From ishai at dev.mellanox.co.il Mon Jan 1 01:17:19 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Mon, 01 Jan 2007 11:17:19 +0200 Subject: [openib-general] [PATCH][TRIVIAL] srp_tools: trivial log message fix In-Reply-To: <4593CC39.9947.00D4.0@novell.com> References: <4593CC39.9947.00D4.0@novell.com> Message-ID: <4598D19F.7020809@dev.mellanox.co.il> Thanks, applied Ishai From kliteyn at dev.mellanox.co.il Mon Jan 1 05:28:00 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:28:00 +0200 Subject: [openib-general] [PATCH 0/4] osm: Improving fat-tree routing engine. Message-ID: <45990C60.2030909@dev.mellanox.co.il> Hi Hal. Happy New Year :) The following series of four patches are all about fat-tree routing: [PATCH 1/4] osm: TRIVIAL - making comparison functions static. [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. [PATCH 3/4] osm: TRIVIAL - making some getter and setter inline. [PATCH 4/4] osm: building LID matrices in fat-tree routing. All the changes are in the same file (osm_ucast_ftree.c). -- Yevgeny Signed-off-by: Yevgeny Kliteynik From kliteyn at dev.mellanox.co.il Mon Jan 1 05:30:45 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:30:45 +0200 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) Message-ID: <45990D05.60905@dev.mellanox.co.il> Making two comparison functions in ftree static. -- Yevgeny Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index 0473135..32d1b3b 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -226,7 +226,7 @@ typedef struct ftree_fabric_t_ ** ***************************************************/ -int OSM_CDECL +static int OSM_CDECL __osm_ftree_compare_switches_by_index( IN const void * p1, IN const void * p2) @@ -247,7 +247,7 @@ __osm_ftree_compare_switches_by_index( /***************************************************/ -int OSM_CDECL +static int OSM_CDECL __osm_ftree_compare_port_groups_by_remote_switch_index( IN const void * p1, IN const void * p2) -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Mon Jan 1 05:31:09 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:31:09 +0200 Subject: [openib-general] [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. Message-ID: <45990D1D.6090305@dev.mellanox.co.il> LMC > 0 is not supported by fat-tree routing. Removing all the related code and adding check to inform the user in case LMC is set. Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 31 +++++++++---------------------- 1 files changed, 9 insertions(+), 22 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index 32d1b3b..e767e64 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -151,9 +151,7 @@ typedef struct ftree_port_group_t_ { cl_map_item_t map_item; ib_net16_t base_lid; /* base lid of the current node */ - uint8_t lmc; /* LMC of the current node */ ib_net16_t remote_base_lid; /* base lid of the remote node */ - uint8_t remote_lmc; /* LMC of the remote node */ ib_net64_t port_guid; /* port guid of this port */ ib_net64_t remote_port_guid; /* port guid of the remote port */ ib_net64_t remote_node_guid; /* node guid of the remote node */ @@ -179,7 +177,6 @@ typedef struct ftree_sw_t_ uint8_t rank; ftree_tuple_t tuple; ib_net16_t base_lid; - uint8_t lmc; ftree_port_group_t ** down_port_groups; uint16_t down_port_groups_num; ftree_port_group_t ** up_port_groups; @@ -434,9 +431,7 @@ __osm_ftree_port_destroy( static ftree_port_group_t * __osm_ftree_port_group_create( IN ib_net16_t base_lid, - IN uint8_t lmc, IN ib_net16_t remote_base_lid, - IN uint8_t remote_lmc, IN ib_net64_t * p_port_guid, IN ib_net64_t * p_remote_port_guid, IN ib_net64_t * p_remote_node_guid, @@ -450,9 +445,7 @@ __osm_ftree_port_group_create( memset(p_group, 0, sizeof(ftree_port_group_t)); p_group->base_lid = base_lid; - p_group->lmc = lmc; p_group->remote_base_lid = remote_base_lid; - p_group->remote_lmc = remote_lmc; memcpy(&p_group->port_guid, p_port_guid, sizeof(ib_net64_t)); memcpy(&p_group->remote_port_guid, p_remote_port_guid, sizeof(ib_net64_t)); memcpy(&p_group->remote_node_guid, p_remote_node_guid, sizeof(ib_net64_t)); @@ -725,9 +718,7 @@ __osm_ftree_sw_add_port( IN uint8_t port_num, IN uint8_t remote_port_num, IN ib_net16_t base_lid, - IN uint8_t lmc, IN ib_net16_t remote_base_lid, - IN uint8_t remote_lmc, IN ib_net64_t port_guid, IN ib_net64_t remote_port_guid, IN ib_net64_t remote_node_guid, @@ -742,9 +733,7 @@ __osm_ftree_sw_add_port( { p_group = __osm_ftree_port_group_create( base_lid, - lmc, remote_base_lid, - remote_lmc, &port_guid, &remote_port_guid, &remote_node_guid, @@ -875,9 +864,7 @@ __osm_ftree_hca_add_port( IN uint8_t port_num, IN uint8_t remote_port_num, IN ib_net16_t base_lid, - IN uint8_t lmc, IN ib_net16_t remote_base_lid, - IN uint8_t remote_lmc, IN ib_net64_t port_guid, IN ib_net64_t remote_port_guid, IN ib_net64_t remote_node_guid, @@ -896,9 +883,7 @@ __osm_ftree_hca_add_port( { p_group = __osm_ftree_port_group_create( base_lid, - lmc, remote_base_lid, - remote_lmc, &port_guid, &remote_port_guid, &remote_node_guid, @@ -2559,9 +2544,7 @@ __osm_ftree_fabric_construct_hca_ports( i, /* local port number */ remote_port_num, /* remote port number */ osm_node_get_base_lid(p_node, i), /* local lid */ - osm_node_get_lmc(p_node, i), /* local lmc */ osm_node_get_base_lid(p_remote_node, 0), /* remote lid */ - osm_node_get_lmc(p_remote_node, 0), /* remote lmc */ osm_physp_get_port_guid(p_osm_port), /* local port guid */ osm_physp_get_port_guid(p_remote_osm_port),/* remote port guid */ remote_node_guid, /* remote node guid */ @@ -2586,7 +2569,6 @@ __osm_ftree_fabric_construct_sw_ports( osm_node_t * p_node = osm_switch_get_node_ptr(p_sw->p_osm_sw); osm_node_t * p_remote_node; ib_net16_t remote_base_lid; - uint8_t remote_lmc; uint8_t remote_node_type; ib_net64_t remote_node_guid; osm_physp_t * p_remote_osm_port; @@ -2634,7 +2616,6 @@ __osm_ftree_fabric_construct_sw_ports( direction = FTREE_DIRECTION_DOWN; remote_base_lid = osm_physp_get_base_lid(p_remote_osm_port); - remote_lmc = osm_physp_get_lmc(p_remote_osm_port); break; case IB_NODE_TYPE_SWITCH: @@ -2652,7 +2633,6 @@ __osm_ftree_fabric_construct_sw_ports( /* switch LID is only in port 0 port_info structure */ remote_base_lid = osm_node_get_base_lid(p_remote_node, 0); - remote_lmc = osm_node_get_lmc(p_remote_node, 0); break; @@ -2670,9 +2650,7 @@ __osm_ftree_fabric_construct_sw_ports( i, /* local port number */ remote_port_num, /* remote port number */ p_sw->base_lid, /* local lid */ - p_sw->lmc, /* local lmc */ remote_base_lid, /* remote lid */ - remote_lmc, /* remote lmc */ osm_physp_get_port_guid(p_osm_port), /* local port guid */ osm_physp_get_port_guid(p_remote_osm_port), /* remote port guid */ remote_node_guid, /* remote node guid */ @@ -2804,6 +2782,15 @@ __osm_ftree_construct_fabric( OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_construct_fabric); + if (p_ftree->p_osm->subn.opt.lmc > 0) + { + osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, + "Subnet LMC > 0 not supported by fat-tree routing.\n" + "Falling back to default routing.\n"); + status = -1; + goto Exit; + } + if ( cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl) < 2 ) { osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Mon Jan 1 05:31:25 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:31:25 +0200 Subject: [openib-general] [PATCH 3/4] osm: TRIVIAL - making some getter and setter inline (fat-tree routing) Message-ID: <45990D2D.9060104@dev.mellanox.co.il> Making some getter and setter inline. Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index e767e64..9a1ac69 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -751,7 +751,7 @@ __osm_ftree_sw_add_port( /***************************************************/ -static void +static inline void __osm_ftree_sw_set_fwd_table_block( IN ftree_sw_t * p_sw, IN uint16_t lid_ho, @@ -762,7 +762,7 @@ __osm_ftree_sw_set_fwd_table_block( /***************************************************/ -static uint8_t +static inline uint8_t __osm_ftree_sw_get_fwd_table_block( IN ftree_sw_t * p_sw, IN uint16_t lid_ho) -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Mon Jan 1 05:31:35 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:31:35 +0200 Subject: [openib-general] [PATCH 4/4] osm: building LID matrices in fat-tree routing Message-ID: <45990D37.6050402@dev.mellanox.co.il> Building LID matrices in fat-tree routing Lid matrices are now built while setting routing w/o any additional complexity, and removed the temporary ftree dump function (that differed from the ucast mgr dump function only by not dumping anything that is related to lid matrices). Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 173 ++++++++++++++++++++++++++++-------------- 1 files changed, 115 insertions(+), 58 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index 9a1ac69..6f72791 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -770,6 +770,26 @@ __osm_ftree_sw_get_fwd_table_block( return p_sw->lft_buf[lid_ho]; } +/***************************************************/ + +static inline cl_status_t +__osm_ftree_sw_set_hops( + IN ftree_sw_t * p_sw, + IN uint16_t max_lid_ho, + IN uint16_t lid_ho, + IN uint8_t port_num, + IN uint8_t hops) +{ + /* make sure the lid matrix has enough room */ + osm_switch_set_min_lid_size(p_sw->p_osm_sw, max_lid_ho); + + /* set local min hop table(LID) */ + return osm_switch_set_hops(p_sw->p_osm_sw, + lid_ho, + port_num, + hops); +} + /*************************************************** ** ** ftree_hca_t functions @@ -1747,8 +1767,10 @@ __osm_ftree_fabric_route_upgoing_by_goin IN ftree_sw_t * p_sw, IN ftree_sw_t * p_prev_sw, IN ib_net16_t target_lid, + IN uint8_t target_rank, IN boolean_t is_real_lid, - IN boolean_t is_main_path) + IN boolean_t is_main_path, + IN uint8_t highest_rank_in_route) { ftree_sw_t * p_remote_sw; uint16_t ports_num; @@ -1813,6 +1835,7 @@ __osm_ftree_fabric_route_upgoing_by_goin * - going DOWN(TRUE,TRUE) through ALL the groups * + promoting port counter * + setting path in remote switch fwd tbl + * + setting hops in remote switch on all the ports of each group * * 2. is_real_lid == TRUE && is_main_path == FALSE: * - going DOWN(TRUE,FALSE) through ALL the groups but only if @@ -1820,11 +1843,14 @@ __osm_ftree_fabric_route_upgoing_by_goin * for this target LID * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet * * 3. is_real_lid == FALSE && is_main_path == TRUE: * - going DOWN(FALSE,TRUE) through ALL the groups * + promoting port counter * + NOT setting path in remote switch fwd tbl + * + NOT setting hops in remote switch * * 4. is_real_lid == FALSE && is_main_path == FALSE: * - illegal state - we shouldn't get here @@ -1849,6 +1875,23 @@ __osm_ftree_fabric_route_upgoing_by_goin __osm_ftree_tuple_to_str(p_remote_sw->tuple), cl_ntoh16(target_lid), p_min_port->remote_port_num); + + /* On the remote switch that is pointed by the p_group, + set hops for ALL the ports in the remote group. */ + + for (j = 0; j < ports_num; j++) + { + cl_ptr_vector_at(&p_group->ports, j, (void **)&p_port); + + __osm_ftree_sw_set_hops(p_remote_sw, + p_ftree->lft_max_lid_ho, + cl_ntoh16(target_lid), + p_port->remote_port_num, + ( (target_rank - highest_rank_in_route) + + (p_remote_sw->rank - highest_rank_in_route) )); + } + + } /* The number of upgoing routes is tracked in the @@ -1869,8 +1912,10 @@ __osm_ftree_fabric_route_upgoing_by_goin p_remote_sw, /* remote switch - used as a route-upgoing alg. start point */ NULL, /* prev. position - NULL to mark that we went down and not up */ target_lid, /* LID that we're routing to */ + target_rank, /* rank of the LID that we're routing to */ is_real_lid, /* whether the target LID is real or dummy */ - is_main_path); /* whether this is path to HCA that should by tracked by counters */ + is_main_path, /* whether this is path to HCA that should by tracked by counters */ + highest_rank_in_route); /* highest visited point in the tree before going down */ } } /* done scanning all the down-going port groups */ @@ -1896,6 +1941,7 @@ __osm_ftree_fabric_route_downgoing_by_go IN ftree_sw_t * p_sw, IN ftree_sw_t * p_prev_sw, IN ib_net16_t target_lid, + IN uint8_t target_rank, IN boolean_t is_real_lid, IN boolean_t is_main_path) { @@ -1920,8 +1966,10 @@ __osm_ftree_fabric_route_downgoing_by_go p_sw, /* local switch - used as a route-upgoing alg. start point */ p_prev_sw, /* switch that we went up from (NULL means that we went down) */ target_lid, /* LID that we're routing to */ + target_rank, /* rank of the LID that we're routing to */ is_real_lid, /* whether this target LID is real or dummy */ - is_main_path); /* whether this path to HCA should by tracked by counters */ + is_main_path, /* whether this path to HCA should by tracked by counters */ + p_sw->rank); /* the highest visited point in the tree before going down */ } /* recursion stop condition - if it's a root switch, */ @@ -1971,9 +2019,12 @@ __osm_ftree_fabric_route_downgoing_by_go * - going UP(TRUE,TRUE) on selected min_group and min_port * + promoting port counter * + setting path in remote switch fwd tbl + * + setting hops in remote switch on all the ports of selected group * - going UP(TRUE,FALSE) on rest of the groups, each time on port 0 * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet * * 2. is_real_lid == TRUE && is_main_path == FALSE: * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, @@ -1981,11 +2032,14 @@ __osm_ftree_fabric_route_downgoing_by_go * configured for this target LID * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet * * 3. is_real_lid == FALSE && is_main_path == TRUE: * - going UP(FALSE,TRUE) ONLY on selected min_group and min_port * + promoting port counter * + NOT setting path in remote switch fwd tbl + * + NOT setting hops in remote switch * * 4. is_real_lid == FALSE && is_main_path == FALSE: * - illegal state - we shouldn't get here @@ -2013,12 +2067,25 @@ __osm_ftree_fabric_route_downgoing_by_go __osm_ftree_sw_set_fwd_table_block(p_remote_sw, cl_ntoh16(target_lid), p_min_port->remote_port_num); - p_remote_sw->lft_buf[cl_ntoh16(target_lid)] = p_min_port->remote_port_num; osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "__osm_ftree_fabric_route_downgoing_by_going_up: " "Switch %s: set path to HCA LID 0x%x through port %u\n", __osm_ftree_tuple_to_str(p_remote_sw->tuple), cl_ntoh16(target_lid),p_min_port->remote_port_num); + + /* On the remote switch that is pointed by the min_group, + set hops for ALL the ports in the remote group. */ + + ports_num = (uint16_t)cl_ptr_vector_get_size(&p_min_group->ports); + for (j = 0; j < ports_num; j++) + { + cl_ptr_vector_at(&p_min_group->ports, j, (void **)&p_port); + __osm_ftree_sw_set_hops(p_remote_sw, + p_ftree->lft_max_lid_ho, + cl_ntoh16(target_lid), + p_port->remote_port_num, + target_rank - p_remote_sw->rank); + } } /* Recursion step: @@ -2028,6 +2095,7 @@ __osm_ftree_fabric_route_downgoing_by_go p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ p_sw, /* this switch - prev. position switch for the function */ target_lid, /* LID that we're routing to */ + target_rank, /* rank of the LID that we're routing to */ is_real_lid, /* whether this target LID is real or dummy */ is_main_path); /* whether this is path to HCA that should by tracked by counters */ } @@ -2044,6 +2112,8 @@ __osm_ftree_fabric_route_downgoing_by_go * configured for this target LID * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet * * 2. is_real_lid == TRUE && is_main_path == FALSE: * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, @@ -2051,6 +2121,8 @@ __osm_ftree_fabric_route_downgoing_by_go * configured for this target LID * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet * * These two rules can be rephrased this way: * - foreach UP port group @@ -2060,6 +2132,7 @@ __osm_ftree_fabric_route_downgoing_by_go * - select port 0 * - do NOT promote port counter * - set path in remote switch fwd tbl + * - set hops in remote switch on all the ports of this group * - go UP(TRUE,FALSE) to the remote switch */ @@ -2087,6 +2160,22 @@ __osm_ftree_fabric_route_downgoing_by_go __osm_ftree_sw_set_fwd_table_block(p_remote_sw, cl_ntoh16(target_lid), p_port->remote_port_num); + + /* On the remote switch that is pointed by the p_group, + set hops for ALL the ports in the remote group. */ + + ports_num = (uint16_t)cl_ptr_vector_get_size(&p_group->ports); + for (j = 0; j < ports_num; j++) + { + cl_ptr_vector_at(&p_group->ports, j, (void **)&p_port); + + __osm_ftree_sw_set_hops(p_remote_sw, + p_ftree->lft_max_lid_ho, + cl_ntoh16(target_lid), + p_port->remote_port_num, + target_rank - p_remote_sw->rank); + } + /* Recursion step: Assign downgoing ports by stepping up, starting on REMOTE switch. */ __osm_ftree_fabric_route_downgoing_by_going_up( @@ -2094,6 +2183,7 @@ __osm_ftree_fabric_route_downgoing_by_go p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ p_sw, /* this switch - prev. position switch for the function */ target_lid, /* LID that we're routing to */ + target_rank, /* rank of the LID that we're routing to */ TRUE, /* whether the target LID is real or dummy */ FALSE); /* whether this is path to HCA that should by tracked by counters */ } @@ -2150,17 +2240,26 @@ __osm_ftree_fabric_route_to_hcas( cl_ntoh16(remote_lid), p_port->port_num); + /* set local min hop table(LID) to route to the CA */ + __osm_ftree_sw_set_hops(p_sw, + p_ftree->lft_max_lid_ho, + cl_ntoh16(remote_lid), + p_port->port_num, + 1); + /* assign downgoing ports by stepping up */ __osm_ftree_fabric_route_downgoing_by_going_up( p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ NULL, /* prev. position switch */ remote_lid, /* LID that we're routing to */ + __osm_ftree_fabric_get_rank(p_ftree), /* rank of the LID that we're routing to */ TRUE, /* whether this HCA LID is real or dummy */ TRUE); /* whether this path to HCA should by tracked by counters */ } - /* We're done with the real HCAs. Now route the dummy HCAs that are missing.*/ + /* We're done with the real HCAs. Now route the dummy HCAs that are missing. + When routing to dummy HCAs we don't fill lid matrices. */ if (p_ftree->max_hcas_per_leaf > p_sw->down_port_groups_num) { @@ -2177,6 +2276,7 @@ __osm_ftree_fabric_route_to_hcas( p_sw, /* local switch - used as a route-downgoing alg. start point */ NULL, /* prev. position switch */ 0, /* LID that we're routing to - ignored for dummy HCA */ + 0, /* rank of the LID that we're routing to - ignored for dummy HCA */ FALSE, /* whether this HCA LID is real or dummy */ TRUE); /* whether this path to HCA should by tracked by counters */ } @@ -2226,11 +2326,19 @@ __osm_ftree_fabric_route_to_switches( __osm_ftree_tuple_to_str(p_sw->tuple), cl_ntoh16(p_sw->base_lid)); + /* set min hop table of the switch to itself */ + __osm_ftree_sw_set_hops(p_sw, + p_ftree->lft_max_lid_ho, + cl_ntoh16(p_sw->base_lid), + 0, /* port_num */ + 0);/* hops */ + __osm_ftree_fabric_route_downgoing_by_going_up( p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ NULL, /* prev. position switch */ p_sw->base_lid, /* LID that we're routing to */ + p_sw->rank, /* rank of the LID that we're routing to */ TRUE, /* whether the target LID is a real or dummy */ FALSE); /* whether this path should by tracked by counters */ } @@ -2971,48 +3079,6 @@ __osm_ftree_do_routing( /*************************************************** ***************************************************/ -static int -__osm_ftree_routing( - IN void * context) -{ - int status = __osm_ftree_construct_fabric(context); - if (status != 0) - return status; - - __osm_ftree_do_routing(context); - return 0; -} - -/*************************************************** - ***************************************************/ - -void -ucast_mgr_dump_to_file( - IN osm_ucast_mgr_t *p_mgr, - IN const char *file_name, - IN void (*func)(cl_map_item_t *, void *)); - -void -ucast_mgr_dump_lfts( - IN cl_map_item_t *p_map_item, - void *cxt); - -static void -__osm_ftree_dump_tables( - IN void * context) -{ - ftree_fabric_t * p_ftree = context; - if (!p_ftree) - return; - - ucast_mgr_dump_to_file(&p_ftree->p_osm->sm.ucast_mgr, - "opensm-lfts.dump", - ucast_mgr_dump_lfts); -} - -/*************************************************** - ***************************************************/ - static void __osm_ftree_delete( IN void * context) @@ -3034,17 +3100,8 @@ int osm_ucast_ftree_setup(osm_opensm_t * p_ftree->p_osm = p_osm; p_osm->routing_engine.context = (void *)p_ftree; - p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_routing; - /* ToDo: Resolve multicast routing. - * Until then lid matrices are built, despite the - * fact that FatTree routing doesn't need them. - * When the multicast routing will be resolved, - * __osm_ftree_routing() function should be removed, - * and here's how the FatTree routing will be set: - * p_osm->routing_engine.build_lid_matrices = __osm_ftree_construct_fabric; - * p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_do_routing; - */ - p_osm->routing_engine.ucast_dump_tables = __osm_ftree_dump_tables; + p_osm->routing_engine.build_lid_matrices = __osm_ftree_construct_fabric; + p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_do_routing; p_osm->routing_engine.delete = __osm_ftree_delete; return 0; } -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Mon Jan 1 05:35:08 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 01 Jan 2007 15:35:08 +0200 Subject: [openib-general] [PATCH] osm: Fixing function return type in osm_switch.h Message-ID: <45990E0C.7020905@dev.mellanox.co.il> Hi Hal Fixing osm_switch_get_mft_max_block_in_use() function return type and updating the comments accordingly. Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_switch.h | 10 ++++++++-- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/osm/include/opensm/osm_switch.h b/osm/include/opensm/osm_switch.h index ab92ffd..bd4a6b0 100644 --- a/osm/include/opensm/osm_switch.h +++ b/osm/include/opensm/osm_switch.h @@ -1130,7 +1130,7 @@ osm_switch_get_mft_max_block( * * SYNOPSIS */ -static inline uint16_t +static inline int16_t osm_switch_get_mft_max_block_in_use( IN osm_switch_t* const p_sw ) { @@ -1142,7 +1142,13 @@ osm_switch_get_mft_max_block_in_use( * p_sw * [in] Pointer to the switch object. * -* RETURN VALUE +* RETURN VALUES +* Returns the maximum block ID in use in this switche's table. +* A value of -1 indicates no blocks are in use. +* +* NOTES +* +* SEE ALSO */ /****f* OpenSM: Switch/osm_switch_get_mft_max_position -- 1.4.4.1.GIT From halr at voltaire.com Mon Jan 1 07:13:05 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 10:13:05 -0500 Subject: [openib-general] nightly osm_sim report 2006-12-30:normal completion In-Reply-To: <1167517497.29620.318774.camel@hal.voltaire.com> References: <200612300525.kBU5Pcr2016005@sw053.yok.mtl.com> <1167480536.29620.286425.camel@hal.voltaire.com> <4596DB45.5070108@mellanox.co.il> <1167517497.29620.318774.camel@hal.voltaire.com> Message-ID: <1167664361.29620.449602.camel@hal.voltaire.com> On Sat, 2006-12-30 at 17:25, Hal Rosenstock wrote: > On Sat, 2006-12-30 at 16:33, Eitan Zahavi wrote: > > Hal Rosenstock wrote: > > > Hi Eitan, > > > > > > On Sat, 2006-12-30 at 00:25, Eitan Zahavi wrote: > > > > > >> OSM Simulation Regression Summary > > >> OpenSM rev = Fri_Dec_29_12:19:08_2006 2e0f81 > > >> ibutils rev = Wed_Dec_27_23:39:30_2006 60aebe > > >> Total=405 Pass=330 Fail=75 > > >> > > >> Pass: > > >> 45 Stability IS1-16.topo > > >> 45 Pkey IS1-16.topo > > >> 45 OsmStress IS1-16.topo > > >> 45 Multicast IS1-16.topo > > >> 45 LidMgr IS1-16.topo > > >> 15 Stability IS3-loop.topo > > >> 15 Stability IS3-128.topo > > >> 15 Pkey IS3-128.topo > > >> 15 OsmStress IS3-128.topo > > >> 15 Multicast IS3-loop.topo > > >> 15 Multicast IS3-128.topo > > >> 15 LidMgr IS3-128.topo > > >> > > >> Failures: > > >> 45 OsmTest IS1-16.topo > > >> 15 OsmTest IS3-loop.topo > > >> 15 OsmTest IS3-128.topo > > >> > > > > > > Any idea on these osmtest failures ? I did add SA MFTRecord yesterday > > > and made a change to SA LFTRecord and SwitchInfoRecord the day before as > > > well as additional osmtests for MFTRecord and LFTRecord. > > > > > Actually I get a core dump: > > Thanks for providing this! > > > #0 0x0805c265 in osm_mcast_tbl_get_block (p_tbl=0x8f6ef6c, > > block_num=-32575, position=0 '\0', p_block=0xb19e4d2c) > > at osm_mcast_tbl.c:299 > > 299 p_block[i] = (*p_tbl->p_mask_tbl)[mlid_start_ho + i][position]; > > > > (gdb) p i > > $1 = 2 > > (gdb) p mlid_start_ho > > $2 = 6176 > > (gdb) p position > > $3 = 0 '\0' > > (gdb) where > > #0 0x0805c265 in osm_mcast_tbl_get_block (p_tbl=0x8f6ef6c, > > block_num=-32575, position=0 '\0', p_block=0xb19e4d2c) > > at osm_mcast_tbl.c:299 > > #1 0x08073d29 in osm_switch_get_mft_block (p_sw=0x8f6eed8, > > block_num=32961, position=0 '\0', p_block=0xb19e4d2c) > > at ./../include/opensm/osm_switch.h:1074 > > #2 0x08073b8c in __osm_mftr_rcv_new_mftr (p_rcv=0x80e9a6c, > > p_sw=0x8f6eed8, p_list=0xb61c0370, lid=512, block=32961, > ^^^^^ > max block number is 511 so this is what caused the core dump. > I just checked in a patch for this which should work. It didn't work. Can you dump p_sw->mcast_tbl ? Thanks. -- Hal > > -- Hal > > > position=0 '\0') at osm_sa_mft_record.c:181 > > #3 0x08074273 in __osm_mftr_rcv_by_comp_mask (p_map_item=0x8f6eed8, > > context=0xb61c0330) at osm_sa_mft_record.c:317 > > #4 0x00cd9747 in cl_qmap_apply_func (p_map=0x80e8584, > > pfn_func=0x8073f98 <__osm_mftr_rcv_by_comp_mask>, context=0xb61c0330) > > at cl_map.c:287 > > #5 0x08074653 in osm_mftr_rcv_process (p_rcv=0x80e9a6c, > > p_madw=0x8f29f0c) at osm_sa_mft_record.c:390 > > #6 0x08074ef2 in __osm_mftr_rcv_ctrl_disp_callback (context=0x80e9afc, > > p_data=0x8f29f0c) at osm_sa_mft_record_ctrl.c:63 > > #7 0x00cd3d4f in __cl_disp_worker (context=0x80e9d18) at > > cl_dispatcher.c:102 > > #8 0x00ce1297 in __cl_thread_pool_routine (context=0x80e9d5c) at > > cl_threadpool.c:74 > > #9 0x00ce0f61 in __cl_thread_wrapper (arg=0x8f1c690) at cl_thread.c:58 > > #10 0x00361371 in start_thread () from /lib/tls/libpthread.so.0 > > #11 0x001eaffe in clone () from /lib/tls/libc.so.6 > > > > > > > Also, why are osmtest failures allowed for "normal completion" ? > > > > > > -- Hal > > > > > > > > > > > > _______________________________________________ > > > openib-general mailing list > > > openib-general at openib.org > > > http://openib.org/mailman/listinfo/openib-general > > > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > > From halr at voltaire.com Mon Jan 1 08:19:54 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 11:19:54 -0500 Subject: [openib-general] [PATCH] osm: Fixing function return type in osm_switch.h In-Reply-To: <45990E0C.7020905@dev.mellanox.co.il> References: <45990E0C.7020905@dev.mellanox.co.il> Message-ID: <1167668393.4596.645.camel@hal.voltaire.com> On Mon, 2007-01-01 at 08:35, Yevgeny Kliteynik wrote: > Hi Hal > > Fixing osm_switch_get_mft_max_block_in_use() function return type > and updating the comments accordingly. > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From halr at voltaire.com Mon Jan 1 08:25:45 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 11:25:45 -0500 Subject: [openib-general] [PATCH 0/4] osm: Improving fat-tree routing engine. In-Reply-To: <45990C60.2030909@dev.mellanox.co.il> References: <45990C60.2030909@dev.mellanox.co.il> Message-ID: <1167668744.4596.970.camel@hal.voltaire.com> Hi Yevgeny, On Mon, 2007-01-01 at 08:28, Yevgeny Kliteynik wrote: > Hi Hal. > > Happy New Year :) And a Happy and Healthy New Year to you :-) > The following series of four patches are all about fat-tree routing: > > [PATCH 1/4] osm: TRIVIAL - making comparison functions static. > [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. > [PATCH 3/4] osm: TRIVIAL - making some getter and setter inline. > [PATCH 4/4] osm: building LID matrices in fat-tree routing. > > All the changes are in the same file (osm_ucast_ftree.c). Thanks. -- Hal > -- > Yevgeny > > Signed-off-by: Yevgeny Kliteynik > From halr at voltaire.com Mon Jan 1 08:28:11 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 11:28:11 -0500 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) In-Reply-To: <45990D05.60905@dev.mellanox.co.il> References: <45990D05.60905@dev.mellanox.co.il> Message-ID: <1167668746.4596.972.camel@hal.voltaire.com> On Mon, 2007-01-01 at 08:30, Yevgeny Kliteynik wrote: > Making two comparison functions in ftree static. > > -- > Yevgeny > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From halr at voltaire.com Mon Jan 1 08:32:41 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 11:32:41 -0500 Subject: [openib-general] [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. In-Reply-To: <45990D1D.6090305@dev.mellanox.co.il> References: <45990D1D.6090305@dev.mellanox.co.il> Message-ID: <1167669160.4596.1360.camel@hal.voltaire.com> On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: > LMC > 0 is not supported by fat-tree routing. Might this be different in the future ? Can LMC > 0 be supported with fat tree routing ? > Removing all the related code and adding check to > inform the user in case LMC is set. > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. I'll also add a note to this effect to the documentation shortly. -- Hal From halr at voltaire.com Mon Jan 1 08:36:37 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 01 Jan 2007 11:36:37 -0500 Subject: [openib-general] [PATCH 3/4] osm: TRIVIAL - making some getter and setter inline (fat-tree routing) In-Reply-To: <45990D2D.9060104@dev.mellanox.co.il> References: <45990D2D.9060104@dev.mellanox.co.il> Message-ID: <1167669397.4596.1555.camel@hal.voltaire.com> On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: > Making some getter and setter inline. > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From nimrodg at mellanox.com Mon Jan 1 09:10:37 2007 From: nimrodg at mellanox.com (Nimrod Gindi) Date: Mon, 1 Jan 2007 09:10:37 -0800 Subject: [openib-general] OFED release testing Task force Message-ID: <1E3DCD1C63492545881FACB6063A57C1B22C8D@mtiexch01.mti.com> Happy and successful year every one, I would like to thank all the people who've stepped forward to participate (in the "To" field). Let's start 2007 with having the kick-off meeting of the task-force and since we have people from various time zones I would consider the best time to be: 8:30am PDT=11:30am EDT = 6:30pm Israel time The suggested dates are: January-4th-2007 or January-8th-2007 Please respond timely and I'll set up every thing on January-2nd-2007 and will send out time, bridge and agenda. Nimrod Gindi Mellanox Technologies Ltd. mail : nimrodg at mellanox.com Cell : +1-408-750-4801 Office: +1-347-342-0011 Fax : +1-212-987-0275 -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Mon Jan 1 11:56:32 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 1 Jan 2007 21:56:32 +0200 Subject: [openib-general] v2.6.20-rc2 merged into ofed 1.2 Message-ID: <6C2C79E72C305246B504CBA17B5500C905DB89@mtlexch01.mtl.com> OK, Linus seems to have tagged -rc3 as well, so I went ahead and pulled that too. -----Original Message----- From: openib-general-bounces at openib.org [mailto:openib-general-bounces at openib.org] On Behalf Of Michael S. Tsirkin Sent: Monday, January 01, 2007 9:03 AM To: openib-general at openib.org Subject: [openib-general] v2.6.20-rc2 merged into ofed 1.2 Upstream v2.6.20-rc2 has been merged into ofed 1.2 branch. There has been no API changes since -rc1, so not backports need to be updated. -- MST _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From eitan at mellanox.co.il Mon Jan 1 12:10:40 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Mon, 1 Jan 2007 22:10:40 +0200 Subject: [openib-general] path query question Message-ID: <6C2C79E72C305246B504CBA17B5500C994B3E3@mtlexch01.mtl.com> Hi Hal, Happy new year! We are working on SDP HA and wanted to implement APM for OFED 1.2 What would be the best way to utilize the existing OpenSM GetMulti(MultiPathRecord) ? Do you have plans for implementing the new IBTA errata for supporting the automatic remote node/system selection? Thanks Eitan From: Ami Perlmutter Sent: Monday, January 01, 2007 4:01 PM To: Eitan Zahavi Cc: Michael S. Tsirkin Subject: path query question Hi Eitan I'm currently implementing HA in SDP. I have my primary path from an SA query, and I want to know what is the fastest and simplest way to find an alternate path from my other port to the remote machine's other port. There is an example in the SDP spec of how such a thing might be accomplished, but it is too long and involves too many MADs. -------------- next part -------------- An HTML attachment was scrubbed... URL: From karun.sharma at qlogic.com Mon Jan 1 20:26:29 2007 From: karun.sharma at qlogic.com (Karun Sharma) Date: Mon, 1 Jan 2007 22:26:29 -0600 Subject: [openib-general] OFED release testing Task force References: <1E3DCD1C63492545881FACB6063A57C1B22C8D@mtiexch01.mti.com> Message-ID: Hi Nimrod: Jan 4th - 8:30 am PDT is OK with me. Thanks Karun ________________________________ From: Nimrod Gindi [mailto:nimrodg at mellanox.com] Sent: Mon 1/1/2007 10:40 PM To: Dotan Barak; Amit Krig; meder at de.ibm.com; Karun Sharma; Jeremy Brown; minich at ornl.gov; yosefe at voltaire.com; monil at voltaire.com; sweitzen at cisco.com; ggrundstrom at NetEffect.com; Nimrod Gindi Cc: openfabrics-ewg at openib.org; openib-general at openib.org Subject: OFED release testing Task force Happy and successful year every one, I would like to thank all the people who've stepped forward to participate (in the "To" field). Let's start 2007 with having the kick-off meeting of the task-force and since we have people from various time zones I would consider the best time to be: 8:30am PDT=11:30am EDT = 6:30pm Israel time The suggested dates are: January-4th-2007 or January-8th-2007 Please respond timely and I'll set up every thing on January-2nd-2007 and will send out time, bridge and agenda. Nimrod Gindi Mellanox Technologies Ltd. mail : nimrodg at mellanox.com Cell : +1-408-750-4801 Office: +1-347-342-0011 Fax : +1-212-987-0275 -------------- next part -------------- An HTML attachment was scrubbed... URL: From eitan at sw053.yok.mtl.com Mon Jan 1 21:06:11 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Tue, 2 Jan 2007 07:06:11 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-02:normal completion Message-ID: <200701020506.l0256BnV004776@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_1_11:35:38_2007 877b52 ibutils rev = Wed_Dec_27_23:39:30_2006 60aebe Total=324 Pass=324 Fail=0 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 LidMgr IS3-128.topo Failures: From monil at voltaire.com Mon Jan 1 22:20:13 2007 From: monil at voltaire.com (Moni Levy) Date: Tue, 2 Jan 2007 08:20:13 +0200 Subject: [openib-general] OFED release testing Task force In-Reply-To: <1E3DCD1C63492545881FACB6063A57C1B22C8D@mtiexch01.mti.com> Message-ID: <3857BB049D83424D9DB82753D37CEA550A565F@taurus.voltaire.com> Nimrod 6:30PM Israel time is ok for me but the dates are not. I can participate on 9th & 10th of Jan. ________________________________ From: Nimrod Gindi [mailto:nimrodg at mellanox.com] Sent: Monday, January 01, 2007 7:11 PM To: Dotan Barak; Amit Krig; meder at de.ibm.com; karun.sharma at qlogic.com; Jeremy.brown at qlogic.com; minich at ornl.gov; Yosef Eitgin; Moni Levy; sweitzen at cisco.com; ggrundstrom at NetEffect.com; Nimrod Gindi Cc: openfabrics-ewg at openib.org; openib-general at openib.org Subject: OFED release testing Task force Happy and successful year every one, I would like to thank all the people who've stepped forward to participate (in the "To" field). Let's start 2007 with having the kick-off meeting of the task-force and since we have people from various time zones I would consider the best time to be: 8:30am PDT=11:30am EDT = 6:30pm Israel time The suggested dates are: January-4th-2007 or January-8th-2007 Please respond timely and I'll set up every thing on January-2nd-2007 and will send out time, bridge and agenda. Nimrod Gindi Mellanox Technologies Ltd. mail : nimrodg at mellanox.com Cell : +1-408-750-4801 Office: +1-347-342-0011 Fax : +1-212-987-0275 -------------- next part -------------- An HTML attachment was scrubbed... URL: From ogerlitz at voltaire.com Mon Jan 1 23:24:57 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 02 Jan 2007 09:24:57 +0200 Subject: [openib-general] path query question In-Reply-To: <6C2C79E72C305246B504CBA17B5500C994B3E3@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C994B3E3@mtlexch01.mtl.com> Message-ID: <459A08C9.6030504@voltaire.com> Eitan Zahavi wrote: > Do you have plans for implementing the new IBTA errata for supporting > the automatic remote node/system selection? Eitan, I understand that this "automatic remote node/system selection" specification provides a way for the SA client to get based on a port GID a second GID of another active port on the remote system, can you send a pointer to the specification? thanks, Or. From ogerlitz at voltaire.com Tue Jan 2 00:05:41 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 02 Jan 2007 10:05:41 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20061231190942.GB32485@mellanox.co.il> References: <20061231190942.GB32485@mellanox.co.il> Message-ID: <459A1255.5040800@voltaire.com> Michael S. Tsirkin wrote: > ib_wc currently only includes the local QP number: this matches the IB spec, > but seems mostly useless. The following patch replaces this with the pointer > to qp itself, and updates all low level drivers and all users. > Use case: > > In IPoIB CM code, I have a common CQ shared by multiple QPs. > To track connection usage, I need a way to get at some per-QP context > upon the completion, and I would like to avoid allocating > context object per work request just to stick a QP pointer into it. > With this code, I can just use wc->qp->qp_context. IPoIB CM uses SRQ so you can not stick a qp context in advanced, simply b/c you can't tell on behalf of which QP this WR would be used. However, looking on ipoib_cm_handle_rx_wc and the code is calls, i don't see where it uses wc->qp_num. Actually, unless the wc status is not success, why would you care from which QP you have got this packet? Or. From dotanb at dev.mellanox.co.il Tue Jan 2 00:35:18 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 2 Jan 2007 10:35:18 +0200 (IST) Subject: [openib-general] second version of the libibverbs man pages Message-ID: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> Hi all and Happy new year. * I rewrote the man pages and removed all of the extra characters of the POD module (according to Roland request). * I tried to stick with the 80 characters limit (according to James request), without 100% success (when i described the attributes of the structures, i needed more than 80 characters in a line..) * Several spelling mistakes were fixed Roland, what do you think? can you use this version and check in those files? thanks Dotan -------------- next part -------------- A non-text attachment was scrubbed... Name: man_pages.tar.gz Type: application/x-gzip Size: 15181 bytes Desc: not available URL: From mst at mellanox.co.il Tue Jan 2 01:02:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 2 Jan 2007 11:02:59 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <459A1255.5040800@voltaire.com> References: <459A1255.5040800@voltaire.com> Message-ID: <20070102090259.GA4451@mellanox.co.il> > > ib_wc currently only includes the local QP number: this matches the IB spec, > > but seems mostly useless. The following patch replaces this with the pointer > > to qp itself, and updates all low level drivers and all users. > > > Use case: > > > > In IPoIB CM code, I have a common CQ shared by multiple QPs. > > To track connection usage, I need a way to get at some per-QP context > > upon the completion, and I would like to avoid allocating > > context object per work request just to stick a QP pointer into it. > > With this code, I can just use wc->qp->qp_context. > > IPoIB CM uses SRQ so you can not stick a qp context in advanced, simply > b/c you can't tell on behalf of which QP this WR would be used. Sorry, I could not parse this sentence. All my patch does is, put struct ib_qp * instead of qp_num in ib_wc - this is more useful for ULPs and turns out not to have extra cost for low-level drivers. > However, looking on ipoib_cm_handle_rx_wc and the code is calls, i don't > see where it uses wc->qp_num. Actually, unless the wc status is not > success, why would you care from which QP you have got this packet? To detect stale (unused) connections and remove them. I have not pushed this code yet. -- MST From dotanb at dev.mellanox.co.il Tue Jan 2 01:11:10 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 2 Jan 2007 11:11:10 +0200 (IST) Subject: [openib-general] does the libibverbs support static linkage? Message-ID: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> Hi Roland. Here are the props of my host/driver: ************************************************************* Host Architecture : x86_64 Linux Distribution: SUSE Linux Enterprise Server 10 (x86_64) VERSION = 10 Kernel Version : 2.6.16.21-0.8-smp GCC Version : gcc (GCC) 4.1.0 (SUSE Linux) Memory size : 4047700 kB Driver Version : gen2_devel-20070101-1730 HCA ID(s) : mthca0 HCA model(s) : 23108 FW version(s) : 3.5.000 Board(s) : MT_0030000001 ************************************************************* when tried to use static linking with our tests (i tried several tests) using the following change in the Makefile: OFED_PATH = /usr/local/ LDFLAGS += --static -Wl,--whole-archive -libverbs ${OFED_PATH}/lib64/infiniband/mthca.a LDFLAGS += -Wl,--no-whole-archive I got the following linkage warning: /usr/local/lib64/libibverbs.a(src_libibverbs_la-init.o): In function `load_driver': src/init.c:66: warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking When i tried to run the executable i got segmentation fault in: load_driver (so_path=0x5e65f0 "/usr/local//lib64/infiniband/mthca.so") at src/init.c:61 (func: load_driver) It seems that the code that causes the seg fault is the following code line: dlhandle = dlopen(so_path, RTLD_NOW); When i executed ldd on the executable i got the following message: not a dynamic executable Do you know why does the debugger show that "/usr/local//lib64/infiniband/mthca.so" was used and not "/usr/local//lib64/infiniband/mthca.a"? Can you please check this issue? thanks Dotan From dotanb at dev.mellanox.co.il Tue Jan 2 02:23:18 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 2 Jan 2007 12:23:18 +0200 (IST) Subject: [openib-general] the valgrind support of libibverbs/libmthca is not full Message-ID: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> Hi Roland. I compiled the libibverbs/libmthca with valgrind support and executed one of our tests using valgrind. I noticed that not all of the verbs are supporting valgrind. For the following verbs (and I'm sure that this is not the full list) there is a memory warning from valgrind: ibv_query_device ibv_dealloc_pd ibv_create_cq thanks Dotan From ogerlitz at voltaire.com Tue Jan 2 02:54:28 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 02 Jan 2007 12:54:28 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20070102090259.GA4451@mellanox.co.il> References: <459A1255.5040800@voltaire.com> <20070102090259.GA4451@mellanox.co.il> Message-ID: <459A39E4.7000506@voltaire.com> Michael S. Tsirkin wrote: >>> Use case: >>> In IPoIB CM code, I have a common CQ shared by multiple QPs. >>> To track connection usage, I need a way to get at some per-QP context >>> upon the completion, and I would like to avoid allocating >>> context object per work request just to stick a QP pointer into it. >>> With this code, I can just use wc->qp->qp_context. >> IPoIB CM uses SRQ so you can not stick a qp context in advanced, simply >> b/c you can't tell on behalf of which QP this WR would be used. > Sorry, I could not parse this sentence. This sentence refers to your use case description, not to what this patch is doing. And again, when SRQ is used, the IB consumer can not stick a per QP context to the WR on post recv time b/c you can't tell on behalf of which QP the SRQ would consume the WR, is it clearer now? > All my patch does is, put struct ib_qp * instead of qp_num in ib_wc - > this is more useful for ULPs and turns out not to have extra cost > for low-level drivers. sure, it makes sense to me, it can help any IB ULP that uses SRQ or uses the same CQ to report completions from multiple QPs. >> However, looking on ipoib_cm_handle_rx_wc and the code is calls, i don't >> see where it uses wc->qp_num. Actually, unless the wc status is not >> success, why would you care from which QP you have got this packet? > To detect stale (unused) connections and remove them. > I have not pushed this code yet. So you don't really have a use case which is based on code that can be reviewed (or can it be reviewed from your openfabrics GIT?) I understand that the logic you suggest comes into play when wc->status is not success as i have guessed. Or. From mst at mellanox.co.il Tue Jan 2 04:05:12 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 2 Jan 2007 14:05:12 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <459A39E4.7000506@voltaire.com> References: <459A39E4.7000506@voltaire.com> Message-ID: <20070102120512.GA16290@mellanox.co.il> > >>> Use case: > >>> In IPoIB CM code, I have a common CQ shared by multiple QPs. > >>> To track connection usage, I need a way to get at some per-QP context > >>> upon the completion, and I would like to avoid allocating > >>> context object per work request just to stick a QP pointer into it. > >>> With this code, I can just use wc->qp->qp_context. > > >> IPoIB CM uses SRQ so you can not stick a qp context in advanced, simply > >> b/c you can't tell on behalf of which QP this WR would be used. > > > Sorry, I could not parse this sentence. > > This sentence refers to your use case description, not to what this > patch is doing. And again, when SRQ is used, the IB consumer can not > stick a per QP context to the WR on post recv time b/c you can't tell on > behalf of which QP the SRQ would consume the WR, is it clearer now? OK, but how does this refer to the use case description? > > All my patch does is, put struct ib_qp * instead of qp_num in ib_wc - > > this is more useful for ULPs and turns out not to have extra cost > > for low-level drivers. > > sure, it makes sense to me, it can help any IB ULP that uses SRQ or uses > the same CQ to report completions from multiple QPs. And that's exactly what IPoIB CM does. > >> However, looking on ipoib_cm_handle_rx_wc and the code is calls, i don't > >> see where it uses wc->qp_num. Actually, unless the wc status is not > >> success, why would you care from which QP you have got this packet? > > > To detect stale (unused) connections and remove them. > > I have not pushed this code yet. > > So you don't really have a use case which is based on code that can be > reviewed (or can it be reviewed from your openfabrics GIT?) Well, I did note the patch is untested. I'm testing the code in question, and plan to push to openfabrics RSN. I'll post a patch as well. > I understand > that the logic you suggest comes into play when wc->status is not > success as i have guessed. No, I want to have an LRU list of pages. -- MST From ogerlitz at voltaire.com Tue Jan 2 05:11:10 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 02 Jan 2007 15:11:10 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20070102120512.GA16290@mellanox.co.il> References: <459A39E4.7000506@voltaire.com> <20070102120512.GA16290@mellanox.co.il> Message-ID: <459A59EE.2020901@voltaire.com> Michael S. Tsirkin wrote: >> This sentence refers to your use case description, not to what this >> patch is doing. And again, when SRQ is used, the IB consumer can not >> stick a per QP context to the WR on post recv time b/c you can't tell on >> behalf of which QP the SRQ would consume the WR, is it clearer now? > OK, but how does this refer to the use case description? it comes to say that your multiple QPs per CQ use case description is in away not accurate since you actually have SRQ per multiple QPs and this is the point why with the current IB API you must have to maintain an association between wc->QPN to QP structure, anyway: >>> All my patch does is, put struct ib_qp * instead of qp_num in ib_wc - >>> this is more useful for ULPs and turns out not to have extra cost >>> for low-level drivers. >> sure, it makes sense to me, it can help any IB ULP that uses SRQ or uses >> the same CQ to report completions from multiple QPs. > And that's exactly what IPoIB CM does. does in the code which is not posted. >>> To detect stale (unused) connections and remove them. >>> I have not pushed this code yet. >> So you don't really have a use case which is based on code that can be >> reviewed (or can it be reviewed from your openfabrics GIT?) > Well, I did note the patch is untested. > I'm testing the code in question, and plan to push to openfabrics RSN. > I'll post a patch as well. OK >> that the logic you suggest comes into play when wc->status is not >> success as i have guessed. > No, I want to have an LRU list of pages. OK From mst at mellanox.co.il Tue Jan 2 05:26:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 2 Jan 2007 15:26:24 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <459A59EE.2020901@voltaire.com> References: <459A59EE.2020901@voltaire.com> Message-ID: <20070102132624.GB16290@mellanox.co.il> > it comes to say that your multiple QPs per CQ use case description is in > away not accurate since you actually have SRQ per multiple QPs and this > is the point why with the current IB API you must have to maintain an > association between wc->QPN to QP structure, anyway: I'm sorry, you lost me here. Was something supposed to come after the column? Anyway, at some point you said you see the point of the patch. If that's still so, there's no reason to argue about wording, that's all the use case was supposed to provide. -- MST From halr at voltaire.com Tue Jan 2 05:37:29 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 08:37:29 -0500 Subject: [openib-general] [PATCH 4/4] osm: building LID matrices in fat-tree routing In-Reply-To: <45990D37.6050402@dev.mellanox.co.il> References: <45990D37.6050402@dev.mellanox.co.il> Message-ID: <1167745048.4596.71762.camel@hal.voltaire.com> On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: > Building LID matrices in fat-tree routing > > Lid matrices are now built while setting routing > w/o any additional complexity, and removed the > temporary ftree dump function (that differed from > the ucast mgr dump function only by not dumping > anything that is related to lid matrices). > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From halr at voltaire.com Tue Jan 2 06:07:31 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 09:07:31 -0500 Subject: [openib-general] path query question In-Reply-To: <6C2C79E72C305246B504CBA17B5500C994B3E3@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C994B3E3@mtlexch01.mtl.com> Message-ID: <1167746850.4596.73456.camel@hal.voltaire.com> Hi Eitan, On Mon, 2007-01-01 at 15:10, Eitan Zahavi wrote: > Hi Hal, > > > > Happy new year! Same to you :-) > We are working on SDP HA and wanted to implement APM for OFED 1.2 > > What would be the best way to utilize the existing OpenSM > GetMulti(MultiPathRecord) ? Current SA MPR support in OpenSM does try to return paths with lower "overlap" so I think it could be used as is. However, I'm not sure this is supported by all SMs so the SA CapabilityMask bit needs testing and there needs to be fallback to normal PRs. > Do you have plans for implementing the new IBTA errata for supporting > the automatic remote node/system selection? Yes; this is beyond the OFED 1.2 timeframe. -- Hal > > > Thanks > > Eitan > > > > From: Ami Perlmutter > Sent: Monday, January 01, 2007 4:01 PM > To: Eitan Zahavi > Cc: Michael S. Tsirkin > Subject: path query question > > > > > Hi Eitan > > I’m currently implementing HA in SDP. > > I have my primary path from an SA query, and I want to know what is > the fastest and simplest way to find an alternate path > > from my other port to the remote machine’s other port. > > There is an example in the SDP spec of how such a thing might be > accomplished, but it is too long and involves too many MADs. > > From kliteyn at dev.mellanox.co.il Tue Jan 2 06:58:48 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 02 Jan 2007 16:58:48 +0200 Subject: [openib-general] [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. In-Reply-To: <1167669160.4596.1360.camel@hal.voltaire.com> References: <45990D1D.6090305@dev.mellanox.co.il> <1167669160.4596.1360.camel@hal.voltaire.com> Message-ID: <459A7328.9090305@dev.mellanox.co.il> Hal Rosenstock wrote: > On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: >> LMC > 0 is not supported by fat-tree routing. > > Might this be different in the future ? Can LMC > 0 be supported with > fat tree routing ? Sure. We just have to figure out two things: 1. Figure out what should communication pattern look like when LMC is not 0. 2. How to make fat-tree routing choose different paths for different lids of the same CA, because otherwise using LMC>0 is pointless. --Yevgeny. >> Removing all the related code and adding check to >> inform the user in case LMC is set. >> >> Signed-off-by: Yevgeny Kliteynik > > Thanks. Applied. > > I'll also add a note to this effect to the documentation shortly. > > -- Hal > From halr at voltaire.com Tue Jan 2 07:23:00 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 10:23:00 -0500 Subject: [openib-general] [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. In-Reply-To: <459A7328.9090305@dev.mellanox.co.il> References: <45990D1D.6090305@dev.mellanox.co.il> <1167669160.4596.1360.camel@hal.voltaire.com> <459A7328.9090305@dev.mellanox.co.il> Message-ID: <1167751380.4596.77691.camel@hal.voltaire.com> On Tue, 2007-01-02 at 09:58, Yevgeny Kliteynik wrote: > Hal Rosenstock wrote: > > On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: > >> LMC > 0 is not supported by fat-tree routing. > > > > Might this be different in the future ? Can LMC > 0 be supported with > > fat tree routing ? > > Sure. We just have to figure out two things: > 1. Figure out what should communication pattern look like > when LMC is not 0. > 2. How to make fat-tree routing choose different paths for > different lids of the same CA, because otherwise using LMC>0 > is pointless. Not sure what you mean by this. How is this different for fat tree routing than any other routing algorithm ? Maybe this is an issue for all of them. Doesn't the PR/MPR request preselect the LID or the response determines the LID to use ? The only issue I see is whether there needs to be a separate GID for each possible LID. -- Hal > --Yevgeny. > > >> Removing all the related code and adding check to > >> inform the user in case LMC is set. > >> > >> Signed-off-by: Yevgeny Kliteynik > > > > Thanks. Applied. > > > > I'll also add a note to this effect to the documentation shortly. > > > > -- Hal > > From halr at voltaire.com Tue Jan 2 07:25:20 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 10:25:20 -0500 Subject: [openib-general] ib_gid_is_link_local Message-ID: <1167751520.4596.77827.camel@hal.voltaire.com> Hi Eitan, I started to look at changes for IB routers and the following in ib_types.h doesn't look quite right to me: static inline boolean_t OSM_API ib_gid_is_link_local( IN const ib_gid_t* const p_gid ) { return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); } Shouldn't it be either the default subnet prefix or the one supplied in PortInfo:GidPrefix (which might not be the default one) ? -- Hal From sashak at voltaire.com Tue Jan 2 08:17:46 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 2 Jan 2007 18:17:46 +0200 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) In-Reply-To: <45990D05.60905@dev.mellanox.co.il> References: <45990D05.60905@dev.mellanox.co.il> Message-ID: <20070102161746.GB7265@sashak.voltaire.com> Hi Yevgeny, On 15:30 Mon 01 Jan , Yevgeny Kliteynik wrote: > Making two comparison functions in ftree static. Good. Any news about OSM_CDECL/__cdecl emilination? Sasha > > -- > Yevgeny > > Signed-off-by: Yevgeny Kliteynik > --- > osm/opensm/osm_ucast_ftree.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c > index 0473135..32d1b3b 100644 > --- a/osm/opensm/osm_ucast_ftree.c > +++ b/osm/opensm/osm_ucast_ftree.c > @@ -226,7 +226,7 @@ typedef struct ftree_fabric_t_ > ** > ***************************************************/ > > -int OSM_CDECL > +static int OSM_CDECL > __osm_ftree_compare_switches_by_index( > IN const void * p1, > IN const void * p2) > @@ -247,7 +247,7 @@ __osm_ftree_compare_switches_by_index( > > /***************************************************/ > > -int OSM_CDECL > +static int OSM_CDECL > __osm_ftree_compare_port_groups_by_remote_switch_index( > IN const void * p1, > IN const void * p2) > -- > 1.4.4.1.GIT > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Tue Jan 2 08:18:36 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 02 Jan 2007 18:18:36 +0200 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) In-Reply-To: <20070102161746.GB7265@sashak.voltaire.com> References: <45990D05.60905@dev.mellanox.co.il> <20070102161746.GB7265@sashak.voltaire.com> Message-ID: <459A85DC.2060003@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 15:30 Mon 01 Jan , Yevgeny Kliteynik wrote: >> Making two comparison functions in ftree static. > > Good. > > Any news about OSM_CDECL/__cdecl emilination? Here's what I know by now: __cdecl is default on windows, but osm is compiled with a flag that makes __stdcall default (there are some problems linking osm to other libraries without this flag), so OSM_CDECL is gonna stay. One thing I do need to check is removing the __stdcall and trying to recompile everything to see if the __stdcall is still needed. -- Yevgeny. > Sasha > >> -- >> Yevgeny >> >> Signed-off-by: Yevgeny Kliteynik >> --- >> osm/opensm/osm_ucast_ftree.c | 4 ++-- >> 1 files changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c >> index 0473135..32d1b3b 100644 >> --- a/osm/opensm/osm_ucast_ftree.c >> +++ b/osm/opensm/osm_ucast_ftree.c >> @@ -226,7 +226,7 @@ typedef struct ftree_fabric_t_ >> ** >> ***************************************************/ >> >> -int OSM_CDECL >> +static int OSM_CDECL >> __osm_ftree_compare_switches_by_index( >> IN const void * p1, >> IN const void * p2) >> @@ -247,7 +247,7 @@ __osm_ftree_compare_switches_by_index( >> >> /***************************************************/ >> >> -int OSM_CDECL >> +static int OSM_CDECL >> __osm_ftree_compare_port_groups_by_remote_switch_index( >> IN const void * p1, >> IN const void * p2) >> -- >> 1.4.4.1.GIT >> >> >> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > From kliteyn at dev.mellanox.co.il Tue Jan 2 08:22:46 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 02 Jan 2007 18:22:46 +0200 Subject: [openib-general] [PATCH] osm: Fat-tree routing failed to recognize non-fat-tree topology. Message-ID: <459A86D6.4020102@dev.mellanox.co.il> Hi Hal Fixing bug in fat-tree routing: it failed to recognize non-fat-tree topology. -- Yevgeny Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 28 +++++++++++++++++++++++++--- 1 files changed, 25 insertions(+), 3 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index 7f8b556..eb7cf96 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -178,9 +178,9 @@ typedef struct ftree_sw_t_ ftree_tuple_t tuple; ib_net16_t base_lid; ftree_port_group_t ** down_port_groups; - uint16_t down_port_groups_num; + uint8_t down_port_groups_num; ftree_port_group_t ** up_port_groups; - uint16_t up_port_groups_num; + uint8_t up_port_groups_num; ftree_fwd_tbl_t lft_buf; } ftree_sw_t; @@ -747,6 +747,7 @@ __osm_ftree_sw_add_port( p_sw->down_port_groups[p_sw->down_port_groups_num++] = p_group; } __osm_ftree_port_group_add_port(p_group,port_num,remote_port_num); + } /* __osm_ftree_sw_add_port() */ /***************************************************/ @@ -2731,9 +2732,27 @@ __osm_ftree_fabric_construct_sw_ports( p_remote_sw = (ftree_sw_t *)cl_qmap_get(&p_ftree->sw_tbl,remote_node_guid); CL_ASSERT(p_remote_sw != (ftree_sw_t *)cl_qmap_end(&p_ftree->sw_tbl)); - CL_ASSERT(abs(p_sw->rank - p_remote_sw->rank) == 1); p_remote_hca_or_sw = (void *)p_remote_sw; + if (abs(p_sw->rank - p_remote_sw->rank) != 1) + { + osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "__osm_ftree_fabric_construct_sw_ports: ERR AB16: " + "Illegal link between switches with ranks %u and %u:\n" + " GUID 0x%016" PRIx64 ", LID 0x%x, rank %u\n" + " GUID 0x%016" PRIx64 ", LID 0x%x, rank %u\n", + p_sw->rank, + p_remote_sw->rank, + cl_ntoh64(osm_node_get_node_guid(osm_switch_get_node_ptr(p_sw->p_osm_sw))), + cl_ntoh16(p_sw->base_lid), + p_sw->rank, + cl_ntoh64(osm_node_get_node_guid(osm_switch_get_node_ptr(p_remote_sw->p_osm_sw))), + cl_ntoh16(p_remote_sw->base_lid), + p_remote_sw->rank); + res = -1; + goto Exit; + } + if (p_sw->rank > p_remote_sw->rank) direction = FTREE_DIRECTION_UP; else @@ -3012,6 +3031,9 @@ __osm_ftree_construct_fabric( if (! __osm_ftree_fabric_validate_topology(p_ftree)) { + osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, + "Fabric topology is not a fat-tree - " + "routing falls back to default routing\n"); status = -1; goto Exit; } -- 1.4.4.1.GIT From monis at voltaire.com Tue Jan 2 08:30:57 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 02 Jan 2007 18:30:57 +0200 Subject: [openib-general] CQ error handling in IPoIB Message-ID: <459A88C1.6010100@voltaire.com> Hi, I have a question regarding error handling in IPoIB. The spec says... When a CQ encounters an error, in order to be able to use the CQ again, the consumer should: * Destroy all the QPs that are attached to the CQ * Destroy the CQ * Recreate the CQ through the Create Completion Queue verb While (at least one part of) the code does... static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) { ... ... ... if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed send event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); } Since I don't see any error handling and I understand that the only way left to make the driver work again is to restart it with uload/load, my question is: What does the code assume about errors happening on th CQ? thanks MoniS From sashak at voltaire.com Tue Jan 2 08:54:36 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 2 Jan 2007 18:54:36 +0200 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) In-Reply-To: <459A85DC.2060003@dev.mellanox.co.il> References: <45990D05.60905@dev.mellanox.co.il> <20070102161746.GB7265@sashak.voltaire.com> <459A85DC.2060003@dev.mellanox.co.il> Message-ID: <20070102165436.GD7265@sashak.voltaire.com> On 18:18 Tue 02 Jan , Yevgeny Kliteynik wrote: > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 15:30 Mon 01 Jan , Yevgeny Kliteynik wrote: > >> Making two comparison functions in ftree static. > > > > Good. > > > > Any news about OSM_CDECL/__cdecl emilination? > > Here's what I know by now: > __cdecl is default on windows, but osm is compiled with a flag > that makes __stdcall default (there are some problems linking > osm to other libraries without this flag), Which problems? Isn't it better to solve those problems instead of such strange workaround as putting __cdecl/__stdcall attribution in the common code? > so OSM_CDECL is gonna > stay. One thing I do need to check is removing the __stdcall > and trying to recompile everything to see if the __stdcall > is still needed. Thanks. Sasha From halr at voltaire.com Tue Jan 2 08:48:50 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 11:48:50 -0500 Subject: [openib-general] [PATCH] osm: Fat-tree routing failed to recognize non-fat-tree topology. In-Reply-To: <459A86D6.4020102@dev.mellanox.co.il> References: <459A86D6.4020102@dev.mellanox.co.il> Message-ID: <1167756529.4596.82466.camel@hal.voltaire.com> On Tue, 2007-01-02 at 11:22, Yevgeny Kliteynik wrote: > Hi Hal > > Fixing bug in fat-tree routing: > it failed to recognize non-fat-tree topology. > > -- > Yevgeny > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From halr at voltaire.com Tue Jan 2 09:01:25 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 12:01:25 -0500 Subject: [openib-general] [PATCH 1/2] OpenSM/osm_sa_(path multipath_record).c: Handle off subnet GID requests better in preparation for IB router/multisubnet support Message-ID: <1167757284.4596.83209.camel@hal.voltaire.com> OpenSM/osm_sa_(path multipath_record).c: Handle off subnet GID requests better in preparation for IB router/multisubnet support Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 7070ec4..1d2d203 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -1198,6 +1198,7 @@ __osm_mpr_rcv_get_gids( IN osm_mpr_rcv_t* const p_rcv, IN const ib_gid_t * gids, IN int ngids, + IN int is_sgid, OUT osm_port_t** pp_port ) { osm_port_t *p_port; @@ -1207,6 +1208,25 @@ __osm_mpr_rcv_get_gids( OSM_LOG_ENTER( p_rcv->p_log, __osm_mpr_rcv_get_gids ); for ( i = 0; i < ngids; i++, gids++ ) { + if ( !ib_gid_is_link_local ( gids ) ) { + if ( ( is_sgid && ib_gid_is_multicast( gids ) ) || + ( ib_gid_get_subnet_prefix ( gids ) != p_rcv->p_subn->opt.subnet_prefix ) ) { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, + "__osm_mpr_rcv_get_gids: ERR 451B: " + "Non local %sGID subnet prefix 0x%016" PRIx64 "\n", + is_sgid ? "S" : "D", + cl_ntoh64( gids->unicast.prefix ) ); + + ib_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + p_port = (osm_port_t *)cl_qmap_get( &p_rcv->p_subn->port_guid_tbl, gids->unicast.interface_id ); if ( !p_port || @@ -1266,16 +1286,13 @@ __osm_mpr_rcv_get_end_points( Check a few easy disqualifying cases up front before getting into the endpoints. */ - - /* SDGIDs could be checked for multicast disqualification. */ - *nsrc = *ndest = 0; if ( comp_mask & IB_MPR_COMPMASK_SGIDCOUNT ) { *nsrc = p_mpr->sgid_count; if ( *nsrc > IB_MULTIPATH_MAX_GIDS ) *nsrc = IB_MULTIPATH_MAX_GIDS; - sa_status = __osm_mpr_rcv_get_gids( p_rcv, gids, *nsrc, pp_ports ); + sa_status = __osm_mpr_rcv_get_gids( p_rcv, gids, *nsrc, 1, pp_ports ); if ( sa_status != IB_SUCCESS ) goto Exit; } @@ -1284,7 +1301,7 @@ __osm_mpr_rcv_get_end_points( *ndest = p_mpr->dgid_count; if ( *ndest + *nsrc > IB_MULTIPATH_MAX_GIDS ) *ndest = IB_MULTIPATH_MAX_GIDS - *nsrc; - sa_status = __osm_mpr_rcv_get_gids( p_rcv, gids + *nsrc, *ndest, + sa_status = __osm_mpr_rcv_get_gids( p_rcv, gids + *nsrc, *ndest, 0, pp_ports + *nsrc ); } diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index e61d617..0500a74 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -1166,6 +1166,25 @@ __osm_pr_rcv_get_end_points( if( comp_mask & IB_PR_COMPMASK_SGID ) { + if ( ! ib_gid_is_link_local ( &p_pr->sgid ) ) + { + if ( ib_gid_get_subnet_prefix ( &p_pr->sgid ) != p_rcv->p_subn->opt.subnet_prefix ) + { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, + "__osm_pr_rcv_get_end_points: " + "Non local SGID subnet prefix 0x%016" PRIx64 "\n", + cl_ntoh64( p_pr->sgid.unicast.prefix ) ); + + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + *pp_src_port = (osm_port_t*)cl_qmap_get( &p_rcv->p_subn->port_guid_tbl, p_pr->sgid.unicast.interface_id ); @@ -1215,6 +1234,26 @@ __osm_pr_rcv_get_end_points( if( comp_mask & IB_PR_COMPMASK_DGID ) { + if ( ! ib_gid_is_link_local ( &p_pr->dgid ) ) + { + if ( ! ib_gid_is_multicast ( &p_pr->dgid ) && + ib_gid_get_subnet_prefix ( &p_pr->dgid ) != p_rcv->p_subn->opt.subnet_prefix ) + { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, + "__osm_pr_rcv_get_end_points: " + "Non local DGID subnet prefix 0x%016" PRIx64 "\n", + cl_ntoh64( p_pr->dgid.unicast.prefix ) ); + + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + *pp_dest_port = (osm_port_t*)cl_qmap_get( &p_rcv->p_subn->port_guid_tbl, p_pr->dgid.unicast.interface_id ); From halr at voltaire.com Tue Jan 2 09:01:33 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 12:01:33 -0500 Subject: [openib-general] [PATCH 1/2] osmtest/osmtest.c: Add off subnet PathRecord requests Message-ID: <1167757286.4596.83211.camel@hal.voltaire.com> osmtest/osmtest.c: Add off subnet PathRecord requests Signed-off-by: Hal Rosenstock diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c index b4543ee..f8648da 100644 --- a/osm/osmtest/osmtest.c +++ b/osm/osmtest/osmtest.c @@ -5802,6 +5802,23 @@ osmtest_validate_against_db( IN osmtest_ /* Can't check status as don't know whether port is running IPoIB */ osmtest_get_path_rec_by_gid_pair( p_osmt, portgid, mgid, &context); + /* Off subnet unicast PathRecord */ + memset( &context, 0, sizeof( context ) ); + ib_gid_set_default( &portgid, portguid ); + ib_gid_set_default( &mgid, portguid ); + mgid.raw[7] = 0xff; /* not default GID prefix */ + /* Can't check status as don't know whether ??? */ + osmtest_get_path_rec_by_gid_pair( p_osmt, portgid, mgid, &context); + + /* More than link local scope multicast PathRecord */ + memset( &context, 0, sizeof( context ) ); + ib_gid_set_default( &portgid, portguid ); + /* Set IPoIB broadcast MGID */ + mgid.unicast.prefix = CL_HTON64(0xff15401bffff0000ULL); /* site local */ + mgid.unicast.interface_id = CL_HTON64(0x00000000ffffffffULL); + /* Can't check status as don't know whether port is running IPoIB */ + osmtest_get_path_rec_by_gid_pair( p_osmt, portgid, mgid, &context); + #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) memset( &context, 0, sizeof( context ) ); memset( &request, 0, sizeof( request ) ); From halr at voltaire.com Tue Jan 2 09:03:35 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 12:03:35 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167751520.4596.77827.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> Message-ID: <1167757414.4596.83346.camel@hal.voltaire.com> On Tue, 2007-01-02 at 10:25, Hal Rosenstock wrote: > Hi Eitan, > > I started to look at changes for IB routers and the following in > ib_types.h doesn't look quite right to me: > > static inline boolean_t OSM_API > ib_gid_is_link_local( > IN const ib_gid_t* const p_gid ) > { > return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > } > > Shouldn't it be either the default subnet prefix or the one supplied in > PortInfo:GidPrefix (which might not be the default one) ? I propose changing the routine name to ib_gid_is_default_prefix so as not to be misleading. Comments ? Patch to follow for this. -- Hal > -- Hal > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Tue Jan 2 10:27:01 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 2 Jan 2007 20:27:01 +0200 Subject: [openib-general] CQ error handling in IPoIB In-Reply-To: <459A88C1.6010100@voltaire.com> References: <459A88C1.6010100@voltaire.com> Message-ID: <20070102182701.GA6637@mellanox.co.il> Hi, > I have a question regarding error handling in IPoIB. > > The spec says... > > When a CQ encounters an error, in order to be able to use the CQ again, > the consumer should: > * Destroy all the QPs that are attached to the CQ > * Destroy the CQ > * Recreate the CQ through the Create Completion Queue verb This speaks about CQ errors such as CQ overrun, not QP errors. > While (at least one part of) the code does... > > static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) > { > ... > ... > ... > if (wc->status != IB_WC_SUCCESS && > wc->status != IB_WC_WR_FLUSH_ERR) > ipoib_warn(priv, "failed send event " > "(status=%d, wrid=%d vend_err %x)\n", > wc->status, wr_id, wc->vendor_err); > } wc status reports QP errors, not CQ errors. This is what is handled here. > Since I don't see any error handling and I understand that the only way left to make the driver work again is to restart it with uload/load, my question is: What does the code assume about errors happening on th CQ? > > thanks > > MoniS IPoIB prevents CQ overrun errors by allocating CQ size > Rx size + TX size. -- MST From todd.rimmer at qlogic.com Tue Jan 2 10:39:40 2007 From: todd.rimmer at qlogic.com (Todd Rimmer) Date: Tue, 2 Jan 2007 12:39:40 -0600 Subject: [openib-general] CQ error handling in IPoIB Message-ID: <4FB1BCCAE6CAED44A1DC005B1DE06119171041@EPEXCH2.qlogic.org> > From: Moni Shoua > Sent: Tuesday, January 02, 2007 11:31 AM > To: openib-general at openib.org > Subject: [openib-general] CQ error handling in IPoIB > > Hi, > I have a question regarding error handling in IPoIB. > > The spec says... > > When a CQ encounters an error, in order to be able to use the CQ again, > the consumer should: > * Destroy all the QPs that are attached to the CQ > * Destroy the CQ > * Recreate the CQ through the Create Completion Queue verb > > While (at least one part of) the code does... > > static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc > *wc) > { > ... > ... > ... > if (wc->status != IB_WC_SUCCESS && > wc->status != IB_WC_WR_FLUSH_ERR) > ipoib_warn(priv, "failed send event " > "(status=%d, wrid=%d vend_err %x)\n", > wc->status, wr_id, wc->vendor_err); > } > In this context the spec is referring to CQ errors, not work request errors. For example, CQ overflow is considered a CQ error and would require the procedure you describe above (destroy QPs, CQ, etc). However a work request error is a WQE or QP error. As such the CQ does not need to be destroyed. Rather the recovery will be limited to QP level actions. Typically the QP has moved to the error state and the QP must be reset and moved back to RTS to resume operation (or the QP must be destroyed and recreated). If you check section 10.10.3.4 of IBTA 1.2 you will see a list of possible errors on a UD QP. Notice that the errors all involve Local Protection or Operation errors. Hence they cannot be caused by a remote node. Rather, they are only caused by invalid local requests (by IPoIB in this case) or possibly by hardware or OS problems (memory stomps, multi-bit undetected memory or bus errors, HCA hardware problem, etc). As you indicate, when such error occur, the driver should recreate or reset the QP. Todd Rimmer From mshefty at ichips.intel.com Tue Jan 2 10:59:12 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 02 Jan 2007 10:59:12 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20061231190942.GB32485@mellanox.co.il> References: <20061231190942.GB32485@mellanox.co.il> Message-ID: <459AAB80.2070601@ichips.intel.com> Michael S. Tsirkin wrote: > ib_wc currently only includes the local QP number: this matches the IB spec, > but seems mostly useless. The following patch replaces this with the pointer > to qp itself, and updates all low level drivers and all users. > > This has the following advantages: > - Ability to get a per-qp context through wc->qp->qp_context > - Existing drivers already have the qp pointer ready in poll cq, so > this change actually saves a tiny bit (extra memory read) on data path > - We will be able to put NULL in there if some hardware does not support > reporting the qp number (it is optional in IB spec) - no such option with qpn > - Users that need the QP number can still get it through wc->qp->qp_num. IMO, if we can return the qp pointer rather than the qpn without additional cost, we should do so. - Sean From mst at mellanox.co.il Tue Jan 2 12:05:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 2 Jan 2007 22:05:23 +0200 Subject: [openib-general] outstanding patches Message-ID: <20070102200523.GA10451@mellanox.co.il> List of patches in OFED: http://git.openfabrics.org/git/?p=~vlad/ofed_1_2/.git;a=history;f=kernel_patches/fixes/kernel_patches/fixes;hb=HEAD Probably 2.6.20 material fix_query_qp_in_reset.patch ib_verbs_h_missing_kref.patch mthca_0_fmr_page_fix.patch set of patches fixing FMR on non-cache-coherent mthca_1_merge_mr_fmr_on_64bit.patch mthca_2_fast_registration.patch mthca_3_alloc_consistent.patch mthca_4_dma_align_reserved_mtts.patch sean_cm_limit_mra_timeout.patch Patches I'm less sure about mthca_wrid_swap.patch - safe, but very small benefit srp_1_recreate_at_reconnect.patch - this improves SRP spec compliance. Good idea? ib_wc_qpn_to_qp.patch - currently benefit is for IPoIB CM, but doing API updates early has its benefits. What do you think? Need more work ipoib_selector_updated.patch - improves IPoIB compliance -- MST From halr at voltaire.com Tue Jan 2 12:06:18 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 15:06:18 -0500 Subject: [openib-general] [PATCH] OpenSM: Change routine name from ib_gid_is_link_local to ib_gid_is_default_prefix Message-ID: <1167768377.4596.93960.camel@hal.voltaire.com> OpenSM: Change routine name from ib_gid_is_link_local to ib_gid_is_default_prefix Signed-off-by: Hal Rosenstock diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 573b294..90044a1 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -2154,18 +2154,18 @@ ib_gid_get_subnet_prefix( * ib_gid_t *********/ -/****f* IBA Base: Types/ib_gid_is_link_local +/****f* IBA Base: Types/ib_gid_is_default_prefix * NAME -* ib_gid_is_link_local +* ib_gid_is_default_prefix * * DESCRIPTION -* Returns TRUE if the unicast GID scoping indicates link local, +* Returns TRUE if the unicast GID prefix is the default prefix, * FALSE otherwise. * * SYNOPSIS */ static inline boolean_t OSM_API -ib_gid_is_link_local( +ib_gid_is_default_prefix( IN const ib_gid_t* const p_gid ) { return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); @@ -2176,7 +2176,7 @@ ib_gid_is_link_local( * [in] Pointer to the GID object. * * RETURN VALUES -* Returns TRUE if the unicast GID scoping indicates link local, +* Returns TRUE if the unicast GID prefix is the default prefix, * FALSE otherwise. * * NOTES diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 1d2d203..7ca44a6 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -1208,7 +1208,7 @@ __osm_mpr_rcv_get_gids( OSM_LOG_ENTER( p_rcv->p_log, __osm_mpr_rcv_get_gids ); for ( i = 0; i < ngids; i++, gids++ ) { - if ( !ib_gid_is_link_local ( gids ) ) { + if ( !ib_gid_is_default_prefix ( gids ) ) { if ( ( is_sgid && ib_gid_is_multicast( gids ) ) || ( ib_gid_get_subnet_prefix ( gids ) != p_rcv->p_subn->opt.subnet_prefix ) ) { /* diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 0500a74..467144b 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -1166,7 +1166,7 @@ __osm_pr_rcv_get_end_points( if( comp_mask & IB_PR_COMPMASK_SGID ) { - if ( ! ib_gid_is_link_local ( &p_pr->sgid ) ) + if ( ! ib_gid_is_default_prefix ( &p_pr->sgid ) ) { if ( ib_gid_get_subnet_prefix ( &p_pr->sgid ) != p_rcv->p_subn->opt.subnet_prefix ) { @@ -1234,7 +1234,7 @@ __osm_pr_rcv_get_end_points( if( comp_mask & IB_PR_COMPMASK_DGID ) { - if ( ! ib_gid_is_link_local ( &p_pr->dgid ) ) + if ( ! ib_gid_is_default_prefix ( &p_pr->dgid ) ) { if ( ! ib_gid_is_multicast ( &p_pr->dgid ) && ib_gid_get_subnet_prefix ( &p_pr->dgid ) != p_rcv->p_subn->opt.subnet_prefix ) From halr at voltaire.com Tue Jan 2 13:42:23 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 16:42:23 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167757414.4596.83346.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> Message-ID: <1167774142.4596.99507.camel@hal.voltaire.com> On Tue, 2007-01-02 at 12:03, Hal Rosenstock wrote: > On Tue, 2007-01-02 at 10:25, Hal Rosenstock wrote: > > Hi Eitan, > > > > I started to look at changes for IB routers and the following in > > ib_types.h doesn't look quite right to me: > > > > static inline boolean_t OSM_API > > ib_gid_is_link_local( > > IN const ib_gid_t* const p_gid ) > > { > > return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > > } > > > > Shouldn't it be either the default subnet prefix or the one supplied in > > PortInfo:GidPrefix (which might not be the default one) ? > > I propose changing the routine name to ib_gid_is_default_prefix so as > not to be misleading. Comments ? Patch to follow for this. What is confusing is the link local unicast GID definition in Figure 39. It says 54 bits of and is green (which I think means 0 as in Figure 40) on p. 144-145. So I think the code is correct as is... -- Hal > > -- Hal > > > -- Hal > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From halr at voltaire.com Tue Jan 2 13:42:30 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 16:42:30 -0500 Subject: [openib-general] [PATCH] OpenSM: Change routine name from ib_gid_is_link_local to ib_gid_is_default_prefix In-Reply-To: <1167768377.4596.93960.camel@hal.voltaire.com> References: <1167768377.4596.93960.camel@hal.voltaire.com> Message-ID: <1167774145.4596.99509.camel@hal.voltaire.com> On Tue, 2007-01-02 at 15:06, Hal Rosenstock wrote: > OpenSM: Change routine name from ib_gid_is_link_local to > ib_gid_is_default_prefix > > Signed-off-by: Hal Rosenstock See previous email on link local GID. -- Hal > diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h > index 573b294..90044a1 100644 > --- a/osm/include/iba/ib_types.h > +++ b/osm/include/iba/ib_types.h > @@ -2154,18 +2154,18 @@ ib_gid_get_subnet_prefix( > * ib_gid_t > *********/ > > -/****f* IBA Base: Types/ib_gid_is_link_local > +/****f* IBA Base: Types/ib_gid_is_default_prefix > * NAME > -* ib_gid_is_link_local > +* ib_gid_is_default_prefix > * > * DESCRIPTION > -* Returns TRUE if the unicast GID scoping indicates link local, > +* Returns TRUE if the unicast GID prefix is the default prefix, > * FALSE otherwise. > * > * SYNOPSIS > */ > static inline boolean_t OSM_API > -ib_gid_is_link_local( > +ib_gid_is_default_prefix( > IN const ib_gid_t* const p_gid ) > { > return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > @@ -2176,7 +2176,7 @@ ib_gid_is_link_local( > * [in] Pointer to the GID object. > * > * RETURN VALUES > -* Returns TRUE if the unicast GID scoping indicates link local, > +* Returns TRUE if the unicast GID prefix is the default prefix, > * FALSE otherwise. > * > * NOTES > diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c > index 1d2d203..7ca44a6 100644 > --- a/osm/opensm/osm_sa_multipath_record.c > +++ b/osm/opensm/osm_sa_multipath_record.c > @@ -1208,7 +1208,7 @@ __osm_mpr_rcv_get_gids( > OSM_LOG_ENTER( p_rcv->p_log, __osm_mpr_rcv_get_gids ); > > for ( i = 0; i < ngids; i++, gids++ ) { > - if ( !ib_gid_is_link_local ( gids ) ) { > + if ( !ib_gid_is_default_prefix ( gids ) ) { > if ( ( is_sgid && ib_gid_is_multicast( gids ) ) || > ( ib_gid_get_subnet_prefix ( gids ) != p_rcv->p_subn->opt.subnet_prefix ) ) { > /* > diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c > index 0500a74..467144b 100644 > --- a/osm/opensm/osm_sa_path_record.c > +++ b/osm/opensm/osm_sa_path_record.c > @@ -1166,7 +1166,7 @@ __osm_pr_rcv_get_end_points( > > if( comp_mask & IB_PR_COMPMASK_SGID ) > { > - if ( ! ib_gid_is_link_local ( &p_pr->sgid ) ) > + if ( ! ib_gid_is_default_prefix ( &p_pr->sgid ) ) > { > if ( ib_gid_get_subnet_prefix ( &p_pr->sgid ) != p_rcv->p_subn->opt.subnet_prefix ) > { > @@ -1234,7 +1234,7 @@ __osm_pr_rcv_get_end_points( > > if( comp_mask & IB_PR_COMPMASK_DGID ) > { > - if ( ! ib_gid_is_link_local ( &p_pr->dgid ) ) > + if ( ! ib_gid_is_default_prefix ( &p_pr->dgid ) ) > { > if ( ! ib_gid_is_multicast ( &p_pr->dgid ) && > ib_gid_get_subnet_prefix ( &p_pr->dgid ) != p_rcv->p_subn->opt.subnet_prefix ) > > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From changquing.tang at hp.com Tue Jan 2 13:46:15 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 2 Jan 2007 15:46:15 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <452E8D77.8010600@veritas.com> Message-ID: Hi, I don't see function 'ib_sa_path_rec_get()' anywhere in the trunk, Is this a new function to be checked in ? Thanks. --CQ > > thanks for ur reply. > I use > ib_sa_path_rec_get( > device, > HCA_PRM_PORT, /* first port =1, second > port=2 */ > &path_rec, > IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > IB_SA_PATH_REC_NUMB_PATH > | IB_SA_PATH_REC_PKEY, > 5000, > GFP_KERNEL, > func_completion, > context, > &query) > > to get the primary path, and make the same call with > HCA_ALT_PORT (=2) to get the alternate path. > > primary path has the source and destination gid for the HCA > port 1, alternate path the source and destination gid for the > HCA port 2. > > using these two paths, I send the REQ, otherwise gets the REQ... > (I can dump the primary and alternate path received in req > handler to check everything is ok, will try that next..) > > do you remember when you checked in the working code? > I am wondering if the RHEL4 U4 binary distrition of redhat > has your changes. > > thanks, som. > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From halr at voltaire.com Tue Jan 2 14:05:49 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 17:05:49 -0500 Subject: [openib-general] [PATCH 0/3] OpenSM: Add base support for IB router object Message-ID: <1167775546.4596.100840.camel@hal.voltaire.com> OpenSM: Add base support for IB router object Signed-off-by: Hal Rosenstock From halr at voltaire.com Tue Jan 2 14:05:57 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 17:05:57 -0500 Subject: [openib-general] [PATCH 1/3] OpenSM/osm_router.(h c): Add base support for IB router object Message-ID: <1167775550.4596.100842.camel@hal.voltaire.com> OpenSM/osm_router.(h c): Add base support for IB router object Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_router.h b/osm/include/opensm/osm_router.h new file mode 100644 index 0000000..168ce77 --- /dev/null +++ b/osm/include/opensm/osm_router.h @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_router_t. + * This object represents an IBA router. + * This object is part of the OpenSM family of objects. + * + * Environment: + * Linux User Mode + * + */ + +#ifndef _OSM_ROUTER_H_ +#define _OSM_ROUTER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +/****h* OpenSM/Router +* NAME +* Router +* +* DESCRIPTION +* The Router object encapsulates the information needed by the +* OpenSM to manage routers. The OpenSM allocates one router object +* per router in the IBA subnet. +* +* The Router object is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Hal Rosenstock, Voltaire +* +*********/ + +/****s* OpenSM: Router/osm_router_t +* NAME +* osm_router_t +* +* DESCRIPTION +* Router structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _osm_router +{ + cl_map_item_t map_item; + osm_port_t *p_port; +} osm_router_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* p_port +* Pointer to the Port object for this router. +* +* SEE ALSO +* Router object +*********/ + +/****f* OpenSM: Router/osm_router_construct +* NAME +* osm_router_construct +* +* DESCRIPTION +* This function constructs a Router object. +* +* SYNOPSIS +*/ +void +osm_router_construct( + IN osm_router_t* const p_rtr ); +/* +* PARAMETERS +* p_rtr +* [in] Pointer to a Router object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_router_init, and osm_router_destroy. +* +* Calling osm_router_construct is a prerequisite to calling any other +* method except osm_router_init. +* +* SEE ALSO +* Router object, osm_router_init, osm_router_destroy +*********/ + +/****f* OpenSM: Router/osm_router_destroy +* NAME +* osm_router_destroy +* +* DESCRIPTION +* The osm_router_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void +osm_router_destroy( + IN osm_router_t* const p_rtr ); +/* +* PARAMETERS +* p_rtr +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* None. +* +* NOTES +* Performs any necessary cleanup of the specified object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_router_construct +* or osm_router_init. +* +* SEE ALSO +* Router object, osm_router_construct, osm_router_init +*********/ + +/****f* OpenSM: Router/osm_router_destroy +* NAME +* osm_router_destroy +* +* DESCRIPTION +* Destroys and deallocates the object. +* +* SYNOPSIS +*/ +void +osm_router_delete( + IN OUT osm_router_t** const pp_rtr ); +/* +* PARAMETERS +* p_rtr +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +* Router object, osm_router_construct, osm_router_init +*********/ + +/****f* OpenSM: Router/osm_router_init +* NAME +* osm_router_init +* +* DESCRIPTION +* The osm_router_init function initializes a Router object for use. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_router_init( + IN osm_router_t* const p_rtr, + IN osm_port_t* const p_port ); +/* +* PARAMETERS +* p_rtr +* [in] Pointer to an osm_router_t object to initialize. +* +* p_port +* [in] Pointer to the port object of this router +* +* RETURN VALUES +* IB_SUCCESS if the Router object was initialized successfully. +* +* NOTES +* Allows calling other node methods. +* +* SEE ALSO +* Router object, osm_router_construct, osm_router_destroy +*********/ + +/****f* OpenSM: Router/osm_router_new +* NAME +* osm_router_new +* +* DESCRIPTION +* The osm_router_init function initializes a Router object for use. +* +* SYNOPSIS +*/ +osm_router_t* +osm_router_new( + IN osm_port_t* const p_port ); +/* +* PARAMETERS +* p_node +* [in] Pointer to the node object of this router +* +* RETURN VALUES +* Pointer to the new initialized router object. +* +* NOTES +* +* SEE ALSO +* Router object, osm_router_construct, osm_router_destroy, +*********/ + +/****f* OpenSM: Router/osm_router_get_port_ptr +* NAME +* osm_router_get_port_ptr +* +* DESCRIPTION +* Returns a pointer to the Port object for this router. +* +* SYNOPSIS +*/ +static inline osm_port_t* +osm_router_get_port_ptr( + IN const osm_router_t* const p_rtr ) +{ + return( p_rtr->p_port ); +} +/* +* PARAMETERS +* p_rtr +* [in] Pointer to an osm_router_t object. +* +* RETURN VALUES +* Returns a pointer to the Port object for this router. +* +* NOTES +* +* SEE ALSO +* Router object +*********/ + +/****f* OpenSM: Router/osm_router_get_node_ptr +* NAME +* osm_router_get_node_ptr +* +* DESCRIPTION +* Returns a pointer to the Node object for this router. +* +* SYNOPSIS +*/ +static inline osm_node_t* +osm_router_get_node_ptr( + IN const osm_router_t* const p_rtr ) +{ + return( p_rtr->p_port->p_node ); +} +/* +* PARAMETERS +* p_rtr +* [in] Pointer to an osm_router_t object. +* +* RETURN VALUES +* Returns a pointer to the Node object for this router. +* +* NOTES +* +* SEE ALSO +* Router object +*********/ + +END_C_DECLS + +#endif /* _OSM_ROUTER_H_ */ diff --git a/osm/opensm/osm_router.c b/osm/opensm/osm_router.c new file mode 100644 index 0000000..4b6470c --- /dev/null +++ b/osm/opensm/osm_router.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_router_t. + * This object represents an Infiniband router. + * This object is part of the opensm family of objects. + * + * Environment: + * Linux User Mode + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +/********************************************************************** + **********************************************************************/ +void +osm_router_construct( + IN osm_router_t* const p_rtr ) +{ + CL_ASSERT( p_rtr ); + memset( p_rtr, 0, sizeof(*p_rtr) ); +} + +/********************************************************************** + **********************************************************************/ +ib_api_status_t +osm_router_init( + IN osm_router_t* const p_rtr, + IN osm_port_t* const p_port ) +{ + ib_api_status_t status = IB_SUCCESS; + + CL_ASSERT( p_rtr ); + CL_ASSERT( p_port ); + + osm_router_construct( p_rtr ); + + p_rtr->p_port = p_port; + + return( status ); +} + +/********************************************************************** + **********************************************************************/ +void +osm_router_destroy( + IN osm_router_t* const p_rtr ) +{ +} + +/********************************************************************** + **********************************************************************/ +void +osm_router_delete( + IN OUT osm_router_t** const pp_rtr ) +{ + osm_router_destroy( *pp_rtr ); + free( *pp_rtr ); + *pp_rtr = NULL; +} + +/********************************************************************** + **********************************************************************/ +osm_router_t* +osm_router_new( + IN osm_port_t* const p_port ) +{ + ib_api_status_t status; + osm_router_t *p_rtr; + + p_rtr = (osm_router_t*)malloc( sizeof(*p_rtr) ); + if( p_rtr ) + { + memset( p_rtr, 0, sizeof(*p_rtr) ); + status = osm_router_init( p_rtr, p_port ); + if( status != IB_SUCCESS ) + osm_router_delete( &p_rtr ); + } + + return( p_rtr ); +} From halr at voltaire.com Tue Jan 2 14:06:04 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 17:06:04 -0500 Subject: [openib-general] [PATCH 2/3] OpenSM: Add osm_router.h to includes and osm_router.c to compile Message-ID: <1167775552.4596.100844.camel@hal.voltaire.com> OpenSM: Add osm_router.h to includes and osm_router.c to compile Signed-off-by: Hal Rosenstock diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index ea8ab10..d6bdd84 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -54,6 +54,7 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_lin_fwd_rcv.h \ $(srcdir)/opensm/osm_service.h \ $(srcdir)/opensm/osm_switch.h \ + $(srcdir)/opensm/osm_router.h \ $(srcdir)/opensm/osm_sa_slvl_record.h \ $(srcdir)/opensm/osm_opensm.h \ $(srcdir)/opensm/osm_sa.h \ diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index 8f42387..dd6dbae 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -66,7 +66,7 @@ opensm_SOURCES = main.c osm_console.c os osm_state_mgr_ctrl.c osm_subnet.c \ osm_sweep_fail_ctrl.c osm_sw_info_rcv.c \ osm_sw_info_rcv_ctrl.c osm_switch.c \ - osm_prtn.c osm_prtn_config.c osm_qos.c \ + osm_prtn.c osm_prtn_config.c osm_qos.c osm_router.c \ osm_trap_rcv.c osm_trap_rcv_ctrl.c \ osm_ucast_mgr.c osm_ucast_updn.c \ osm_ucast_file.c osm_ucast_ftree.c \ From halr at voltaire.com Tue Jan 2 14:06:16 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 17:06:16 -0500 Subject: [openib-general] [PATCH 3/3] OpenSM: Utilize base router object Message-ID: <1167775556.4596.100846.camel@hal.voltaire.com> OpenSM: Utilize base router object In osm_node_info_rcv.c and osm_drop_mgr.c, utilize base router object Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_drop_mgr.c b/osm/opensm/osm_drop_mgr.c index 5031f3b..ca5b8ac 100644 --- a/osm/opensm/osm_drop_mgr.c +++ b/osm/opensm/osm_drop_mgr.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,28 @@ osm_drop_mgr_init( /********************************************************************** **********************************************************************/ static void +__osm_drop_mgr_remove_router( + IN const osm_drop_mgr_t* const p_mgr, + IN const ib_net64_t portguid ) +{ + osm_router_t *p_rtr; + cl_qmap_t* p_rtr_guid_tbl; + + p_rtr_guid_tbl = &p_mgr->p_subn->rtr_guid_tbl; + p_rtr = (osm_router_t*)cl_qmap_remove( p_rtr_guid_tbl, portguid ); + if( p_rtr != (osm_router_t*)cl_qmap_end( p_rtr_guid_tbl ) ) + { + osm_log( p_mgr->p_log, OSM_LOG_VERBOSE, + "__osm_drop_mgr_remove_router: " + "Cleaned router for port guid 0x%016" PRIx64 "\n", + cl_ntoh64( portguid ) ); + osm_router_delete( &p_rtr ); + } +} + +/********************************************************************** + **********************************************************************/ +static void __osm_drop_mgr_remove_port( IN const osm_drop_mgr_t* const p_mgr, IN osm_port_t* p_port ) @@ -195,6 +218,8 @@ __osm_drop_mgr_remove_port( free(p_sm); } + __osm_drop_mgr_remove_router( p_mgr, port_guid ); + osm_port_get_lid_range_ho( p_port, &min_lid_ho, &max_lid_ho ); osm_log( p_mgr->p_log, OSM_LOG_VERBOSE, diff --git a/osm/opensm/osm_node_info_rcv.c b/osm/opensm/osm_node_info_rcv.c index ebec592..11b7cb8 100644 --- a/osm/opensm/osm_node_info_rcv.c +++ b/osm/opensm/osm_node_info_rcv.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -713,8 +714,11 @@ __osm_ni_rcv_process_new( osm_node_t *p_node_check; osm_port_t *p_port; osm_port_t *p_port_check; + osm_router_t *p_rtr = NULL; + osm_router_t *p_rtr_check; cl_qmap_t *p_node_guid_tbl; cl_qmap_t *p_port_guid_tbl; + cl_qmap_t *p_rtr_guid_tbl; ib_node_info_t *p_ni; ib_smp_t *p_smp; osm_ni_context_t *p_ni_context; @@ -761,6 +765,19 @@ __osm_ni_rcv_process_new( goto Exit; } + /* If there were RouterInfo or other router attribute, + this would be elsewhere */ + if ( p_ni->node_type == IB_NODE_TYPE_ROUTER ) + { + p_rtr = osm_router_new( p_port ); + if ( p_rtr == NULL ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_ni_rcv_process_new: ERR 0D1A: " + "Unable to create new router object\n" ); + } + } + /* Add the new port object to the database. */ @@ -786,6 +803,8 @@ __osm_ni_rcv_process_new( osm_physp_get_dr_path_ptr( osm_port_get_default_phys_ptr ( p_port_check) ), OSM_LOG_ERROR); + if ( p_rtr ) + osm_router_delete( &p_rtr ); osm_port_delete( &p_port ); osm_node_delete( &p_node ); goto Exit; @@ -807,6 +826,8 @@ __osm_ni_rcv_process_new( "__osm_ni_rcv_process_new: ERR 0D05: " "Error %s adding to new_ports_list\n", CL_STATUS_MSG( status ) ); + if ( p_rtr ) + osm_router_delete( &p_rtr ); osm_port_delete( &p_port ); osm_node_delete( &p_node ); goto Exit; @@ -820,6 +841,21 @@ __osm_ni_rcv_process_new( } } + if ( p_rtr && p_ni->node_type == IB_NODE_TYPE_ROUTER ) + { + p_rtr_guid_tbl = &p_rcv->p_subn->rtr_guid_tbl; + p_rtr_check = (osm_router_t*)cl_qmap_insert( p_rtr_guid_tbl, + p_ni->port_guid, + &p_rtr->map_item ); + if( p_rtr_check != p_rtr ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_ni_rcv_process_new: ERR 0D1B: " + "Unable to add port GUID:0x%016" PRIx64 " to router table\n", + cl_ntoh64( p_ni->port_guid ) ); + } + } + p_node_guid_tbl = &p_rcv->p_subn->node_guid_tbl; p_node_check = (osm_node_t*)cl_qmap_insert( p_node_guid_tbl, p_ni->node_guid, From changquing.tang at hp.com Tue Jan 2 14:11:55 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 2 Jan 2007 16:11:55 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: Message-ID: Sorry, I find the function 'ib_sa_path_rec_get()' in kernel code. Then here is my question: Is there any way (instruction) to fill in struct 'ib_sa_path_rec' inside struct 'ib_cm_req_param' without using librdmacm.so ? Thank you very much. --CQ > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Tang, > Changqing > Sent: Tuesday, January 02, 2007 3:46 PM > To: somenath; Sean Hefty > Cc: openib-general at openib.org > Subject: Re: [openib-general] APM support in openib stack > > > Hi, > I don't see function 'ib_sa_path_rec_get()' anywhere in > the trunk, Is this a new function to be checked in ? > > Thanks. > > --CQ > > > > > > thanks for ur reply. > > I use > > ib_sa_path_rec_get( > > device, > > HCA_PRM_PORT, /* first port > =1, second > > port=2 */ > > &path_rec, > > IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > > IB_SA_PATH_REC_NUMB_PATH > > | IB_SA_PATH_REC_PKEY, > > 5000, > > GFP_KERNEL, > > func_completion, > > context, > > &query) > > > > to get the primary path, and make the same call with > HCA_ALT_PORT (=2) > > to get the alternate path. > > > > primary path has the source and destination gid for the HCA port 1, > > alternate path the source and destination gid for the HCA port 2. > > > > using these two paths, I send the REQ, otherwise gets the REQ... > > (I can dump the primary and alternate path received in req > handler to > > check everything is ok, will try that next..) > > > > do you remember when you checked in the working code? > > I am wondering if the RHEL4 U4 binary distrition of redhat has your > > changes. > > > > thanks, som. > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From sashak at voltaire.com Tue Jan 2 14:35:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 3 Jan 2007 00:35:01 +0200 Subject: [openib-general] [PATCH] opensm: switch pointer field for osm_node_t structure Message-ID: <20070102223501.GG32677@sashak.voltaire.com> This introduces new field for osm_node_t structure. For switch nodes this field keeps pointer to corresponded osm_switch_t object, for non switch nodes it is initialized as NULL. This new field is useful for fast switch object access and for node type detection. Signed-off-by: Sasha Khapyorsky --- osm/include/opensm/osm_node.h | 20 +++++++++---- osm/opensm/osm_drop_mgr.c | 23 ++------------- osm/opensm/osm_lid_mgr.c | 51 ++++++--------------------------- osm/opensm/osm_link_mgr.c | 6 +-- osm/opensm/osm_mcast_mgr.c | 9 ++---- osm/opensm/osm_node_info_rcv.c | 9 +---- osm/opensm/osm_pkey_mgr.c | 17 ++--------- osm/opensm/osm_port.c | 17 +---------- osm/opensm/osm_port_info_rcv.c | 34 ++++++---------------- osm/opensm/osm_qos.c | 15 +++------ osm/opensm/osm_sa_multipath_record.c | 46 ++++-------------------------- osm/opensm/osm_sa_path_record.c | 48 ++++--------------------------- osm/opensm/osm_sa_portinfo_record.c | 11 ++----- osm/opensm/osm_sw_info_rcv.c | 12 +++---- osm/opensm/osm_ucast_mgr.c | 25 ++-------------- osm/opensm/osm_ucast_updn.c | 19 ++++-------- 16 files changed, 85 insertions(+), 277 deletions(-) diff --git a/osm/include/opensm/osm_node.h b/osm/include/opensm/osm_node.h index d595abb..8417f10 100644 --- a/osm/include/opensm/osm_node.h +++ b/osm/include/opensm/osm_node.h @@ -65,6 +65,8 @@ BEGIN_C_DECLS +struct _osm_switch; + /****h* OpenSM/Node * NAME * Node @@ -99,18 +101,24 @@ BEGIN_C_DECLS */ typedef struct _osm_node { - cl_map_item_t map_item; - ib_node_info_t node_info; - ib_node_desc_t node_desc; - uint32_t discovery_count; - uint32_t physp_tbl_size; - osm_physp_t physp_table[1]; + cl_map_item_t map_item; + struct _osm_switch *sw; + ib_node_info_t node_info; + ib_node_desc_t node_desc; + uint32_t discovery_count; + uint32_t physp_tbl_size; + osm_physp_t physp_table[1]; } osm_node_t; /* * FIELDS * map_item * Linkage structure for cl_qmap. MUST BE FIRST MEMBER! * +* sw +* For switch node contains pointer to appropriate osm_switch +* structure. NULL for non-switch nodes. Can be used for fast +* access to switch object and for simple node type detection +* * node_info * The IBA defined NodeInfo data for this node. * diff --git a/osm/opensm/osm_drop_mgr.c b/osm/opensm/osm_drop_mgr.c index 5031f3b..8f605a7 100644 --- a/osm/opensm/osm_drop_mgr.c +++ b/osm/opensm/osm_drop_mgr.c @@ -387,6 +387,7 @@ __osm_drop_mgr_remove_switch( } else { + p_node->sw = NULL; osm_switch_delete( &p_sw ); } @@ -440,24 +441,8 @@ __osm_drop_mgr_process_node( return_val = TRUE; - switch( osm_node_get_type( p_node ) ) - { - case IB_NODE_TYPE_CA: - case IB_NODE_TYPE_ROUTER: - break; - - case IB_NODE_TYPE_SWITCH: + if (p_node->sw) __osm_drop_mgr_remove_switch( p_mgr, p_node ); - break; - - default: - osm_log( p_mgr->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_process_node: ERR 0104: " - "Node 0x%016" PRIx64 " unknown node type: %u\n", - cl_ntoh64( osm_node_get_node_guid( p_node ) ), - osm_node_get_type( p_node ) ); - break; - } p_node_guid_tbl = &p_mgr->p_subn->node_guid_tbl; p_node_check = (osm_node_t*)cl_qmap_remove( p_node_guid_tbl, @@ -484,7 +469,6 @@ __osm_drop_mgr_check_node( IN const osm_drop_mgr_t* const p_mgr, IN osm_node_t* p_node ) { - osm_switch_t *p_sw; ib_net64_t node_guid; osm_physp_t *p_physp; osm_port_t *p_port; @@ -505,8 +489,7 @@ __osm_drop_mgr_check_node( } /* Make sure we have a switch object for this node */ - p_sw = osm_get_switch_by_guid( p_mgr->p_subn, node_guid ); - if (!p_sw) + if (!p_node->sw) { /* We do not have switch info for this node */ osm_log( p_mgr->p_log, OSM_LOG_VERBOSE, diff --git a/osm/opensm/osm_lid_mgr.c b/osm/opensm/osm_lid_mgr.c index 3f0b131..aba243a 100644 --- a/osm/opensm/osm_lid_mgr.c +++ b/osm/opensm/osm_lid_mgr.c @@ -361,7 +361,6 @@ __osm_lid_mgr_init_sweep( osm_lid_mgr_range_t *p_range = NULL; osm_port_t *p_port; cl_qmap_t *p_port_guid_tbl; - osm_switch_t *p_sw; uint8_t lmc_num_lids = (uint8_t)(1 << p_mgr->p_subn->opt.lmc); uint16_t lmc_mask; uint16_t req_lid, num_lids; @@ -445,19 +444,11 @@ __osm_lid_mgr_init_sweep( cl_ntoh64(osm_port_get_guid(p_port)), &db_min_lid, &db_max_lid)) { - if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) != - IB_NODE_TYPE_SWITCH ) + if ( !p_port->p_node->sw || + osm_switch_sp0_is_lmc_capable(p_port->p_node->sw, p_mgr->p_subn)) num_lids = lmc_num_lids; else - { - /* Determine if enhanced switch port 0 */ - p_sw = osm_get_switch_by_guid(p_mgr->p_subn, - osm_node_get_node_guid(osm_port_get_parent_node(p_port))); - if (osm_switch_sp0_is_lmc_capable(p_sw, p_mgr->p_subn)) - num_lids = lmc_num_lids; - else - num_lids = 1; - } + num_lids = 1; if ((num_lids != 1) && (((db_min_lid & lmc_mask) != db_min_lid) || @@ -552,27 +543,14 @@ __osm_lid_mgr_init_sweep( /* get the lid range of that port, and the required number of lids we are about to assign to it */ osm_port_get_lid_range_ho(p_port, &disc_min_lid, &disc_max_lid); - if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) != - IB_NODE_TYPE_SWITCH ) + if ( !p_port->p_node->sw || + osm_switch_sp0_is_lmc_capable(p_port->p_node->sw, p_mgr->p_subn)) { disc_max_lid = disc_min_lid + lmc_num_lids - 1; num_lids = lmc_num_lids; } else - { - /* Determine if enhanced switch port 0 */ - p_sw = osm_get_switch_by_guid(p_mgr->p_subn, - osm_node_get_node_guid(osm_port_get_parent_node(p_port))); - if (osm_switch_sp0_is_lmc_capable(p_sw, p_mgr->p_subn)) - { - disc_max_lid = disc_min_lid + lmc_num_lids - 1; - num_lids = lmc_num_lids; - } - else - { - num_lids = 1; - } - } + num_lids = 1; /* Make sure the lid is aligned */ if ((num_lids != 1) && ((disc_min_lid & lmc_mask) != disc_min_lid)) @@ -832,7 +810,6 @@ __osm_lid_mgr_get_port_lid( uint8_t num_lids = (1 << p_mgr->p_subn->opt.lmc); int lid_changed = 0; uint16_t lmc_mask; - osm_switch_t *p_sw; OSM_LOG_ENTER( p_mgr->p_log, __osm_lid_mgr_get_port_lid ); @@ -845,15 +822,9 @@ __osm_lid_mgr_get_port_lid( guid = cl_ntoh64( osm_port_get_guid( p_port ) ); /* if the port is a base switch port 0 then we only need one lid */ - if( osm_node_get_type( osm_port_get_parent_node( p_port ) ) == - IB_NODE_TYPE_SWITCH ) - { - /* Determine if base switch port 0 */ - p_sw = osm_get_switch_by_guid(p_mgr->p_subn, - osm_node_get_node_guid(osm_port_get_parent_node(p_port))); - if (!osm_switch_sp0_is_lmc_capable(p_sw, p_mgr->p_subn)) - num_lids = 1; - } + if( p_port->p_node->sw && + !osm_switch_sp0_is_lmc_capable(p_port->p_node->sw, p_mgr->p_subn)) + num_lids = 1; /* if the port matches the guid2lid */ if (!osm_db_guid2lid_get( p_mgr->p_g2l, guid, &min_lid, &max_lid)) @@ -995,7 +966,6 @@ __osm_lid_mgr_set_physp_pi( const ib_port_info_t* p_old_pi; osm_madw_context_t context; osm_node_t* p_node; - osm_switch_t* p_sw; ib_api_status_t status; uint8_t mtu; uint8_t op_vls; @@ -1236,8 +1206,7 @@ __osm_lid_mgr_set_physp_pi( ib_port_info_get_neighbor_mtu( p_pi ) ); /* Determine if enhanced switch port 0 and if so set LMC */ - p_sw = osm_get_switch_by_guid( p_mgr->p_subn, p_node->node_info.node_guid ); - if (osm_switch_sp0_is_lmc_capable(p_sw, p_mgr->p_subn)) + if (osm_switch_sp0_is_lmc_capable(p_node->sw, p_mgr->p_subn)) { /* M_KeyProtectBits are always zero */ p_pi->mkey_lmc = p_mgr->p_subn->opt.lmc; diff --git a/osm/opensm/osm_link_mgr.c b/osm/opensm/osm_link_mgr.c index b696d91..0f15f2b 100644 --- a/osm/opensm/osm_link_mgr.c +++ b/osm/opensm/osm_link_mgr.c @@ -140,7 +140,6 @@ __osm_link_mgr_set_physp_pi( if( port_num == 0 ) { - osm_switch_t *p_switch; ib_switch_info_t* p_sw_info; /* @@ -148,8 +147,7 @@ __osm_link_mgr_set_physp_pi( we need to check if this is enhanced port 0 or base port 0. For base port 0 the following parameters are not valid. (p824, table 145) */ - p_switch = osm_get_switch_by_guid( p_mgr->p_subn, p_node->node_info.node_guid ); - if (! p_switch ) + if (!p_node->sw) { osm_log( p_mgr->p_log, OSM_LOG_ERROR, "__osm_link_mgr_set_physp_pi: ERR 4201: " @@ -158,7 +156,7 @@ __osm_link_mgr_set_physp_pi( goto Exit; } - p_sw_info = osm_switch_get_si_ptr( p_switch ); + p_sw_info = osm_switch_get_si_ptr(p_node->sw); if (ib_switch_info_is_enhanced_port0( p_sw_info ) == FALSE) { /* This means the switch doesn't support enhanced port zero. diff --git a/osm/opensm/osm_mcast_mgr.c b/osm/opensm/osm_mcast_mgr.c index aab9c01..d5aa021 100644 --- a/osm/opensm/osm_mcast_mgr.c +++ b/osm/opensm/osm_mcast_mgr.c @@ -789,7 +789,6 @@ __osm_mcast_mgr_branch( const osm_physp_t *p_remote_physp; const osm_node_t *p_node; const osm_node_t *p_remote_node; - osm_switch_t *p_remote_sw; p_port_list = &list_array[i]; @@ -832,9 +831,7 @@ __osm_mcast_mgr_branch( /* Acquire a pointer to the remote switch then recurse. */ - p_remote_sw = osm_get_switch_by_guid(p_mgr->p_subn, - osm_node_get_node_guid( p_remote_node ) ); - CL_ASSERT( p_remote_sw ); + CL_ASSERT( p_remote_node->sw ); p_physp = osm_node_get_physp_ptr( p_node, i ); CL_ASSERT( p_physp ); @@ -845,7 +842,7 @@ __osm_mcast_mgr_branch( CL_ASSERT( osm_physp_is_valid( p_remote_physp ) ); p_mtn->child_array[i] = __osm_mcast_mgr_branch( - p_mgr, p_mgrp, p_remote_sw, + p_mgr, p_mgrp, p_remote_node->sw, p_port_list, depth, osm_physp_get_port_num( p_remote_physp), p_max_depth ); @@ -1209,7 +1206,7 @@ osm_mcast_mgr_process_single( goto Exit; } - p_sw = osm_get_switch_by_guid( p_mgr->p_subn, sw_guid ); + p_sw = p_remote_node->sw; if( !p_sw ) { osm_log( p_mgr->p_log, OSM_LOG_ERROR, diff --git a/osm/opensm/osm_node_info_rcv.c b/osm/opensm/osm_node_info_rcv.c index ebec592..23c3b18 100644 --- a/osm/opensm/osm_node_info_rcv.c +++ b/osm/opensm/osm_node_info_rcv.c @@ -641,9 +641,6 @@ __osm_ni_rcv_process_existing_switch( IN osm_node_t* const p_node, IN const osm_madw_t* const p_madw ) { - ib_net64_t node_guid; - osm_switch_t *p_sw; - OSM_LOG_ENTER( p_rcv->p_log, __osm_ni_rcv_process_existing_switch ); /* @@ -659,15 +656,13 @@ __osm_ni_rcv_process_existing_switch( else { /* Make sure we have SwitchInfo on this node */ - node_guid = osm_node_get_node_guid( p_node ); - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, node_guid ); - if( !p_sw || osm_switch_discovery_count_get( p_sw ) == 0 ) + if( !p_node->sw || osm_switch_discovery_count_get( p_node->sw ) == 0 ) { /* we don't have the SwitchInfo - retry to get it */ osm_log( p_rcv->p_log, OSM_LOG_DEBUG, "__osm_ni_rcv_process_existing_switch: " "Retry to get SwitchInfo on node GUID:0x%" - PRIx64 "\n", cl_ntoh64(node_guid) ); + PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)) ); __osm_ni_rcv_process_switch( p_rcv, p_node, p_madw ); } } diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c index 7502933..9e3e35a 100644 --- a/osm/opensm/osm_pkey_mgr.c +++ b/osm/opensm/osm_pkey_mgr.c @@ -72,18 +72,13 @@ pkey_mgr_get_physp_max_blocks( IN const osm_physp_t *p_physp ) { osm_node_t *p_node = osm_physp_get_node_ptr( p_physp ); - osm_switch_t *p_sw; uint16_t num_pkeys = 0; - if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) || + if ( !p_node->sw || ( osm_physp_get_port_num( p_physp ) == 0 ) ) num_pkeys = cl_ntoh16( p_node->node_info.partition_cap ); else - { - p_sw = osm_get_switch_by_guid( p_subn, osm_node_get_node_guid( p_node ) ); - if (p_sw) - num_pkeys = cl_ntoh16( p_sw->switch_info.enforce_cap ); - } + num_pkeys = cl_ntoh16( p_node->sw->switch_info.enforce_cap ); return((num_pkeys + 31) / 32); } @@ -455,8 +450,6 @@ pkey_mgr_update_peer_port( ib_pkey_table_t *block, *peer_block; const osm_pkey_tbl_t *p_pkey_tbl; osm_pkey_tbl_t *p_peer_pkey_tbl; - osm_switch_t *p_sw; - ib_switch_info_t *p_si; uint16_t block_index; uint16_t num_of_blocks; uint16_t peer_max_blocks; @@ -474,11 +467,7 @@ pkey_mgr_update_peer_port( if ( !peer || !osm_physp_is_valid( peer ) ) return FALSE; p_node = osm_physp_get_node_ptr( peer ); - if ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) - return FALSE; - - p_sw = osm_get_switch_by_guid( p_subn, osm_node_get_node_guid( p_node ) ); - if (!p_sw || !(p_si = osm_switch_get_si_ptr( p_sw )) || !p_si->enforce_cap) + if ( !p_node->sw || !p_node->sw->switch_info.enforce_cap ) return FALSE; p_pkey_tbl = osm_physp_get_pkey_tbl( p_physp ); diff --git a/osm/opensm/osm_port.c b/osm/opensm/osm_port.c index 4f51e9d..2f427d2 100644 --- a/osm/opensm/osm_port.c +++ b/osm/opensm/osm_port.c @@ -891,7 +891,6 @@ osm_physp_set_pkey_tbl( IN uint16_t block_num ) { uint16_t max_blocks; - osm_switch_t* p_switch; CL_ASSERT( p_pkey_tbl ); CL_ASSERT( osm_physp_is_valid( p_physp ) ); @@ -900,8 +899,7 @@ osm_physp_set_pkey_tbl( limited by the size of the P_Key table specified by the PartitionCap on the node. */ - if (osm_node_get_type( p_physp->p_node ) != IB_NODE_TYPE_SWITCH || - p_physp->port_num == 0 ) + if (!p_physp->p_node->sw || p_physp->port_num == 0 ) { /* The maximum blocks is defined in the node info: partition cap for CA, @@ -917,19 +915,8 @@ osm_physp_set_pkey_tbl( This is a switch, and not a management port. The maximum blocks is defined in the switch info: partition enforcement cap. */ - p_switch = - osm_get_switch_by_guid(p_subn, p_physp->p_node->node_info.node_guid); - if (! p_switch) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_physp_set_pkey_tbl: ERR 4107: " - "Cannot find switch by guid: %" PRIx64 "\n", - cl_ntoh64(p_physp->p_node->node_info.node_guid) ); - return; - } - max_blocks = - (cl_ntoh16(p_switch->switch_info.enforce_cap) + + (cl_ntoh16(p_physp->p_node->sw->switch_info.enforce_cap) + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; } diff --git a/osm/opensm/osm_port_info_rcv.c b/osm/opensm/osm_port_info_rcv.c index 2b273d2..ced745d 100644 --- a/osm/opensm/osm_port_info_rcv.c +++ b/osm/opensm/osm_port_info_rcv.c @@ -238,8 +238,6 @@ __osm_pi_rcv_process_switch_port( uint8_t port_num; uint8_t remote_port_num; osm_dr_path_t path; - osm_switch_t *p_sw; - ib_switch_info_t *p_si; OSM_LOG_ENTER( p_rcv->p_log, __osm_pi_rcv_process_switch_port ); @@ -352,10 +350,8 @@ __osm_pi_rcv_process_switch_port( "Invalid base LID 0x%x corrected\n", cl_ntoh16( orig_lid ) ); /* Determine if base switch port 0 */ - p_sw = osm_get_switch_by_guid(p_rcv->p_subn, - osm_node_get_node_guid( p_node )); - if (p_sw && (p_si = osm_switch_get_si_ptr(p_sw)) && - !ib_switch_info_is_enhanced_port0(p_si)) + if (p_node->sw && + !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)) { /* PortState is not used on BSP0 but just in case it is DOWN */ p_physp->port_info = *p_pi; @@ -441,7 +437,6 @@ void osm_pkey_get_tables( uint8_t port_num; uint16_t block_num, max_blocks; uint32_t attr_mod_ho; - osm_switch_t* p_switch; OSM_LOG_ENTER( p_log, osm_pkey_get_tables ); @@ -455,8 +450,7 @@ void osm_pkey_get_tables( port_num = p_physp->port_num; - if (osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH || - port_num == 0 ) + if (!p_node->sw || port_num == 0) { /* The maximum blocks is defined by the node info partition cap for CA, router, and switch management ports. */ @@ -465,29 +459,19 @@ void osm_pkey_get_tables( } else { - /* This is a switch, and not a management port. The maximum blocks is defined - in the switch info partition enforcement cap. */ - p_switch = osm_get_switch_by_guid(p_subn, p_node->node_info.node_guid); - - if (! p_switch) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_physp_has_pkey: ERR 0F11: " - "Cannot find switch by guid: %" PRIx64 "\n", - cl_ntoh64(p_node->node_info.node_guid) ); - goto Exit; - } + /* This is a switch, and not a management port. The maximum blocks + is defined in the switch info partition enforcement cap. */ /* Check for IBM eHCA firmware defect in reporting partition enforcement cap */ if (cl_ntoh32(ib_node_info_get_vendor_id(&p_node->node_info)) == IBM_VENDOR_ID) - p_switch->switch_info.enforce_cap = 0; + p_node->sw->switch_info.enforce_cap = 0; /* Bail out if this is a switch with no partition enforcement capability */ - if (cl_ntoh16(p_switch->switch_info.enforce_cap) == 0) + if (cl_ntoh16(p_node->sw->switch_info.enforce_cap) == 0) goto Exit; - max_blocks = (cl_ntoh16(p_switch->switch_info.enforce_cap)+IB_NUM_PKEY_ELEMENTS_IN_BLOCK -1) - / IB_NUM_PKEY_ELEMENTS_IN_BLOCK ; + max_blocks = (cl_ntoh16(p_node->sw->switch_info.enforce_cap) + + IB_NUM_PKEY_ELEMENTS_IN_BLOCK -1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK ; } for (block_num = 0 ; block_num < max_blocks ; block_num++) diff --git a/osm/opensm/osm_qos.c b/osm/opensm/osm_qos.c index fceb304..0dd5dc7 100644 --- a/osm/opensm/osm_qos.c +++ b/osm/opensm/osm_qos.c @@ -318,14 +318,12 @@ osm_signal_t osm_qos_setup(osm_opensm_t * p_osm) { struct qos_config ca_config, sw0_config, swe_config, rtr_config; struct qos_config *cfg; - osm_switch_t *p_sw; - ib_switch_info_t *p_si; cl_qmap_t *p_tbl; cl_map_item_t *p_next; osm_port_t *p_port; uint32_t num_physp; osm_physp_t *p_physp; - uint8_t node_type; + osm_node_t *p_node; ib_api_status_t status; uint8_t i; @@ -351,8 +349,8 @@ osm_signal_t osm_qos_setup(osm_opensm_t * p_osm) p_port = (osm_port_t *) p_next; p_next = cl_qmap_next(p_next); - node_type = osm_node_get_type(osm_port_get_parent_node(p_port)); - if (node_type == IB_NODE_TYPE_SWITCH) { + p_node = p_port->p_node; + if (p_node->sw) { num_physp = osm_port_get_num_physp(p_port); for (i = 1; i < num_physp; i++) { p_physp = osm_port_get_phys_ptr(p_port, i); @@ -363,14 +361,11 @@ osm_signal_t osm_qos_setup(osm_opensm_t * p_osm) p_port, p_physp, i, &swe_config); } /* skip base port 0 */ - p_sw = osm_get_switch_by_guid(&p_osm->subn, - osm_port_get_guid(p_port)); - if (!p_sw || !(p_si = osm_switch_get_si_ptr(p_sw)) || - !ib_switch_info_is_enhanced_port0(p_si)) + if (!ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)) continue; cfg = &sw0_config; - } else if (node_type == IB_NODE_TYPE_ROUTER) + } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_ROUTER) cfg = &rtr_config; else cfg = &ca_config; diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 1d2d203..60a4930 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -231,11 +231,9 @@ __osm_mpr_rcv_get_path_parms( IN const ib_net64_t comp_mask, OUT osm_path_parms_t* const p_parms ) { - ib_net64_t node_guid; const osm_node_t* p_node; const osm_physp_t* p_physp; const osm_physp_t* p_dest_physp; - const osm_switch_t* p_sw; const osm_prtn_t* p_prtn; const ib_port_info_t* p_pi; ib_slvl_table_t* p_slvl_tbl; @@ -349,21 +347,14 @@ __osm_mpr_rcv_get_path_parms( p_node = osm_physp_get_node_ptr( p_physp ); - if ( osm_node_get_type( p_node ) == IB_NODE_TYPE_SWITCH ) + if ( p_node->sw ) { - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, - osm_node_get_node_guid( p_node ) ); - if( !p_sw ) - { - status = IB_ERROR; - goto Exit; - } /* * If the dest_lid_ho is equal to the lid of the switch pointed by * p_sw then p_physp will be the physical port of the switch port zero. */ - p_physp = osm_switch_get_route_by_lid( p_sw, cl_ntoh16( dest_lid_ho ) ); + p_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); if ( p_physp == 0 ) { osm_log( p_rcv->p_log, OSM_LOG_ERROR, @@ -381,17 +372,10 @@ __osm_mpr_rcv_get_path_parms( */ p_node = osm_physp_get_node_ptr( p_dest_physp ); - if ( osm_node_get_type( p_node ) == IB_NODE_TYPE_SWITCH ) + if ( p_node->sw ) { - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, - osm_node_get_node_guid( p_node ) ); - if( !p_sw ) - { - status = IB_ERROR; - goto Exit; - } - p_dest_physp = osm_switch_get_route_by_lid( p_sw, cl_ntoh16( dest_lid_ho ) ); + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); if ( p_dest_physp == 0 ) { @@ -431,7 +415,7 @@ __osm_mpr_rcv_get_path_parms( p_node = osm_physp_get_node_ptr( p_physp ); - if ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) + if ( !p_node->sw ) { /* There is some sort of problem in the subnet object! @@ -445,22 +429,6 @@ __osm_mpr_rcv_get_path_parms( goto Exit; } - node_guid = osm_node_get_node_guid( p_node ); - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, node_guid ); - - if( !p_sw ) - { - /* - There is some sort of problem in the subnet object! - */ - osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4504: " - "Internal error, no switch for GUID 0x%016" PRIx64 "\n", - cl_ntoh64( node_guid ) ); - status = IB_ERROR; - goto Exit; - } - /* Check parameters for the ingress port in this switch. */ @@ -499,7 +467,7 @@ __osm_mpr_rcv_get_path_parms( /* Continue with the egress port on this switch. */ - p_physp = osm_switch_get_route_by_lid( p_sw, dest_lid ); + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); if ( p_physp == 0 ) { @@ -507,7 +475,7 @@ __osm_mpr_rcv_get_path_parms( "__osm_mpr_rcv_get_path_parms: ERR 4516: " "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, - cl_ntoh64( node_guid ) ); + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); status = IB_ERROR; goto Exit; } diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 0500a74..11058ab 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -233,11 +233,9 @@ __osm_pr_rcv_get_path_parms( IN const ib_net64_t comp_mask, OUT osm_path_parms_t* const p_parms ) { - ib_net64_t node_guid; const osm_node_t* p_node; const osm_physp_t* p_physp; const osm_physp_t* p_dest_physp; - const osm_switch_t* p_sw; const osm_prtn_t* p_prtn; const ib_port_info_t* p_pi; ib_api_status_t status = IB_SUCCESS; @@ -289,21 +287,13 @@ __osm_pr_rcv_get_path_parms( p_node = osm_physp_get_node_ptr( p_physp ); - if( osm_node_get_type( p_node ) == IB_NODE_TYPE_SWITCH ) + if( p_node->sw ) { - p_sw = osm_get_switch_by_guid(p_rcv->p_subn, - osm_node_get_node_guid( p_node ) ); - if( !p_sw ) - { - status = IB_ERROR; - goto Exit; - } - /* * If the dest_lid_ho is equal to the lid of the switch pointed by * p_sw then p_physp will be the physical port of the switch port zero. */ - p_physp = osm_switch_get_route_by_lid(p_sw, cl_ntoh16( dest_lid_ho ) ); + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_ntoh16( dest_lid_ho ) ); if ( p_physp == 0 ) { osm_log( p_rcv->p_log, OSM_LOG_ERROR, @@ -321,17 +311,9 @@ __osm_pr_rcv_get_path_parms( */ p_node = osm_physp_get_node_ptr( p_dest_physp ); - if( osm_node_get_type( p_node ) == IB_NODE_TYPE_SWITCH ) + if( p_node->sw ) { - p_sw = osm_get_switch_by_guid(p_rcv->p_subn, - osm_node_get_node_guid( p_node ) ); - if( !p_sw ) - { - status = IB_ERROR; - goto Exit; - } - - p_dest_physp = osm_switch_get_route_by_lid( p_sw, cl_ntoh16( dest_lid_ho ) ); + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); if ( p_dest_physp == 0 ) { @@ -369,7 +351,7 @@ __osm_pr_rcv_get_path_parms( p_node = osm_physp_get_node_ptr( p_physp ); - if( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) + if( !p_node->sw ) { /* There is some sort of problem in the subnet object! @@ -383,22 +365,6 @@ __osm_pr_rcv_get_path_parms( goto Exit; } - node_guid = osm_node_get_node_guid( p_node ); - p_sw = osm_get_switch_by_guid(p_rcv->p_subn, node_guid ); - - if( !p_sw ) - { - /* - There is some sort of problem in the subnet object! - */ - osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F04: " - "Internal error, no switch for GUID 0x%016" PRIx64 "\n", - cl_ntoh64( node_guid ) ); - status = IB_ERROR; - goto Exit; - } - /* Check parameters for the ingress port in this switch. */ @@ -437,7 +403,7 @@ __osm_pr_rcv_get_path_parms( /* Continue with the egress port on this switch. */ - p_physp = osm_switch_get_route_by_lid( p_sw, dest_lid ); + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); if ( p_physp == 0 ) { @@ -445,7 +411,7 @@ __osm_pr_rcv_get_path_parms( "__osm_pr_rcv_get_path_parms: ERR 1F07: " "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, - cl_ntoh64( node_guid ) ); + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); status = IB_ERROR; goto Exit; } diff --git a/osm/opensm/osm_sa_portinfo_record.c b/osm/opensm/osm_sa_portinfo_record.c index d904945..054ff1f 100644 --- a/osm/opensm/osm_sa_portinfo_record.c +++ b/osm/opensm/osm_sa_portinfo_record.c @@ -198,20 +198,15 @@ __osm_sa_pir_create( uint16_t base_lid_ho; uint16_t match_lid_ho; osm_physp_t *p_node_physp; - osm_switch_t *p_sw; OSM_LOG_ENTER( p_rcv->p_log, __osm_sa_pir_create ); - if (p_physp->p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) + if (p_physp->p_node->sw) { p_node_physp = osm_node_get_physp_ptr( p_physp->p_node, 0 ); base_lid_ho = cl_ntoh16( osm_physp_get_base_lid( p_node_physp ) ); - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, - osm_physp_get_port_guid( p_node_physp ) ); - if (osm_switch_sp0_is_lmc_capable(p_sw, p_rcv->p_subn)) - lmc = osm_physp_get_lmc( p_node_physp ); - else - lmc = 0; + lmc = osm_switch_sp0_is_lmc_capable(p_physp->p_node->sw, p_rcv->p_subn) ? + osm_physp_get_lmc( p_node_physp ) : 0; } else { diff --git a/osm/opensm/osm_sw_info_rcv.c b/osm/opensm/osm_sw_info_rcv.c index 136fc05..6647d16 100644 --- a/osm/opensm/osm_sw_info_rcv.c +++ b/osm/opensm/osm_sw_info_rcv.c @@ -386,6 +386,8 @@ __osm_si_rcv_process_new( goto Exit; } + p_node->sw = p_sw; + /* Update the switch info according to the info we just received. @@ -428,9 +430,9 @@ static boolean_t __osm_si_rcv_process_existing( IN const osm_si_rcv_t* const p_rcv, IN osm_node_t* const p_node, - IN osm_switch_t* const p_sw, IN const osm_madw_t* const p_madw ) { + osm_switch_t *p_sw = p_node->sw; ib_switch_info_t *p_si; osm_si_context_t *p_si_context; ib_smp_t *p_smp; @@ -438,8 +440,6 @@ __osm_si_rcv_process_existing( OSM_LOG_ENTER( p_rcv->p_log, __osm_si_rcv_process_existing ); - CL_ASSERT( p_node ); - CL_ASSERT( p_sw ); CL_ASSERT( p_madw ); p_smp = osm_madw_get_smp_ptr( p_madw ); @@ -588,7 +588,6 @@ osm_si_rcv_process( ib_switch_info_t *p_si; ib_smp_t *p_smp; osm_node_t *p_node; - osm_switch_t *p_sw; ib_net64_t node_guid; osm_si_context_t *p_context; @@ -654,8 +653,7 @@ osm_si_rcv_process( /* Acquire the switch object for this switch. */ - p_sw = osm_get_switch_by_guid( p_rcv->p_subn, node_guid ); - if( !p_sw ) + if( !p_node->sw ) { __osm_si_rcv_process_new( p_rcv, p_node, p_madw ); /* @@ -667,7 +665,7 @@ osm_si_rcv_process( else { /* we might get back a request for signaling change was detected */ - if (__osm_si_rcv_process_existing( p_rcv, p_node, p_sw, p_madw )) + if (__osm_si_rcv_process_existing( p_rcv, p_node, p_madw )) { CL_PLOCK_RELEASE( p_rcv->p_lock ); osm_state_mgr_process( p_rcv->p_state_mgr, diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c index f546c5f..c1e530e 100644 --- a/osm/opensm/osm_ucast_mgr.c +++ b/osm/opensm/osm_ucast_mgr.c @@ -1078,8 +1078,6 @@ __osm_ucast_mgr_process_neighbors( osm_ucast_mgr_t* const p_mgr = (osm_ucast_mgr_t*)context; osm_node_t *p_node; osm_node_t *p_remote_node; - ib_net64_t remote_node_guid; - osm_switch_t *p_remote_sw; uint32_t port_num; uint8_t remote_port_num; uint32_t num_ports; @@ -1111,32 +1109,15 @@ __osm_ucast_mgr_process_neighbors( (uint8_t)port_num, &remote_port_num ); if( p_remote_node && (p_remote_node != p_node ) - && (osm_node_get_type( p_remote_node ) == IB_NODE_TYPE_SWITCH ) ) + && p_remote_node->sw ) { /* make sure the link is healthy. If it is not - don't propagate through it. */ p_physp = osm_node_get_physp_ptr( p_node, port_num ); if (!osm_link_is_healthy( p_physp ) ) continue; - remote_node_guid = osm_node_get_node_guid( p_remote_node ); - - p_remote_sw = osm_get_switch_by_guid( p_mgr->p_subn, remote_node_guid ); - if( !p_remote_sw ) - { - osm_log( p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_neighbors: ERR 3A07: " - "No switch object for Node GUID 0x%" PRIx64 "\n", - cl_ntoh64( remote_node_guid ) ); - } - else - { - __osm_ucast_mgr_process_neighbor( - p_mgr, - p_sw, - p_remote_sw, - (uint8_t)port_num, - remote_port_num ); - } + __osm_ucast_mgr_process_neighbor(p_mgr, p_sw, p_remote_node->sw, + (uint8_t)port_num, remote_port_num ); } } diff --git a/osm/opensm/osm_ucast_updn.c b/osm/opensm/osm_ucast_updn.c index d6c5dbc..c860427 100644 --- a/osm/opensm/osm_ucast_updn.c +++ b/osm/opensm/osm_ucast_updn.c @@ -225,11 +225,10 @@ static int __updn_bfs_by_node( IN updn_t *p_updn, IN osm_subn_t *p_subn, - IN ib_net64_t guid_index, + IN osm_port_t *p_port, IN cl_qmap_t *p_guid_rank_tbl ) { /* Init local vars */ - osm_port_t *p_port; osm_switch_t *p_self_node = NULL; uint8_t pn, pn_rem; osm_physp_t *p_physp, *p_remote_physp; @@ -247,8 +246,6 @@ __updn_bfs_by_node( cl_list_init( p_nextList, 10 ); p_currList = p_nextList; - p_port = (osm_port_t*) cl_qmap_get(&(p_subn->port_guid_tbl),guid_index); - /* TODO : check if p_port is not NULL */ p_physp = osm_port_get_default_phys_ptr(p_port); /* Check valid pointer */ if (!p_physp || !osm_physp_is_valid(p_physp )) @@ -263,9 +260,9 @@ __updn_bfs_by_node( "__updn_bfs_by_node:" "Starting lid : 0x%x \n", root_lid ); - if (osm_node_get_type( p_port->p_node ) == IB_NODE_TYPE_SWITCH) + if (p_port->p_node->sw) { - p_self_node = osm_get_switch_by_guid(p_subn, guid_index); + p_self_node = p_port->p_node->sw; /* Update its Min Hop Table */ osm_log( p_log, OSM_LOG_DEBUG, "__updn_bfs_by_node:" @@ -286,7 +283,7 @@ __updn_bfs_by_node( { /* Check if the remote port is a switch, if it is update root_lid, Min Hop Table */ - if (osm_node_get_type(p_remote_physp->p_node) != IB_NODE_TYPE_SWITCH) + if (!p_remote_physp->p_node->sw) { osm_log( p_log, OSM_LOG_ERROR, "__updn_bfs_by_node: ERR AA07: " @@ -296,9 +293,7 @@ __updn_bfs_by_node( } else { - p_self_node = osm_get_switch_by_guid(p_subn, - osm_physp_get_port_guid - (p_remote_physp)); + p_self_node = p_remote_physp->p_node->sw; max_sw_lid = osm_switch_get_max_lid_ho(p_self_node); if ((1 <= root_lid) && (root_lid <= max_sw_lid)) /* Update its Min Hop Table */ @@ -396,7 +391,7 @@ __updn_bfs_by_node( remote_guid = osm_node_get_node_guid(p_remote_node); /* printf ("-V- Current guid : 0x%" PRIx64 " Remote guid : 0x%" PRIx64 "\n", */ /* cl_ntoh64(current_guid), cl_ntoh64(remote_guid)); */ - p_remote_sw = osm_get_switch_by_guid(p_subn, remote_guid); + p_remote_sw = p_remote_node->sw; p_rem_rank = (updn_rank_t*)cl_qmap_get(p_guid_rank_tbl, remote_guid); p_cur_rank = (updn_rank_t*)cl_qmap_get(p_guid_rank_tbl, current_guid); /* Decide which direction to mark it (UP/DOWN) */ @@ -901,7 +896,7 @@ __osm_subn_set_up_down_min_hop_table( "__osm_subn_set_up_down_min_hop_table: " "BFS through port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid) ); - if(__updn_bfs_by_node(p_updn, p_subn, port_guid, + if(__updn_bfs_by_node(p_updn, p_subn, p_port, &p_updn->guid_rank_tbl)) { OSM_LOG_EXIT( p_log ); -- 1.4.4.2.gfc82d From mshefty at ichips.intel.com Tue Jan 2 14:43:08 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 02 Jan 2007 14:43:08 -0800 Subject: [openib-general] outstanding patches In-Reply-To: <20070102200523.GA10451@mellanox.co.il> References: <20070102200523.GA10451@mellanox.co.il> Message-ID: <459ADFFC.2030605@ichips.intel.com> > sean_cm_limit_mra_timeout.patch I either missed or didn't see a response to this patch after I posted it. Can someone please confirm that the patch worked for them? If so, I can repost it. There's also a patch to fix a struct ucma_event leak that was requested to go into 2.6.20, but is waiting for Roland's response when he can get to it. - Sean From mshefty at ichips.intel.com Tue Jan 2 14:45:42 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 02 Jan 2007 14:45:42 -0800 Subject: [openib-general] APM support in openib stack In-Reply-To: References: Message-ID: <459AE096.5080307@ichips.intel.com> Tang, Changqing wrote: > Sorry, I find the function 'ib_sa_path_rec_get()' in kernel code. Then > here is my question: > > Is there any way (instruction) to fill in struct 'ib_sa_path_rec' inside > struct 'ib_cm_req_param' without using librdmacm.so ? Not easily at this time. The only alternative is to interface directly to the libibmad interface and exchange MADs with the SA directly. - Sean From gshipman at lanl.gov Tue Jan 2 14:49:13 2007 From: gshipman at lanl.gov (Galen Shipman) Date: Tue, 2 Jan 2007 15:49:13 -0700 Subject: [openib-general] Specifying subnet prefix to OpenSM? Message-ID: I have 2 test machines, each with two HCAs. One HCA on each node is plugged into a switch, the other two HCAs are connected back to back. The goal is to get two different subnets, one for each pair of HCAs. Currently I am using "opensm --guid" in order to run OpenSM on the each HCA/Port 0. The problem is that I also want these two subnets to appear distinct in some fashion. I am querying the subnet via: ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid); subnet = ntoh64(gid.global.subnet_prefix); Not sure if this is the correct thing to do, but this is what I am doing ;-). So, is there a way to specify the subnet prefix to OpenSM? Is there some other method that I can use to determine that the subnets are distinct? Thanks, Galen From jsquyres at cisco.com Tue Jan 2 15:00:07 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 2 Jan 2007 18:00:07 -0500 Subject: [openib-general] ibv_get_device_list() question Message-ID: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> We're debating internally in the OMPI group about providing access to specific NICs/HCAs, especially on NUMA machines where different NICs/ HCAs are local to different busses / processors. So I have the following questions: 1. Does the ibv API provide any information about devices' physical location with regards to busses / processors? (I doubt it, but I'm just asking to be sure) 2. Even if we get this information externally (e.g., user provides a config file to OMPI), does the information returned via ibv_get_device_list() come back in a guaranteed order? I.e., if I have multiple HCAs in a host, will the entries in the list always be returned in the same order such that I could have a host-specific OMPI config file saying "processes on cores 0,1,2,3 use ibv_device 0, processes on cores 4,5,6,7 use device 1", or something similar? Thanks. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From jgunthorpe at obsidianresearch.com Tue Jan 2 15:00:27 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 2 Jan 2007 16:00:27 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167774142.4596.99507.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> Message-ID: <20070102230027.GB13404@obsidianresearch.com> On Tue, Jan 02, 2007 at 04:42:23PM -0500, Hal Rosenstock wrote: > > I propose changing the routine name to ib_gid_is_default_prefix so as > > not to be misleading. Comments ? Patch to follow for this. > > What is confusing is the link local unicast GID definition in Figure 39. > It says 54 bits of and is green (which I think means 0 as in Figure 40) > on p. 144-145. I think you are right about the 0s (see definition of GID Prefix). Based on the definition of GID Prefix (and the IPv6 addressing architecture this section is clearly derived from) I'd also change ib_gid_is_link_local to check only the scope bits (ie compare a /10 not a /64..) BTW, I think ib_gid_is_link_local is a fine name for the function as is and matches the various specifications. The test you described: > > > Shouldn't it be either the default subnet prefix or the one supplied in > > > PortInfo:GidPrefix (which might not be the default one) ? Would be better described as ib_gid_is_on_link. on-link being a term used to refer to an address where a routing table says it is present on the local link rather than reachable through a router. It looks like you are making alot of progress here on router support - I'm looking forward to trying it! Jason From halr at voltaire.com Tue Jan 2 15:24:15 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 18:24:15 -0500 Subject: [openib-general] Specifying subnet prefix to OpenSM? In-Reply-To: References: Message-ID: <1167780255.4596.105292.camel@hal.voltaire.com> Hi Galen, On Tue, 2007-01-02 at 17:49, Galen Shipman wrote: > I have 2 test machines, each with two HCAs. One HCA on each node is > plugged into a switch, the other two HCAs are connected back to back. > The goal is to get two different subnets, one for each pair of HCAs. > Currently I am using "opensm --guid" in order to run OpenSM on the > each HCA/Port 0. The problem is that I also want these two subnets to > appear distinct in some fashion. I am querying the subnet via: > > ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid); > subnet = ntoh64(gid.global.subnet_prefix); > > > Not sure if this is the correct thing to do, but this is what I am > doing ;-). > > So, is there a way to specify the subnet prefix to OpenSM? Yes, by supplying subnet_prefix in the opensm.opts file. If both opensms run on the same machine, you will need 2 different OSM_CACHE_DIRs to have different options. > Is there > some other method that I can use to determine that the subnets are > distinct? Not that I'm aware of. -- Hal > > Thanks, > > Galen > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From halr at voltaire.com Tue Jan 2 15:39:21 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 18:39:21 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070102230027.GB13404@obsidianresearch.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> Message-ID: <1167781157.4596.106104.camel@hal.voltaire.com> Hi Jason, On Tue, 2007-01-02 at 18:00, Jason Gunthorpe wrote: > On Tue, Jan 02, 2007 at 04:42:23PM -0500, Hal Rosenstock wrote: > > > > I propose changing the routine name to ib_gid_is_default_prefix so as > > > not to be misleading. Comments ? Patch to follow for this. > > > > What is confusing is the link local unicast GID definition in Figure 39. > > It says 54 bits of and is green (which I think means 0 as in Figure 40) > > on p. 144-145. > > I think you are right about the 0s (see definition of GID Prefix). > Based on the definition of GID Prefix (and the IPv6 addressing > architecture this section is clearly derived from) I'd also change > ib_gid_is_link_local to check only the scope bits (ie compare a /10 > not a /64..) I was wondering about that too and was about to go there. So you are saying that any link local scope GID is fine (and doesn't need complete 64 bit matching but only the first 10 bits), right ? > BTW, I think ib_gid_is_link_local is a fine name for the function as > is and matches the various specifications. The test you described: Agreed. I came back around to that conclusion once I became unconfused... > > > > Shouldn't it be either the default subnet prefix or the one supplied in > > > > PortInfo:GidPrefix (which might not be the default one) ? > > Would be better described as ib_gid_is_on_link. on-link being a term > used to refer to an address where a routing table says it is present > on the local link rather than reachable through a router. Yes, that is better terminology but I can't use this yet as there is no routing table (at least yet)... > It looks like you are making alot of progress here on router support - > I'm looking forward to trying it! Thanks and I look forward to getting feedback from real routers :-) -- Hal > Jason From halr at voltaire.com Tue Jan 2 15:46:39 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 18:46:39 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167781157.4596.106104.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> Message-ID: <1167781598.4596.106504.camel@hal.voltaire.com> On Tue, 2007-01-02 at 18:39, Hal Rosenstock wrote: > Hi Jason, > > On Tue, 2007-01-02 at 18:00, Jason Gunthorpe wrote: > > On Tue, Jan 02, 2007 at 04:42:23PM -0500, Hal Rosenstock wrote: > > > > > > I propose changing the routine name to ib_gid_is_default_prefix so as > > > > not to be misleading. Comments ? Patch to follow for this. > > > > > > What is confusing is the link local unicast GID definition in Figure 39. > > > It says 54 bits of and is green (which I think means 0 as in Figure 40) > > > on p. 144-145. > > > > I think you are right about the 0s (see definition of GID Prefix). > > Based on the definition of GID Prefix (and the IPv6 addressing > > architecture this section is clearly derived from) I'd also change > > ib_gid_is_link_local to check only the scope bits (ie compare a /10 > > not a /64..) > > I was wondering about that too and was about to go there. So you are > saying that any link local scope GID is fine (and doesn't need complete > 64 bit matching but only the first 10 bits), right ? How about the following (untested as yet): diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 573b294..0f710e1 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -2168,7 +2168,8 @@ static inline boolean_t OSM_API ib_gid_is_link_local( IN const ib_gid_t* const p_gid ) { - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); + return( ( ib_gid_get_subnet_prefix( p_gid ) & + CL_HTON64( 0xFFE0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX } /* * PARAMETERS > > > BTW, I think ib_gid_is_link_local is a fine name for the function as > > is and matches the various specifications. The test you described: > > Agreed. I came back around to that conclusion once I became > unconfused... > > > > > > Shouldn't it be either the default subnet prefix or the one supplied in > > > > > PortInfo:GidPrefix (which might not be the default one) ? > > > > Would be better described as ib_gid_is_on_link. on-link being a term > > used to refer to an address where a routing table says it is present > > on the local link rather than reachable through a router. > > Yes, that is better terminology but I can't use this yet as there is no > routing table (at least yet)... > > > It looks like you are making alot of progress here on router support - > > I'm looking forward to trying it! > > Thanks and I look forward to getting feedback from real routers :-) > > -- Hal > > > Jason > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rjwalsh at pathscale.com Tue Jan 2 15:57:55 2007 From: rjwalsh at pathscale.com (Robert Walsh) Date: Tue, 02 Jan 2007 15:57:55 -0800 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> Message-ID: <459AF183.2070909@pathscale.com> Jeff Squyres wrote: > We're debating internally in the OMPI group about providing access to > specific NICs/HCAs, especially on NUMA machines where different NICs/ > HCAs are local to different busses / processors. So I have the > following questions: > > 1. Does the ibv API provide any information about devices' physical > location with regards to busses / processors? (I doubt it, but I'm > just asking to be sure) Don't know whether you can get this directly, but you can get it indirectly by looking in /sys/class/infiniband//device. bus points at the bus. local_cpus contains the nearby cpu mask. This what you're looking for? From halr at voltaire.com Tue Jan 2 16:03:42 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 19:03:42 -0500 Subject: [openib-general] [PATCH] OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local Message-ID: <1167782620.4596.107442.camel@hal.voltaire.com> OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local Signed-off-by: Hal Rosenstock diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 573b294..0f710e1 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -2168,7 +2168,8 @@ static inline boolean_t OSM_API ib_gid_is_link_local( IN const ib_gid_t* const p_gid ) { - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); + return( ( ib_gid_get_subnet_prefix( p_gid ) & + CL_HTON64( 0xFFE0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX ); } /* * PARAMETERS From jgunthorpe at obsidianresearch.com Tue Jan 2 17:24:41 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 2 Jan 2007 18:24:41 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167781157.4596.106104.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> Message-ID: <20070103012440.GC13404@obsidianresearch.com> On Tue, Jan 02, 2007 at 06:39:21PM -0500, Hal Rosenstock wrote: > > I think you are right about the 0s (see definition of GID Prefix). > > Based on the definition of GID Prefix (and the IPv6 addressing > > architecture this section is clearly derived from) I'd also change > > ib_gid_is_link_local to check only the scope bits (ie compare a /10 > > not a /64..) > > I was wondering about that too and was about to go there. So you are > saying that any link local scope GID is fine (and doesn't need complete > 64 bit matching but only the first 10 bits), right ? Well, there is a quibble here that is worth noting. I would expect a function like ib_gid_is_link_local to behave similarly to the POSIX IN6_IS_ADDR_LINKLOCAL function, in that it tests the scope of the address for the link local property. For this purpose /10 is correct. However, the current usage of ib_gid_is_link_local is really a placeholder for a routing lookup. Since IBA defines the link local addresses to be fe80::xxxx/64 it must test the full /64 (like ethernet does). > > > > > Shouldn't it be either the default subnet prefix or the one supplied in > > > > > PortInfo:GidPrefix (which might not be the default one) ? > > > > Would be better described as ib_gid_is_on_link. on-link being a term > > used to refer to an address where a routing table says it is present > > on the local link rather than reachable through a router. > > Yes, that is better terminology but I can't use this yet as there is no > routing table (at least yet)... Well, I'd say you have a hard wired routing table with 3 entires: fe80::/64 onlink DEFAULT_PREFIX::/64 onlink default unreachable Which is what the tests in the code accomplish. One thought would be to implement the route lookup function with a useable signature and move the hard codeded tests into it as a placeholder for an alterable routing table. Jason From jgunthorpe at obsidianresearch.com Tue Jan 2 17:25:55 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 2 Jan 2007 18:25:55 -0700 Subject: [openib-general] [PATCH] OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local In-Reply-To: <1167782620.4596.107442.camel@hal.voltaire.com> References: <1167782620.4596.107442.camel@hal.voltaire.com> Message-ID: <20070103012555.GD13404@obsidianresearch.com> On Tue, Jan 02, 2007 at 07:03:42PM -0500, Hal Rosenstock wrote: > - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > + return( ( ib_gid_get_subnet_prefix( p_gid ) & > + CL_HTON64( 0xFFE0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX ); Should be FFC, ie: #define IN6_IS_ADDR_LINKLOCAL(a) \ ((((__const uint32_t *) (a))[0] & htonl (0xffc00000)) \ == htonl (0xfe800000)) Jason From jsquyres at cisco.com Tue Jan 2 17:38:35 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 2 Jan 2007 20:38:35 -0500 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <459AF183.2070909@pathscale.com> References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> <459AF183.2070909@pathscale.com> Message-ID: <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> Ah! That *would* be perfect, but I guess the RHEL4 kernel doesn't have this stuff (there's no "bus" or "local_cpus" entries in the device/ subdir). Failing that, is guaranteed to be the same across reboots, and/or the info returned by ibv_get_device_info() guaranteed to be in the same order? (I suppose that I can use ibv_get_device_name() to match potentially unordered info returned by ibv_get_device_info(), if necessary -- as long as the name is guaranteed to be the same across reboots) On Jan 2, 2007, at 6:57 PM, Robert Walsh wrote: > Jeff Squyres wrote: >> We're debating internally in the OMPI group about providing access >> to specific NICs/HCAs, especially on NUMA machines where >> different NICs/ HCAs are local to different busses / processors. >> So I have the following questions: >> 1. Does the ibv API provide any information about devices' >> physical location with regards to busses / processors? (I doubt >> it, but I'm just asking to be sure) > > Don't know whether you can get this directly, but you can get it > indirectly by looking in /sys/class/infiniband// > device. bus points at the bus. local_cpus contains the nearby cpu > mask. This what you're looking for? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From jsquyres at cisco.com Tue Jan 2 17:47:59 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 2 Jan 2007 20:47:59 -0500 Subject: [openib-general] SVN deprecation In-Reply-To: <4593A31D.50808@voltaire.com> References: <4593A31D.50808@voltaire.com> Message-ID: On Dec 28, 2006, at 5:57 AM, Or Gerlitz wrote: > Lets leave these sources with a readme stating they are > unmaintained along with the gen2/branches/1.1 sources at least for > the dev/release cycle of OFED 1.2 Do you have a reason for this? I ask because even if sources are "svn rm"ed, they are still available in the SVN history for anyone who really needs them. It just makes the process one step more difficult to get to the old sources (which is kinda the point -- we want people to get them from git, not accidentally get them from SVN and wonder why everything is so old / they can't find what people are talking about on the mailing lists / or worse, not realize that they're old, etc.). My thought was that if people accidentally check out of SVN, there should be [almost] nothing at the HEAD except a README stating that everything has moved to git (and instructions if you need to get into the SVN history) so that it's blatantly obvious that there's nothing new in SVN anymore. If all you do is put a README file at the HEAD without removing anything, it's just too easy to miss (this has already happened several times) -- people will peruse the source and think that it's the most current stuff. That's my $0.02... -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From gshipman at lanl.gov Tue Jan 2 18:02:51 2007 From: gshipman at lanl.gov (Galen Shipman) Date: Tue, 2 Jan 2007 19:02:51 -0700 Subject: [openib-general] Specifying subnet prefix to OpenSM? In-Reply-To: <1167780255.4596.105292.camel@hal.voltaire.com> References: <1167780255.4596.105292.camel@hal.voltaire.com> Message-ID: Worked like a charm,.. Thanks On Jan 2, 2007, at 4:24 PM, Hal Rosenstock wrote: > Hi Galen, > > On Tue, 2007-01-02 at 17:49, Galen Shipman wrote: >> I have 2 test machines, each with two HCAs. One HCA on each node is >> plugged into a switch, the other two HCAs are connected back to back. >> The goal is to get two different subnets, one for each pair of HCAs. >> Currently I am using "opensm --guid" in order to run OpenSM on the >> each HCA/Port 0. The problem is that I also want these two subnets to >> appear distinct in some fashion. I am querying the subnet via: >> >> ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid); >> subnet = ntoh64(gid.global.subnet_prefix); >> >> >> Not sure if this is the correct thing to do, but this is what I am >> doing ;-). >> >> So, is there a way to specify the subnet prefix to OpenSM? > > Yes, by supplying subnet_prefix in the opensm.opts file. > > If both opensms run on the same machine, you will need 2 different > OSM_CACHE_DIRs to have different options. > >> Is there >> some other method that I can use to determine that the subnets are >> distinct? > > Not that I'm aware of. > > -- Hal > >> >> Thanks, >> >> Galen >> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/ >> openib-general >> > From rjwalsh at pathscale.com Tue Jan 2 18:27:23 2007 From: rjwalsh at pathscale.com (Robert Walsh) Date: Tue, 02 Jan 2007 18:27:23 -0800 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> <459AF183.2070909@pathscale.com> <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> Message-ID: <459B148B.6090907@pathscale.com> Jeff Squyres wrote: > Ah! That *would* be perfect, but I guess the RHEL4 kernel doesn't have > this stuff (there's no "bus" or "local_cpus" entries in the device/ > subdir). So it doesn't. Special. I don't see any obvious way of inferring this either. > Failing that, is guaranteed to be the same across reboots, > and/or the info returned by ibv_get_device_info() guaranteed to be in > the same order? (I suppose that I can use ibv_get_device_name() to > match potentially unordered info returned by ibv_get_device_info(), if > necessary -- as long as the name is guaranteed to be the same across > reboots) I don't know: my suspicion is that none of this ordering or naming is guaranteed. You probably want to do this based on GUID and not device name. Roland probably knows for sure. Regards, Robert. From venkatesh.babu at 3leafnetworks.com Tue Jan 2 18:49:25 2007 From: venkatesh.babu at 3leafnetworks.com (Venkatesh Babu) Date: Tue, 02 Jan 2007 18:49:25 -0800 Subject: [openib-general] APM: SM port failover Message-ID: <459B19B5.8000807@3leafnetworks.com> Let us say there are two nodes A and B. NodeA (passive side) passively listens for RC QP connection establishment requests and NodeB (active side) initiates the RC QP connection request with ib_send_cm_req(). When a port failure occurs on NodeA (passive side), it gets the event IB_EVENT_PORT_ERR locally. So it can call ib_modify_qp() for the RC QP to change the path_mig_state to IB_MIG_MIGRATED to use the alternate path. No problem here. But NodeB has to register with the OpenSM for the port failure event on NodeA, so that it can call ib_modify_qp() on the active side. This is working fine by using the interface ib_sa_serv_notice_hdlr() described in bug#159 (https://staging.openfabrics.org/bugzilla/show_bug.cgi?id=159). Now the question is - what if NodeA is running OpenSM with port 1 (sm_port=1), and that port fails (say cable disconnect). Then Node B can not receive any notification of the port failure even though it has registered for the notice handler with ib_sa_serv_notice_hdlr(), because sm_port is down. How can we handle the port failover in this scenario ? VBabu From halr at voltaire.com Tue Jan 2 20:08:14 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 23:08:14 -0500 Subject: [openib-general] [PATCH] OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local In-Reply-To: <20070103012555.GD13404@obsidianresearch.com> References: <1167782620.4596.107442.camel@hal.voltaire.com> <20070103012555.GD13404@obsidianresearch.com> Message-ID: <1167797293.4596.120902.camel@hal.voltaire.com> On Tue, 2007-01-02 at 20:25, Jason Gunthorpe wrote: > On Tue, Jan 02, 2007 at 07:03:42PM -0500, Hal Rosenstock wrote: > > - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > > + return( ( ib_gid_get_subnet_prefix( p_gid ) & > > + CL_HTON64( 0xFFE0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX ); > > Should be FFC, ie: Missed that. Thanks. I'll reiusse the patch. -- Hal > #define IN6_IS_ADDR_LINKLOCAL(a) \ > ((((__const uint32_t *) (a))[0] & htonl (0xffc00000)) \ > == htonl (0xfe800000)) > > Jason From halr at voltaire.com Tue Jan 2 20:13:32 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 23:13:32 -0500 Subject: [openib-general] [PATCHv2] OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local Message-ID: <1167797611.4596.121166.camel@hal.voltaire.com> OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local Signed-off-by: Hal Rosenstock diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 573b294..e4ef7e3 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -2168,7 +2168,8 @@ static inline boolean_t OSM_API ib_gid_is_link_local( IN const ib_gid_t* const p_gid ) { - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); + return( ( ib_gid_get_subnet_prefix( p_gid ) & + CL_HTON64( 0xFFC0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX ); } /* * PARAMETERS From halr at voltaire.com Tue Jan 2 20:22:40 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 02 Jan 2007 23:22:40 -0500 Subject: [openib-general] APM: SM port failover In-Reply-To: <459B19B5.8000807@3leafnetworks.com> References: <459B19B5.8000807@3leafnetworks.com> Message-ID: <1167798160.4596.121699.camel@hal.voltaire.com> On Tue, 2007-01-02 at 21:49, Venkatesh Babu wrote: > Let us say there are two nodes A and B. NodeA (passive side) passively > listens for RC QP connection establishment requests and NodeB (active > side) initiates the RC QP connection request with ib_send_cm_req(). When > a port failure occurs on NodeA (passive side), it gets the event > IB_EVENT_PORT_ERR locally. So it can call ib_modify_qp() for the RC QP > to change the path_mig_state to IB_MIG_MIGRATED to use the alternate > path. No problem here. But NodeB has to register with the OpenSM for the > port failure event on NodeA, so that it can call ib_modify_qp() on the > active side. > > This is working fine by using the interface ib_sa_serv_notice_hdlr() > described in bug#159 > (https://staging.openfabrics.org/bugzilla/show_bug.cgi?id=159). > > Now the question is - what if NodeA is running OpenSM with port 1 > (sm_port=1), and that port fails (say cable disconnect). Then Node B can > not receive any notification of the port failure even though it has > registered for the notice handler with ib_sa_serv_notice_hdlr(), because > sm_port is down. > > How can we handle the port failover in this scenario ? The subnet is essentially running without an SM when that port is disconnected. How about a backup SM for the subnet ? -- Hal > VBabu From jgunthorpe at obsidianresearch.com Tue Jan 2 20:54:00 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 2 Jan 2007 21:54:00 -0700 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> <459AF183.2070909@pathscale.com> <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> Message-ID: <20070103045400.GE13404@obsidianresearch.com> On Tue, Jan 02, 2007 at 08:38:35PM -0500, Jeff Squyres wrote: > Failing that, is guaranteed to be the same across > reboots, and/or the info returned by ibv_get_device_info() guaranteed > to be in the same order? (I suppose that I can use Linux has a history of changing PCI discovery order from kernel version to kernel version so without something enforcing order it shouldn't be relied upon.. It is best to use something semi-fixed like the port GUID or regex match of the pci id.. I guess ideally this would be done on boot by a script, if enough information was exported in sysfs .. local_cpus doesn't sound quite right either since I expect that local_cpus will cover only some of the CPUs on a big machine. You still need a distance weighting scheme to map out the rest :| Jason From mst at mellanox.co.il Tue Jan 2 21:03:04 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 07:03:04 +0200 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <20070103045400.GE13404@obsidianresearch.com> References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> <459AF183.2070909@pathscale.com> <772ED536-8D0E-4FD5-92AC-B7012B733BF7@cisco.com> <20070103045400.GE13404@obsidianresearch.com> Message-ID: <20070103050304.GB20259@mellanox.co.il> > > Failing that, is guaranteed to be the same across > > reboots, and/or the info returned by ibv_get_device_info() guaranteed > > to be in the same order? (I suppose that I can use > > Linux has a history of changing PCI discovery order from kernel > version to kernel version so without something enforcing order it > shouldn't be relied upon.. Hotplug and catastrophic events could also change the naming. -- MST From eitan at sw053.yok.mtl.com Tue Jan 2 21:03:02 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Wed, 3 Jan 2007 07:03:02 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-03:normal completion Message-ID: <200701030503.l03532bQ014965@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Tue_Jan_2_14:42:50_2007 fb3365 ibutils rev = Wed_Dec_27_23:39:30_2006 60aebe Total=297 Pass=296 Fail=1 Pass: 33 Stability IS1-16.topo 33 OsmTest IS1-16.topo 33 OsmStress IS1-16.topo 33 Multicast IS1-16.topo 33 LidMgr IS1-16.topo 32 Pkey IS1-16.topo 11 Stability IS3-loop.topo 11 Stability IS3-128.topo 11 Pkey IS3-128.topo 11 OsmTest IS3-loop.topo 11 OsmTest IS3-128.topo 11 OsmStress IS3-128.topo 11 Multicast IS3-loop.topo 11 Multicast IS3-128.topo 11 LidMgr IS3-128.topo Failures: 1 Pkey IS1-16.topo From k_mahesh85 at yahoo.co.in Tue Jan 2 21:22:04 2007 From: k_mahesh85 at yahoo.co.in (keshetti mahesh) Date: Wed, 3 Jan 2007 05:22:04 +0000 (GMT) Subject: [openib-general] [query]maximum no. of connections per session in iser initiator Message-ID: <8737.93378.qm@web8318.mail.in.yahoo.com> hi all, recently i had gone through the iser initiator code in OFED and saw one thing, the maximum no. of connections allowed per session is 8. what is the constraint for this limit. And in the another iscsi transport i.e TCP this variable is only one. whether this limit is imposed by iSCSI or by transport layer(TCP or iSER) ? BTW is there any such limit for the no. of sessions allowed at atime in this initiator? thanks and regards, Mahesh. Send free SMS to your Friends on Mobile from your Yahoo! Messenger. Download Now! http://messenger.yahoo.com/download.php -------------- next part -------------- An HTML attachment was scrubbed... URL: From k_mahesh85 at yahoo.co.in Tue Jan 2 21:26:19 2007 From: k_mahesh85 at yahoo.co.in (keshetti mahesh) Date: Wed, 3 Jan 2007 05:26:19 +0000 (GMT) Subject: [openib-general] [query]maximum no. of connections per session in iser initiator Message-ID: <20070103052619.7519.qmail@web8315.mail.in.yahoo.com> Please CC me the responses to this mail ID thanks and regards, Mahesh. keshetti mahesh wrote: hi all, recently i had gone through the iser initiator code in OFED and saw one thing, the maximum no. of connections allowed per session is 8. what is the constraint for this limit. And in the another iscsi transport i.e TCP this variable is only one. whether this limit is imposed by iSCSI or by transport layer(TCP or iSER) ? BTW is there any such limit for the no. of sessions allowed at atime in this initiator? thanks and regards, Mahesh. Send free SMS to your Friends on Mobile from your Yahoo! Messenger. Download Now! http://messenger.yahoo.com/download.php Send free SMS to your Friends on Mobile from your Yahoo! Messenger. Download Now! http://messenger.yahoo.com/download.php -------------- next part -------------- An HTML attachment was scrubbed... URL: From eitan at mellanox.co.il Tue Jan 2 22:21:49 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Wed, 03 Jan 2007 08:21:49 +0200 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167751520.4596.77827.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> Message-ID: <459B4B7D.2030902@mellanox.co.il> Hal Rosenstock wrote: > Hi Eitan, > > I started to look at changes for IB routers and the following in > ib_types.h doesn't look quite right to me: > > static inline boolean_t OSM_API > ib_gid_is_link_local( > IN const ib_gid_t* const p_gid ) > { > return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > } > > Shouldn't it be either the default subnet prefix or the one supplied in > PortInfo:GidPrefix (which might not be the default one) ? > Hi Hal, I think the function name means : "is the provided GID prefix the NonRoutableSubnetPrefix.?" Maybe it is used incorrectly as if it means "is the given GID is of the local subnet?" The header of the function say: * DESCRIPTION * Returns TRUE if the unicast GID scoping indicates link local, * FALSE otherwise. So it is really a matter of semantics clarity.I see no current usage of the function so you are probably free to redefine it. EZ > -- Hal > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From eitan at mellanox.co.il Tue Jan 2 22:23:43 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Wed, 03 Jan 2007 08:23:43 +0200 Subject: [openib-general] [PATCHv2] OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default subnet prefix to determine is link local In-Reply-To: <1167797611.4596.121166.camel@hal.voltaire.com> References: <1167797611.4596.121166.camel@hal.voltaire.com> Message-ID: <459B4BEF.8080805@mellanox.co.il> Hi Hal, I think I miss understood your previous mail. This patch is correcting the bug you indicated. EZ Hal Rosenstock wrote: > OpenSM/ib_types.h: In ib_gid_is_link_local, only compare /10 to default > subnet prefix to determine is link local > > Signed-off-by: Hal Rosenstock > > diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h > index 573b294..e4ef7e3 100644 > --- a/osm/include/iba/ib_types.h > +++ b/osm/include/iba/ib_types.h > @@ -2168,7 +2168,8 @@ static inline boolean_t OSM_API > ib_gid_is_link_local( > IN const ib_gid_t* const p_gid ) > { > - return( ib_gid_get_subnet_prefix( p_gid ) == IB_DEFAULT_SUBNET_PREFIX ); > + return( ( ib_gid_get_subnet_prefix( p_gid ) & > + CL_HTON64( 0xFFC0000000000000ULL ) ) == IB_DEFAULT_SUBNET_PREFIX ); > } > /* > * PARAMETERS > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ogerlitz at voltaire.com Wed Jan 3 03:22:09 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 03 Jan 2007 13:22:09 +0200 Subject: [openib-general] SVN deprecation In-Reply-To: References: <4593A31D.50808@voltaire.com> Message-ID: <459B91E1.7000605@voltaire.com> Jeff Squyres wrote: > My thought was that if people accidentally check out of SVN, there > should be [almost] nothing at the HEAD except a README stating that > everything has moved to git (and instructions if you need to get into > the SVN history) so that it's blatantly obvious that there's nothing new > in SVN anymore. OK, it makes sense, but before doing so, can you educate me what is the procedure to see (ie the equivalent of svn ls) the structure of a deleted SVN tree (ie subset of the repositor) and what is the procedure to get/diff between revsions/etc those deleted sources? Or. From kliteyn at dev.mellanox.co.il Wed Jan 3 04:12:30 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 03 Jan 2007 14:12:30 +0200 Subject: [openib-general] [PATCH] osm: TRIVIAL - fixing data types Message-ID: <459B9DAE.3080705@dev.mellanox.co.il> Hi Hal. Some trivial data type fixes Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index eb7cf96..e937982 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -209,7 +209,7 @@ typedef struct ftree_fabric_t_ cl_qmap_t hca_tbl; cl_qmap_t sw_tbl; cl_qmap_t sw_by_tuple_tbl; - uint16_t tree_rank; + uint8_t tree_rank; ftree_sw_t ** leaf_switches; uint32_t leaf_switches_num; uint16_t max_hcas_per_leaf; @@ -1026,7 +1026,7 @@ __osm_ftree_fabric_destroy(ftree_fabric_ /***************************************************/ static void -__osm_ftree_fabric_set_rank(ftree_fabric_t * p_ftree, uint16_t rank) +__osm_ftree_fabric_set_rank(ftree_fabric_t * p_ftree, uint8_t rank) { if (rank > p_ftree->tree_rank) p_ftree->tree_rank = rank; @@ -1034,7 +1034,7 @@ __osm_ftree_fabric_set_rank(ftree_fabric /***************************************************/ -static uint16_t +static uint8_t __osm_ftree_fabric_get_rank(ftree_fabric_t * p_ftree) { return p_ftree->tree_rank; -- 1.4.4.1.GIT From halr at voltaire.com Wed Jan 3 04:25:36 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 03 Jan 2007 07:25:36 -0500 Subject: [openib-general] [PATCH] osm: TRIVIAL - fixing data types In-Reply-To: <459B9DAE.3080705@dev.mellanox.co.il> References: <459B9DAE.3080705@dev.mellanox.co.il> Message-ID: <1167827135.4596.148531.camel@hal.voltaire.com> On Wed, 2007-01-03 at 07:12, Yevgeny Kliteynik wrote: > Hi Hal. > > Some trivial data type fixes > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From mst at mellanox.co.il Wed Jan 3 04:41:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:41:59 +0200 Subject: [openib-general] [PATCH 0 of 5] IB/mthca: memory registration fixes In-Reply-To: <20061213180916.GA1689@mellanox.co.il> References: <20061212151039.GJ26613@mellanox.co.il> <20061213180916.GA1689@mellanox.co.il> Message-ID: <20070103124159.GB6019@mellanox.co.il> This is a patch series with various fixes in mthca memory registration. Please consider for 2.6.20. Patches 2 and 3 are what has been posted previously. Patch list 1. mthca_0_fmr_page_fix.patch - fix off-by-one in FMR handling on memfree hardware 2. mthca_1_merge_mr_fmr_on_64bit.patch - merge MR and FMR on Tavor for 64 bit not a fix properly, but a minor patch 3. mthca_2_fast_registration.patch - always write MTTs from CPU. This is an enabler for 4, but also gives a speed improvement for registration 4. mthca_3_alloc_consistent.patch - fix non-cache-coherent architectures by adding dma_sync points and allocating MPTs from cache coherent memory 5. mthca_4_dma_align_reserved_mtts.patch - finishes up 4 by making sure reserved MTTs are in cache line of their own -- MST From jsquyres at cisco.com Wed Jan 3 04:46:28 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 3 Jan 2007 07:46:28 -0500 Subject: [openib-general] SVN deprecation In-Reply-To: <459B91E1.7000605@voltaire.com> References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> Message-ID: <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> On Jan 3, 2007, at 6:22 AM, Or Gerlitz wrote: >> My thought was that if people accidentally check out of SVN, there >> should be [almost] nothing at the HEAD except a README stating >> that everything has moved to git (and instructions if you need to >> get into the SVN history) so that it's blatantly obvious that >> there's nothing new in SVN anymore. > > OK, it makes sense, but before doing so, can you educate me what is > the procedure to see (ie the equivalent of svn ls) the structure of > a deleted SVN tree (ie subset of the repositor) and what is the > procedure to get/diff between revsions/etc those deleted sources? Sure. The canonical SVN reference is the SVN book, which you can find here: http://svnbook.red-bean.com/ "svn help ls" and "svn help diff" both show that these commands take - r arguments. For example, you can ask for the ls of a specific revision of the repository. "svn diff" takes a *range* on the -r argument, so you can supply any arbitrary r numbers and it will do the diff between those versions. Additionally, "svn co" also takes the -r argument, so you can checkout any revision you want (and not have to use "svn ls" extensively). See "svn help " and the SVN book for more information. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Wed Jan 3 04:46:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:46:30 +0200 Subject: [openib-general] [PATCH 1 of 5] IB/mthca: fix off-by-one in FMR handling on memfree Message-ID: <20070103124630.GC6019@mellanox.co.il> There's an off-by-one in FMR handling on memfree hardware. Example: assume we have 2 entries in scatterlist, 4K each, offset is 4K. current code will return first entry + 4K, where we really want the second entry. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_memfree.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_ list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { - if (chunk->mem[i].length >= offset) { + if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; } -- MST From mst at mellanox.co.il Wed Jan 3 04:48:44 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:48:44 +0200 Subject: [openib-general] [PATCH 2 of 5] IB/mthca: merge MR and FMR space on 64 bit Message-ID: <20070103124844.GD6019@mellanox.co.il> For Tavor, we currently reserve separate MPT and MTT space for FMRs so avoid abusing the vmalloc space on 32 bit kernels. No such problem exists on 64 bit kernels so let's not do it there. This way we have a shared pool for MR and FMR resources, used on demand. This will also make it possible to write MTTs for regular regions directly from driver. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_mr.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c @@ -761,7 +761,7 @@ void mthca_arbel_fmr_unmap(struct mthca_ int mthca_init_mr_table(struct mthca_dev *dev) { unsigned long addr; - int err, i; + int mpts, mtts, err, i; err = mthca_alloc_init(&dev->mr_table.mpt_alloc, dev->limits.num_mpts, @@ -795,13 +795,21 @@ int mthca_init_mr_table(struct mthca_dev err = -EINVAL; goto err_fmr_mpt; } + mpts = mtts = 1 << i; + } else { + mpts = dev->limits.num_mtt_segs; + mtts = dev->limits.num_mpts; + } + + if (!mthca_is_memfree(dev) && + (dev->mthca_flags & MTHCA_FLAG_FMR)) { addr = pci_resource_start(dev->pdev, 4) + ((pci_resource_len(dev->pdev, 4) - 1) & dev->mr_table.mpt_base); dev->mr_table.tavor_fmr.mpt_base = - ioremap(addr, (1 << i) * sizeof(struct mthca_mpt_entry)); + ioremap(addr, mpts * sizeof(struct mthca_mpt_entry)); if (!dev->mr_table.tavor_fmr.mpt_base) { mthca_warn(dev, "MPT ioremap for FMR failed.\n"); @@ -814,19 +822,21 @@ int mthca_init_mr_table(struct mthca_dev dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, (1 << i) * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; goto err_fmr_mtt; } + } - err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i); + if (dev->limits.fmr_reserved_mtts) { + err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1)); if (err) goto err_fmr_mtt_buddy; /* Prevent regular MRs from using FMR keys */ - err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i); + err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1)); if (err) goto err_reserve_fmr; Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_profile.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c @@ -277,7 +277,7 @@ u64 mthca_make_profile(struct mthca_dev * out of the MR pool. They don't use additional memory, but * we assign them as part of the HCA profile anyway. */ - if (mthca_is_memfree(dev)) + if (mthca_is_memfree(dev) || BITS_PER_LONG == 64) dev->limits.fmr_reserved_mtts = 0; else dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts; -- MST From mst at mellanox.co.il Wed Jan 3 04:50:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:50:22 +0200 Subject: [openib-general] [PATCH 3 of 5] IB/mthca: always fill MTTs from CPU Message-ID: <20070103125022.GE6019@mellanox.co.il> Speed up memory registration by filling in MTTs directly. This reduces the number of FW commands needed to register an MR by at least a factor of 2. This applies to all memfree cards, and to tavor mode on 64 bit systems with the patch I posted earlier. Signed-off-by: Michael S. Tsirkin --- This patch, and the previous one, has been posted previously. Issues on cache non coherent CPUs are fixed by following patches Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_dev.h =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_dev.h +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_dev.h @@ -464,6 +464,8 @@ void mthca_uar_free(struct mthca_dev *de int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); +int mthca_write_mtt_size(struct mthca_dev *dev); + struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_mr.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c @@ -243,8 +243,8 @@ void mthca_free_mtt(struct mthca_dev *de kfree(mtt); } -int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, - int start_index, u64 *buffer_list, int list_len) +static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) { struct mthca_mailbox *mailbox; __be64 *mtt_entry; @@ -295,6 +295,84 @@ out: return err; } +void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + u64 __iomem *mtts; + u32 mtt_seg; + int i; + + mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64); + for (i = 0; i < list_len; ++i) { + __be64 mtt_entry = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + mthca_write64_raw(mtt_entry, mtts + i); + } +} + +void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + __be64 *mtts; + int i; + int s = start_index * sizeof (u64); + + /* For Arbel, all MTTs must fit in the same page. */ + BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); + /* Require full segments */ + BUG_ON(s % MTHCA_MTT_SEG_SIZE); + + mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + + s / MTHCA_MTT_SEG_SIZE); + + BUG_ON(!mtts); + + for (i = 0; i < list_len; ++i) + mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); +} + +int mthca_write_mtt_size(struct mthca_dev *dev) +{ + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + return PAGE_SIZE / sizeof (u64) - 2; + + /* For Arbel, all MTTs must fit in the same page. */ + return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + int size = mthca_write_mtt_size(dev); + int chunk; + + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); + + while (list_len > 0) { + chunk = min(size, list_len); + if (mthca_is_memfree(dev)) + mthca_arbel_write_mtt_seg(dev, mtt, start_index, + buffer_list, chunk); + else + mthca_tavor_write_mtt_seg(dev, mtt, start_index, + buffer_list, chunk); + + list_len -= chunk; + start_index += chunk; + buffer_list += chunk; + } + + return 0; +} + static inline u32 tavor_hw_index_to_key(u32 ind) { return ind; Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_provider.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(s int shift, n, len; int i, j, k; int err = 0; + int write_mtt_size; shift = ffs(region->page_size) - 1; @@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(s i = n = 0; + write_mtt_size = min(mthca_write_mtt_size(dev), (int)(PAGE_SIZE / sizeof *pages)); + list_for_each_entry(chunk, ®ion->chunk_list, list) for (j = 0; j < chunk->nmap; ++j) { len = sg_dma_len(&chunk->page_list[j]) >> shift; @@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(s pages[i++] = sg_dma_address(&chunk->page_list[j]) + region->page_size * k; /* - * Be friendly to WRITE_MTT command - * and leave two empty slots for the - * index and reserved fields of the - * mailbox. + * Be friendly to write_mtt and pass it chunks + * of appropriate size. */ - if (i == PAGE_SIZE / sizeof (u64) - 2) { - err = mthca_write_mtt(dev, mr->mtt, - n, pages, i); + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); if (err) goto mtt_done; n += i; -- MST From mst at mellanox.co.il Wed Jan 3 04:50:50 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:50:50 +0200 Subject: [openib-general] [PATCH 4 of 5] IB/mthca: fix non-cache-coherent CPUs with memfree Message-ID: <20070103125050.GF6019@mellanox.co.il> Fix non-cache-coherent CPUs with memfree HCAs. We allocate the MTT table with alloc_pages() and then do pci_map_sg(), so we must call pci_dma_sync_sg after the CPU writes to the MTT table (this works since device never writes the MTTs on memfree). For MPTs, both the device and CPU might write there, so we must allocate dma coherent memory for these. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_memfree.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -35,6 +35,8 @@ */ #include +#include +#include #include "mthca_memfree.h" #include "mthca_dev.h" @@ -58,22 +60,31 @@ struct mthca_user_db_table { } page[0]; }; -void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent) { struct mthca_icm_chunk *chunk, *tmp; + void *buf; int i; if (!icm) return; list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { - if (chunk->nsg > 0) - pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, - PCI_DMA_BIDIRECTIONAL); - - for (i = 0; i < chunk->npages; ++i) - __free_pages(chunk->mem[i].page, - get_order(chunk->mem[i].length)); + if (coherent) + for (i = 0; i < chunk->npages; ++i) { + buf = lowmem_page_address(chunk->mem[i].page); + dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + buf, sg_dma_address(&chunk->mem[i])); + } + else { + if (chunk->nsg > 0) + pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + for (i = 0; i < chunk->npages; ++i) + __free_pages(chunk->mem[i].page, + get_order(chunk->mem[i].length)); + } kfree(chunk); } @@ -81,12 +92,41 @@ void mthca_free_icm(struct mthca_dev *de kfree(icm); } +static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) +{ + mem->page = alloc_pages(gfp_mask, order); + if (!mem->page) + return -ENOMEM; + + mem->length = PAGE_SIZE << order; + mem->offset = 0; + return 0; +} + +static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, + int order, gfp_t gfp_mask) +{ + void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem), + gfp_mask); + if (!buf) + return -ENOMEM; + + sg_set_buf(mem, buf, PAGE_SIZE << order); + BUG_ON(mem->offset); + sg_dma_len(mem) = PAGE_SIZE << order; + return 0; +} + struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, - gfp_t gfp_mask) + gfp_t gfp_mask, int coherent) { struct mthca_icm *icm; struct mthca_icm_chunk *chunk = NULL; int cur_order; + int ret; + + /* We use sg_set_buf for coherent allocs, which assumes low memory */ + BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!icm) @@ -112,21 +152,28 @@ struct mthca_icm *mthca_alloc_icm(struct while (1 << cur_order > npages) --cur_order; - chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order); - if (chunk->mem[chunk->npages].page) { - chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order; - chunk->mem[chunk->npages].offset = 0; + if (coherent) + ret = mthca_alloc_icm_coherent(&dev->pdev->dev, + &chunk->mem[chunk->npages], + cur_order, gfp_mask); + else + ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages], + cur_order, gfp_mask); - if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) { + if (!ret) { + ++chunk->npages; + + if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) { chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; + } + if (chunk->npages == MTHCA_ICM_CHUNK_LEN) chunk = NULL; - } npages -= 1 << cur_order; } else { @@ -136,7 +183,7 @@ struct mthca_icm *mthca_alloc_icm(struct } } - if (chunk) { + if (!coherent && chunk) { chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); @@ -148,7 +195,7 @@ struct mthca_icm *mthca_alloc_icm(struct return icm; fail: - mthca_free_icm(dev, icm); + mthca_free_icm(dev, icm, coherent); return NULL; } @@ -167,7 +214,7 @@ int mthca_table_get(struct mthca_dev *de table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | - __GFP_NOWARN); + __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; goto out; @@ -175,7 +222,7 @@ int mthca_table_get(struct mthca_dev *de if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE, &status) || status) { - mthca_free_icm(dev, table->icm[i]); + mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; ret = -ENOMEM; goto out; @@ -204,16 +251,16 @@ void mthca_table_put(struct mthca_dev *d mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); - mthca_free_icm(dev, table->icm[i]); + mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; } mutex_unlock(&table->mutex); } -void *mthca_table_find(struct mthca_icm_table *table, int obj) +void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle) { - int idx, offset, i; + int idx, offset, dma_offset, i; struct mthca_icm_chunk *chunk; struct mthca_icm *icm; struct page *page = NULL; @@ -225,13 +272,22 @@ void *mthca_table_find(struct mthca_icm_ idx = (obj & (table->num_obj - 1)) * table->obj_size; icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE]; - offset = idx % MTHCA_TABLE_CHUNK_SIZE; + dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE; if (!icm) goto out; list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > dma_offset) + *dma_handle = sg_dma_address(&chunk->mem[i]) + + dma_offset; + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + /* DMA mapping can merge pages but not split them, + * so if we found the page, dma_handle has already + * been assigned to. */ if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; @@ -283,7 +339,7 @@ void mthca_table_put_range(struct mthca_ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, u64 virt, int obj_size, int nobj, int reserved, - int use_lowmem) + int use_lowmem, int use_coherent) { struct mthca_icm_table *table; int num_icm; @@ -302,6 +358,7 @@ struct mthca_icm_table *mthca_alloc_icm_ table->num_obj = nobj; table->obj_size = obj_size; table->lowmem = use_lowmem; + table->coherent = use_coherent; mutex_init(&table->mutex); for (i = 0; i < num_icm; ++i) @@ -314,12 +371,12 @@ struct mthca_icm_table *mthca_alloc_icm_ table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | - __GFP_NOWARN); + __GFP_NOWARN, use_coherent); if (!table->icm[i]) goto err; if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE, &status) || status) { - mthca_free_icm(dev, table->icm[i]); + mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; goto err; } @@ -339,7 +396,7 @@ err: mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); - mthca_free_icm(dev, table->icm[i]); + mthca_free_icm(dev, table->icm[i], table->coherent); } kfree(table); @@ -357,7 +414,7 @@ void mthca_free_icm_table(struct mthca_d mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); - mthca_free_icm(dev, table->icm[i]); + mthca_free_icm(dev, table->icm[i], table->coherent); } kfree(table); Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -379,7 +379,7 @@ static int mthca_load_fw(struct mthca_de mdev->fw.arbel.fw_icm = mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, - GFP_HIGHUSER | __GFP_NOWARN); + GFP_HIGHUSER | __GFP_NOWARN, 0); if (!mdev->fw.arbel.fw_icm) { mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; @@ -412,7 +412,7 @@ err_unmap_fa: mthca_UNMAP_FA(mdev, &status); err_free: - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); return err; } @@ -441,7 +441,7 @@ static int mthca_init_icm(struct mthca_d (unsigned long long) aux_pages << 2); mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages, - GFP_HIGHUSER | __GFP_NOWARN); + GFP_HIGHUSER | __GFP_NOWARN, 0); if (!mdev->fw.arbel.aux_icm) { mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; @@ -467,7 +467,8 @@ static int mthca_init_icm(struct mthca_d mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, - mdev->limits.reserved_mtts, 1); + mdev->limits.reserved_mtts, + 1, 0); if (!mdev->mr_table.mtt_table) { mthca_err(mdev, "Failed to map MTT context memory, aborting.\n"); err = -ENOMEM; @@ -477,7 +478,8 @@ static int mthca_init_icm(struct mthca_d mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base, dev_lim->mpt_entry_sz, mdev->limits.num_mpts, - mdev->limits.reserved_mrws, 1); + mdev->limits.reserved_mrws, + 1, 1); if (!mdev->mr_table.mpt_table) { mthca_err(mdev, "Failed to map MPT context memory, aborting.\n"); err = -ENOMEM; @@ -487,7 +489,8 @@ static int mthca_init_icm(struct mthca_d mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base, dev_lim->qpc_entry_sz, mdev->limits.num_qps, - mdev->limits.reserved_qps, 0); + mdev->limits.reserved_qps, + 0, 0); if (!mdev->qp_table.qp_table) { mthca_err(mdev, "Failed to map QP context memory, aborting.\n"); err = -ENOMEM; @@ -497,7 +500,8 @@ static int mthca_init_icm(struct mthca_d mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base, dev_lim->eqpc_entry_sz, mdev->limits.num_qps, - mdev->limits.reserved_qps, 0); + mdev->limits.reserved_qps, + 0, 0); if (!mdev->qp_table.eqp_table) { mthca_err(mdev, "Failed to map EQP context memory, aborting.\n"); err = -ENOMEM; @@ -507,7 +511,7 @@ static int mthca_init_icm(struct mthca_d mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base, MTHCA_RDB_ENTRY_SIZE, mdev->limits.num_qps << - mdev->qp_table.rdb_shift, + mdev->qp_table.rdb_shift, 0, 0, 0); if (!mdev->qp_table.rdb_table) { mthca_err(mdev, "Failed to map RDB context memory, aborting\n"); @@ -518,7 +522,8 @@ static int mthca_init_icm(struct mthca_d mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, dev_lim->cqc_entry_sz, mdev->limits.num_cqs, - mdev->limits.reserved_cqs, 0); + mdev->limits.reserved_cqs, + 0, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; @@ -530,7 +535,8 @@ static int mthca_init_icm(struct mthca_d mthca_alloc_icm_table(mdev, init_hca->srqc_base, dev_lim->srq_entry_sz, mdev->limits.num_srqs, - mdev->limits.reserved_srqs, 0); + mdev->limits.reserved_srqs, + 0, 0); if (!mdev->srq_table.table) { mthca_err(mdev, "Failed to map SRQ context memory, " "aborting.\n"); @@ -550,7 +556,7 @@ static int mthca_init_icm(struct mthca_d mdev->limits.num_amgms, mdev->limits.num_mgms + mdev->limits.num_amgms, - 0); + 0, 0); if (!mdev->mcg_table.table) { mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); err = -ENOMEM; @@ -588,7 +594,7 @@ err_unmap_aux: mthca_UNMAP_ICM_AUX(mdev, &status); err_free_aux: - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); return err; } @@ -609,7 +615,7 @@ static void mthca_free_icms(struct mthca mthca_unmap_eq_icm(mdev); mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); } static int mthca_init_arbel(struct mthca_dev *mdev) @@ -693,7 +699,7 @@ err_free_icm: err_stop_fw: mthca_UNMAP_FA(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); err_disable: if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) @@ -712,7 +718,7 @@ static void mthca_close_hca(struct mthca mthca_free_icms(mdev); mthca_UNMAP_FA(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) mthca_DISABLE_LAM(mdev, &status); Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.h =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_memfree.h +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -69,6 +69,7 @@ struct mthca_icm_table { int num_obj; int obj_size; int lowmem; + int coherent; struct mutex mutex; struct mthca_icm *icm[0]; }; @@ -82,17 +83,17 @@ struct mthca_icm_iter { struct mthca_dev; struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, - gfp_t gfp_mask); -void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm); + gfp_t gfp_mask, int coherent); +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent); struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, u64 virt, int obj_size, int nobj, int reserved, - int use_lowmem); + int use_lowmem, int use_coherent); void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table); int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); -void *mthca_table_find(struct mthca_icm_table *table, int obj); +void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle); int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, int start, int end); void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_mr.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c @@ -315,6 +315,7 @@ void mthca_arbel_write_mtt_seg(struct mt int start_index, u64 *buffer_list, int list_len) { __be64 *mtts; + dma_addr_t dma_handle; int i; int s = start_index * sizeof (u64); @@ -324,12 +325,14 @@ void mthca_arbel_write_mtt_seg(struct mt BUG_ON(s % MTHCA_MTT_SEG_SIZE); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE); + s / MTHCA_MTT_SEG_SIZE, &dma_handle); BUG_ON(!mtts); for (i = 0; i < list_len; ++i) mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); + + dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof(u64), DMA_TO_DEVICE); } int mthca_write_mtt_size(struct mthca_dev *dev) @@ -602,7 +605,7 @@ int mthca_fmr_alloc(struct mthca_dev *de if (err) goto err_out_mpt_free; - mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key); + mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL); BUG_ON(!mr->mem.arbel.mpt); } else mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + @@ -616,7 +619,8 @@ int mthca_fmr_alloc(struct mthca_dev *de if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, - mr->mtt->first_seg); + mr->mtt->first_seg, + &mr->mem.arbel.dma_handle); BUG_ON(!mr->mem.arbel.mtts); } else mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; @@ -790,6 +794,9 @@ int mthca_arbel_map_phys_fmr(struct ib_f fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | MTHCA_MTT_FLAG_PRESENT); + dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle, + list_len * sizeof(u64), DMA_TO_DEVICE); + fmr->mem.arbel.mpt->key = cpu_to_be32(key); fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.h =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_provider.h +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.h @@ -89,6 +89,7 @@ struct mthca_fmr { struct { struct mthca_mpt_entry *mpt; __be64 *mtts; + dma_addr_t dma_handle; } arbel; } mem; }; -- MST From mst at mellanox.co.il Wed Jan 3 04:51:19 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:51:19 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line Message-ID: <20070103125119.GG6019@mellanox.co.il> MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -464,6 +464,10 @@ static int mthca_init_icm(struct mthca_d goto err_unmap_aux; } + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), + mdev->limits.reserved_mtts); + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, -- MST From mst at mellanox.co.il Wed Jan 3 04:53:28 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 14:53:28 +0200 Subject: [openib-general] [PATCH cosmetic] IB/verbs: include linux/kref directly Message-ID: <20070103125328.GI6019@mellanox.co.il> ib_verbs uses struct kref, so it should include linux/kref.h directly. Signed-off-by: Michael S. Tsirkin --- Index: latest/include/rdma/ib_verbs.h =================================================================== --- latest.orig/include/rdma/ib_verbs.h 2006-06-18 09:04:34.000000000 +0300 +++ latest/include/rdma/ib_verbs.h 2006-06-18 17:03:10.423216096 +0300 @@ -47,6 +47,7 @@ #include #include #include +#include union ib_gid { u8 raw[16]; -- MST From swise at opengridcomputing.com Wed Jan 3 06:25:48 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 08:25:48 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20061224084925.GD15106@mellanox.co.il> References: <20061214135233.21159.78613.stgit@dell3.ogc.int> <20061214135303.21159.61880.stgit@dell3.ogc.int> <20061224084925.GD15106@mellanox.co.il> Message-ID: <1167834348.4187.3.camel@stevo-desktop> > > @@ -1373,7 +1374,7 @@ int ib_peek_cq(struct ib_cq *cq, int wc_ > > static inline int ib_req_notify_cq(struct ib_cq *cq, > > enum ib_cq_notify cq_notify) > > { > > - return cq->device->req_notify_cq(cq, cq_notify); > > + return cq->device->req_notify_cq(cq, cq_notify, NULL); > > } > > > > /** > > Can't say I like this adding overhead in data path operations (and note this > can't be optimized out). And kernel consumers work without passing it in, so it > hurts kernel code even for Chelsio. Granted, the cost is small here, but these > things do tend to add up. > > It seems all Chelsio needs is to pass in a consumer index - so, how about a new > entry point? Something like void set_cq_udata(struct ib_cq *cq, struct ib_udata *udata)? > Adding a new entry point would hurt chelsio's user mode performance if if then requires 2 kernel transitions to rearm the cq. Passing in user data is sort of SOP for these sorts of verbs. How much does passing one more param cost for kernel users? From mst at mellanox.co.il Wed Jan 3 06:41:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 16:41:53 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167834348.4187.3.camel@stevo-desktop> References: <1167834348.4187.3.camel@stevo-desktop> Message-ID: <20070103144153.GN6019@mellanox.co.il> > > > @@ -1373,7 +1374,7 @@ int ib_peek_cq(struct ib_cq *cq, int wc_ > > > static inline int ib_req_notify_cq(struct ib_cq *cq, > > > enum ib_cq_notify cq_notify) > > > { > > > - return cq->device->req_notify_cq(cq, cq_notify); > > > + return cq->device->req_notify_cq(cq, cq_notify, NULL); > > > } > > > > > > /** > > > > Can't say I like this adding overhead in data path operations (and note this > > can't be optimized out). And kernel consumers work without passing it in, so it > > hurts kernel code even for Chelsio. Granted, the cost is small here, but these > > things do tend to add up. > > > > It seems all Chelsio needs is to pass in a consumer index - so, how about a new > > entry point? Something like void set_cq_udata(struct ib_cq *cq, struct ib_udata *udata)? > > > > Adding a new entry point would hurt chelsio's user mode performance if > if then requires 2 kernel transitions to rearm the cq. No, it won't need 2 transitions - just an extra function call, so it won't hurt performance - it would improve performance. ib_uverbs_req_notify_cq would call ib_uverbs_req_notify_cq() { ib_set_cq_udata(cq, udata) ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); } This way kernel consumers don't incur any overhead, and in userspace users extra function call is dwarfed by system call overhead. > Passing in user data is sort of SOP for these sorts of verbs. I don't see other examples. Where we did pass extra user data is in non-data pass verbs such as create QP. This is most inner tight loop in many ULPs, so we should be very careful about adding code there - these things do add up. See recent IRQ API update in kernel. > How much does passing one more param cost for kernel users? Donnu. I just reviewed the code. It really should be up to patch submitter to check the performance effect of his patch, if there might be any. -- MST From swise at opengridcomputing.com Wed Jan 3 06:56:12 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 08:56:12 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070103144153.GN6019@mellanox.co.il> References: <1167834348.4187.3.camel@stevo-desktop> <20070103144153.GN6019@mellanox.co.il> Message-ID: <1167836172.4187.9.camel@stevo-desktop> > > > > > > It seems all Chelsio needs is to pass in a consumer index - so, how about a new > > > entry point? Something like void set_cq_udata(struct ib_cq *cq, struct ib_udata *udata)? > > > > > > > Adding a new entry point would hurt chelsio's user mode performance if > > if then requires 2 kernel transitions to rearm the cq. > > No, it won't need 2 transitions - just an extra function call, > so it won't hurt performance - it would improve performance. > > ib_uverbs_req_notify_cq would call > > ib_uverbs_req_notify_cq() > { > ib_set_cq_udata(cq, udata) > ib_req_notify_cq(cq, cmd.solicited_only ? > IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); > } > ib_set_cq_udata() would transition into the kernel to pass in the consumer's index. In addition, ib_req_notify_cq would also transition into the kernel since its not a bypass function for chelsio. > This way kernel consumers don't incur any overhead, > and in userspace users extra function call is dwarfed > by system call overhead. > > > Passing in user data is sort of SOP for these sorts of verbs. > > I don't see other examples. Where we did pass extra user data > is in non-data pass verbs such as create QP. > > This is most inner tight loop in many ULPs, so we should be very careful > about adding code there - these things do add up. > See recent IRQ API update in kernel. Roland, do you have any comments on this? You previously indicated these patches were good to go once chelsio's ethernet driver gets pulled in. > > How much does passing one more param cost for kernel users? > > Donnu. I just reviewed the code. > It really should be up to patch submitter to check the performance > effect of his patch, if there might be any. I've run this code with mthca and didn't notice any performance degradation, but I wasn't specifically measuring cq_poll overhead in a tight loop... From mst at mellanox.co.il Wed Jan 3 07:00:13 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 17:00:13 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167836172.4187.9.camel@stevo-desktop> References: <1167836172.4187.9.camel@stevo-desktop> Message-ID: <20070103150013.GO6019@mellanox.co.il> > > > > No, it won't need 2 transitions - just an extra function call, > > so it won't hurt performance - it would improve performance. > > > > ib_uverbs_req_notify_cq would call > > > > ib_uverbs_req_notify_cq() > > { > > ib_set_cq_udata(cq, udata) > > ib_req_notify_cq(cq, cmd.solicited_only ? > > IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); > > } > > > > ib_set_cq_udata() would transition into the kernel to pass in the > consumer's index. In addition, ib_req_notify_cq would also transition > into the kernel since its not a bypass function for chelsio. We misunderstand each other. ib_uverbs_req_notify_cq is in drivers/infiniband/core/uverbs_cmd.c - all this code runs inside the IB_USER_VERBS_CMD_REQ_NOTIFY_CQ command, so there is a single user to kernel transition. -- MST From mst at mellanox.co.il Wed Jan 3 07:02:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 17:02:30 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167836172.4187.9.camel@stevo-desktop> References: <1167836172.4187.9.camel@stevo-desktop> Message-ID: <20070103150230.GP6019@mellanox.co.il> > I've run this code with mthca and didn't notice any performance > degradation, but I wasn't specifically measuring cq_poll overhead in a > tight loop... We were speaking about ib_req_notify_cq here, actually, not cq poll. So what was tested? -- MST From swise at opengridcomputing.com Wed Jan 3 07:06:02 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 09:06:02 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070103150230.GP6019@mellanox.co.il> References: <1167836172.4187.9.camel@stevo-desktop> <20070103150230.GP6019@mellanox.co.il> Message-ID: <1167836762.4187.15.camel@stevo-desktop> On Wed, 2007-01-03 at 17:02 +0200, Michael S. Tsirkin wrote: > > I've run this code with mthca and didn't notice any performance > > degradation, but I wasn't specifically measuring cq_poll overhead in a > > tight loop... > > We were speaking about ib_req_notify_cq here, actually, not cq poll. > So what was tested? > Sorry, I meant req_notify. I didn't specifically measure the cost of req_notify before and after this change. I've been running the user mode perftest programs mainly. From swise at opengridcomputing.com Wed Jan 3 07:07:21 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 09:07:21 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070103150013.GO6019@mellanox.co.il> References: <1167836172.4187.9.camel@stevo-desktop> <20070103150013.GO6019@mellanox.co.il> Message-ID: <1167836841.4187.18.camel@stevo-desktop> On Wed, 2007-01-03 at 17:00 +0200, Michael S. Tsirkin wrote: > > > > > > No, it won't need 2 transitions - just an extra function call, > > > so it won't hurt performance - it would improve performance. > > > > > > ib_uverbs_req_notify_cq would call > > > > > > ib_uverbs_req_notify_cq() > > > { > > > ib_set_cq_udata(cq, udata) > > > ib_req_notify_cq(cq, cmd.solicited_only ? > > > IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); > > > } > > > > > > > ib_set_cq_udata() would transition into the kernel to pass in the > > consumer's index. In addition, ib_req_notify_cq would also transition > > into the kernel since its not a bypass function for chelsio. > > We misunderstand each other. > > ib_uverbs_req_notify_cq is in drivers/infiniband/core/uverbs_cmd.c - > all this code runs inside the IB_USER_VERBS_CMD_REQ_NOTIFY_CQ command, > so there is a single user to kernel transition. > Oh I see. This seems like a lot of extra code to avoid passing one extra arg to the driver's req_notify_cq verb. I'd appreciate other folk's input on how important they think this is. If you insist, then I'll run some tests specifically in kernel mode and see how this affects mthca's req_notify performance. Steve. From mst at mellanox.co.il Wed Jan 3 07:10:16 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 17:10:16 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167836762.4187.15.camel@stevo-desktop> References: <1167836762.4187.15.camel@stevo-desktop> Message-ID: <20070103151015.GQ6019@mellanox.co.il> > > > I've run this code with mthca and didn't notice any performance > > > degradation, but I wasn't specifically measuring cq_poll overhead in a > > > tight loop... > > > > We were speaking about ib_req_notify_cq here, actually, not cq poll. > > So what was tested? > > > > Sorry, I meant req_notify. I didn't specifically measure the cost of > req_notify before and after this change. > > I've been running the user mode perftest programs mainly. So, it's not really activated a lot there. You want something like IPoIB BW test. -- MST From mst at mellanox.co.il Wed Jan 3 07:18:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 17:18:23 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167836841.4187.18.camel@stevo-desktop> References: <1167836172.4187.9.camel@stevo-desktop> <20070103150013.GO6019@mellanox.co.il> <1167836841.4187.18.camel@stevo-desktop> Message-ID: <20070103151823.GR6019@mellanox.co.il> > > > > No, it won't need 2 transitions - just an extra function call, > > > > so it won't hurt performance - it would improve performance. > > > > > > > > ib_uverbs_req_notify_cq would call > > > > > > > > ib_uverbs_req_notify_cq() > > > > { > > > > ib_set_cq_udata(cq, udata) > > > > ib_req_notify_cq(cq, cmd.solicited_only ? > > > > IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); > > > > } > > > > > > > > > > ib_set_cq_udata() would transition into the kernel to pass in the > > > consumer's index. In addition, ib_req_notify_cq would also transition > > > into the kernel since its not a bypass function for chelsio. > > > > We misunderstand each other. > > > > ib_uverbs_req_notify_cq is in drivers/infiniband/core/uverbs_cmd.c - > > all this code runs inside the IB_USER_VERBS_CMD_REQ_NOTIFY_CQ command, > > so there is a single user to kernel transition. > > > > Oh I see. > > This seems like a lot of extra code to avoid passing one extra arg to > the driver's req_notify_cq verb. I'd appreciate other folk's input on > how important they think this is. > > If you insist, then I'll run some tests specifically in kernel mode and > see how this affects mthca's req_notify performance. This might be an interesting datapoint. -- MST From kliteyn at dev.mellanox.co.il Wed Jan 3 07:35:34 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 03 Jan 2007 17:35:34 +0200 Subject: [openib-general] [PATCH] osm: Fixing bug in fat-tree routing error path Message-ID: <459BCD46.5040805@dev.mellanox.co.il> Hi Hal. Fixing a bug in fat-tree: Unlike UPDN or File engines, the second stage of fat-tree engine (fdb tables) can't be run if the first stage of the algorithn (ranking and indexing) failed. Adding flag to let the routing know that the first stage has failed. Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_ucast_ftree.c | 10 +++++++++- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/osm/opensm/osm_ucast_ftree.c b/osm/opensm/osm_ucast_ftree.c index e937982..cb40ab6 100644 --- a/osm/opensm/osm_ucast_ftree.c +++ b/osm/opensm/osm_ucast_ftree.c @@ -215,6 +215,7 @@ typedef struct ftree_fabric_t_ uint16_t max_hcas_per_leaf; cl_pool_t sw_fwd_tbl_pool; uint16_t lft_max_lid_ho; + boolean_t fabric_built; } ftree_fabric_t; /*************************************************** @@ -1008,6 +1009,7 @@ __osm_ftree_fabric_clear(ftree_fabric_t p_ftree->leaf_switches_num = 0; p_ftree->leaf_switches = NULL; + p_ftree->fabric_built = FALSE; } /* __osm_ftree_fabric_destroy() */ @@ -3051,6 +3053,8 @@ __osm_ftree_construct_fabric( "Clearing FatTree Fabric data structures\n"); __osm_ftree_fabric_clear(p_ftree); } + else + p_ftree->fabric_built = TRUE; osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "__osm_ftree_construct_fabric: \n" @@ -3061,7 +3065,7 @@ __osm_ftree_construct_fabric( OSM_LOG_EXIT(&p_ftree->p_osm->log); return status; -} +} /* __osm_ftree_construct_fabric() */ /*************************************************** ***************************************************/ @@ -3074,6 +3078,9 @@ __osm_ftree_do_routing( OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_do_routing); + if (!p_ftree->fabric_built) + goto Exit; + osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,"__osm_ftree_do_routing: " "Starting FatTree routing\n"); @@ -3094,6 +3101,7 @@ __osm_ftree_do_routing( osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,"__osm_ftree_do_routing: " "FatTree routing is done\n"); + Exit: OSM_LOG_EXIT(&p_ftree->p_osm->log); return 0; } -- 1.4.4.1.GIT From monis at voltaire.com Wed Jan 3 07:42:10 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 03 Jan 2007 17:42:10 +0200 Subject: [openib-general] CQ error handling in IPoIB In-Reply-To: <4FB1BCCAE6CAED44A1DC005B1DE06119171041@EPEXCH2.qlogic.org> References: <4FB1BCCAE6CAED44A1DC005B1DE06119171041@EPEXCH2.qlogic.org> Message-ID: <459BCED2.90006@voltaire.com> Todd Rimmer wrote: >>From: Moni Shoua >>Sent: Tuesday, January 02, 2007 11:31 AM >>To: openib-general at openib.org >>Subject: [openib-general] CQ error handling in IPoIB >> >>Hi, >>I have a question regarding error handling in IPoIB. >> >>The spec says... >> >>When a CQ encounters an error, in order to be able to use the CQ > > again, > >>the consumer should: >>* Destroy all the QPs that are attached to the CQ >>* Destroy the CQ >>* Recreate the CQ through the Create Completion Queue verb >> >>While (at least one part of) the code does... >> >>static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc >>*wc) >>{ >> ... >> ... >> ... >> if (wc->status != IB_WC_SUCCESS && >> wc->status != IB_WC_WR_FLUSH_ERR) >> ipoib_warn(priv, "failed send event " >> "(status=%d, wrid=%d vend_err %x)\n", >> wc->status, wr_id, wc->vendor_err); >>} >> > > > In this context the spec is referring to CQ errors, not work request > errors. For example, CQ overflow is considered a CQ error and would > require the procedure you describe above (destroy QPs, CQ, etc). > > However a work request error is a WQE or QP error. As such the CQ does > not need to be destroyed. Rather the recovery will be limited to QP > level actions. Typically the QP has moved to the error state and the QP > must be reset and moved back to RTS to resume operation (or the QP must > be destroyed and recreated). > > If you check section 10.10.3.4 of IBTA 1.2 you will see a list of > possible errors on a UD QP. Notice that the errors all involve Local > Protection or Operation errors. Hence they cannot be caused by a remote > node. Rather, they are only caused by invalid local requests (by IPoIB > in this case) or possibly by hardware or OS problems (memory stomps, > multi-bit undetected memory or bus errors, HCA hardware problem, etc). > > As you indicate, when such error occur, the driver should recreate or > reset the QP. > > Todd Rimmer > Thanks for the detailed answer. I see my mistake in bringing that quote from the spec but the question was answered even though. From ishai at dev.mellanox.co.il Wed Jan 3 07:57:42 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Wed, 03 Jan 2007 17:57:42 +0200 Subject: [openib-general] [PATCH 1 of 5] IB/mthca: fix off-by-one in FMR handling on memfree In-Reply-To: <20070103124630.GC6019@mellanox.co.il> References: <20070103124630.GC6019@mellanox.co.il> Message-ID: <459BD276.3030307@dev.mellanox.co.il> Hi MST and Roland, There is a chance that this is the core of the 32-bit OS on 64-bit platforms bug we had. I run a certain test (copy 1G size file to the SRP disk) with and without this patch. With the patch I got a panic, and without it the test passed. Ishai Michael S. Tsirkin wrote: > There's an off-by-one in FMR handling on memfree hardware. > > Example: assume we have 2 entries in scatterlist, 4K each, offset > is 4K. current code will return first entry + 4K, where we > really want the second entry. > > Signed-off-by: Michael S. Tsirkin > > --- > > Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c > =================================================================== > --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_memfree.c > +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_memfree.c > @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_ > > list_for_each_entry(chunk, &icm->chunk_list, list) { > for (i = 0; i < chunk->npages; ++i) { > - if (chunk->mem[i].length >= offset) { > + if (chunk->mem[i].length > offset) { > page = chunk->mem[i].page; > goto out; > } > > From mst at mellanox.co.il Wed Jan 3 08:13:26 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 18:13:26 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20061231190942.GB32485@mellanox.co.il> References: <20061231190942.GB32485@mellanox.co.il> Message-ID: <20070103161326.GS6019@mellanox.co.il> > In IPoIB CM code, I have a common CQ shared by multiple QPs. > To track connection usage, I need a way to get at some per-QP context > upon the completion, and I would like to avoid allocating > context object per work request just to stick a QP pointer into it. > With this code, I can just use wc->qp->qp_context. BTW, I just pushed this IPoIB CM code out to staging. I'll post an updated patch RSN for -mm inclusion, but meanwhile you can look at an example here: git://git.openfabrics.org/git/~mst/linux-2.6/.git ipoib_cm_branch_2_6_20 Look under drivers/infiniband/ulp/ipoib/ipoib_cm.c -- MST From mst at mellanox.co.il Wed Jan 3 08:18:52 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 18:18:52 +0200 Subject: [openib-general] DNS changes In-Reply-To: References: Message-ID: <20070103161852.GT6019@mellanox.co.il> > Also note that the name "staging.openfabrics.org" will eventually go > away -- at some point after all the new names are in place and the > dust has settled. There will be adequate warning before this occurs > (so that you can get new git checkouts, etc.), so consider this an > early warning. I actually like the "staging" name. This is kind of synonymous with "preparation", "testing", and this fits the fact that code there is under development. And, its shorter to say "I pushed this to staging" than "I pushed this to openfabrics.org". Can we keep the staging indefinitely, name too? -- MST From halr at voltaire.com Wed Jan 3 08:38:17 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 03 Jan 2007 11:38:17 -0500 Subject: [openib-general] [PATCH] osm: Fixing bug in fat-tree routing error path In-Reply-To: <459BCD46.5040805@dev.mellanox.co.il> References: <459BCD46.5040805@dev.mellanox.co.il> Message-ID: <1167842296.4564.3753.camel@hal.voltaire.com> On Wed, 2007-01-03 at 10:35, Yevgeny Kliteynik wrote: > Hi Hal. > > Fixing a bug in fat-tree: > > Unlike UPDN or File engines, the second stage of fat-tree engine > (fdb tables) can't be run if the first stage of the algorithn > (ranking and indexing) failed. Adding flag to let the routing > know that the first stage has failed. > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From mshefty at ichips.intel.com Wed Jan 3 09:19:44 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 03 Jan 2007 09:19:44 -0800 Subject: [openib-general] APM: SM port failover In-Reply-To: <459B19B5.8000807@3leafnetworks.com> References: <459B19B5.8000807@3leafnetworks.com> Message-ID: <459BE5B0.1060407@ichips.intel.com> Venkatesh Babu wrote: > Let us say there are two nodes A and B. NodeA (passive side) passively > listens for RC QP connection establishment requests and NodeB (active > side) initiates the RC QP connection request with ib_send_cm_req(). When > a port failure occurs on NodeA (passive side), it gets the event > IB_EVENT_PORT_ERR locally. So it can call ib_modify_qp() for the RC QP > to change the path_mig_state to IB_MIG_MIGRATED to use the alternate > path. No problem here. But NodeB has to register with the OpenSM for the > port failure event on NodeA, so that it can call ib_modify_qp() on the > active side. Note that path migration will occur without either calling ib_modify_qp() to force the migration. ib_modify_qp() can speed up how quickly failover occurs, but it is not necessary. > This is working fine by using the interface ib_sa_serv_notice_hdlr() > described in bug#159 > (https://staging.openfabrics.org/bugzilla/show_bug.cgi?id=159). InformInfo support has also been added through my rdma-dev git tree. - Sean From jsquyres at cisco.com Wed Jan 3 09:34:03 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 3 Jan 2007 12:34:03 -0500 Subject: [openib-general] DNS changes In-Reply-To: <20070103161852.GT6019@mellanox.co.il> References: <20070103161852.GT6019@mellanox.co.il> Message-ID: <7E44C2E9-2FDF-4B62-80E4-D7589DF71D53@cisco.com> On Jan 3, 2007, at 11:18 AM, Michael S. Tsirkin wrote: >> Also note that the name "staging.openfabrics.org" will eventually go >> away -- at some point after all the new names are in place and the >> dust has settled. There will be adequate warning before this occurs >> (so that you can get new git checkouts, etc.), so consider this an >> early warning. > > I actually like the "staging" name. > This is kind of synonymous with "preparation", "testing", > and this fits the fact that code there is under development. > > And, its shorter to say "I pushed this to staging" than > "I pushed this to openfabrics.org". I think that statement ("I pushed this to staging") only has meaning if it really is a *staging* repository -- as opposed to, say, a separate production repository. Otherwise, the name "staging" applied in this context is somewhat of a misnomer. We really intended the DNS name "staging" in the context of the new server vs. the old server. Not in terms of git repositories. FWIW, git.openfabrics.org is shorter than staging.openfabrics.org. > Can we keep the staging indefinitely, name too? We can, sure -- but I guess it would be worth clarifying exactly what it is *for*. My proposal for the OFA DNS has a specific meaning and purpose for each name. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From swise at opengridcomputing.com Wed Jan 3 11:17:19 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 13:17:19 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070103151823.GR6019@mellanox.co.il> References: <1167836172.4187.9.camel@stevo-desktop> <20070103150013.GO6019@mellanox.co.il> <1167836841.4187.18.camel@stevo-desktop> <20070103151823.GR6019@mellanox.co.il> Message-ID: <1167851839.4187.36.camel@stevo-desktop> > > > > ib_set_cq_udata() would transition into the kernel to pass in the > > > > consumer's index. In addition, ib_req_notify_cq would also transition > > > > into the kernel since its not a bypass function for chelsio. > > > > > > We misunderstand each other. > > > > > > ib_uverbs_req_notify_cq is in drivers/infiniband/core/uverbs_cmd.c - > > > all this code runs inside the IB_USER_VERBS_CMD_REQ_NOTIFY_CQ command, > > > so there is a single user to kernel transition. > > > > > > > Oh I see. > > > > This seems like a lot of extra code to avoid passing one extra arg to > > the driver's req_notify_cq verb. I'd appreciate other folk's input on > > how important they think this is. > > > > If you insist, then I'll run some tests specifically in kernel mode and > > see how this affects mthca's req_notify performance. > > This might be an interesting datapoint. > Here's what I measured: Without extra param (1000 iterations in cycles): ave 101.283 min 91 max 247 With extra param (1000 iterations in cycles): ave 103.311 min 91 max 221 Convert cycles to ns (3466.727 MHz CPU): Without: 101.283 / 3466.727 = .02922us == 29.22ns With: 103.311 / 3466.727 = .02980us == 29.80ns So I measure a .58ns average increase for passing in the additional parameter. Here is a snipit of the test: spin_lock_irq(&lock); do_gettimeofday(&start_tv); for (i=0; i<1000; i++) { cycles_start[i] = get_cycles(); ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); cycles_stop[i] = get_cycles(); } do_gettimeofday(&stop_tv); spin_unlock_irq(&lock); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < 1000; i++) { cycles_t v = cycles_stop[i] - cycles_start[i]; sum += v; if (v > max) max = v; if (min == 0 || v < min) min = v; } printk(KERN_ERR PFX "FOO delta sec %lu usec %lu sum %llu min %llu max %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, (unsigned long long)sum, (unsigned long long)min, (unsigned long long)max); From mst at mellanox.co.il Wed Jan 3 11:33:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 3 Jan 2007 21:33:24 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167851839.4187.36.camel@stevo-desktop> References: <1167851839.4187.36.camel@stevo-desktop> Message-ID: <20070103193324.GD29003@mellanox.co.il> > Without extra param (1000 iterations in cycles): > ave 101.283 min 91 max 247 > With extra param (1000 iterations in cycles): > ave 103.311 min 91 max 221 A 2% hit then. Not huge, but 0 either. > Convert cycles to ns (3466.727 MHz CPU): > > Without: 101.283 / 3466.727 = .02922us == 29.22ns > With: 103.311 / 3466.727 = .02980us == 29.80ns > > So I measure a .58ns average increase for passing in the additional > parameter. That depends on CPU speed though. Percentage is likely to be more universal. > Here is a snipit of the test: > > spin_lock_irq(&lock); > do_gettimeofday(&start_tv); > for (i=0; i<1000; i++) { > cycles_start[i] = get_cycles(); > ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); > cycles_stop[i] = get_cycles(); > } > do_gettimeofday(&stop_tv); > spin_unlock_irq(&lock); > > if (stop_tv.tv_usec < start_tv.tv_usec) { > stop_tv.tv_usec += 1000000; > stop_tv.tv_sec -= 1; > } > > for (i=0; i < 1000; i++) { > cycles_t v = cycles_stop[i] - cycles_start[i]; > sum += v; > if (v > max) > max = v; > if (min == 0 || v < min) > min = v; > } > > printk(KERN_ERR PFX "FOO delta sec %lu usec %lu sum %llu min %llu max %llu\n", > stop_tv.tv_sec - start_tv.tv_sec, > stop_tv.tv_usec - start_tv.tv_usec, > (unsigned long long)sum, (unsigned long long)min, > (unsigned long long)max); Good job, the test looks good, thanks. So what does this tell you? To me it looks like there's a measurable speed difference, and so we should find a way (e.g. what I proposed) to enable chelsio userspace without adding overhead to other low level drivers or indeed chelsio kernel level code. What do you think? Roland? -- MST From swise at opengridcomputing.com Wed Jan 3 12:20:18 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 14:20:18 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070103193324.GD29003@mellanox.co.il> References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> Message-ID: <1167855618.4187.65.camel@stevo-desktop> On Wed, 2007-01-03 at 21:33 +0200, Michael S. Tsirkin wrote: > > Without extra param (1000 iterations in cycles): > > ave 101.283 min 91 max 247 > > With extra param (1000 iterations in cycles): > > ave 103.311 min 91 max 221 > > A 2% hit then. Not huge, but 0 either. > > > Convert cycles to ns (3466.727 MHz CPU): > > > > Without: 101.283 / 3466.727 = .02922us == 29.22ns > > With: 103.311 / 3466.727 = .02980us == 29.80ns > > > > So I measure a .58ns average increase for passing in the additional > > parameter. > > That depends on CPU speed though. Percentage is likely to be more universal. > > > Here is a snipit of the test: > > > > spin_lock_irq(&lock); > > do_gettimeofday(&start_tv); > > for (i=0; i<1000; i++) { > > cycles_start[i] = get_cycles(); > > ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); > > cycles_stop[i] = get_cycles(); > > } > > do_gettimeofday(&stop_tv); > > spin_unlock_irq(&lock); > > > > if (stop_tv.tv_usec < start_tv.tv_usec) { > > stop_tv.tv_usec += 1000000; > > stop_tv.tv_sec -= 1; > > } > > > > for (i=0; i < 1000; i++) { > > cycles_t v = cycles_stop[i] - cycles_start[i]; > > sum += v; > > if (v > max) > > max = v; > > if (min == 0 || v < min) > > min = v; > > } > > > > printk(KERN_ERR PFX "FOO delta sec %lu usec %lu sum %llu min %llu max %llu\n", > > stop_tv.tv_sec - start_tv.tv_sec, > > stop_tv.tv_usec - start_tv.tv_usec, > > (unsigned long long)sum, (unsigned long long)min, > > (unsigned long long)max); > > Good job, the test looks good, thanks. > > So what does this tell you? > To me it looks like there's a measurable speed difference, > and so we should find a way (e.g. what I proposed) to enable chelsio userspace > without adding overhead to other low level drivers or indeed chelsio kernel level code. > > What do you think? Roland? > I think having a 2nd function to set the udata seems onerous. From swise at opengridcomputing.com Wed Jan 3 13:22:00 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 03 Jan 2007 15:22:00 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167855618.4187.65.camel@stevo-desktop> References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> Message-ID: <1167859320.4187.81.camel@stevo-desktop> > > > > So what does this tell you? > > To me it looks like there's a measurable speed difference, > > and so we should find a way (e.g. what I proposed) to enable chelsio userspace > > without adding overhead to other low level drivers or indeed chelsio kernel level code. > > > > What do you think? Roland? > > > > I think having a 2nd function to set the udata seems onerous. > > Roland, If you think I should not add the udata parameter to the req_notify_cq() provider verb, then I can rework the chelsio driver: 1) at cq creation time, pass the virtual address of the u32 used by the library to track the current cq index. That way the chelsio kernel driver can save the address in its kernel cq context for later use. 2) change chelsio's req_notify_cq() to copy in the current cq index value directly for rearming. This puts all the burden on the chelsio driver, which is apparently the only one that needs this functionality. Lemme know. Steve. From halr at voltaire.com Wed Jan 3 14:40:36 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 03 Jan 2007 17:40:36 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070103012440.GC13404@obsidianresearch.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> Message-ID: <1167864035.4564.23831.camel@hal.voltaire.com> On Tue, 2007-01-02 at 20:24, Jason Gunthorpe wrote: > On Tue, Jan 02, 2007 at 06:39:21PM -0500, Hal Rosenstock wrote: > > > > I think you are right about the 0s (see definition of GID Prefix). > > > Based on the definition of GID Prefix (and the IPv6 addressing > > > architecture this section is clearly derived from) I'd also change > > > ib_gid_is_link_local to check only the scope bits (ie compare a /10 > > > not a /64..) > > > > I was wondering about that too and was about to go there. So you are > > saying that any link local scope GID is fine (and doesn't need complete > > 64 bit matching but only the first 10 bits), right ? > > Well, there is a quibble here that is worth noting. > > I would expect a function like ib_gid_is_link_local to behave > similarly to the POSIX IN6_IS_ADDR_LINKLOCAL function, in that it > tests the scope of the address for the link local property. For this > purpose /10 is correct. > > However, the current usage of ib_gid_is_link_local is really a > placeholder for a routing lookup. That function could be made more precise with a routing lookup. I presume an invalid link local GID would fail the lookup as opposed to now where it "passes". > Since IBA defines the link local > addresses to be fe80::xxxx/64 it must test the full /64 (like ethernet > does). Why ? I thought that the 54 bits (after the inital 10 bits 0xFE80) were constrained to be 0. If they are not, I agree but then I'm confused about the discussion of those 54 bits. > > > > > > Shouldn't it be either the default subnet prefix or the one supplied in > > > > > > PortInfo:GidPrefix (which might not be the default one) ? > > > > > > Would be better described as ib_gid_is_on_link. on-link being a term > > > used to refer to an address where a routing table says it is present > > > on the local link rather than reachable through a router. > > > > Yes, that is better terminology but I can't use this yet as there is no > > routing table (at least yet)... > > Well, I'd say you have a hard wired routing table with 3 entires: > > fe80::/64 onlink > DEFAULT_PREFIX::/64 onlink > default unreachable Perhaps default is assumed "reachable" in experimental router mode. > Which is what the tests in the code accomplish. True. > One thought would be to implement the route lookup function I think there is more than just this function although this is the first one we are focusing on/discussing. > with a useable signature and move the hard codeded tests into it as a > placeholder for an alterable routing table. Makes sense to me. I'll write something up as an RFC on this. -- Hal > Jason From jgunthorpe at obsidianresearch.com Wed Jan 3 15:14:51 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 3 Jan 2007 16:14:51 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167864035.4564.23831.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> Message-ID: <20070103231451.GB3267@obsidianresearch.com> On Wed, Jan 03, 2007 at 05:40:36PM -0500, Hal Rosenstock wrote: > That function could be made more precise with a routing lookup. I > presume an invalid link local GID would fail the lookup as opposed to > now where it "passes". I think it depends how you code it.. In IP-like terms you have two processes going on here, a route lookup and a 'neighbour lookup' (GID to LID/etc conversion) for the link address. An non-existing link local GID should pass the routing lookup, but fail the neighbour lookup since there is no LID. So typically you'd do the routing lookup and end up with an on-link GID. Then you'd do the neighbour lookup and get a link address (IB path). > > Since IBA defines the link local > > addresses to be fe80::xxxx/64 it must test the full /64 (like ethernet > > does). > > Why ? I thought that the 54 bits (after the inital 10 bits 0xFE80) were > constrained to be 0. If they are not, I agree but then I'm confused > about the discussion of those 54 bits. They are constrained to be 0, that is why you have to test them when doing the routing checks (since non-zero values will be rejected by the HCA). But to test if the _scope_ of the address is link local you only look at the first 10 bits. > > Well, I'd say you have a hard wired routing table with 3 entires: > > > > fe80::/64 onlink > > DEFAULT_PREFIX::/64 onlink > > default unreachable > > Perhaps default is assumed "reachable" in experimental router mode. As an initial start I'd like to see support for an entry like this: default via fe80::17:77ff:feff:5 Where fe80::17:77ff:feff:5 is the GID of a router. A full table for a routing environment might look like: fe80::/64 onlink fe80::/10 unreachable ; IBA requires link local scope addresses never ; to be routed ff00::/8 onlink ; Multicast is 'on link' DEFAULT_PREFIX::/64 onlink default via fe80::17:77ff:feff:5 Matching is always done by longest prefix match. Jason From eitan at sw053.yok.mtl.com Wed Jan 3 21:00:27 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Thu, 4 Jan 2007 07:00:27 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-04:normal completion Message-ID: <200701040500.l0450RPA019726@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Wed_Jan_3_13:51:20_2007 523764 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=328 Pass=323 Fail=5 Pass: 24 Stability IS1-16.topo 24 Pkey IS1-16.topo 24 OsmTest IS1-16.topo 24 OsmStress IS1-16.topo 24 Multicast IS1-16.topo 24 LidMgr IS1-16.topo 8 Stability IS3-loop.topo 8 Stability IS3-128.topo 8 Pkey IS3-128.topo 8 OsmTest IS3-loop.topo 8 OsmTest IS3-128.topo 8 OsmStress IS3-128.topo 8 Multicast IS3-loop.topo 8 Multicast IS3-128.topo 8 LidMgr IS3-128.topo 8 FatTree part-4-ary-3-tree.topo 8 FatTree merge-roots-4-ary-2-tree.topo 8 FatTree merge-root-4-ary-3-tree.topo 8 FatTree merge-root-12-ary-2-tree.topo 8 FatTree half-4-ary-3-tree.topo 8 FatTree 4-ary-4-tree.topo 8 FatTree 4-ary-3-tree.topo 8 FatTree 32nodes-3lvl-is1.topo 8 FatTree 12-ary-2-tree.topo 7 FatTree merge-roots-reorder-4-ary-2-tree.topo 7 FatTree merge-2-ary-4-tree.topo 7 FatTree blend-4-ary-2-tree.topo 7 FatTree 2-ary-4-tree.topo 7 FatTree 12-node-spaced.topo Failures: 1 FatTree merge-roots-reorder-4-ary-2-tree.topo 1 FatTree merge-2-ary-4-tree.topo 1 FatTree blend-4-ary-2-tree.topo 1 FatTree 2-ary-4-tree.topo 1 FatTree 12-node-spaced.topo From mst at mellanox.co.il Wed Jan 3 21:07:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 4 Jan 2007 07:07:22 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167859320.4187.81.camel@stevo-desktop> References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> <1167859320.4187.81.camel@stevo-desktop> Message-ID: <20070104050722.GA9900@mellanox.co.il> > If you think I should not add the udata parameter to the req_notify_cq() > provider verb, then I can rework the chelsio driver: > > 1) at cq creation time, pass the virtual address of the u32 used by the > library to track the current cq index. That way the chelsio kernel > driver can save the address in its kernel cq context for later use. > > 2) change chelsio's req_notify_cq() to copy in the current cq index > value directly for rearming. > > This puts all the burden on the chelsio driver, which is apparently the > only one that needs this functionality. Good thinking, I haven't thought of this approach. This way there won't be any API/core changes and no changes to other low level drivers, correct? And for chelsio, there's no overhead as compared to code you posted. Sounds good. -- MST From kliteyn at dev.mellanox.co.il Wed Jan 3 23:03:41 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 04 Jan 2007 09:03:41 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-04:normal completion In-Reply-To: <200701040500.l0450RPA019726@sw053.yok.mtl.com> References: <200701040500.l0450RPA019726@sw053.yok.mtl.com> Message-ID: <459CA6CD.7060007@dev.mellanox.co.il> All 5 failed cases have the following in test.log: ... -I- Invoking ibdiagnet -o /tmp/ibmgtsim.7234 -E- ibdiagnet failed with status:do_ypcall: clnt_call: RPC: Timed out YPBINDPROC_DOMAIN: Domain not bound -I- Closing SIM ... ... The ibgiagnet.log files look ok in all 5 cases. Do you know what is causing this failure? Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Wed_Jan_3_13:51:20_2007 523764 > ibutils rev = Wed_Jan_3_11:42:12_2007 913448 > Total=328 Pass=323 Fail=5 > > Pass: > 24 Stability IS1-16.topo > 24 Pkey IS1-16.topo > 24 OsmTest IS1-16.topo > 24 OsmStress IS1-16.topo > 24 Multicast IS1-16.topo > 24 LidMgr IS1-16.topo > 8 Stability IS3-loop.topo > 8 Stability IS3-128.topo > 8 Pkey IS3-128.topo > 8 OsmTest IS3-loop.topo > 8 OsmTest IS3-128.topo > 8 OsmStress IS3-128.topo > 8 Multicast IS3-loop.topo > 8 Multicast IS3-128.topo > 8 LidMgr IS3-128.topo > 8 FatTree part-4-ary-3-tree.topo > 8 FatTree merge-roots-4-ary-2-tree.topo > 8 FatTree merge-root-4-ary-3-tree.topo > 8 FatTree merge-root-12-ary-2-tree.topo > 8 FatTree half-4-ary-3-tree.topo > 8 FatTree 4-ary-4-tree.topo > 8 FatTree 4-ary-3-tree.topo > 8 FatTree 32nodes-3lvl-is1.topo > 8 FatTree 12-ary-2-tree.topo > 7 FatTree merge-roots-reorder-4-ary-2-tree.topo > 7 FatTree merge-2-ary-4-tree.topo > 7 FatTree blend-4-ary-2-tree.topo > 7 FatTree 2-ary-4-tree.topo > 7 FatTree 12-node-spaced.topo > > Failures: > 1 FatTree merge-roots-reorder-4-ary-2-tree.topo > 1 FatTree merge-2-ary-4-tree.topo > 1 FatTree blend-4-ary-2-tree.topo > 1 FatTree 2-ary-4-tree.topo > 1 FatTree 12-node-spaced.topo > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ogerlitz at voltaire.com Thu Jan 4 03:20:43 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 04 Jan 2007 13:20:43 +0200 Subject: [openib-general] SVN deprecation In-Reply-To: <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> Message-ID: <459CE30B.3040503@voltaire.com> Jeff Squyres wrote: > On Jan 3, 2007, at 6:22 AM, Or Gerlitz wrote: >> OK, it makes sense, but before doing so, can you educate me what is >> the procedure to see (ie the equivalent of svn ls) the structure of a >> deleted SVN tree (ie subset of the repositor) and what is the >> procedure to get/diff between revsions/etc those deleted sources? > "svn help ls" and "svn help diff" both show that these commands take -r > arguments. For example, you can ask for the ls of a specific revision > of the repository. > See "svn help " and the SVN book for more information. Considering myself as being located somewhere within the ~medium range of the SVN users spectrum I truly think that my questions are not of RTFM type. So let me see i can do it pro actively: assuming you have used the command $ svn delete some-sub-tree-of-a-repository ***and*** left a README at the ancestor directory of this subtree which says that you have just deleted sub-trees xxx,yyy,zzz etc my work flow to get a checkout of xxx would look like: $ svn co url/of/path/to/ancestor/directory $ cd /path/to/ancestor/directory $ read README, learn there was an xxx directory with content XXX $ svn ls xxx -R or $ svn co -r HEAD xxx then $ cd xxx and that point i have a (non...) fresh checkout of xxx snapshoting the time you have deleted it, which i can start doing over svn log/diff etc. ??? Or. From halr at voltaire.com Thu Jan 4 03:49:53 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 06:49:53 -0500 Subject: [openib-general] nightly osm_sim report 2007-01-03:normal completion In-Reply-To: <200701030503.l03532bQ014965@sw053.yok.mtl.com> References: <200701030503.l03532bQ014965@sw053.yok.mtl.com> Message-ID: <1167911342.4564.64336.camel@hal.voltaire.com> On Wed, 2007-01-03 at 00:03, Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Tue_Jan_2_14:42:50_2007 fb3365 > ibutils rev = Wed_Dec_27_23:39:30_2006 60aebe > Total=297 Pass=296 Fail=1 > > Pass: > 33 Stability IS1-16.topo > 33 OsmTest IS1-16.topo > 33 OsmStress IS1-16.topo > 33 Multicast IS1-16.topo > 33 LidMgr IS1-16.topo > 32 Pkey IS1-16.topo > 11 Stability IS3-loop.topo > 11 Stability IS3-128.topo > 11 Pkey IS3-128.topo > 11 OsmTest IS3-loop.topo > 11 OsmTest IS3-128.topo > 11 OsmStress IS3-128.topo > 11 Multicast IS3-loop.topo > 11 Multicast IS3-128.topo > 11 LidMgr IS3-128.topo > > Failures: > 1 Pkey IS1-16.topo Any idea on this failure from the 1/3 run ? -- Hal From halr at voltaire.com Thu Jan 4 04:04:19 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 07:04:19 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070103231451.GB3267@obsidianresearch.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> Message-ID: <1167912258.4564.65120.camel@hal.voltaire.com> On Wed, 2007-01-03 at 18:14, Jason Gunthorpe wrote: > On Wed, Jan 03, 2007 at 05:40:36PM -0500, Hal Rosenstock wrote: > > > That function could be made more precise with a routing lookup. I > > presume an invalid link local GID would fail the lookup as opposed to > > now where it "passes". > > I think it depends how you code it.. In IP-like terms you have two > processes going on here, a route lookup and a 'neighbour lookup' (GID > to LID/etc conversion) for the link address. > > An non-existing link local GID should pass the routing lookup, but fail the > neighbour lookup since there is no LID. > > So typically you'd do the routing lookup and end up with an on-link > GID. Then you'd do the neighbour lookup and get a link address (IB path). I think you are talking here about the end node and router function rather than SM/SA, right ? And in terms of IB path lookup, is the path just local with a destination of the neighbor (router) or global to the end point ? > > > Since IBA defines the link local > > > addresses to be fe80::xxxx/64 it must test the full /64 (like ethernet > > > does). > > > > Why ? I thought that the 54 bits (after the inital 10 bits 0xFE80) were > > constrained to be 0. If they are not, I agree but then I'm confused > > about the discussion of those 54 bits. > > They are constrained to be 0, that is why you have to test them when > doing the routing checks (since non-zero values will be rejected by > the HCA). > > But to test if the _scope_ of the address is link local you only look > at the first 10 bits. Right. > > > Well, I'd say you have a hard wired routing table with 3 entires: > > > > > > fe80::/64 onlink > > > DEFAULT_PREFIX::/64 onlink > > > default unreachable > > > > Perhaps default is assumed "reachable" in experimental router mode. > > As an initial start I'd like to see support for an entry like this: > > default via fe80::17:77ff:feff:5 > > Where fe80::17:77ff:feff:5 is the GID of a router. A full table for a > routing environment might look like: > > fe80::/64 onlink > fe80::/10 unreachable ; IBA requires link local scope addresses never > ; to be routed > ff00::/8 onlink ; Multicast is 'on link' Why is all multicast scoped onlink ? I guess this is a starting point but not what IPoIB needs to run across IB subnets. > DEFAULT_PREFIX::/64 onlink > default via fe80::17:77ff:feff:5 > > Matching is always done by longest prefix match. Yes. -- Hal > Jason From halr at voltaire.com Thu Jan 4 05:52:25 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 08:52:25 -0500 Subject: [openib-general] [PATCH] opensm: switch pointer field for osm_node_t structure In-Reply-To: <20070102223501.GG32677@sashak.voltaire.com> References: <20070102223501.GG32677@sashak.voltaire.com> Message-ID: <1167918744.4564.70580.camel@hal.voltaire.com> On Tue, 2007-01-02 at 17:35, Sasha Khapyorsky wrote: > This introduces new field for osm_node_t structure. For switch nodes > this field keeps pointer to corresponded osm_switch_t object, for non > switch nodes it is initialized as NULL. This new field is useful for > fast switch object access and for node type detection. > > Signed-off-by: Sasha Khapyorsky Thanks! Applied. -- Hal From jsquyres at cisco.com Thu Jan 4 05:57:08 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 4 Jan 2007 08:57:08 -0500 Subject: [openib-general] SVN deprecation In-Reply-To: <459CE30B.3040503@voltaire.com> References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> <459CE30B.3040503@voltaire.com> Message-ID: On Jan 4, 2007, at 6:20 AM, Or Gerlitz wrote: >> See "svn help " and the SVN book for more information. > > Considering myself as being located somewhere within the ~medium > range of the SVN users spectrum I truly think that my questions are > not of RTFM type. Sounds like you just don't want to read the fine documentation. ;-) There's a nice overview of browsing a repository's history in the SVN book here: http://svnbook.red-bean.com/en/1.2/svn-book.html#svn.tour.history > So let me see i can do it pro actively: > > assuming you have used the command > > $ svn delete some-sub-tree-of-a-repository Actually, it would be: svn rm some-tree svn ci some-tree That would commit the removal of some-tree at the HEAD at repository number N. Of course, one of the goals of SVN is to maintain an accurate history. So you can always examine the repository at r(N-1) (or any prior values of N) to see what subtree looked like at exactly those points in time. Quoting the SVN book: "Of course, nothing is ever totally deleted from the repository—just from the HEAD of the repository. You can get back anything you delete by checking out (or updating your working copy) a revision earlier than the one in which you deleted it." > ***and*** left a README at the ancestor directory of this subtree > which Note that leaving a README in the directory has nothing to do with the semantics of how SVN works. I think that this is probably obvious, but I just wanted to be precise. :-) > says that you have just deleted sub-trees xxx,yyy,zzz etc my work > flow to get a checkout of xxx would look like: > > $ svn co url/of/path/to/ancestor/directory > $ cd /path/to/ancestor/directory > $ read README, learn there was an xxx directory with content XXX > > $ svn ls xxx -R > or > $ svn co -r HEAD xxx > then > $ cd xxx It's actually simpler than that: $ svn co url/of/path/to/ancestor/directory $ cd /path/to/ancestor/directory $ read README, learn there was an xxx directory with content XXX $ svn up -r N Which updates your current checkout to reflect the state of the repository (at that tree) at repository number N (where N is assumedly less than the value of N at the HEAD, so you're reverting back to an earlier state of the repository, before the stuff that you want was deleted). When navigating the history via "svn up -r N", there's a few complex corner cases where the update can fail (by design), but they aren't common and can be avoided by doing a fresh checkout of the r number that you want: $ svn co -r N url/of/path/to/ancestor/directory -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From swise at opengridcomputing.com Thu Jan 4 06:07:10 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 08:07:10 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <20070104050722.GA9900@mellanox.co.il> References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> <1167859320.4187.81.camel@stevo-desktop> <20070104050722.GA9900@mellanox.co.il> Message-ID: <1167919630.3071.8.camel@stevo-desktop> On Thu, 2007-01-04 at 07:07 +0200, Michael S. Tsirkin wrote: > > If you think I should not add the udata parameter to the req_notify_cq() > > provider verb, then I can rework the chelsio driver: > > > > 1) at cq creation time, pass the virtual address of the u32 used by the > > library to track the current cq index. That way the chelsio kernel > > driver can save the address in its kernel cq context for later use. > > > > 2) change chelsio's req_notify_cq() to copy in the current cq index > > value directly for rearming. > > > > This puts all the burden on the chelsio driver, which is apparently the > > only one that needs this functionality. > > Good thinking, I haven't thought of this approach. > > This way there won't be any API/core changes and no changes to > other low level drivers, correct? And for chelsio, there's no overhead > as compared to code you posted. > > Sounds good. > I still want to hear from Roland on this before I go to the effort of reworking all this... Steve. From kliteyn at dev.mellanox.co.il Thu Jan 4 06:23:39 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 04 Jan 2007 16:23:39 +0200 Subject: [openib-general] [PATCH 1/4] osm: TRIVIAL - making comparison functions static (fat-tree routing) In-Reply-To: <20070102165436.GD7265@sashak.voltaire.com> References: <45990D05.60905@dev.mellanox.co.il> <20070102161746.GB7265@sashak.voltaire.com> <459A85DC.2060003@dev.mellanox.co.il> <20070102165436.GD7265@sashak.voltaire.com> Message-ID: <459D0DEB.90606@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 18:18 Tue 02 Jan , Yevgeny Kliteynik wrote: >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> >>> On 15:30 Mon 01 Jan , Yevgeny Kliteynik wrote: >>>> Making two comparison functions in ftree static. >>> Good. >>> >>> Any news about OSM_CDECL/__cdecl emilination? >> Here's what I know by now: >> __cdecl is default on windows, but osm is compiled with a flag >> that makes __stdcall default (there are some problems linking >> osm to other libraries without this flag), > > Which problems? Isn't it better to solve those problems instead of such > strange workaround as putting __cdecl/__stdcall attribution in the common > code? IB access layer on windows is compiled with __stdcall as default, and there were problems linking opensm unless it also had the same default. I don't think I should invest too much time trying to figure out why IBAL should be compiled this way - I suppose there's a reason for this. Anyway, I'll check if the __stdcall can be removed (I'm sure it can, but need to check anyway) -- Yevgeny. >> so OSM_CDECL is gonna >> stay. One thing I do need to check is removing the __stdcall >> and trying to recompile everything to see if the __stdcall >> is still needed. > > Thanks. > > Sasha > From ogerlitz at voltaire.com Thu Jan 4 06:30:21 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 04 Jan 2007 16:30:21 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: References: Message-ID: <459D0F7D.1060902@voltaire.com> Tang, Changqing wrote: > Sorry, I find the function 'ib_sa_path_rec_get()' in kernel code. Then > here is my question: > > Is there any way (instruction) to fill in struct 'ib_sa_path_rec' inside > struct 'ib_cm_req_param' without using librdmacm.so ? Hi CQ, I understand that you considering to go on an approach which does not involve librdmacm, so you would probably like to + use IB PORT GIDs at your initial mpi init exchange + issue IB SA Path query via libibsa (which does not exist) + establish IB RC connection (listen/connect/accept) via libibcm Please note that such an approach is possible even with the non existence of libibsa (similarly to what IB MPIs do today use IB PORT LIDs [note you would need both GIDs & LIDs to have the CM working fine] at your ranks pre exchange and hardcode the other IB PATH params such as MTU, PKEY and SL you set later into the IB RC QP. However, a no rdma cm approach means you need to apply hacks to "guess" the correct pkey, mtu and sl and some more limitations that eventually you would face when coming to advanced IB deployment environments. All you have to do to use the rdma cm to require a functional IPoIB NIC on each of the active IB PORTs, which is a trivial requirement from the users in the Ethernet world, so why not apply it here as well ??? On the other hand moving to use the IB CM instead of emulating it via TCP is some progress... Or. From eeb at bartonsoftware.com Thu Jan 4 06:29:18 2007 From: eeb at bartonsoftware.com (Eric Barton) Date: Thu, 4 Jan 2007 14:29:18 -0000 Subject: [openib-general] IB_CM_REJ_INVALID_SERVICE_ID In-Reply-To: <4592CE89.2060005@ichips.intel.com> Message-ID: <01ec01c7300c$b840be20$0182a8c0@eblap> Sean, > Eric Barton wrote: > > Can an rdma_connect be rejected with IB_CM_REJ_INVALID_SERVICE_ID > > for any other reason than the peer isn't listening with the > > correct service number? > > This should only occur if the remote peer isn't listening. This > reject code is automatically sent by the ib_cm when a request does > not find a corresponding listen. > > >>We are testing 1.6b5 for a InfiniBand cluster with RHEL 4. We use > >>the binaries provides by CFS and use OFED 1.1 as the IB stack. > >> > >>At several times some of the clients hang during fs mount or when > >>an OST is added (see log). Error:LustreError: > >>1776:0:(o2iblnd_cb.c:2314:kiblnd_rejected()) > >>10.0.90.8 at o2ib rejected: reason 8, size 148 > > Is this event = 8 and status = 8? yes > >>from OFED: enum ib_cm_rej_reason { > >> IB_CM_REJ_INVALID_SERVICE_ID = 8, > >> > >>Once an IPoIB ping is started to the corresponding OST the client > >>continues. Afterwards it is quite stable. > > > > > > ...which seems to be saying that just doing an IPoIB ping to the > > server was enough to make rdma_connect() work OK. > > I can't explain the relationship between the ping and the connect > starting to work. some more from the customer... > We have removed the two Mellanox cards from the OSS and put a single > Voltaire card in it. This seems to work. I could connect 60 nodes > without errors. We will further investigate this to better > understand the cause of the original problem. ...I hadn't realised they had > 1 HCA. We bind the listen ID to a specific IP address - could that have some bearing? AFAICS from the customer's debug logs, they are listening on the correct HCA... -- Cheers, Eric From halr at voltaire.com Thu Jan 4 06:54:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 09:54:47 -0500 Subject: [openib-general] APM support in openib stack In-Reply-To: <459D0F7D.1060902@voltaire.com> References: <459D0F7D.1060902@voltaire.com> Message-ID: <1167922486.4564.73764.camel@hal.voltaire.com> On Thu, 2007-01-04 at 09:30, Or Gerlitz wrote: > Tang, Changqing wrote: > > Sorry, I find the function 'ib_sa_path_rec_get()' in kernel code. Then > > here is my question: > > > > Is there any way (instruction) to fill in struct 'ib_sa_path_rec' inside > > struct 'ib_cm_req_param' without using librdmacm.so ? > > Hi CQ, > > I understand that you considering to go on an approach which does not > involve librdmacm, so you would probably like to > > + use IB PORT GIDs at your initial mpi init exchange > + issue IB SA Path query via libibsa (which does not exist) likely multipath query if you want APM > + establish IB RC connection (listen/connect/accept) via libibcm > > Please note that such an approach is possible even with the non > existence of libibsa (similarly to what IB MPIs do today use IB PORT > LIDs [note you would need both GIDs & LIDs to have the CM working fine] There are other possibilities here too using inband (IB) exchanges. libibsa is not that far away either. What is your time frame ? > at your ranks pre exchange and hardcode the other IB PATH params such as > MTU, PKEY and SL you set later into the IB RC QP. > > However, a no rdma cm approach means you need to apply hacks to "guess" > the correct pkey, mtu and sl and some more limitations that eventually > you would face when coming to advanced IB deployment environments. Also, you would make more work if iWARP support is of interest as well. It comes "for free" or at least "for cheap" if the RDMA CM approach is taken. -- Hal > All you have to do to use the rdma cm to require a functional IPoIB NIC > on each of the active IB PORTs, which is a trivial requirement from the > users in the Ethernet world, so why not apply it here as well ??? > > On the other hand moving to use the IB CM instead of emulating it via > TCP is some progress... > > Or. > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Thu Jan 4 07:03:38 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 04 Jan 2007 17:03:38 +0200 Subject: [openib-general] [PATCH 2/4] osm: LMC > 0 is not supported by fat-tree routing. In-Reply-To: <1167751380.4596.77691.camel@hal.voltaire.com> References: <45990D1D.6090305@dev.mellanox.co.il> <1167669160.4596.1360.camel@hal.voltaire.com> <459A7328.9090305@dev.mellanox.co.il> <1167751380.4596.77691.camel@hal.voltaire.com> Message-ID: <459D174A.8010608@dev.mellanox.co.il> Hal Rosenstock wrote: > On Tue, 2007-01-02 at 09:58, Yevgeny Kliteynik wrote: >> Hal Rosenstock wrote: >>> On Mon, 2007-01-01 at 08:31, Yevgeny Kliteynik wrote: >>>> LMC > 0 is not supported by fat-tree routing. >>> Might this be different in the future ? Can LMC > 0 be supported with >>> fat tree routing ? >> Sure. We just have to figure out two things: >> 1. Figure out what should communication pattern look like >> when LMC is not 0. > >> 2. How to make fat-tree routing choose different paths for >> different lids of the same CA, because otherwise using LMC>0 >> is pointless. > > Not sure what you mean by this. How is this different for fat tree > routing than any other routing algorithm ? Maybe this is an issue for > all of them. I agree - this is an issue for any routing. I guess it wouldn't take too much to add a naive support for LMC>0, (in which case routes to LIDs of the same port will always be routed through the same path). But then we come back to the first question that we need to think about: this routing algorithm is optimized for shift pattern. How will the communication pattern look with LMC>0? --Yevgeny > Doesn't the PR/MPR request preselect the LID or the response determines > the LID to use ? The only issue I see is whether there needs to be a > separate GID for each possible LID. > > -- Hal > >> --Yevgeny. >> >>>> Removing all the related code and adding check to >>>> inform the user in case LMC is set. >>>> >>>> Signed-off-by: Yevgeny Kliteynik >>> Thanks. Applied. >>> >>> I'll also add a note to this effect to the documentation shortly. >>> >>> -- Hal >>> > From ogerlitz at voltaire.com Thu Jan 4 07:17:24 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 04 Jan 2007 17:17:24 +0200 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> Message-ID: <459D1A84.9060609@voltaire.com> Sean Hefty wrote: > Updates the librdmacm to work with ABI version 3, which is the proposed > kernel changes for inclusion in 2.6.20. > @@ -929,74 +926,102 @@ int rdma_join_multicast(struct rdma_cm_i > void *context) > { > struct ucma_abi_join_mcast *cmd; > + struct ucma_abi_create_id_resp *resp; > struct cma_id_private *id_priv; > + struct cma_multicast *mc, **pos; > void *msg; > int ret, size, addrlen; > > + id_priv = container_of(id, struct cma_id_private, id); > addrlen = ucma_addrlen(addr); > if (!addrlen) > return -EINVAL; > > - CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_JOIN_MCAST, size); > - id_priv = container_of(id, struct cma_id_private, id); > + mc = malloc(sizeof *mc); > + if (!mc) > + return -ENOMEM; > + > + memset(mc, 0, sizeof *mc); > + mc->context = context; > + mc->id_priv = id_priv; > + memcpy(&mc->addr, addr, addrlen); > + if (pthread_cond_init(&id_priv->cond, NULL)) { > + ret = -1; > + goto err1; > + } > + > + pthread_mutex_lock(&id_priv->mut); > + mc->next = id_priv->mc_list; > + id_priv->mc_list = mc; > + pthread_mutex_unlock(&id_priv->mut); > + > + CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_JOIN_MCAST, size); > cmd->id = id_priv->handle; > memcpy(&cmd->addr, addr, addrlen); > - cmd->uid = (uintptr_t) context; > + cmd->uid = (uintptr_t) mc; > > ret = write(id->channel->fd, msg, size); > - if (ret != size) > - return (ret > 0) ? -ENODATA : ret; > + if (ret != size) { > + ret = (ret > 0) ? -ENODATA : ret; > + goto err2; > + } > > + mc->handle = resp->id; > return 0; > +err2: > + pthread_mutex_lock(&id_priv->mut); > + for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next) > + ; > + *pos = mc->next; > + pthread_mutex_unlock(&id_priv->mut); > +err1: > + free(mc); > + return ret; > } > > int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) > { > - struct ucma_abi_leave_mcast *cmd; > + struct ucma_abi_destroy_id *cmd; > + struct ucma_abi_destroy_id_resp *resp; > struct cma_id_private *id_priv; > + struct cma_multicast *mc, **pos; > void *msg; > int ret, size, addrlen; > - struct ibv_ah_attr ah_attr; > - uint32_t qp_info; > > addrlen = ucma_addrlen(addr); > if (!addrlen) > return -EINVAL; > > - CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_LEAVE_MCAST, size); > id_priv = container_of(id, struct cma_id_private, id); > - cmd->id = id_priv->handle; > - memcpy(&cmd->addr, addr, addrlen); > + pthread_mutex_lock(&id_priv->mut); > + for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next) > + if (!memcmp(&(*pos)->addr, addr, addrlen)) > + break; > > - if (id->qp) { > - ret = rdma_get_dst_attr(id, addr, &ah_attr, &qp_info, > &qp_info); > - if (ret) > - goto out; > + mc = *pos; > + if (*pos) > + *pos = mc->next; > + pthread_mutex_unlock(&id_priv->mut); > + if (!mc) > + return -EADDRNOTAVAIL; > > - ret = ibv_detach_mcast(id->qp, &ah_attr.grh.dgid, > ah_attr.dlid); > - if (ret) > - goto out; > - } > + if (id->qp) > + ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid); > > + CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_LEAVE_MCAST, size); > + cmd->id = mc->handle; > + > ret = write(id->channel->fd, msg, size); > if (ret != size) > ret = (ret > 0) ? -ENODATA : ret; > -out: > - return ret; > -} > > -static void ucma_copy_event_from_kern(struct rdma_cm_event *dst, > - struct ucma_abi_event_resp *src) > -{ > - dst->event = src->event; > - dst->status = src->status; > - dst->private_data_len = src->private_data_len; > - if (src->private_data_len) { > - dst->private_data = dst + 1; > - memcpy(dst->private_data, src->private_data, > - src->private_data_len); > - } else > - dst->private_data = NULL; > + pthread_mutex_lock(&id_priv->mut); > + while (mc->events_completed < resp->events_reported) > + pthread_cond_wait(&mc->cond, &id_priv->mut); > + pthread_mutex_unlock(&id_priv->mut); > + > + free(mc); > + return ret; > } Sean, I just noticed that once i apply the patch, the last + lines (that is pthread_mutex_lock, while loop doing pthread_cond_wait and then pthread_mutex_unlock) become part of rdma_leave_multicast which seems to me strictly buggy as no one is going to wake up this code. Looking in your librdmacm git on openfabrics @ http://www2.openfabrics.org/git/?p=~shefty/librdmacm.git;a=blob;f=src/cma.c this indeed seems to be code of librdmacm/src/cma.c Can you clarify that? Or. From changquing.tang at hp.com Thu Jan 4 07:49:08 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 4 Jan 2007 09:49:08 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <459D0F7D.1060902@voltaire.com> Message-ID: We are currently happy with Verbs API to wire the IB connection, without libibcm.so and librdmacm.so, the drawback of this method is that it Requires an alltoall exchange of QP number among all process, it is OK to static MPI world. When we come to dynamic process, there are two groups of MPI processes, within each group, IB connection has already established, we want To establish IB connection between the two groups, and the size of each group is dynamic. We can use the current method, but it requires Several rounds of message exchange(simulate alltoall), so we hope to have a connect/accept style method to establish IB connection. If we require system to have IPoIB on each port, and if there are two cards( two ports each), plus an ethernet, then 5 IP addresses must be Configured on a node. We want to have as little requirements as possible, and performance consideration(I asked you before which method is faster to setup IB connection), So what I am thinking of is: 1. for static MPI job, don't use libibcm.so and librdmacm.so 2. for dynamic MPI job, add to use libibcm.so only, no IPoIB is required. If we come to iWARP, it is another story. --CQ > -----Original Message----- > From: Or Gerlitz [mailto:ogerlitz at voltaire.com] > Sent: Thursday, January 04, 2007 8:30 AM > To: Tang, Changqing > Cc: Sean Hefty; openib-general at openib.org > Subject: Re: [openib-general] APM support in openib stack > > Tang, Changqing wrote: > > Sorry, I find the function 'ib_sa_path_rec_get()' in kernel code. > > Then here is my question: > > > > Is there any way (instruction) to fill in struct 'ib_sa_path_rec' > > inside struct 'ib_cm_req_param' without using librdmacm.so ? > > Hi CQ, > > I understand that you considering to go on an approach which > does not involve librdmacm, so you would probably like to > > + use IB PORT GIDs at your initial mpi init exchange > + issue IB SA Path query via libibsa (which does not exist) > + establish IB RC connection (listen/connect/accept) via libibcm > > Please note that such an approach is possible even with the > non existence of libibsa (similarly to what IB MPIs do today > use IB PORT LIDs [note you would need both GIDs & LIDs to > have the CM working fine] at your ranks pre exchange and > hardcode the other IB PATH params such as MTU, PKEY and SL > you set later into the IB RC QP. > > However, a no rdma cm approach means you need to apply hacks > to "guess" > the correct pkey, mtu and sl and some more limitations that > eventually you would face when coming to advanced IB > deployment environments. > > All you have to do to use the rdma cm to require a functional > IPoIB NIC on each of the active IB PORTs, which is a trivial > requirement from the users in the Ethernet world, so why not > apply it here as well ??? > > On the other hand moving to use the IB CM instead of > emulating it via TCP is some progress... > > Or. > > From changquing.tang at hp.com Thu Jan 4 08:19:14 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 4 Jan 2007 10:19:14 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <1167922486.4564.73764.camel@hal.voltaire.com> Message-ID: > > I understand that you considering to go on an approach > which does not > > involve librdmacm, so you would probably like to > > > > + use IB PORT GIDs at your initial mpi init exchange > > + issue IB SA Path query via libibsa (which does not exist) > > likely multipath query if you want APM Yes, we want to have APM as well. > > > + establish IB RC connection (listen/connect/accept) via libibcm > > > > Please note that such an approach is possible even with the non > > existence of libibsa (similarly to what IB MPIs do today > use IB PORT > > LIDs [note you would need both GIDs & LIDs to have the CM working > > fine] > > There are other possibilities here too using inband (IB) exchanges. > libibsa is not that far away either. What is your time frame ? Is it possible to have it in OFED 1.2 ? > > > at your ranks pre exchange and hardcode the other IB PATH > params such > > as MTU, PKEY and SL you set later into the IB RC QP. > > > > However, a no rdma cm approach means you need to apply > hacks to "guess" > > the correct pkey, mtu and sl and some more limitations that > eventually > > you would face when coming to advanced IB deployment environments. > > Also, you would make more work if iWARP support is of > interest as well. > It comes "for free" or at least "for cheap" if the RDMA CM > approach is taken. We understand that, but we hope to have a connect/accept style IB connection setup, without IPoIB involved, like HP-UX IT-API(similar to uDAPL without underlying IP support), it works with multiple cards. Configure 4-5 IP addresses on a single node is kind of silly. --CQ > > -- Hal > > > All you have to do to use the rdma cm to require a functional IPoIB > > NIC on each of the active IB PORTs, which is a trivial requirement > > from the users in the Ethernet world, so why not apply it > here as well ??? > > > > On the other hand moving to use the IB CM instead of > emulating it via > > TCP is some progress... > > > > Or. > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > From halr at voltaire.com Thu Jan 4 08:28:01 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 11:28:01 -0500 Subject: [openib-general] APM support in openib stack In-Reply-To: References: Message-ID: <1167928079.4564.78654.camel@hal.voltaire.com> On Thu, 2007-01-04 at 11:19, Tang, Changqing wrote: > > > I understand that you considering to go on an approach > > which does not > > > involve librdmacm, so you would probably like to > > > > > > + use IB PORT GIDs at your initial mpi init exchange > > > + issue IB SA Path query via libibsa (which does not exist) > > > > likely multipath query if you want APM > > Yes, we want to have APM as well. > > > > > > > + establish IB RC connection (listen/connect/accept) via libibcm > > > > > > Please note that such an approach is possible even with the non > > > existence of libibsa (similarly to what IB MPIs do today > > use IB PORT > > > LIDs [note you would need both GIDs & LIDs to have the CM working > > > fine] > > > > There are other possibilities here too using inband (IB) exchanges. > > libibsa is not that far away either. What is your time frame ? > > Is it possible to have it in OFED 1.2 ? What libibsa capabilities are you looking for ? Is it just PathRecord related as above ? -- Hal > > > > > > at your ranks pre exchange and hardcode the other IB PATH > > params such > > > as MTU, PKEY and SL you set later into the IB RC QP. > > > > > > However, a no rdma cm approach means you need to apply > > hacks to "guess" > > > the correct pkey, mtu and sl and some more limitations that > > eventually > > > you would face when coming to advanced IB deployment environments. > > > > Also, you would make more work if iWARP support is of > > interest as well. > > It comes "for free" or at least "for cheap" if the RDMA CM > > approach is taken. > > We understand that, but we hope to have a connect/accept style IB > connection setup, without IPoIB involved, > like HP-UX IT-API(similar to uDAPL without underlying IP support), it > works with multiple cards. > > Configure 4-5 IP addresses on a single node is kind of silly. > > > > --CQ > > > > > > -- Hal > > > > > All you have to do to use the rdma cm to require a functional IPoIB > > > NIC on each of the active IB PORTs, which is a trivial requirement > > > from the users in the Ethernet world, so why not apply it > > here as well ??? > > > > > > On the other hand moving to use the IB CM instead of > > emulating it via > > > TCP is some progress... > > > > > > Or. > > > > > > > > > _______________________________________________ > > > openib-general mailing list > > > openib-general at openib.org > > > http://openib.org/mailman/listinfo/openib-general > > > > > > To unsubscribe, please visit > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > From or.gerlitz at gmail.com Thu Jan 4 08:31:45 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Thu, 4 Jan 2007 18:31:45 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: References: <1167922486.4564.73764.camel@hal.voltaire.com> Message-ID: <15ddcffd0701040831w53e89503u1f94647657316087@mail.gmail.com> On 1/4/07, Tang, Changqing wrote: > > > + use IB PORT GIDs at your initial mpi init exchange > > > + issue IB SA Path query via libibsa (which does not exist) > > > > likely multipath query if you want APM > Yes, we want to have APM as well. The RDMA CM is about to be enhanced to support IB APM where for now you would need to provide the alternate path from some "out of rdma cm" band means. But in the future an enhancemnt of the IB SM would allow the RDMA CM to provide you with the alternate path as well. > > There are other possibilities here too using inband (IB) exchanges. > > libibsa is not that far away either. What is your time frame ? > Is it possible to have it in OFED 1.2 ? I don't see any much advantge for you with libibsa over hard coding a path, ie use dynamic GIDs/LIDs and static or hacked setting for the other path record fields. > > Also, you would make more work if iWARP support is of > > interest as well. > > It comes "for free" or at least "for cheap" if the RDMA CM > > approach is taken. > We understand that, but we hope to have a connect/accept style IB > connection setup, without IPoIB involved, > like HP-UX IT-API(similar to uDAPL without underlying IP support), it > works with multiple cards. CQ, Please note that to have iWARP support you **must** use the rdmacm to establish the RDMA connection. This is b/c unlike IB, iWARP connection can't be hard wire manually. First a TCP connection is established and then this connection is moved to RDMA. Since you cann't hard wire a TCP connection you must use the IWARP CM etc for that. So bottom line, using the RDMA CM would give you IB and iWARP support and you can't support iWARP without it... Or. From changquing.tang at hp.com Thu Jan 4 08:36:31 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 4 Jan 2007 10:36:31 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <1167928079.4564.78654.camel@hal.voltaire.com> Message-ID: > > > > > > There are other possibilities here too using inband (IB) > exchanges. > > > libibsa is not that far away either. What is your time frame ? > > > > Is it possible to have it in OFED 1.2 ? > > What libibsa capabilities are you looking for ? Is it just > PathRecord related as above ? What even functions to let us to do connect/accept IB setup, can't we combine it into libibcm.so, why use a separate library ? --CQ > > -- Hal > > > > > > > > > > at your ranks pre exchange and hardcode the other IB PATH > > > params such > > > > as MTU, PKEY and SL you set later into the IB RC QP. > > > > > > > > However, a no rdma cm approach means you need to apply > > > hacks to "guess" > > > > the correct pkey, mtu and sl and some more limitations that > > > eventually > > > > you would face when coming to advanced IB deployment > environments. > > > > > > Also, you would make more work if iWARP support is of interest as > > > well. > > > It comes "for free" or at least "for cheap" if the RDMA > CM approach > > > is taken. > > > > We understand that, but we hope to have a connect/accept style IB > > connection setup, without IPoIB involved, like HP-UX > IT-API(similar to > > uDAPL without underlying IP support), it works with multiple cards. > > > > Configure 4-5 IP addresses on a single node is kind of silly. > > > > > > > > --CQ > > > > > > > > > > -- Hal > > > > > > > All you have to do to use the rdma cm to require a functional > > > > IPoIB NIC on each of the active IB PORTs, which is a trivial > > > > requirement from the users in the Ethernet world, so > why not apply > > > > it > > > here as well ??? > > > > > > > > On the other hand moving to use the IB CM instead of > > > emulating it via > > > > TCP is some progress... > > > > > > > > Or. > > > > > > > > > > > > _______________________________________________ > > > > openib-general mailing list > > > > openib-general at openib.org > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > To unsubscribe, please visit > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > > > > > From halr at voltaire.com Thu Jan 4 08:39:08 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 11:39:08 -0500 Subject: [openib-general] APM support in openib stack In-Reply-To: <15ddcffd0701040831w53e89503u1f94647657316087@mail.gmail.com> References: <1167922486.4564.73764.camel@hal.voltaire.com> <15ddcffd0701040831w53e89503u1f94647657316087@mail.gmail.com> Message-ID: <1167928747.4564.79244.camel@hal.voltaire.com> On Thu, 2007-01-04 at 11:31, Or Gerlitz wrote: > On 1/4/07, Tang, Changqing wrote: > > > > > + use IB PORT GIDs at your initial mpi init exchange > > > > + issue IB SA Path query via libibsa (which does not exist) > > > > > > likely multipath query if you want APM > > > Yes, we want to have APM as well. > > The RDMA CM is about to be enhanced to support IB APM where for now > you would need to provide the alternate path from some "out of rdma > cm" band means. But in the future an enhancemnt of the IB SM would > allow the RDMA CM to provide you with the alternate path as well. What enhancement are you referring to ? I think there is some APM support available today in some SMs. > > > There are other possibilities here too using inband (IB) exchanges. > > > libibsa is not that far away either. What is your time frame ? > > > Is it possible to have it in OFED 1.2 ? > > I don't see any much advantge for you with libibsa over hard coding a > path, ie use dynamic GIDs/LIDs and static or hacked setting for the > other path record fields. Other than perhaps not needing another network to transfer these parameters. > > > Also, you would make more work if iWARP support is of > > > interest as well. > > > It comes "for free" or at least "for cheap" if the RDMA CM > > > approach is taken. > > > We understand that, but we hope to have a connect/accept style IB > > connection setup, without IPoIB involved, > > like HP-UX IT-API(similar to uDAPL without underlying IP support), it > > works with multiple cards. > > CQ, > > Please note that to have iWARP support you **must** use the rdmacm to > establish the RDMA connection. > > This is b/c unlike IB, iWARP connection can't be hard wire manually. currently. There was a thread on perhaps changing this but it did seem to terminate without conclusion. -- Hal > First a TCP connection is established and then this connection is > moved to RDMA. Since you cann't hard wire a TCP connection you must > use the IWARP CM etc for that. So bottom line, using the RDMA CM would > give you IB and iWARP support and you can't support iWARP without > it... > > Or. From rjwalsh at pathscale.com Thu Jan 4 08:43:22 2007 From: rjwalsh at pathscale.com (Robert Walsh) Date: Thu, 04 Jan 2007 08:43:22 -0800 Subject: [openib-general] git question Message-ID: <459D2EAA.9040204@pathscale.com> Hi all, I was away for all of December, so imagine my surprise when I came back and saw git up and running, svn deprecated, etc. :-) Anyway, I poked around looking for some information on where stuff had moved to. I found this page: https://wiki.openfabrics.org/tiki-index.php?page=Downloading+Code+From+the+OFA+git+Repositories This tells me where the userspace and kernel components have moved to. Is there a similar document telling me where things like the trubnk/contrib directory is? For that matter, is there a mapping at all between https://openib.org/svn/x/y/z and git:/something/something/something? BTW: I know that this could be somewhere in the month-long backlog of email I'm still wading through, but I haven't noticed yet and it probably should be added to that wiki page in any case. Regards, Robert. From mst at mellanox.co.il Thu Jan 4 08:41:21 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 4 Jan 2007 18:41:21 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: <15ddcffd0701040831w53e89503u1f94647657316087@mail.gmail.com> References: <1167922486.4564.73764.camel@hal.voltaire.com> <15ddcffd0701040831w53e89503u1f94647657316087@mail.gmail.com> Message-ID: <20070104164121.GA1793@mellanox.co.il> > > > > + use IB PORT GIDs at your initial mpi init exchange > > > > + issue IB SA Path query via libibsa (which does not exist) > > > > > > likely multipath query if you want APM > > > Yes, we want to have APM as well. > > The RDMA CM is about to be enhanced to support IB APM where for now > you would need to provide the alternate path from some "out of rdma > cm" band means. Are you working on this then? Cool. > But in the future an enhancemnt of the IB SM would > allow the RDMA CM to provide you with the alternate path as well. Wouldn't we be able to do the necessary queries with existing SM? I think the SM enhancement is only needed as an optimization, to reduce the number of queries. -- MST From or.gerlitz at gmail.com Thu Jan 4 08:43:40 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Thu, 4 Jan 2007 18:43:40 +0200 Subject: [openib-general] SVN deprecation In-Reply-To: References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> <459CE30B.3040503@voltaire.com> Message-ID: <15ddcffd0701040843te5979c2tbc7de1a82a33f16@mail.gmail.com> On 1/4/07, Jeff Squyres wrote: > On Jan 4, 2007, at 6:20 AM, Or Gerlitz wrote: > > >> See "svn help " and the SVN book for more information. > > > > Considering myself as being located somewhere within the ~medium > > range of the SVN users spectrum I truly think that my questions are > > not of RTFM type. > > Sounds like you just don't want to read the fine documentation. ;-) I do want to read but (and forgive me if this seems like making you work to explain me the manual) I also do want to double check that the person that leads this process knows 100% what he is doing and for that purpose i found it appropriate to ask you to review my suggested work flow, anyway > > There's a nice overview of browsing a repository's history in the SVN > book here: > > http://svnbook.red-bean.com/en/1.2/svn-book.html#svn.tour.history > > > So let me see i can do it pro actively: > > > > assuming you have used the command > > > > $ svn delete some-sub-tree-of-a-repository > > Actually, it would be: > > svn rm some-tree > svn ci some-tree > > That would commit the removal of some-tree at the HEAD at repository > number N. Of course, one of the goals of SVN is to maintain an > accurate history. So you can always examine the repository at r(N-1) > (or any prior values of N) to see what subtree looked like at exactly > those points in time. > > Quoting the SVN book: > > "Of course, nothing is ever totally deleted from the repository—just > from the HEAD of the repository. You can get back anything you delete > by checking out (or updating your working copy) a revision earlier > than the one in which you deleted it." > > > ***and*** left a README at the ancestor directory of this subtree > > which > > Note that leaving a README in the directory has nothing to do with > the semantics of how SVN works. I think that this is probably > obvious, but I just wanted to be precise. :-) It has to do with telling the user who just checked out the HEAD of the repoistory at revision N what was the sub-tree/s **structure** at the revision (which would NOT be N-1 if you don't do all the delete at once and even if you do all of them at once i am not sure it would be N-1) which these trees where deleted. > > says that you have just deleted sub-trees xxx,yyy,zzz etc my work > > flow to get a checkout of xxx would look like: > > > > $ svn co url/of/path/to/ancestor/directory > > $ cd /path/to/ancestor/directory > > $ read README, learn there was an xxx directory with content XXX > > > > $ svn ls xxx -R > > or > > $ svn co -r HEAD xxx > > then > > $ cd xxx > > It's actually simpler than that: > > $ svn co url/of/path/to/ancestor/directory > $ cd /path/to/ancestor/directory > $ read README, learn there was an xxx directory with content XXX > $ svn up -r N > > Which updates your current checkout to reflect the state of the > repository (at that tree) at repository number N (where N is > assumedly less than the value of N at the HEAD, so you're reverting > back to an earlier state of the repository, before the stuff that you > want was deleted). > > When navigating the history via "svn up -r N", there's a few complex > corner cases where the update can fail (by design), but they aren't > common and can be avoided by doing a fresh checkout of the r number > that you want: > > $ svn co -r N url/of/path/to/ancestor/directory thanks for all the clarifications, the process you suggest seems fine to me. Or. From changquing.tang at hp.com Thu Jan 4 08:50:41 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 4 Jan 2007 10:50:41 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <1167928747.4564.79244.camel@hal.voltaire.com> Message-ID: > > > > There are other possibilities here too using inband > (IB) exchanges. > > > > libibsa is not that far away either. What is your time frame ? > > > > > Is it possible to have it in OFED 1.2 ? > > > > I don't see any much advantge for you with libibsa over > hard coding a > > path, ie use dynamic GIDs/LIDs and static or hacked setting for the > > other path record fields. > > Other than perhaps not needing another network to transfer > these parameters. This is the most usefull function! > > > > Please note that to have iWARP support you **must** use the > rdmacm to > > establish the RDMA connection. We understand. Let's ignore iWARP in this discussion, because the iWARP performance is far behind. --CQ > > > > This is b/c unlike IB, iWARP connection can't be hard wire manually. > > currently. There was a thread on perhaps changing this but it > did seem to terminate without conclusion. > > -- Hal > > > First a TCP connection is established and then this connection is > > moved to RDMA. Since you cann't hard wire a TCP connection you must > > use the IWARP CM etc for that. So bottom line, using the > RDMA CM would > > give you IB and iWARP support and you can't support iWARP without > > it... > > > > Or. > > From jsquyres at cisco.com Thu Jan 4 08:53:43 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 4 Jan 2007 11:53:43 -0500 Subject: [openib-general] SVN deprecation In-Reply-To: <15ddcffd0701040843te5979c2tbc7de1a82a33f16@mail.gmail.com> References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> <459CE30B.3040503@voltaire.com> <15ddcffd0701040843te5979c2tbc7de1a82a33f16@mail.gmail.com> Message-ID: <78BF8D4E-B381-4626-919D-29BE2B47982A@cisco.com> On Jan 4, 2007, at 11:43 AM, Or Gerlitz wrote: >> Sounds like you just don't want to read the fine documentation. ;-) > > I do want to read but (and forgive me if this seems like making you > work to explain me the manual) I also do want to double check that the > person that leads this process knows 100% what he is doing and for > that purpose i found it appropriate to ask you to review my suggested > work flow, anyway No worries -- I'm being needlessly nit-picky; sorry. Plus, isn't it the weekend for you? It's perfectly understandable if you didn't want to RTFM on the weekend. :-) >> [snipped] >> When navigating the history via "svn up -r N", there's a few complex >> corner cases where the update can fail (by design), but they aren't >> common and can be avoided by doing a fresh checkout of the r number >> that you want: >> >> $ svn co -r N url/of/path/to/ancestor/directory > > thanks for all the clarifications, the process you suggest seems > fine to me. Ok. I'll work up a proposed README -- per your point, you're right, the last "good" r number (i.e., the last N value before the deletions occurred) may not be (r_number_of_HEAD - 1). I'll include the last "good" r number in the README and some examples of how to peruse the history. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From halr at voltaire.com Thu Jan 4 08:26:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 11:26:47 -0500 Subject: [openib-general] APM support in openib stack In-Reply-To: References: Message-ID: <1167928004.4564.78587.camel@hal.voltaire.com> On Thu, 2007-01-04 at 11:19, Tang, Changqing wrote: > > > I understand that you considering to go on an approach > > which does not > > > involve librdmacm, so you would probably like to > > > > > > + use IB PORT GIDs at your initial mpi init exchange > > > + issue IB SA Path query via libibsa (which does not exist) > > > > likely multipath query if you want APM > > Yes, we want to have APM as well. > > > > > > > + establish IB RC connection (listen/connect/accept) via libibcm > > > > > > Please note that such an approach is possible even with the non > > > existence of libibsa (similarly to what IB MPIs do today > > use IB PORT > > > LIDs [note you would need both GIDs & LIDs to have the CM working > > > fine] > > > > There are other possibilities here too using inband (IB) exchanges. > > libibsa is not that far away either. What is your time frame ? > > Is it possible to have it in OFED 1.2 ? I think it's possible but may require some relaxation of the 1/31 functionality freeze for this but I'm not sure yet... -- Hal > > > > > > at your ranks pre exchange and hardcode the other IB PATH > > params such > > > as MTU, PKEY and SL you set later into the IB RC QP. > > > > > > However, a no rdma cm approach means you need to apply > > hacks to "guess" > > > the correct pkey, mtu and sl and some more limitations that > > eventually > > > you would face when coming to advanced IB deployment environments. > > > > Also, you would make more work if iWARP support is of > > interest as well. > > It comes "for free" or at least "for cheap" if the RDMA CM > > approach is taken. > > We understand that, but we hope to have a connect/accept style IB > connection setup, without IPoIB involved, > like HP-UX IT-API(similar to uDAPL without underlying IP support), it > works with multiple cards. > > Configure 4-5 IP addresses on a single node is kind of silly. > > > > --CQ > > > > > > -- Hal > > > > > All you have to do to use the rdma cm to require a functional IPoIB > > > NIC on each of the active IB PORTs, which is a trivial requirement > > > from the users in the Ethernet world, so why not apply it > > here as well ??? > > > > > > On the other hand moving to use the IB CM instead of > > emulating it via > > > TCP is some progress... > > > > > > Or. > > > > > > > > > _______________________________________________ > > > openib-general mailing list > > > openib-general at openib.org > > > http://openib.org/mailman/listinfo/openib-general > > > > > > To unsubscribe, please visit > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > From mshefty at ichips.intel.com Thu Jan 4 09:35:07 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 09:35:07 -0800 Subject: [openib-general] IB_CM_REJ_INVALID_SERVICE_ID In-Reply-To: <01ec01c7300c$b840be20$0182a8c0@eblap> References: <01ec01c7300c$b840be20$0182a8c0@eblap> Message-ID: <459D3ACB.2060106@ichips.intel.com> > ...I hadn't realised they had > 1 HCA. We bind the listen ID to a > specific IP address - could that have some bearing? AFAICS from the > customer's debug logs, they are listening on the correct HCA... A listen bound to a specific address would be bound to a specific device. If the CM REQ comes in on a different device, there wouldn't be a match. But even if this were the case (which you suggest it's not), then doing a ping shouldn't have any affect on whether a future CM REQ finds a match. How are the IP addresses on the HCA ports configured? Are the connections made to IP addresses, or is name resolution used at all? - Sean From eeb at bartonsoftware.com Thu Jan 4 09:39:32 2007 From: eeb at bartonsoftware.com (Eric Barton) Date: Thu, 4 Jan 2007 17:39:32 -0000 Subject: [openib-general] IB_CM_REJ_INVALID_SERVICE_ID In-Reply-To: <459D3ACB.2060106@ichips.intel.com> Message-ID: <021f01c73027$4b16ed40$0182a8c0@eblap> > > ...I hadn't realised they had > 1 HCA. We bind the listen ID to a > > specific IP address - could that have some bearing? AFAICS from the > > customer's debug logs, they are listening on the correct HCA... > > A listen bound to a specific address would be bound to a specific device. If > the CM REQ comes in on a different device, there wouldn't be a match. But even > if this were the case (which you suggest it's not), then doing a ping shouldn't > have any affect on whether a future CM REQ finds a match. Indeed! > How are the IP addresses on the HCA ports configured? I'll have to find out from the customer... > Are the connections made to IP addresses, or is name resolution > used at all? No name resolution at all - just IP addresses. Cheers, Eric From mshefty at ichips.intel.com Thu Jan 4 09:55:34 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 09:55:34 -0800 Subject: [openib-general] APM support in openib stack In-Reply-To: References: Message-ID: <459D3F96.20909@ichips.intel.com> > We want to have as little requirements as possible, and performance > consideration(I asked you before which method is faster to setup IB > connection), > So what I am thinking of is: > 1. for static MPI job, don't use libibcm.so and librdmacm.so > 2. for dynamic MPI job, add to use libibcm.so only, no IPoIB is > required. I usually think of IB connections in terms of 3 steps: 1. Identify the remote node. How do you identify the remote node, and how is that information obtained? 2. Obtain a path record between the local and remote node. Today, there are two libraries capable of providing this, libibmad and librdmacm. The userspace MAD library gives greater control, and would likely do so even if a libibsa were created. 3. Establish a connection. IMO, out of band connections should should be prohibited by the IB stack, but this would likely break a lot of existing code. The IB CM is the only agent capable of detecting stale and duplicate connections between nodes. Without it, applications are more susceptible to data corruption. - Sean From mst at mellanox.co.il Thu Jan 4 10:03:45 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 4 Jan 2007 20:03:45 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: <459D3F96.20909@ichips.intel.com> References: <459D3F96.20909@ichips.intel.com> Message-ID: <20070104180345.GB3430@mellanox.co.il> > IMO, out of band connections should should be prohibited by the IB stack, but > this would likely break a lot of existing code. The IB CM is the only agent > capable of detecting stale and duplicate connections between nodes. Without it, > applications are more susceptible to data corruption. Hmm, I think we agreed the sensible thing for stale connections is to prevent QPN reuse for timewait period below the verb interface? I didn't get to this yet, but you didn't change your mind, did you? -- MST From changquing.tang at hp.com Thu Jan 4 10:37:19 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 4 Jan 2007 12:37:19 -0600 Subject: [openib-general] APM support in openib stack In-Reply-To: <459D3F96.20909@ichips.intel.com> Message-ID: > > I usually think of IB connections in terms of 3 steps: > > 1. Identify the remote node. > How do you identify the remote node, and how is that > information obtained? We can either use IP address or LID of the port. They always need a way to transfer such information from the Passive side to active side (internally you translate IP to GID/LID, right? ), or just exchange them. So either you give the IP on command line(you transfer it manually), or use other channel to transfer it(this is MPI does, and we use ethernet network to do it, for MPI job with hundreds of nodes, we must use a different network(even if it is IPoIB) to transfer such ID info in order to wire IB connection). > > 2. Obtain a path record between the local and remote node. > Today, there are two libraries capable of providing this, > libibmad and librdmacm. The userspace MAD library gives > greater control, and would likely do so even if a libibsa > were created. We assume there must be at least one path between every process-pair. For APM, it would be nice to query multiple path. > > 3. Establish a connection. > > IMO, out of band connections should should be prohibited by What do you mean 'out of band connections' ? We need a way to pass IP or LID from one process to another, a third channel is always needed (if you type the server's IP on client's command line, you can think you use a manul channel to transfer the IP) --CQ > the IB stack, but this would likely break a lot of existing > code. The IB CM is the only agent capable of detecting stale > and duplicate connections between nodes. Without it, > applications are more susceptible to data corruption. > > - Sean > From sean.hefty at intel.com Thu Jan 4 10:42:39 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 4 Jan 2007 10:42:39 -0800 Subject: [openib-general] APM support in openib stack In-Reply-To: Message-ID: <000101c73030$1c9be250$8698070a@amr.corp.intel.com> >> IMO, out of band connections should should be prohibited by > >What do you mean 'out of band connections' ? > >We need a way to pass IP or LID from one process to another, a third >channel is always needed (if you type the server's IP on client's >command line, you can think you use a manul channel to transfer the IP) By out of band, I mean connections that are not established through the IB CM. For example, connections established by exchanging QP information over a TCP connection. - Sean From jgunthorpe at obsidianresearch.com Thu Jan 4 10:42:48 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 4 Jan 2007 11:42:48 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167912258.4564.65120.camel@hal.voltaire.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> Message-ID: <20070104184248.GC3267@obsidianresearch.com> On Thu, Jan 04, 2007 at 07:04:19AM -0500, Hal Rosenstock wrote: > > I think it depends how you code it.. In IP-like terms you have two > > processes going on here, a route lookup and a 'neighbour lookup' (GID > > to LID/etc conversion) for the link address. > > > > An non-existing link local GID should pass the routing lookup, but fail the > > neighbour lookup since there is no LID. > > > > So typically you'd do the routing lookup and end up with an on-link > > GID. Then you'd do the neighbour lookup and get a link address (IB path). > > I think you are talking here about the end node and router function > rather than SM/SA, right ? The way I was hoping to start out is by putting this in the SA and the routers, not in the end nodes. > And in terms of IB path lookup, is the path just local with a > destination of the neighbor (router) or global to the end point ? With this kind of model the IB path lookup would return a LID/SL/etc so it is a local path but might terminate on a router and might require a GRH so it can be forwarded. > > As an initial start I'd like to see support for an entry like this: > > > > default via fe80::17:77ff:feff:5 > > > > Where fe80::17:77ff:feff:5 is the GID of a router. A full table for a > > routing environment might look like: > > > > fe80::/64 onlink > > fe80::/10 unreachable ; IBA requires link local scope addresses never > > ; to be routed > > ff00::/8 onlink ; Multicast is 'on link' > > Why is all multicast scoped onlink ? I guess this is a starting point > but not what IPoIB needs to run across IB subnets. I'm thinking to start it would be OK to just force routers to be subscribed to all MLIDs until we have a better idea how this needs to work. This would work for IPoIB and is why I put it as onlink. If the SM is doing the inter-subnet spanning tree then it probably needs more routing entries.. If it is staying out of the picture then this is probably enough. Jason From sean.hefty at intel.com Thu Jan 4 10:47:31 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 4 Jan 2007 10:47:31 -0800 Subject: [openib-general] APM support in openib stack In-Reply-To: <20070104180345.GB3430@mellanox.co.il> Message-ID: <000201c73030$ca4f8140$8698070a@amr.corp.intel.com> >Hmm, I think we agreed the sensible thing for stale connections is to prevent >QPN reuse for timewait period below the verb interface? > >I didn't get to this yet, but you didn't change your mind, did you? I didn't change my mind about QP reuse. I was referring to the case where: node A crashes, comes back up, and tries to reconnect to node B using the same QP. - Sean From mshefty at ichips.intel.com Thu Jan 4 12:11:16 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 12:11:16 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes Message-ID: <459D5F64.8090505@ichips.intel.com> There's a problem with how rdma cm events are reported to userspace that can lead to application crashes. When a new connection request arrives, a context for the connection is allocated in the kernel. The connection event is then reported to userspace. The userspace library retrieves the event and allocates its own context for the connection. The userspace context is associated with the kernel's context when accepting. This allows the kernel to give userspace context with other events. A problem occurs if a second event for the same connection occurs before the user has had a chance to call accept. The userspace context has not yet been set, which causes the librdmacm to crash. (This has been seen when the app takes too long to call accept, resulting in the remote side timing out and rejecting the connection.) I can think of a couple possible fixes for this, but wanted to get input. I believe that this can be fixed in either the kernel or userspace code. A kernel fix could queue events until the context has been set. A userspace fix could store its contexts in a map, and lookup the correct one if it is not given. - Sean From swise at opengridcomputing.com Thu Jan 4 13:04:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 15:04:25 -0600 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <459D5F64.8090505@ichips.intel.com> References: <459D5F64.8090505@ichips.intel.com> Message-ID: <1167944665.3071.48.camel@stevo-desktop> On Thu, 2007-01-04 at 12:11 -0800, Sean Hefty wrote: > There's a problem with how rdma cm events are reported to userspace that can > lead to application crashes. > > When a new connection request arrives, a context for the connection is allocated > in the kernel. The connection event is then reported to userspace. The > userspace library retrieves the event and allocates its own context for the > connection. The userspace context is associated with the kernel's context when > accepting. This allows the kernel to give userspace context with other events. > > A problem occurs if a second event for the same connection occurs before the > user has had a chance to call accept. The userspace context has not yet been > set, which causes the librdmacm to crash. (This has been seen when the app > takes too long to call accept, resulting in the remote side timing out and > rejecting the connection.) > > I can think of a couple possible fixes for this, but wanted to get input. I > believe that this can be fixed in either the kernel or userspace code. A kernel > fix could queue events until the context has been set. A userspace fix could > store its contexts in a map, and lookup the correct one if it is not given. > Does this affect kernel rdma cm users too in some way? If not, then perhaps the place to fix it is in userspace. From mst at mellanox.co.il Thu Jan 4 13:21:54 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 4 Jan 2007 23:21:54 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: <000201c73030$ca4f8140$8698070a@amr.corp.intel.com> References: <000201c73030$ca4f8140$8698070a@amr.corp.intel.com> Message-ID: <20070104212154.GB4155@mellanox.co.il> > >Hmm, I think we agreed the sensible thing for stale connections is to prevent > >QPN reuse for timewait period below the verb interface? > > > >I didn't get to this yet, but you didn't change your mind, did you? > > I didn't change my mind about QP reuse. > > I was referring to the case where: node A crashes, comes back up, and tries to > reconnect to node B using the same QP. But it will get a new QP in node B, won't it, even if sockets are used for CM? -- MST From rdreier at cisco.com Thu Jan 4 13:34:27 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:34:27 -0800 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <1167859320.4187.81.camel@stevo-desktop> (Steve Wise's message of "Wed, 03 Jan 2007 15:22:00 -0600") References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> <1167859320.4187.81.camel@stevo-desktop> Message-ID: OK, I'm back from vacation today. Anyway I don't have a definitive statement on this right now. I guess I agree that I don't like having an extra parameter to a function that should be pretty fast (although req notify isn't quite as hot as something like posting a send request or polling a cq), given that it adds measurable overhead. (And I am surprised that the overhead is measurable, since 3 arguments still fit in registers, but OK). I also agree that adding an extra entry point just to pass in the user data is ugly, and also racy. Giving the kernel driver a pointer it can read seems OK I guess, although it's a little ugly to have a backdoor channel like that. I'm somewhat surprised the driver has to go into the kernel to rearm a CQ -- what makes the operation need kernel privileges? (Sorry for not reading the code) From caitlinb at broadcom.com Thu Jan 4 13:36:42 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Thu, 4 Jan 2007 13:36:42 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <459D5F64.8090505@ichips.intel.com> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1E12A22@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > There's a problem with how rdma cm events are reported to > userspace that can lead to application crashes. > > When a new connection request arrives, a context for the > connection is allocated in the kernel. The connection event > is then reported to userspace. The userspace library > retrieves the event and allocates its own context for the > connection. The userspace context is associated with the > kernel's context when accepting. This allows the kernel to > give userspace context with other events. > > A problem occurs if a second event for the same connection > occurs before the user has had a chance to call accept. The > userspace context has not yet been > set, which causes the librdmacm to crash. (This has been > seen when the app > takes too long to call accept, resulting in the remote side > timing out and rejecting the connection.) > I'm missing something here. How does a second event "for the same connection" occur before the connection does? That is, if the user has not accepted the connection request, how can a later packet be related to the "same connection"? If this is related strictly to retransmitted connection requests, then should userspace be involved? Should that be handled by the kernel connection managers and/or the device specific verbs? From rdreier at cisco.com Thu Jan 4 13:37:29 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:37:29 -0800 Subject: [openib-general] ibv_get_device_list() question In-Reply-To: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> (Jeff Squyres's message of "Tue, 2 Jan 2007 18:00:07 -0500") References: <975EF225-2658-4E9D-AFFC-EF0FF294F398@cisco.com> Message-ID: > 1. Does the ibv API provide any information about devices' physical > location with regards to busses / processors? (I doubt it, but I'm > just asking to be sure) Not directly through the API, and I would hate to try and design an API to encapsulate all that info. I think chasing down stuff through sysfs as was already suggested is your best bet. If the sysfs files exist you're happy and can try to optimize automatically, otherwise you have to rely on what the user feeds you manually. > 2. Even if we get this information externally (e.g., user provides a > config file to OMPI), does the information returned via > ibv_get_device_list() come back in a guaranteed order? I.e., if I > have multiple HCAs in a host, will the entries in the list always be > returned in the same order such that I could have a host-specific > OMPI config file saying "processes on cores 0,1,2,3 use ibv_device 0, > processes on cores 4,5,6,7 use device 1", or something similar? No, in the presence of hotplug, differences in PCI bus scanning between different kernels, all of that can change. I think the sane way to refer to IB devices is with GUIDs, since those are supposed to be pretty persistent. - R. From rdreier at cisco.com Thu Jan 4 13:39:32 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:39:32 -0800 Subject: [openib-general] CQ error handling in IPoIB In-Reply-To: <459A88C1.6010100@voltaire.com> (Moni Shoua's message of "Tue, 02 Jan 2007 18:30:57 +0200") References: <459A88C1.6010100@voltaire.com> Message-ID: > When a CQ encounters an error, in order to be able to use the CQ again, > the consumer should: > * Destroy all the QPs that are attached to the CQ > * Destroy the CQ > * Recreate the CQ through the Create Completion Queue verb Yes, true... > While (at least one part of) the code does... > > static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) > { > ... > ... > ... > if (wc->status != IB_WC_SUCCESS && > wc->status != IB_WC_WR_FLUSH_ERR) but these aren't CQ errors, they are work request errors. It is true that the IPoIB driver currently does not handle a QP going to the error state very gracefully, but the only way it can happen is because of a SW or HW bug so that recovery hasn't been a high priority. - R. From rdreier at cisco.com Thu Jan 4 13:39:57 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:39:57 -0800 Subject: [openib-general] the valgrind support of libibverbs/libmthca is not full In-Reply-To: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> (dotanb@dev.mellanox.co.il's message of "Tue, 2 Jan 2007 12:23:18 +0200 (IST)") References: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> Message-ID: > For the following verbs (and I'm sure that this is not the full list) > there is a memory warning from valgrind: > ibv_query_device > ibv_dealloc_pd > ibv_create_cq Can you send the full warnings you get? Thanks From rdreier at cisco.com Thu Jan 4 13:44:03 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:44:03 -0800 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> (dotanb@dev.mellanox.co.il's message of "Tue, 2 Jan 2007 11:11:10 +0200 (IST)") References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> Message-ID: > Driver Version : gen2_devel-20070101-1730 What version of libibverbs does this correspond to? > load_driver (so_path=0x5e65f0 "/usr/local//lib64/infiniband/mthca.so") at > src/init.c:61 (func: load_driver) This makes me think it is a version older than 1.0.4, since load_driver() is at a completely different line number in 1.0.4. - R. From rdreier at cisco.com Thu Jan 4 13:45:59 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:45:59 -0800 Subject: [openib-general] one vs. two drivers for an iWARP-capable Ethernet NIC In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77010FB436@nekter> (Leonid Grossman's message of "Thu, 28 Dec 2006 16:31:13 -0500") References: <78C9135A3D2ECE4B8162EBDCE82CAD77010FB436@nekter> Message-ID: > Jeff/Roland/all, > What is the preferred submission driver model for an iWARP-capable > Ethernet NIC - two separate drivers (Ethernet and OpenFabrics) that > interact with each other, or a single driver that supports both > OpenFabrics and Ethernet interfaces? Let's not use the term "OpenFabrics interface." Let's just call the two interfaces RDMA and ethernet (or L2 NIC if you like). Anyway my preference would be for the cleanest possible driver. If your driver is not naturally divided into two separate parts then it's fine to have a unified driver. We already have examples of both: amso1100 is unified and the Chelsio T3 driver is split. - R. From swise at opengridcomputing.com Thu Jan 4 13:49:31 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 15:49:31 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> <1167859320.4187.81.camel@stevo-desktop> Message-ID: <1167947371.3071.59.camel@stevo-desktop> On Thu, 2007-01-04 at 13:34 -0800, Roland Dreier wrote: > OK, I'm back from vacation today. > > Anyway I don't have a definitive statement on this right now. I guess > I agree that I don't like having an extra parameter to a function that > should be pretty fast (although req notify isn't quite as hot as > something like posting a send request or polling a cq), given that it > adds measurable overhead. (And I am surprised that the overhead is > measurable, since 3 arguments still fit in registers, but OK). > > I also agree that adding an extra entry point just to pass in the user > data is ugly, and also racy. > > Giving the kernel driver a pointer it can read seems OK I guess, > although it's a little ugly to have a backdoor channel like that. > > I'm somewhat surprised the driver has to go into the kernel to rearm a > CQ -- what makes the operation need kernel privileges? (Sorry for not > reading the code) > - Rearming the CQ requires reading and writing to a global adapter register that is shared and thus needs to be protected. They didn't architect the rearm to be a direct user operation. Steve. From rdreier at cisco.com Thu Jan 4 13:49:03 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:49:03 -0800 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: <20061226085144.GA4325@mellanox.co.il> (Michael S. Tsirkin's message of "Tue, 26 Dec 2006 10:51:44 +0200") References: <20061225141849.GI842@mellanox.co.il> <20061226085144.GA4325@mellanox.co.il> Message-ID: > Wrt static linking: I see this warning when I link with -static: > : warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking > And it actually crashes inside dlopen on some platforms. Which platform? Is it crashing in dlopen(NULL) (my heuristic for detecting a static application). > Would it be possible to add a configuration option to avoid using dlopen > for static apps? Or, maybe, it makes more sense to make an empty stub for libdl, > and ask apps to link with that? The ideal solution would be to build the library twice, once for static and once for dynamic, and pass in a different preprocessor flag so we can only use dlopen() when it has a chance at working. But it's hard to do that with libtool/automake, and I don't feel like hand-crafting a build system right now. OTOH static linking is such a corner case that I don't want to waste too much time on it. - R. From rdreier at cisco.com Thu Jan 4 13:50:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 13:50:04 -0800 Subject: [openib-general] ib_dma_addr_t In-Reply-To: <20061225230051.GG17469@mellanox.co.il> (Michael S. Tsirkin's message of "Tue, 26 Dec 2006 01:00:51 +0200") References: <20061225230051.GG17469@mellanox.co.il> Message-ID: > I'd like to propose that we introduce ib_dma_addr_t. > The idea is to add some type safety (via sparse checker) > that we lost when all addresses were converted to u64. How would that work? sparse isn't going to warn about implicit casts to/from ib_dma_addr_t, is it? We can't make ib_dma_addr_t a bitwise type, because arithmetic is allowed on bus addresses. - R. From mshefty at ichips.intel.com Thu Jan 4 13:53:35 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 13:53:35 -0800 Subject: [openib-general] APM support in openib stack In-Reply-To: <20070104212154.GB4155@mellanox.co.il> References: <000201c73030$ca4f8140$8698070a@amr.corp.intel.com> <20070104212154.GB4155@mellanox.co.il> Message-ID: <459D775F.9020705@ichips.intel.com> > But it will get a new QP in node B, won't it, even if sockets > are used for CM? The app on node A can get the same QP as a previous connection. The app on node B will get a different QP. But this leaves the situation where two QPs on node B may be configured to the same QP on node A. The QP configuration on node A should handle this, but it's wasting resources on node B, since the initial QP is unusable. - Sean From mshefty at ichips.intel.com Thu Jan 4 14:06:29 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 14:06:29 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <54AD0F12E08D1541B826BE97C98F99F1E12A22@NT-SJCA-0751.brcm.ad.broadcom.com> References: <54AD0F12E08D1541B826BE97C98F99F1E12A22@NT-SJCA-0751.brcm.ad.broadcom.com> Message-ID: <459D7A65.3020204@ichips.intel.com> > I'm missing something here. How does a second event "for the > same connection" occur before the connection does? > > That is, if the user has not accepted the connection request, > how can a later packet be related to the "same connection"? > > If this is related strictly to retransmitted connection requests, > then should userspace be involved? Should that be handled by > the kernel connection managers and/or the device specific verbs? Here's the actual problem that we see that hopefully explains the issue better: Node A sends an IB CM REQ to node B This results in new connection objects (one in the kernel, and one in userspace) and a connect event. After several IB CM REQ retries, node A tires of waiting for node B to respond to the REQ and gives up Node A sends an IB CM REJ to node B to fail the connection The reject is part of the IB CM protocol, and the event is exposed to the user. If the userspace application retrieves the reject event before calling rdma_accept, it ends up dereferencing a NULL pointer because it hasn't set the context for the new connection yet. It currently expects a valid context passed up from the kernel in order to locate its connection object. - Sean From mshefty at ichips.intel.com Thu Jan 4 14:13:41 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 14:13:41 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <1167944665.3071.48.camel@stevo-desktop> References: <459D5F64.8090505@ichips.intel.com> <1167944665.3071.48.camel@stevo-desktop> Message-ID: <459D7C15.3020307@ichips.intel.com> > Does this affect kernel rdma cm users too in some way? If not, then > perhaps the place to fix it is in userspace. This doesn't affect the kernel rdma_cm. The fix can either go into the rdma_ucm (not rdma_cm), or librdmacm, either of which limit the impact to userspace clients only. Here's one issue with putting the fix in the librdmacm: there's no guarantee that a multi-threaded client will process the event for a new connection before the reject. This makes me lean toward implementing the fix in the kernel, where the events are serialized. - Sean From halr at voltaire.com Thu Jan 4 14:13:52 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 04 Jan 2007 17:13:52 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070104184248.GC3267@obsidianresearch.com> References: <1167751520.4596.77827.camel@hal.voltaire.com> <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> Message-ID: <1167948831.4564.97385.camel@hal.voltaire.com> On Thu, 2007-01-04 at 13:42, Jason Gunthorpe wrote: > On Thu, Jan 04, 2007 at 07:04:19AM -0500, Hal Rosenstock wrote: > > > > I think it depends how you code it.. In IP-like terms you have two > > > processes going on here, a route lookup and a 'neighbour lookup' (GID > > > to LID/etc conversion) for the link address. > > > > > > An non-existing link local GID should pass the routing lookup, but fail the > > > neighbour lookup since there is no LID. > > > > > > So typically you'd do the routing lookup and end up with an on-link > > > GID. Then you'd do the neighbour lookup and get a link address (IB path). > > > > I think you are talking here about the end node and router function > > rather than SM/SA, right ? > > The way I was hoping to start out is by putting this in the SA and the > routers, not in the end nodes. We can start there but this is a very fundamental question. I have heard people weigh in on both sides... > > And in terms of IB path lookup, is the path just local with a > > destination of the neighbor (router) or global to the end point ? > > With this kind of model the IB path lookup would return a LID/SL/etc Map S/DGID and perhaps TClass to LID/SL/MTU ? > so it is a local path but might terminate on a router and might > require a GRH so it can be forwarded. depending on whether the DGID is on or off link. > > > As an initial start I'd like to see support for an entry like this: > > > > > > default via fe80::17:77ff:feff:5 > > > > > > Where fe80::17:77ff:feff:5 is the GID of a router. A full table for a > > > routing environment might look like: > > > > > > fe80::/64 onlink > > > fe80::/10 unreachable ; IBA requires link local scope addresses never > > > ; to be routed > > > ff00::/8 onlink ; Multicast is 'on link' > > > > Why is all multicast scoped onlink ? I guess this is a starting point > > but not what IPoIB needs to run across IB subnets. > > I'm thinking to start it would be OK to just force routers to be > subscribed to all MLIDs all known MLIDs on that subnet rather than all possible MLIDs ? It's really the MGIDs that are of interest rather than the MLIDs. The router needs to subscribe to traps 66/67 multicast groups in and out of existence. MLIDs on each side of the router may not be the same for a non link local MGID. > until we have a better idea how this needs to > work. This would work for IPoIB and is why I put it as onlink. By onlink, are you saying these wouldn't be forwarded ? > If the SM is doing the inter-subnet spanning tree then it probably > needs more routing entries.. Are you referring to running a spanning tree for multicast ? In any case, I think it will be a while before the routing protocols come into the picture and whether the SM is involved or not is another piece of some of the fundamental routing questions/devisions to be made. > If it is staying out of the picture then > this is probably enough. I think it will be out of the picture for a while. -- Hal > Jason From swise at opengridcomputing.com Thu Jan 4 14:15:47 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 16:15:47 -0600 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <459D7C15.3020307@ichips.intel.com> References: <459D5F64.8090505@ichips.intel.com> <1167944665.3071.48.camel@stevo-desktop> <459D7C15.3020307@ichips.intel.com> Message-ID: <1167948947.3071.65.camel@stevo-desktop> On Thu, 2007-01-04 at 14:13 -0800, Sean Hefty wrote: > > Does this affect kernel rdma cm users too in some way? If not, then > > perhaps the place to fix it is in userspace. > > This doesn't affect the kernel rdma_cm. The fix can either go into the rdma_ucm > (not rdma_cm), or librdmacm, either of which limit the impact to userspace > clients only. > > Here's one issue with putting the fix in the librdmacm: there's no guarantee > that a multi-threaded client will process the event for a new connection before > the reject. This makes me lean toward implementing the fix in the kernel, where > the events are serialized. That sounds reasonable. From swise at opengridcomputing.com Thu Jan 4 14:18:48 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 16:18:48 -0600 Subject: [openib-general] open fabrics wiki error Message-ID: <1167949128.3071.68.camel@stevo-desktop> Hey all, Who owns the wiki server? It seems the Chelsio T3 howto got lost somehow? I cannot open this URL which is from the "how to" section of the OFA wiki: https://wiki.openfabrics.org/tiki-index.php?page=Install+OpenIB+for+Chelsio+T3 I assume the file doesn't exist anymore for some reason? I wanted to update this howto to reflect the new git locations... Steve. From jsquyres at cisco.com Thu Jan 4 14:23:58 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 4 Jan 2007 17:23:58 -0500 Subject: [openib-general] open fabrics wiki error In-Reply-To: <1167949128.3071.68.camel@stevo-desktop> References: <1167949128.3071.68.camel@stevo-desktop> Message-ID: Steve -- Michael Lee from Sandia was doing lots of work on the new web server today. It's possible that it got accidentally lost or you caught it at an in-between time. Michael -- can you figure out what happened? On Jan 4, 2007, at 5:18 PM, Steve Wise wrote: > Hey all, > > Who owns the wiki server? > > It seems the Chelsio T3 howto got lost somehow? I cannot open this > URL > which is from the "how to" section of the OFA wiki: > > https://wiki.openfabrics.org/tiki-index.php?page=Install+OpenIB+for > +Chelsio+T3 > > I assume the file doesn't exist anymore for some reason? I wanted to > update this howto to reflect the new git locations... > > > Steve. > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From caitlinb at broadcom.com Thu Jan 4 14:28:34 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Thu, 4 Jan 2007 14:28:34 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <459D7C15.3020307@ichips.intel.com> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1E12A40@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: >> Does this affect kernel rdma cm users too in some way? If not, then >> perhaps the place to fix it is in userspace. > > This doesn't affect the kernel rdma_cm. The fix can either > go into the rdma_ucm (not rdma_cm), or librdmacm, either of > which limit the impact to userspace clients only. > > Here's one issue with putting the fix in the librdmacm: > there's no guarantee that a multi-threaded client will > process the event for a new connection before the reject. > This makes me lean toward implementing the fix in the kernel, > where the events are serialized. > With transport neutral semantics that also makes sense. The reason that the passive side user is involved in connection setup is to approve the connection (and specify the QP). The extra events you cited do not in fact require any user-mode response with the transport neutral logic. The kernel can simply flag the connection request as being "already aborted" and simply wait for the consumer to accept/reject. There is no real need to interrupt the consumer to tell them about this problem NOW (and the fact that the consumer is taking time to answer might indicate that the Consumer is already quite busy). I'm not as familiar with all the corner cases in IB-specific connection establishment, but if there is no user context is there really a need to forward the event to userspace as long as the reject is not forgotten? From mst at mellanox.co.il Thu Jan 4 14:34:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 00:34:35 +0200 Subject: [openib-general] ib_dma_addr_t In-Reply-To: References: Message-ID: <20070104223435.GD4155@mellanox.co.il> > > I'd like to propose that we introduce ib_dma_addr_t. > > The idea is to add some type safety (via sparse checker) > > that we lost when all addresses were converted to u64. > > How would that work? sparse isn't going to warn about implicit casts > to/from ib_dma_addr_t, is it? We can't make ib_dma_addr_t a bitwise > type, because arithmetic is allowed on bus addresses. How about the address_space attribute? Something like how __iomem is defined? -- MST From rdreier at cisco.com Thu Jan 4 14:38:02 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 14:38:02 -0800 Subject: [openib-general] ib_dma_addr_t In-Reply-To: <20070104223435.GD4155@mellanox.co.il> (Michael S. Tsirkin's message of "Fri, 5 Jan 2007 00:34:35 +0200") References: <20070104223435.GD4155@mellanox.co.il> Message-ID: > How about the address_space attribute? > Something like how __iomem is defined? I didn't try it and I'm not very up-to-date on sparse, but isn't address_space for pointer types, not integer types? - R. From mst at mellanox.co.il Thu Jan 4 14:41:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 00:41:59 +0200 Subject: [openib-general] APM support in openib stack In-Reply-To: <459D775F.9020705@ichips.intel.com> References: <459D775F.9020705@ichips.intel.com> Message-ID: <20070104224159.GF4155@mellanox.co.il> > The app on node A can get the same QP as a previous connection. The app on node > B will get a different QP. But this leaves the situation where two QPs on node > B may be configured to the same QP on node A. The QP configuration on node A > should handle this, but it's wasting resources on node B, since the initial QP > is unusable. So is the issue one of wasting resources or some potntial data corruption? If the former, frankly, I don't see the case where node A happens to get the same QPN as worth solving in isolation (chances seem remote) so each protocol needs its own ways to detect stale connections. For example for IPoIB CM I decided to kill a passive connection if it's unused for 30 seconds. -- MST From mshefty at ichips.intel.com Thu Jan 4 14:42:01 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 04 Jan 2007 14:42:01 -0800 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <54AD0F12E08D1541B826BE97C98F99F1E12A40@NT-SJCA-0751.brcm.ad.broadcom.com> References: <54AD0F12E08D1541B826BE97C98F99F1E12A40@NT-SJCA-0751.brcm.ad.broadcom.com> Message-ID: <459D82B9.3070505@ichips.intel.com> > I'm not as familiar with all the corner cases in IB-specific > connection establishment, but if there is no user context > is there really a need to forward the event to userspace > as long as the reject is not forgotten? In this specific case, probably not. And that's our current work-around for DAPL / Intel MPI, but mainly because it was easy to implement. I just hesitate to discard events, but if problem doesn't pertain to iWarp, and only occurs in a very specific case for IB, the simple solution may be the best. - Sean From mst at mellanox.co.il Thu Jan 4 14:43:08 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 00:43:08 +0200 Subject: [openib-general] ib_dma_addr_t In-Reply-To: References: Message-ID: <20070104224308.GG4155@mellanox.co.il> > > How about the address_space attribute? > > Something like how __iomem is defined? > > I didn't try it and I'm not very up-to-date on sparse, but isn't > address_space for pointer types, not integer types? I didn't think there might be a limitation, but I didn't try it. I'll try to look at it sometime later. -- MST From mst at mellanox.co.il Thu Jan 4 14:51:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 00:51:15 +0200 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: References: <20061225141849.GI842@mellanox.co.il> <20061226085144.GA4325@mellanox.co.il> Message-ID: <20070104225115.GH4155@mellanox.co.il> > > Wrt static linking: I see this warning when I link with -static: > > : warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking > > And it actually crashes inside dlopen on some platforms. > > Which platform? Is it crashing in dlopen(NULL) (my heuristic for > detecting a static application). SLES10. > > Would it be possible to add a configuration option to avoid using dlopen > > for static apps? Or, maybe, it makes more sense to make an empty stub for libdl, > > and ask apps to link with that? > > The ideal solution would be to build the library twice, once for > static and once for dynamic, and pass in a different preprocessor flag > so we can only use dlopen() when it has a chance at working. But it's > hard to do that with libtool/automake, and I don't feel like > hand-crafting a build system right now. > > OTOH static linking is such a corner case that I don't want to waste > too much time on it. I can think of some other solutions to this. For example have a stub libdl.a implementation. Interested in a patch? I might give it a try. -- MST From swise at opengridcomputing.com Thu Jan 4 14:59:57 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 04 Jan 2007 16:59:57 -0600 Subject: [openib-general] RFC fix for userspace rdma cm crashes In-Reply-To: <459D82B9.3070505@ichips.intel.com> References: <54AD0F12E08D1541B826BE97C98F99F1E12A40@NT-SJCA-0751.brcm.ad.broadcom.com> <459D82B9.3070505@ichips.intel.com> Message-ID: <1167951597.3071.74.camel@stevo-desktop> On Thu, 2007-01-04 at 14:42 -0800, Sean Hefty wrote: > > I'm not as familiar with all the corner cases in IB-specific > > connection establishment, but if there is no user context > > is there really a need to forward the event to userspace > > as long as the reject is not forgotten? > > In this specific case, probably not. And that's our current work-around for > DAPL / Intel MPI, but mainly because it was easy to implement. I just hesitate > to discard events, but if problem doesn't pertain to iWarp, and only occurs in a > very specific case for IB, the simple solution may be the best. > This can happen in iWARP too I think. Discarding the event and failing the subsequent rdma_accept() is ok IMO. From rdreier at cisco.com Thu Jan 4 15:27:05 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 15:27:05 -0800 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: <20070104225115.GH4155@mellanox.co.il> (Michael S. Tsirkin's message of "Fri, 5 Jan 2007 00:51:15 +0200") References: <20061225141849.GI842@mellanox.co.il> <20061226085144.GA4325@mellanox.co.il> <20070104225115.GH4155@mellanox.co.il> Message-ID: > I can think of some other solutions to this. For example have a stub > libdl.a implementation. > Interested in a patch? I might give it a try. Depends how horrible it is of course -- but see what you can come up with. I'm not against improving the current situation, just my appetite for banging my head against static linking is limited. From jgunthorpe at obsidianresearch.com Thu Jan 4 15:58:54 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 4 Jan 2007 16:58:54 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1167948831.4564.97385.camel@hal.voltaire.com> References: <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> <1167948831.4564.97385.camel@hal.voltaire.com> Message-ID: <20070104235854.GA12386@obsidianresearch.com> On Thu, Jan 04, 2007 at 05:13:52PM -0500, Hal Rosenstock wrote: > > The way I was hoping to start out is by putting this in the SA and the > > routers, not in the end nodes. > > We can start there but this is a very fundamental question. I have heard > people weigh in on both sides... Yes, but fortunately the two methods can co-exist and we can prototype the expected router support in opensm and get some experience there. > > With this kind of model the IB path lookup would return a LID/SL/etc > > Map S/DGID and perhaps TClass to LID/SL/MTU ? Yeah, I think so. > all known MLIDs on that subnet rather than all possible MLIDs ? It's > really the MGIDs that are of interest rather than the MLIDs. The router > needs to subscribe to traps 66/67 multicast groups in and out of > existence. MLIDs on each side of the router may not be the same for a > non link local MGID. Yes, we can definately do that. However, it might be smart to have opensm consider the routers to be a send-only member for every MLID.. > By onlink, are you saying these wouldn't be forwarded ? Not necessarily, the resulting MLID could still end up going to a router.. A onlink line routing table just terminates the routing lookup. 'unreachable' is another termination. A via line changes the next hop GID and creates more lookups until an onlink is reached. I honestly don't have a good idea how routed multicast can work on IB without alot of ugly overhead. What do you do if you route between 4 1000 node clusters with IPv6? How can you avoid registering 4000 multicast groups with each SM and still have IPv6 SNM work correctly? > Are you referring to running a spanning tree for multicast ? In any > case, I think it will be a while before the routing protocols come into > the picture and whether the SM is involved or not is another piece of > some of the fundamental routing questions/devisions to be made. Yes, but in this case I don't think multicast routing can be pushed to the host. It is either the router or some combination of the router and the SM. Jason From rdreier at cisco.com Thu Jan 4 19:48:09 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 19:48:09 -0800 Subject: [openib-general] [PATCH 1 of 5] IB/mthca: fix off-by-one in FMR handling on memfree In-Reply-To: <20070103124630.GC6019@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 3 Jan 2007 14:46:30 +0200") References: <20070103124630.GC6019@mellanox.co.il> Message-ID: Thanks, good catch. Applied (and I'll send this to stable@ as well) From rdreier at cisco.com Thu Jan 4 19:49:17 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 19:49:17 -0800 Subject: [openib-general] [PATCH 1 of 5] IB/mthca: fix off-by-one in FMR handling on memfree In-Reply-To: <459BD276.3030307@dev.mellanox.co.il> ( ishai@dev.mellanox.co.il's message of "Wed, 03 Jan 2007 17:57:42 +0200") References: <20070103124630.GC6019@mellanox.co.il> <459BD276.3030307@dev.mellanox.co.il> Message-ID: > There is a chance that this is the core of the 32-bit OS on 64-bit > platforms bug we had. > I run a certain test (copy 1G size file to the SRP disk) with and > without this patch. > With the patch I got a panic, and without it the test passed. It's good to hear that this patch fixes the bug. However I wonder why I never got a bug report for this problem. And I also wonder why this bug would only affect 32-bit OS on a 64-bit platform.... From rdreier at cisco.com Thu Jan 4 19:54:28 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 04 Jan 2007 19:54:28 -0800 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070103125119.GG6019@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 3 Jan 2007 14:51:19 +0200") References: <20070103125119.GG6019@mellanox.co.il> Message-ID: > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > + mdev->limits.reserved_mtts); I don't follow this -- first of all, what guarantee is there that the reserved MTTs end on a cacheline boundary just because they take up more than a single cacheline? It seems this should really be using ALIGN() somehow. Second, isn't reserved_mtts in units of 64 bytes, not sizeof(u64)? From eitan at sw053.yok.mtl.com Thu Jan 4 21:38:31 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Fri, 5 Jan 2007 07:38:31 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-05:normal completion Message-ID: <200701050538.l055cVmc026097@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Thu_Jan_4_08:48:46_2007 fdd07c ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=369 Pass=364 Fail=5 Pass: 27 Stability IS1-16.topo 27 Pkey IS1-16.topo 27 OsmTest IS1-16.topo 27 OsmStress IS1-16.topo 27 Multicast IS1-16.topo 27 LidMgr IS1-16.topo 9 Stability IS3-loop.topo 9 Stability IS3-128.topo 9 Pkey IS3-128.topo 9 OsmTest IS3-loop.topo 9 OsmTest IS3-128.topo 9 OsmStress IS3-128.topo 9 Multicast IS3-loop.topo 9 Multicast IS3-128.topo 9 LidMgr IS3-128.topo 9 FatTree part-4-ary-3-tree.topo 9 FatTree merge-roots-4-ary-2-tree.topo 9 FatTree merge-root-4-ary-3-tree.topo 9 FatTree merge-root-12-ary-2-tree.topo 9 FatTree half-4-ary-3-tree.topo 9 FatTree 4-ary-4-tree.topo 9 FatTree 4-ary-3-tree.topo 9 FatTree 12-node-spaced.topo 9 FatTree 12-ary-2-tree.topo 8 FatTree merge-roots-reorder-4-ary-2-tree.topo 8 FatTree merge-2-ary-4-tree.topo 8 FatTree blend-4-ary-2-tree.topo 8 FatTree 32nodes-3lvl-is1.topo 8 FatTree 2-ary-4-tree.topo Failures: 1 FatTree merge-roots-reorder-4-ary-2-tree.topo 1 FatTree merge-2-ary-4-tree.topo 1 FatTree blend-4-ary-2-tree.topo 1 FatTree 32nodes-3lvl-is1.topo 1 FatTree 2-ary-4-tree.topo From mst at mellanox.co.il Thu Jan 4 23:53:27 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 09:53:27 +0200 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: <20061225141849.GI842@mellanox.co.il> References: <20061225141849.GI842@mellanox.co.il> Message-ID: <20070105075244.GK4155@mellanox.co.il> > > OK, at last here is a patch that implements the improvements to > > libibverbs driver loading that we discussed back in October. > > > > With this patch, instead of trying all the .so files in the > > $(libdir)/infiniband directory as libibverbs 1.0 does, libibverbs > > instead builds a list of drivers to load and dlopen() exactly that > > list of libraries. It uses relative paths rather than absolute paths, > > so the linker uses the normal search path to find driver libraries. > > > > (To get a list of drivers, libibverbs parses all the config files it > > finds in $(sysconfdir)/libibverbs.d and also looks at the environment > > variables RDMAV_DRIVERS and IBV_DRIVERS) > > > > Then, instead of calling a specific entry point in the driver, > > libibverbs assumes the driver will call ibv_register_driver() from an > > __attribute__((constructor)) function. > > > > This has a number of benefits: > > - multiple drivers can be linked statically into an executable > > - LD_LIBRARY_PATH can be used to manage which drivers to load > > - different versions of the driver can be selected automagically at > > runtime (eg i686/cmov on i386 distros) > > > > I will post a libmthca patch to illustrate how driver libraries need > > to change to work with this new libibverbs method. > > I think this looked good, and probably best to do before the next > major release. > > Do you plan to merge this? BTW, the question still stands. If I start trying to play with static linking issues, I'd like to do this based on this patch, not what's in master currently. -- MST From mst at mellanox.co.il Fri Jan 5 00:02:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 10:02:48 +0200 Subject: [openib-general] [PATCH 1 of 5] IB/mthca: fix off-by-one in FMR handling on memfree In-Reply-To: References: <20070103124630.GC6019@mellanox.co.il> <459BD276.3030307@dev.mellanox.co.il> Message-ID: <20070105080248.GL4155@mellanox.co.il> > > There is a chance that this is the core of the 32-bit OS on 64-bit > > platforms bug we had. > > I run a certain test (copy 1G size file to the SRP disk) with and > > without this patch. > > With the patch I got a panic, and without it the test passed. > > It's good to hear that this patch fixes the bug. However I wonder why > I never got a bug report for this problem. And I also wonder why this > bug would only affect 32-bit OS on a 64-bit platform.... I have a theory for this: for the bug to trigger, you need your low memory to be fragmented so that there are several entries in the chunk scatterlist array. I guess this is more likely on a 32 bit kernel machine with high memory present than on a 64 bit kernel where all memory is low. Whereas native 32 bit machines are low end so they likely either have PCI-X (thus no memfree) or <1G RAM (thus no high memmory). -- MST From mst at mellanox.co.il Fri Jan 5 00:08:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 10:08:48 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: <20070103125119.GG6019@mellanox.co.il> Message-ID: <20070105080848.GM4155@mellanox.co.il> > > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > > + mdev->limits.reserved_mtts); > > I don't follow this -- first of all, what guarantee is there that the > reserved MTTs end on a cacheline boundary just because they take up > more than a single cacheline? It seems this should really be using > ALIGN() somehow. I thought our buddy allocator algorithm guarantees size alignment. It seems to me we rely on this in other places, no? > Second, isn't reserved_mtts in units of 64 bytes, not sizeof(u64)? I thought this is just the number of reserved MTTs, and each MTT is 64 bit of course. what's wrong here? -- MST From mst at mellanox.co.il Fri Jan 5 00:26:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 10:26:30 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: Message-ID: <20070105082630.GB13052@mellanox.co.il> > > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > > + mdev->limits.reserved_mtts); > > I don't follow this -- first of all, what guarantee is there that the > reserved MTTs end on a cacheline boundary just because they take up > more than a single cacheline? It seems this should really be using > ALIGN() somehow. OK, I finally see what you mean. We must also make sure that mtt_base is cache line aligned, I forgot this might not be the case. > Second, isn't reserved_mtts in units of 64 bytes, not sizeof(u64)? Aha, you are right of course. This line does it: mthca_cmd.c: dev_lim->reserved_mtts = 1 << (field >> 4); Will fix next week. Thanks for the comments! -- MST From mst at mellanox.co.il Fri Jan 5 00:30:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 10:30:59 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070105082630.GB13052@mellanox.co.il> References: <20070105082630.GB13052@mellanox.co.il> Message-ID: <20070105083059.GC13052@mellanox.co.il> > > > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > > > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > > > + mdev->limits.reserved_mtts); > > > > I don't follow this -- first of all, what guarantee is there that the > > reserved MTTs end on a cacheline boundary just because they take up > > more than a single cacheline? It seems this should really be using > > ALIGN() somehow. > > OK, I finally see what you mean. We must also make sure that mtt_base is > cache line aligned, I forgot this might not be the case. Or might it? Sorry for thinking aloud, but won't other things break if memfree allocator base isn't page aligned? For example we are allocating full pages of ICM space in each table, are we not? -- MST From halr at voltaire.com Fri Jan 5 05:37:32 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 08:37:32 -0500 Subject: [openib-general] [PATCH1/2] OpenSM/ib_types.h: Add ib_mgid_get/set_scope routines Message-ID: <1168004251.4564.146473.camel@hal.voltaire.com> OpenSM/ib_types.h: Add ib_mgid_get/set_scope routines Signed-off-by: Hal Rosenstock diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 5dd4753..acf9141 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -2100,6 +2100,40 @@ ib_gid_is_multicast( return( p_gid->raw[0] == 0xFF ); } +/****f* IBA Base: Types/ib_gid_get_scope +* NAME +* ib_gid_get_scope +* +* DESCRIPTION +* Returns scope of (assumed) multicast GID. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_mgid_get_scope( + IN const ib_gid_t* p_gid ) +{ + return( p_gid->raw[1] & 0x0F ); +} + +/****f* IBA Base: Types/ib_gid_set_scope +* NAME +* ib_gid_set_scope +* +* DESCRIPTION +* Sets scope of (assumed) multicast GID. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_mgid_set_scope( + IN ib_gid_t* p_gid, + IN const uint8_t scope ) +{ + p_gid->raw[1] &= 0xF0; + p_gid->raw[1] |= scope & 0x0F; +} + /****f* IBA Base: Types/ib_gid_set_default * NAME * ib_gid_set_default From halr at voltaire.com Fri Jan 5 05:40:31 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 08:40:31 -0500 Subject: [openib-general] [PATCH 2/2] OpenSM/osm_prtn.c: In osm_prtn_add_mcgroup, use scope from MGID for MCMemberRecord Message-ID: <1168004253.4564.146475.camel@hal.voltaire.com> OpenSM/osm_prtn.c: In osm_prtn_add_mcgroup, use scope from MGID for MCMemberRecord In osm_prtn_add_mcgroup, use scope from MGID for MCMemberRecord rather than assume link local scope Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_prtn.c b/osm/opensm/osm_prtn.c index b905416..a10eae9 100644 --- a/osm/opensm/osm_prtn.c +++ b/osm/opensm/osm_prtn.c @@ -201,6 +201,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm osm_mgrp_t *p_mgrp = NULL; osm_sa_t *p_sa = &p_subn->p_osm->sa; ib_api_status_t status = IB_SUCCESS; + uint8_t scope; pkey = p->pkey | cl_hton16(0x8000); @@ -216,8 +217,9 @@ ib_api_status_t osm_prtn_add_mcgroup(osm mc_rec.rate = (rate ? rate : OSM_DEFAULT_MGRP_RATE) | (2 << 6); /* 10Gb/sec */ mc_rec.pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; mc_rec.sl_flow_hop = ib_member_set_sl_flow_hop(p->sl, 0, 0); - /* Note: scope needs to be consistent with MGID */ - mc_rec.scope_state = 0x21; + /* Scope in MCMemberRecord needs to be consistent with MGID */ + scope = ib_mgid_get_scope(&osm_ipoib_mgid); /* get scope from MGID */ + mc_rec.scope_state = ib_member_set_scope_state(scope, 1); /* full member */ /* don't update rate, mtu */ comp_mask = IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_MTU_SEL From rdreier at cisco.com Fri Jan 5 06:19:39 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 05 Jan 2007 06:19:39 -0800 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: <20070105075244.GK4155@mellanox.co.il> (Michael S. Tsirkin's message of "Fri, 5 Jan 2007 09:53:27 +0200") References: <20061225141849.GI842@mellanox.co.il> <20070105075244.GK4155@mellanox.co.il> Message-ID: > BTW, the question still stands. If I start trying to play with > static linking issues, I'd like to do this based on this patch, > not what's in master currently. Yes, I had hoped to push it out sooner but I wanted to fix all the driver libraries first. I didn't get a chance to finish that up before my vacatation, but I will do that soon and post patches for driver libraries when I change libibverbs. From swise at opengridcomputing.com Fri Jan 5 06:22:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 05 Jan 2007 08:22:25 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: References: <1167851839.4187.36.camel@stevo-desktop> <20070103193324.GD29003@mellanox.co.il> <1167855618.4187.65.camel@stevo-desktop> <1167859320.4187.81.camel@stevo-desktop> Message-ID: <1168006945.10259.17.camel@stevo-desktop> On Thu, 2007-01-04 at 13:34 -0800, Roland Dreier wrote: > OK, I'm back from vacation today. > > Anyway I don't have a definitive statement on this right now. I guess > I agree that I don't like having an extra parameter to a function that > should be pretty fast (although req notify isn't quite as hot as > something like posting a send request or polling a cq), given that it > adds measurable overhead. (And I am surprised that the overhead is > measurable, since 3 arguments still fit in registers, but OK). > > I also agree that adding an extra entry point just to pass in the user > data is ugly, and also racy. > > Giving the kernel driver a pointer it can read seems OK I guess, > although it's a little ugly to have a backdoor channel like that. > Another alternative is for the cq-index u32 memory to be allocated by the kernel and mapped into the user process. So the lib can read/write it, and the kernel can read it directly. This is the fastest way perfwise, but I didn't want to do it because of the page granularity of mapping. IE it would require a page of address space (and backing memory I guess) just for 1 u32. The CQ element array memory is already allocated this way (and its DMA coherent too), but I didn't want to overload that memory with this extra variable either. Mapping just seemed ugly and wasteful to me. So given 3 approaches: 1) allow user data to be passed into ib_req_notify_cq() via the standard uverbs mechanisms. 2) hide this in the chelsio driver and have the driver copyin the info directly. 3) allocate the memory for this in the kernel and map it to the user process. I chose 1 because it seemed the cleanest from an architecture point of view and I didn't think it would impact performance much. Steve. From mst at mellanox.co.il Fri Jan 5 06:25:39 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 5 Jan 2007 16:25:39 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: Message-ID: <20070105142539.GB26997@mellanox.co.il> > Second, isn't reserved_mtts in units of 64 bytes, not sizeof(u64)? Let's focus on this comment first. We have at offset 28 bytes: Log (base 2) of the number of MTT segments reserved for firmware use And we have MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); dev_lim->reserved_mtts = 1 << (field >> 4); So it seems reserved_mtts is the number of reserved MTTs and each MTT is 64 bit. Correct? -- MST From rdreier at cisco.com Fri Jan 5 07:53:16 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 05 Jan 2007 07:53:16 -0800 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070105142539.GB26997@mellanox.co.il> (Michael S. Tsirkin's message of "Fri, 5 Jan 2007 16:25:39 +0200") References: <20070105142539.GB26997@mellanox.co.il> Message-ID: > Let's focus on this comment first. > We have at offset 28 bytes: > Log (base 2) of the number of MTT segments reserved for firmware use > > And we have > MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); > dev_lim->reserved_mtts = 1 << (field >> 4); > > So it seems reserved_mtts is the number of reserved MTTs and each > MTT is 64 bit. Correct? Actually it seems we have a problem somewhere. The usage of the field in the query_dev_lim response seems different between mem-free and tavor mode: the tavor PRM says it is the log of # of MTT segments while the memfree PRM says it is the log of the # of MTT entries. Anyway I need to trace this through more carefully... - R. From brian at clusterfs.com Fri Jan 5 07:57:26 2007 From: brian at clusterfs.com (Brian J. Murrell) Date: Fri, 05 Jan 2007 10:57:26 -0500 Subject: [openib-general] how to specify compiler during modules build Message-ID: <1168012646.18576.57.camel@pc.ilinx> Hi All, I have successfully integrated the building of the openib RPMs into our kernel build process with one exception... specification of which compiler to use to build the modules in kernel-ib. For many reasons, we cannot simply use the default "gcc" compiler on many O/Ses as we have our own kernel patches that prevent the use of gcc4 (for example), so we must force the building of our kernel with gcc32 (for example). There does not seem to be any provision though to specify which kernel the openib build process uses. I had to hack it into the build process with a patch in the RPM: --- Makefile.orig 2007-01-05 15:01:26.000000000 +0000 +++ Makefile 2007-01-05 14:54:29.000000000 +0000 @@ -47,7 +47,7 @@ env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ -I$(CWD)/drivers/infiniband/debug" \ - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ + $(MAKE) -C $(KSRC) CC=gcc32 SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ Of course, I have hardcoded in my gcc32, but a more elegant, complete solution would allow that to be specified by the caller. In fact it seems like a good idea for the linux kernel in general, to cache the value of the CC that was used to build a tree when it has targets built in the tree -- that way any other module building process external to the tree would use the same compiler. Probably this cache would be cleared with either a clean or distclean (mrclean still?). But this is a topic for another ML. :-) What are your thoughts on devising some way of passing the CC to build the kernel modules in through the RPM spec file? Cheers, b. -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 189 bytes Desc: This is a digitally signed message part URL: From halr at voltaire.com Fri Jan 5 08:44:49 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 11:44:49 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070104235854.GA12386@obsidianresearch.com> References: <1167757414.4596.83346.camel@hal.voltaire.com> <1167774142.4596.99507.camel@hal.voltaire.com> <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> <1167948831.4564.97385.camel@hal.voltaire.com> <20070104235854.GA12386@obsidianresearch.com> Message-ID: <1168015486.4564.156860.camel@hal.voltaire.com> On Thu, 2007-01-04 at 18:58, Jason Gunthorpe wrote: > On Thu, Jan 04, 2007 at 05:13:52PM -0500, Hal Rosenstock wrote: > > > > The way I was hoping to start out is by putting this in the SA and the > > > routers, not in the end nodes. > > > > We can start there but this is a very fundamental question. I have heard > > people weigh in on both sides... > > Yes, but fortunately the two methods can co-exist and we can prototype > the expected router support in opensm and get some experience > there. True. > > > With this kind of model the IB path lookup would return a LID/SL/etc > > > > Map S/DGID and perhaps TClass to LID/SL/MTU ? > > Yeah, I think so. > > > all known MLIDs on that subnet rather than all possible MLIDs ? It's > > really the MGIDs that are of interest rather than the MLIDs. The router > > needs to subscribe to traps 66/67 multicast groups in and out of > > existence. MLIDs on each side of the router may not be the same for a > > non link local MGID. > > Yes, we can definately do that. > > However, it might be smart to have opensm consider the routers to be a > send-only member for every MLID.. Do you mean non-member rather than send-only member ? Routers need to receive as well as send, right ? Or are you worried about some other issue here ? Also, I'm still not sure about a couple of aspects of "every MLID": 1. Wouldn't the router only want to be full member of link local scoped MGIDs (that it was interested in locally) ? Are you saying any local scoped MGIDs not of interest would just get dropped anyhow ? If that is the point here, that would work but isn't there a performance impact of doing so ? 2. Similarly for any other (non local scope) MGRPs which do not match across any router ports, isn't there a performance impact of receiving and then having to drop/filter these packets ? > > By onlink, are you saying these wouldn't be forwarded ? > > Not necessarily, the resulting MLID could still end up going to a > router.. Right, the router is some sort of member on the MGRPs "of interest". I think you are trying to make that list of MGRPs "of interest" simpler and utilize filtering where not needed (as I mentioned above), but I may be mistaken here. > A onlink line routing table just terminates the routing > lookup. 'unreachable' is another termination. A via line changes the > next hop GID and creates more lookups until an onlink is reached. So is the specification of all multicast as onlink a short term thing then ? Also, with using onlink for all multicast, is there some forwarding determination made somewhere in the router stack ? > I honestly don't have a good idea how routed multicast can work on IB > without alot of ugly overhead. What do you do if you route between 4 > 1000 node clusters with IPv6? How can you avoid registering 4000 > multicast groups with each SM and still have IPv6 SNM work correctly? Yes, I have no idea how IPv6 will work with large inter subnet clusters either. We had a thread on this a while ago and I think it died out at that point. To state the obvious, I think some changes need to be made for IPv6 to work well with current IB hardware or perhaps some configuration restrictions ? > > Are you referring to running a spanning tree for multicast ? In any > > case, I think it will be a while before the routing protocols come into > > the picture and whether the SM is involved or not is another piece of > > some of the fundamental routing questions/devisions to be made. > > Yes, but in this case I don't think multicast routing can be pushed to > the host. It is either the router or some combination of the router > and the SM. I'm not quite following you on this yet. Why/how is host multicast routing any different (than unicast) ? -- Hal > Jason From Steve.Greenwood at hp.com Fri Jan 5 09:30:33 2007 From: Steve.Greenwood at hp.com (Greenwood, Steve) Date: Fri, 5 Jan 2007 12:30:33 -0500 Subject: [openib-general] best way to get ibv_get_cq_event to return Message-ID: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> I have a thread that uses ibv_get_cq_event to handling completing RDMA requests. This call seems to have roughly the same capabilities as the Mellanox vapi call EVAPI_poll_cq_block with one important exception. The open-fabrics routine does not have a timeout argument. Beyond restructuring my application, is there a simple way to make get ibv_get_cq_event return without a message arriving? Steve Greenwood HP High Performance Computing Group -------------- next part -------------- An HTML attachment was scrubbed... URL: From felix at chelsio.com Fri Jan 5 09:32:19 2007 From: felix at chelsio.com (Felix Marti) Date: Fri, 5 Jan 2007 09:32:19 -0800 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes Message-ID: <8A71B368A89016469F72CD08050AD334F3BE50@maui.asicdesigners.com> > -----Original Message----- > From: openib-general-bounces at openib.org [mailto:openib-general- > bounces at openib.org] On Behalf Of Steve Wise > Sent: Friday, January 05, 2007 6:22 AM > To: Roland Dreier > Cc: linux-kernel at vger.kernel.org; openib-general at openib.org; > netdev at vger.kernel.org > Subject: Re: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes > > On Thu, 2007-01-04 at 13:34 -0800, Roland Dreier wrote: > > OK, I'm back from vacation today. > > > > Anyway I don't have a definitive statement on this right now. I guess > > I agree that I don't like having an extra parameter to a function that > > should be pretty fast (although req notify isn't quite as hot as > > something like posting a send request or polling a cq), given that it > > adds measurable overhead. (And I am surprised that the overhead is > > measurable, since 3 arguments still fit in registers, but OK). > > > > I also agree that adding an extra entry point just to pass in the user > > data is ugly, and also racy. > > > > Giving the kernel driver a pointer it can read seems OK I guess, > > although it's a little ugly to have a backdoor channel like that. > > > > Another alternative is for the cq-index u32 memory to be allocated by > the kernel and mapped into the user process. So the lib can read/write > it, and the kernel can read it directly. This is the fastest way > perfwise, but I didn't want to do it because of the page granularity of > mapping. IE it would require a page of address space (and backing > memory I guess) just for 1 u32. The CQ element array memory is already > allocated this way (and its DMA coherent too), but I didn't want to > overload that memory with this extra variable either. Mapping just > seemed ugly and wasteful to me. > > So given 3 approaches: > > 1) allow user data to be passed into ib_req_notify_cq() via the standard > uverbs mechanisms. > > 2) hide this in the chelsio driver and have the driver copyin the info > directly. > > 3) allocate the memory for this in the kernel and map it to the user > process. > > I chose 1 because it seemed the cleanest from an architecture point of > view and I didn't think it would impact performance much. [Felix Marti] In addition, is arming the CQ really in the performance path? - Don't apps poll the CQ as long as there are pending CQEs and only arm the CQ for notification once there is nothing left to do? If this is the case, it would mean that we waste a few cycles 'idle' cycles. Steve, next to the micro benchmark that you did, did you run any performance benchmark that actually moves traffic? If so, did you see a difference in performance? > > > Steve. > > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Fri Jan 5 10:59:59 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 05 Jan 2007 12:59:59 -0600 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <8A71B368A89016469F72CD08050AD334F3BE50@maui.asicdesigners.com> References: <8A71B368A89016469F72CD08050AD334F3BE50@maui.asicdesigners.com> Message-ID: <1168023599.14028.38.camel@stevo-desktop> On Fri, 2007-01-05 at 09:32 -0800, Felix Marti wrote: > > > -----Original Message----- > > From: openib-general-bounces at openib.org [mailto:openib-general- > > bounces at openib.org] On Behalf Of Steve Wise > > Sent: Friday, January 05, 2007 6:22 AM > > To: Roland Dreier > > Cc: linux-kernel at vger.kernel.org; openib-general at openib.org; > > netdev at vger.kernel.org > > Subject: Re: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes > > > > On Thu, 2007-01-04 at 13:34 -0800, Roland Dreier wrote: > > > OK, I'm back from vacation today. > > > > > > Anyway I don't have a definitive statement on this right now. I > guess > > > I agree that I don't like having an extra parameter to a function > that > > > should be pretty fast (although req notify isn't quite as hot as > > > something like posting a send request or polling a cq), given that > it > > > adds measurable overhead. (And I am surprised that the overhead is > > > measurable, since 3 arguments still fit in registers, but OK). > > > > > > I also agree that adding an extra entry point just to pass in the > user > > > data is ugly, and also racy. > > > > > > Giving the kernel driver a pointer it can read seems OK I guess, > > > although it's a little ugly to have a backdoor channel like that. > > > > > > > Another alternative is for the cq-index u32 memory to be allocated by > > the kernel and mapped into the user process. So the lib can > read/write > > it, and the kernel can read it directly. This is the fastest way > > perfwise, but I didn't want to do it because of the page granularity > of > > mapping. IE it would require a page of address space (and backing > > memory I guess) just for 1 u32. The CQ element array memory is > already > > allocated this way (and its DMA coherent too), but I didn't want to > > overload that memory with this extra variable either. Mapping just > > seemed ugly and wasteful to me. > > > > So given 3 approaches: > > > > 1) allow user data to be passed into ib_req_notify_cq() via the > standard > > uverbs mechanisms. > > > > 2) hide this in the chelsio driver and have the driver copyin the info > > directly. > > > > 3) allocate the memory for this in the kernel and map it to the user > > process. > > > > I chose 1 because it seemed the cleanest from an architecture point of > > view and I didn't think it would impact performance much. > > [Felix Marti] In addition, is arming the CQ really in the performance > path? - Don't apps poll the CQ as long as there are pending CQEs and > only arm the CQ for notification once there is nothing left to do? If > this is the case, it would mean that we waste a few cycles 'idle' > cycles. I tend to agree. This shouldn't be the hot perf path like post_send/recv and poll. > Steve, next to the micro benchmark that you did, did you run any > performance benchmark that actually moves traffic? If so, did you see a > difference in performance? > No. But I didn't explicitly measure with and without this one single change. From halr at voltaire.com Fri Jan 5 12:09:54 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 15:09:54 -0500 Subject: [openib-general] [PATCH][TRIVIAL] OpenSM/osm_sa_mcmember_record.(h c): Use definitions rather than hard coded constants for MC group scope Message-ID: <1168027793.4564.167973.camel@hal.voltaire.com> OpenSM/osm_sa_mcmember_record.(h c): Use definitions rather than hard coded constants for MC group scope Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_sa_mcmember_record.h b/osm/include/opensm/osm_sa_mcmember_record.h index eac30ae..802f3e8 100644 --- a/osm/include/opensm/osm_sa_mcmember_record.h +++ b/osm/include/opensm/osm_sa_mcmember_record.h @@ -392,7 +392,25 @@ osm_mcmr_rcv_find_or_create_new_mgrp( #define OSM_DEFAULT_MGRP_RATE 0x03 /***********/ -/* These Component Mask fields comply with IB Spec 1.2 */ +/* Scope component definitions from IBA 1.2 (Table 3 p. 146) */ +#define MC_SCOPE_LINK_LOCAL 0x2 +#define MC_SCOPE_SITE_LOCAL 0x5 +#define MC_SCOPE_ORG_LOCAL 0x8 +#define MC_SCOPE_GLOBAL 0xE + +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_SCOPE +* Name +* OSM_DEFAULT_MGRP_SCOPE +* +* DESCRIPTION +* Default SCOPE used for new MGRP creation (link local) +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_SCOPE MC_SCOPE_LINK_LOCAL +/***********/ + +/* JoinState definitions from IBA 1.2 */ #define MC_FULL_MEMBER 0x1 #define MC_NON_MEMBER 0x2 #define MC_SENDONLY_NON_MEMBER 0x4 diff --git a/osm/opensm/osm_sa_mcmember_record.c b/osm/opensm/osm_sa_mcmember_record.c index 04718aa..dcc6c5a 100644 --- a/osm/opensm/osm_sa_mcmember_record.c +++ b/osm/opensm/osm_sa_mcmember_record.c @@ -1039,7 +1039,7 @@ __validate_requested_mgid(IN osm_mcmr_re /* 2 - now what if the link local format 0xA01B is used - the scope should not be link local */ if ( ( signature == 0xA01B ) && - ((p_mcm_rec->mgid.multicast.header[1] & 0x0F) == 0x02) ) { + ((p_mcm_rec->mgid.multicast.header[1] & 0x0F) == MC_SCOPE_LINK_LOCAL) ) { osm_log( p_rcv->p_log, OSM_LOG_ERROR, "__validate_requested_mgid: ERR 1B24: " "MGID uses 0xA01B signature but with link-local scope\n" ); @@ -1285,7 +1285,7 @@ osm_mcmr_rcv_create_new_mgrp( else { /* to guarantee no collision with other subnets use local scope! */ - scope = 0x02; /* link-local scope */ + scope = MC_SCOPE_LINK_LOCAL; } p_mgid = &(mcm_rec.mgid); From sean.hefty at intel.com Fri Jan 5 12:35:15 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 5 Jan 2007 12:35:15 -0800 Subject: [openib-general] [PATCH] 2.6.20 rdma_ucm: fix reporting events with invalid user context In-Reply-To: <1167951597.3071.74.camel@stevo-desktop> Message-ID: <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> There's a problem with how rdma cm events are reported to userspace that can lead to application crashes. When a new connection request arrives, a context for the connection is allocated in the kernel. The connection event is then reported to userspace. The userspace library retrieves the event and allocates its own context for the connection. The userspace context is associated with the kernel's context when accepting. This allows the kernel to give userspace context with other events. A problem occurs if a second event for the same connection occurs before the user has had a chance to call accept. The userspace context has not yet been set, which causes the librdmacm to crash. (This has been seen when the app takes too long to call accept, resulting in the remote side timing out and rejecting the connection.) Fix this by ignoring events for new connections until userspace has set their context. This can only happen if an error occurs on a new connection before the user accepts it. This is okay, since the accept will just fail later. Signed-off-by: Sean Hefty --- If everyone agrees with this approach, I'd like to try to push it into 2.6.20. It will definitely be needed for OFED, as we hit into this problem when scaling up with DAPL. diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9f30f9b..e2e8d32 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -213,7 +213,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, goto out; } ctx->backlog--; + } else if (!ctx->uid) { + /* + * We ignore events for new connections until userspace has set + * their context. This can only happen if an error occurs on a + * new connection before the user accepts it. This is okay, + * since the accept will just fail later. + */ + kfree(uevent); + goto out; } + list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); out: From halr at voltaire.com Fri Jan 5 13:15:35 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 16:15:35 -0500 Subject: [openib-general] {PATCH] OpenSM: Support scope for IPoIB multicast groups in partition config Message-ID: <1168031734.4564.171433.camel@hal.voltaire.com> OpenSM: Support scope for IPoIB multicast groups in partition config Signed-off-by: Hal Rosenstock diff --git a/osm/doc/partition-config.txt b/osm/doc/partition-config.txt index 2306d4c..0841c53 100644 --- a/osm/doc/partition-config.txt +++ b/osm/doc/partition-config.txt @@ -42,14 +42,16 @@ flag - used to indicate IPoIB c Currently recognized flags are: -ipoib - indicates that this partition may be used for IPoIB, as - result IPoIB capable MC group will be created. -rate= - specifies rate for this IPoIB MC group (default is 3 (10GBps)) -mtu= - specifies MTU for this IPoIB MC group (default is 4 (2048)) -sl= - specifies SL for this IPoIB MC group (default is 0) - -Note that values for 'rate' and 'mtu' should be specified as defined in -IBTA specification (for example mtu=4 for 2048). +ipoib - indicates that this partition may be used for IPoIB, as + result IPoIB capable MC group will be created. +rate= - specifies rate for this IPoIB MC group (default is 3 (10GBps)) +mtu= - specifies MTU for this IPoIB MC group (default is 4 (2048)) +sl= - specifies SL for this IPoIB MC group (default is 0) +scope= - specifies scope for this IPoIB MC group (default is 2 (link +local)) + +Note that values for 'rate', 'mtu'. and 'scope' should be specified as defined +in the IBTA specification (for example mtu=4 for 2048). PortGUIDs list: diff --git a/osm/man/opensm.8 b/osm/man/opensm.8 index 973b3f1..8ea7b91 100644 --- a/osm/man/opensm.8 +++ b/osm/man/opensm.8 @@ -1,4 +1,4 @@ -.TH OPENSM 8 "January 2, 2007" "OpenIB" "OpenIB Management" +.TH OPENSM 8 "January 5, 2007" "OpenIB" "OpenIB Management" .SH NAME opensm \- InfiniBand subnet manager and administration (SM/SA) @@ -286,17 +286,19 @@ Partition Definition: Currently recognized flags are: - ipoib - indicates that this partition may be used for IPoIB, as - result IPoIB capable MC group will be created. - rate= - specifies rate for this IPoIB MC group - (default is 3 (10GBps)) - mtu= - specifies MTU for this IPoIB MC group - (default is 4 (2048)) - sl= - specifies SL for this IPoIB MC group - (default is 0) - -Note that values for 'rate' and 'mtu' should be specified as defined in -IBTA specification (for example, mtu=4 for 2048). + ipoib - indicates that this partition may be used for IPoIB, as + result IPoIB capable MC group will be created. + rate= - specifies rate for this IPoIB MC group + (default is 3 (10GBps)) + mtu= - specifies MTU for this IPoIB MC group + (default is 4 (2048)) + sl= - specifies SL for this IPoIB MC group + (default is 0) + scope= - specifies scope for this IPoIB MC group + (default is 2 (link local)) + +Note that values for rate, mtu, and scope should be specified as +defined in the IBTA specification (for example, mtu=4 for 2048). PortGUIDs list: diff --git a/osm/opensm/osm_prtn.c b/osm/opensm/osm_prtn.c index 1a4fc52..43c242b 100644 --- a/osm/opensm/osm_prtn.c +++ b/osm/opensm/osm_prtn.c @@ -165,7 +165,7 @@ ib_api_status_t osm_prtn_add_all(osm_log return status; } -static const ib_gid_t osm_ipoib_mgid = { +static ib_gid_t osm_ipoib_mgid = { { 0xff, /* multicast field */ 0x12, /* non-permanent bit, link local scope */ @@ -193,7 +193,8 @@ static const ib_gid_t osm_ts_ipoib_mgid ib_api_status_t osm_prtn_add_mcgroup(osm_log_t *p_log, osm_subn_t *p_subn, osm_prtn_t *p, - unsigned is_ipoib, uint8_t rate, uint8_t mtu) + unsigned is_ipoib, uint8_t rate, + uint8_t mtu, uint8_t scope) { ib_member_rec_t mc_rec; ib_net64_t comp_mask; @@ -201,7 +202,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm osm_mgrp_t *p_mgrp = NULL; osm_sa_t *p_sa = &p_subn->p_osm->sa; ib_api_status_t status = IB_SUCCESS; - uint8_t scope; + uint8_t ts_scope; pkey = p->pkey | cl_hton16(0x8000); @@ -218,12 +219,13 @@ ib_api_status_t osm_prtn_add_mcgroup(osm mc_rec.pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; mc_rec.sl_flow_hop = ib_member_set_sl_flow_hop(p->sl, 0, 0); /* Scope in MCMemberRecord needs to be consistent with MGID */ - scope = ib_mgid_get_scope(&osm_ipoib_mgid); /* get scope from MGID */ - mc_rec.scope_state = ib_member_set_scope_state(scope, MC_FULL_MEMBER); + mc_rec.scope_state = ib_member_set_scope_state(scope ? scope : OSM_DEFAULT_MGRP_SCOPE, MC_FULL_MEMBER); + ib_mgid_set_scope(&mc_rec.mgid, scope ? scope : OSM_DEFAULT_MGRP_SCOPE); - /* don't update rate, mtu */ + /* don't update rate, mtu, scope */ comp_mask = IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_MTU_SEL - | IB_MCR_COMPMASK_RATE | IB_MCR_COMPMASK_RATE_SEL; + | IB_MCR_COMPMASK_RATE | IB_MCR_COMPMASK_RATE_SEL + | IB_MCR_COMPMASK_SCOPE; status = osm_mcmr_rcv_find_or_create_new_mgrp(&p_sa->mcmr_rcv, comp_mask, &mc_rec, &p_mgrp); if (!p_mgrp || status != IB_SUCCESS) @@ -238,6 +240,9 @@ ib_api_status_t osm_prtn_add_mcgroup(osm /* FIXME: remove this upon TS fixes */ mc_rec.mgid = osm_ts_ipoib_mgid; memcpy(&mc_rec.mgid.raw[4], &pkey, sizeof(pkey)); + /* Scope in MCMemberRecord needs to be consistent with MGID */ + ts_scope = ib_mgid_get_scope(&osm_ts_ipoib_mgid); /* get scope from MGID */ + mc_rec.scope_state = ib_member_set_scope_state(ts_scope, MC_FULL_MEMBER); status = osm_mcmr_rcv_find_or_create_new_mgrp(&p_sa->mcmr_rcv, comp_mask, &mc_rec, &p_mgrp); if (p_mgrp) @@ -331,7 +336,7 @@ static ib_api_status_t osm_prtn_make_def status = osm_prtn_add_port(p_log, p_subn, p, p_subn->sm_port_guid, TRUE); if (no_config) - osm_prtn_add_mcgroup(p_log, p_subn, p, 1, 0, 0); + osm_prtn_add_mcgroup(p_log, p_subn, p, 1, 0, 0, 0); _err: return status; diff --git a/osm/opensm/osm_prtn_config.c b/osm/opensm/osm_prtn_config.c index b9a88c3..dbce497 100644 --- a/osm/opensm/osm_prtn_config.c +++ b/osm/opensm/osm_prtn_config.c @@ -69,7 +69,7 @@ struct part_conf { osm_log_t *p_log; osm_subn_t *p_subn; osm_prtn_t *p_prtn; - unsigned is_ipoib, mtu, rate, sl; + unsigned is_ipoib, mtu, rate, sl, scope; }; extern osm_prtn_t *osm_prtn_make_new(osm_log_t *p_log, osm_subn_t *p_subn, @@ -83,7 +83,7 @@ extern ib_api_status_t osm_prtn_add_port extern ib_api_status_t osm_prtn_add_mcgroup(osm_log_t *p_log, osm_subn_t *p_subn, osm_prtn_t *p, unsigned is_ipoib, uint8_t rate, - uint8_t mtu); + uint8_t mtu, uint8_t scope); static int partition_create(unsigned lineno, struct part_conf *conf, char *name, char *id, char *flag, char *flag_val) @@ -122,7 +122,7 @@ static int partition_create(unsigned lin if (conf->is_ipoib) osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, conf->is_ipoib, (uint8_t)conf->rate, - (uint8_t)conf->mtu); + (uint8_t)conf->mtu, (uint8_t)conf->scope); return 0; } @@ -145,6 +145,12 @@ static int partition_add_flag(unsigned l "PARSE WARN: line %d: " "flag \'rate\' requires valid value" " - skipped\n", lineno); + } else if (!strncmp(flag, "scope", len)) { + if (!val || (conf->scope = strtoul(val, NULL, 0)) == 0) + osm_log(conf->p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'scope\' requires valid value" + " - skipped\n", lineno); } else if (!strncmp(flag, "sl", len)) { unsigned sl; char *end; @@ -206,7 +212,6 @@ static int partition_add_port(unsigned l return 0; } - /* conf file parser */ #define STRIP_HEAD_SPACES(p) while (*(p) == ' ' || *(p) == '\t' || \ From swise at opengridcomputing.com Fri Jan 5 15:46:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 05 Jan 2007 17:46:46 -0600 Subject: [openib-general] OFED 1.2 features update In-Reply-To: <45759B8C.8010408@dev.mellanox.co.il> References: <45759B8C.8010408@dev.mellanox.co.il> Message-ID: <1168040806.14028.108.camel@stevo-desktop> On Tue, 2006-12-05 at 18:17 +0200, Tziporet Koren wrote: > Hi, > In the OFED meeting yesterday the following decisions were taken: > > 1. We agreed to have two types of features > > * Must have features - will delay the release if not ready > * Desirable features - will be included only if they are ready on > time according to OFED requirements. > > 2. The following features are added to OFED 1.2 as desired: > > 1. iWARP - someone from iWARP company should be the owner > 2. VNIC - Madhue > > OFED 1.2 plan was updated on the Wiki: > https://openib.org/tiki/tiki-index.php?page=OFED+1.2+release+plan+and+features > Hey Tziporet, Where is this document now? The URL is dead. Thanks, Steve. From jsquyres at cisco.com Fri Jan 5 16:12:22 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Fri, 5 Jan 2007 19:12:22 -0500 Subject: [openib-general] OFED 1.2 features update In-Reply-To: <1168040806.14028.108.camel@stevo-desktop> References: <45759B8C.8010408@dev.mellanox.co.il> <1168040806.14028.108.camel@stevo-desktop> Message-ID: <7723B577-D67D-4DA8-889B-627F689C6BDA@cisco.com> It looks like the redirect on the old server is wrong -- it's redirecting to tiki.openfabrics.org when it should be redirecting to wiki.openfabrics.org. The proper IP address for the DNS name wiki.openfabrics.org may take throughout the weekend to get to you -- we started the propagation this morning. Michael: can you fix the tiki redirect on the old server? On Jan 5, 2007, at 6:46 PM, Steve Wise wrote: > On Tue, 2006-12-05 at 18:17 +0200, Tziporet Koren wrote: >> Hi, >> In the OFED meeting yesterday the following decisions were taken: >> >> 1. We agreed to have two types of features >> >> * Must have features - will delay the release if not ready >> * Desirable features - will be included only if they are ready on >> time according to OFED requirements. >> >> 2. The following features are added to OFED 1.2 as desired: >> >> 1. iWARP - someone from iWARP company should be the owner >> 2. VNIC - Madhue >> >> OFED 1.2 plan was updated on the Wiki: >> https://openib.org/tiki/tiki-index.php?page=OFED+1.2+release+plan >> +and+features >> > > Hey Tziporet, > > Where is this document now? The URL is dead. > > > Thanks, > > Steve. > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From halr at voltaire.com Fri Jan 5 16:12:44 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 05 Jan 2007 19:12:44 -0500 Subject: [openib-general] OFED 1.2 features update In-Reply-To: <1168040806.14028.108.camel@stevo-desktop> References: <45759B8C.8010408@dev.mellanox.co.il> <1168040806.14028.108.camel@stevo-desktop> Message-ID: <1168042345.4564.180829.camel@hal.voltaire.com> On Fri, 2007-01-05 at 18:46, Steve Wise wrote: > On Tue, 2006-12-05 at 18:17 +0200, Tziporet Koren wrote: > > Hi, > > In the OFED meeting yesterday the following decisions were taken: > > > > 1. We agreed to have two types of features > > > > * Must have features - will delay the release if not ready > > * Desirable features - will be included only if they are ready on > > time according to OFED requirements. > > > > 2. The following features are added to OFED 1.2 as desired: > > > > 1. iWARP - someone from iWARP company should be the owner > > 2. VNIC - Madhue > > > > OFED 1.2 plan was updated on the Wiki: > > https://openib.org/tiki/tiki-index.php?page=OFED+1.2+release+plan+and+features > > > > Hey Tziporet, > > Where is this document now? The URL is dead. https://wiki.openfabrics.org/tiki-index.php?page=OFED+release+procedure -- Hal > > Thanks, > > Steve. > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From eitan at sw053.yok.mtl.com Fri Jan 5 21:18:23 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sat, 6 Jan 2007 07:18:23 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-06:normal completion Message-ID: <200701060518.l065IN6q026554@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Thu_Jan_4_17:15:43_2007 0987ee ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=369 Pass=368 Fail=1 Pass: 27 Stability IS1-16.topo 27 Pkey IS1-16.topo 27 OsmTest IS1-16.topo 27 OsmStress IS1-16.topo 27 Multicast IS1-16.topo 27 LidMgr IS1-16.topo 9 Stability IS3-loop.topo 9 Stability IS3-128.topo 9 Pkey IS3-128.topo 9 OsmTest IS3-loop.topo 9 OsmTest IS3-128.topo 9 OsmStress IS3-128.topo 9 Multicast IS3-loop.topo 9 Multicast IS3-128.topo 9 LidMgr IS3-128.topo 9 FatTree part-4-ary-3-tree.topo 9 FatTree merge-roots-reorder-4-ary-2-tree.topo 9 FatTree merge-roots-4-ary-2-tree.topo 9 FatTree merge-root-4-ary-3-tree.topo 9 FatTree merge-root-12-ary-2-tree.topo 9 FatTree merge-2-ary-4-tree.topo 9 FatTree blend-4-ary-2-tree.topo 9 FatTree 4-ary-4-tree.topo 9 FatTree 4-ary-3-tree.topo 9 FatTree 32nodes-3lvl-is1.topo 9 FatTree 2-ary-4-tree.topo 9 FatTree 12-node-spaced.topo 9 FatTree 12-ary-2-tree.topo 8 FatTree half-4-ary-3-tree.topo Failures: 1 FatTree half-4-ary-3-tree.topo From mplee at sandia.gov Fri Jan 5 21:28:06 2007 From: mplee at sandia.gov (Lee, Michael Paichi) Date: Fri, 5 Jan 2007 22:28:06 -0700 Subject: [openib-general] OFED 1.2 features update References: <45759B8C.8010408@dev.mellanox.co.il> <1168040806.14028.108.camel@stevo-desktop> <7723B577-D67D-4DA8-889B-627F689C6BDA@cisco.com> Message-ID: <3D84A59A1AD3584DA02AEAD240E8863F036693E2@ES22SNLNT.srn.sandia.gov> Done -----Original Message----- From: Jeff Squyres [mailto:jsquyres at cisco.com] Sent: Fri 1/5/2007 4:12 PM To: Steve Wise Cc: Tziporet Koren; openib-general; Lee, Michael Paichi Subject: Re: [openib-general] OFED 1.2 features update It looks like the redirect on the old server is wrong -- it's redirecting to tiki.openfabrics.org when it should be redirecting to wiki.openfabrics.org. The proper IP address for the DNS name wiki.openfabrics.org may take throughout the weekend to get to you -- we started the propagation this morning. Michael: can you fix the tiki redirect on the old server? On Jan 5, 2007, at 6:46 PM, Steve Wise wrote: > On Tue, 2006-12-05 at 18:17 +0200, Tziporet Koren wrote: >> Hi, >> In the OFED meeting yesterday the following decisions were taken: >> >> 1. We agreed to have two types of features >> >> * Must have features - will delay the release if not ready >> * Desirable features - will be included only if they are ready on >> time according to OFED requirements. >> >> 2. The following features are added to OFED 1.2 as desired: >> >> 1. iWARP - someone from iWARP company should be the owner >> 2. VNIC - Madhue >> >> OFED 1.2 plan was updated on the Wiki: >> https://openib.org/tiki/tiki-index.php?page=OFED+1.2+release+plan >> +and+features >> > > Hey Tziporet, > > Where is this document now? The URL is dead. > > > Thanks, > > Steve. > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Sat Jan 6 08:56:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Sat, 06 Jan 2007 10:56:56 -0600 Subject: [openib-general] OFED 1.2 features update In-Reply-To: <1168042345.4564.180829.camel@hal.voltaire.com> References: <45759B8C.8010408@dev.mellanox.co.il> <1168040806.14028.108.camel@stevo-desktop> <1168042345.4564.180829.camel@hal.voltaire.com> Message-ID: <1168102616.28089.8.camel@stevo-desktop> > https://wiki.openfabrics.org/tiki-index.php?page=OFED+release+procedure > > -- Hal > Thanks. FYI: I'm going to work next week on integrating the chelsio T3 drivers/libs into OFED 1.2 in order to make the feature freeze by 1/31. I haven't worked with OFED releases to date, so I have some catching up to do. Any pointers would be greatly appreciated! Question: If the Chelsio drivers miss 2.6.20 but are pulled into Roland's and Jeff Garzik's git trees and targeted for 2.6.21, can they still be included in OFED 1.2? Steve. From mst at mellanox.co.il Sat Jan 6 09:34:40 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 6 Jan 2007 19:34:40 +0200 Subject: [openib-general] [PATCH v4 01/13] Linux RDMA Core Changes In-Reply-To: <8A71B368A89016469F72CD08050AD334F3BE50@maui.asicdesigners.com> References: <8A71B368A89016469F72CD08050AD334F3BE50@maui.asicdesigners.com> Message-ID: <20070106173439.GC26997@mellanox.co.il> > [Felix Marti] In addition, is arming the CQ really in the performance > path? - Don't apps poll the CQ as long as there are pending CQEs and > only arm the CQ for notification once there is nothing left to do? If > this is the case, it would mean that we waste a few cycles 'idle' > cycles. Applications such as IPoIB might queue up packets, then ARM the CQ, and only then they are processed by the upper layers in the stack. So arming the CQ is on hot datapath. -- MST From mst at mellanox.co.il Sat Jan 6 11:58:37 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 6 Jan 2007 21:58:37 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: Message-ID: <20070106195837.GB5195@mellanox.co.il> > > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > > + mdev->limits.reserved_mtts); > > I don't follow this -- first of all, what guarantee is there that the > reserved MTTs end on a cacheline boundary just because they take up > more than a single cacheline? It seems this should really be using > ALIGN() somehow. Actually, I think that we really must have each of the tables start at ICM-page aligned addresses. I think this happened to work fine so far as profile was hard-coded, but with new module option code this might not be the case anymore. Since we access some of them from CPU and some from hardware, we really need them different tables separate dma cache lines too. ICM-page alignment probably gives this to us for free, but the following patch makes this assumption explicit. Pls review. Warning: untested patch. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_profile.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c @@ -148,6 +148,10 @@ u64 mthca_make_profile(struct mthca_dev for (i = 0; i < MTHCA_RES_NUM; ++i) { if (profile[i].size) { profile[i].start = mem_base + total_size; + if (mthca_is_memfree(mdev)) + profile[i].start = ALIGN(profile[i].start, + max(MTHCA_ICM_PAGE_SIZE, + dma_get_cache_alignment())); total_size += profile[i].size; } if (total_size > mem_avail) { -- MST From kliteyn at dev.mellanox.co.il Sat Jan 6 14:09:05 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 07 Jan 2007 00:09:05 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-06:normal completion In-Reply-To: <200701060518.l065IN6q026554@sw053.yok.mtl.com> References: <200701060518.l065IN6q026554@sw053.yok.mtl.com> Message-ID: <45A01E01.7070807@dev.mellanox.co.il> Eitan, I found this in the simulation log: -E- ibdiagnet failed with status: do_ypcall: clnt_call: RPC: Timed out YPBINDPROC_DOMAIN: Domain not bound All the logs look ok, including the ibdiagnet.log -- Yevgeny Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Thu_Jan_4_17:15:43_2007 0987ee > ibutils rev = Wed_Jan_3_11:42:12_2007 913448 > Total=369 Pass=368 Fail=1 > > Pass: > 27 Stability IS1-16.topo > 27 Pkey IS1-16.topo > 27 OsmTest IS1-16.topo > 27 OsmStress IS1-16.topo > 27 Multicast IS1-16.topo > 27 LidMgr IS1-16.topo > 9 Stability IS3-loop.topo > 9 Stability IS3-128.topo > 9 Pkey IS3-128.topo > 9 OsmTest IS3-loop.topo > 9 OsmTest IS3-128.topo > 9 OsmStress IS3-128.topo > 9 Multicast IS3-loop.topo > 9 Multicast IS3-128.topo > 9 LidMgr IS3-128.topo > 9 FatTree part-4-ary-3-tree.topo > 9 FatTree merge-roots-reorder-4-ary-2-tree.topo > 9 FatTree merge-roots-4-ary-2-tree.topo > 9 FatTree merge-root-4-ary-3-tree.topo > 9 FatTree merge-root-12-ary-2-tree.topo > 9 FatTree merge-2-ary-4-tree.topo > 9 FatTree blend-4-ary-2-tree.topo > 9 FatTree 4-ary-4-tree.topo > 9 FatTree 4-ary-3-tree.topo > 9 FatTree 32nodes-3lvl-is1.topo > 9 FatTree 2-ary-4-tree.topo > 9 FatTree 12-node-spaced.topo > 9 FatTree 12-ary-2-tree.topo > 8 FatTree half-4-ary-3-tree.topo > > Failures: > 1 FatTree half-4-ary-3-tree.topo > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From eitan at sw053.yok.mtl.com Sat Jan 6 21:15:27 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sun, 7 Jan 2007 07:15:27 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-07:normal completion Message-ID: <200701070515.l075FRD6001811@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_6_06:44:34_2007 6c8647 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=369 Pass=366 Fail=3 Pass: 27 Stability IS1-16.topo 27 Pkey IS1-16.topo 27 OsmTest IS1-16.topo 27 OsmStress IS1-16.topo 27 Multicast IS1-16.topo 27 LidMgr IS1-16.topo 9 Stability IS3-loop.topo 9 Stability IS3-128.topo 9 Pkey IS3-128.topo 9 OsmTest IS3-loop.topo 9 OsmTest IS3-128.topo 9 OsmStress IS3-128.topo 9 Multicast IS3-loop.topo 9 Multicast IS3-128.topo 9 FatTree part-4-ary-3-tree.topo 9 FatTree merge-roots-reorder-4-ary-2-tree.topo 9 FatTree merge-roots-4-ary-2-tree.topo 9 FatTree merge-root-4-ary-3-tree.topo 9 FatTree merge-root-12-ary-2-tree.topo 9 FatTree half-4-ary-3-tree.topo 9 FatTree blend-4-ary-2-tree.topo 9 FatTree 4-ary-4-tree.topo 9 FatTree 4-ary-3-tree.topo 9 FatTree 32nodes-3lvl-is1.topo 9 FatTree 2-ary-4-tree.topo 9 FatTree 12-ary-2-tree.topo 8 LidMgr IS3-128.topo 8 FatTree merge-2-ary-4-tree.topo 8 FatTree 12-node-spaced.topo Failures: 1 LidMgr IS3-128.topo 1 FatTree merge-2-ary-4-tree.topo 1 FatTree 12-node-spaced.topo From dotanb at dev.mellanox.co.il Sat Jan 6 23:26:38 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 07 Jan 2007 09:26:38 +0200 Subject: [openib-general] the valgrind support of libibverbs/libmthca is not full In-Reply-To: References: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> Message-ID: <45A0A0AE.3050603@dev.mellanox.co.il> Hi Roland and welcome back .... Roland Dreier wrote: > > For the following verbs (and I'm sure that this is not the full list) > > there is a memory warning from valgrind: > > ibv_query_device > > ibv_dealloc_pd > > ibv_create_cq > > Can you send the full warnings you get? > > Thanks > Here are some of the messages that i got when i executed gen2_basic under valgrind: ==10826== Use of uninitialised value of size 8 ==10826== at 0x39A843E79A: _itoa_word (in /lib64/tls/libc-2.3.4.so) ==10826== by 0x39A844196A: vfprintf (in /lib64/tls/libc-2.3.4.so) ==10826== by 0x39A8462433: vsnprintf (in /lib64/tls/libc-2.3.4.so) ==10826== by 0x39A84481E0: snprintf (in /lib64/tls/libc-2.3.4.so) ==10826== by 0x4C3DA3C: mthca_query_device (verbs.c:63) ==10826== by 0x404363: hca_4 (test_hca.c:192) ==10826== by 0x40571C: test_hca (test_hca.c:368) ==10826== by 0x403111: main (main.c:135) ==10853== 72 bytes in 1 blocks are definitely lost in loss record 8 of 13 ==10853== at 0x4904B7E: malloc (vg_replace_malloc.c:149) ==10853== by 0x4C3DA82: mthca_alloc_pd (verbs.c:83) ==10853== by 0x4C3BC15: mthca_alloc_context (mthca.c:179) ==10853== by 0x4A32536: ibv_open_device (device.c:126) ==10853== by 0x401FF4: open_hca (main.c:214) ==10853== by 0x40E4E7: cq_7 (test_cq.c:529) ==10853== by 0x4108A0: test_cq (test_cq.c:790) ==10853== by 0x403196: main (main.c:147) ==10855== Syscall param write(buf) points to uninitialised byte(s) ==10855== at 0x39A84B9302: __write_nocancel (in /lib64/tls/libc-2.3.4.so) ==10855== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==10855== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==10855== by 0x4C3DCE9: mthca_create_cq (mthca.h:259) ==10855== by 0x4A34174: ibv_create_cq (verbs.c:247) ==10855== by 0x40E513: cq_7 (test_cq.c:533) ==10855== by 0x4108A0: test_cq (test_cq.c:790) ==10855== by 0x403196: main (main.c:147) ==10855== Address 0x7FF0006A8 is on thread 1's stack ==10855== Syscall param write(buf) points to uninitialised byte(s) ==10855== at 0x39A84B9302: __write_nocancel (in /lib64/tls/libc-2.3.4.so) ==10855== by 0x4A30D43: ibv_cmd_create_cq (cmd.c:315) ==10855== by 0x4C3DDED: mthca_create_cq (verbs.c:233) ==10855== by 0x4A34174: ibv_create_cq (verbs.c:247) ==10855== by 0x40E513: cq_7 (test_cq.c:533) ==10855== by 0x4108A0: test_cq (test_cq.c:790) ==10855== by 0x403196: main (main.c:147) ==10855== Address 0x7FF000748 is on thread 1's stack If you need some more info, just tell me ... thanks Dotan From dotanb at dev.mellanox.co.il Sun Jan 7 00:32:46 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 07 Jan 2007 10:32:46 +0200 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> Message-ID: <45A0B02E.1020105@dev.mellanox.co.il> Roland Dreier wrote: > > Driver Version : gen2_devel-20070101-1730 > > What version of libibverbs does this correspond to? > > > load_driver (so_path=0x5e65f0 "/usr/local//lib64/infiniband/mthca.so") at > > src/init.c:61 (func: load_driver) > > This makes me think it is a version older than 1.0.4, since > load_driver() is at a completely different line number in 1.0.4. > > - R. > This is version 1.1-pre1. thanks Dotan From erezz at voltaire.com Sun Jan 7 00:48:17 2007 From: erezz at voltaire.com (Erez Zilber) Date: Sun, 07 Jan 2007 10:48:17 +0200 Subject: [openib-general] [query]maximum no. of connections per session in iser initiator In-Reply-To: <20070103052619.7519.qmail@web8315.mail.in.yahoo.com> References: <20070103052619.7519.qmail@web8315.mail.in.yahoo.com> Message-ID: <45A0B3D1.2060406@voltaire.com> keshetti mahesh wrote: > Please CC me the responses to this mail ID > > thanks and regards, > Mahesh. > > */keshetti mahesh /* wrote: > > hi all, > > recently i had gone through the iser initiator code in OFED and > saw one thing, the maximum no. of connections allowed per session > is 8. > I guess that you're talking about this piece of code: #define ISCSI_ISER_MAX_CONN 8 #define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ ISER_QP_MAX_REQ_DTOS) * \ ISCSI_ISER_MAX_CONN) It is related to the cq used by iSER. We use it in order to define the max len of the cq (the same cq is used for the sq & rq). It means that per HCA, 8 connections may be opened. > what is the constraint for this > limit. > And in the another iscsi transport i.e TCP this variable is only one. > Where do you see that in iscsi_tcp? > > whether this limit is imposed by iSCSI or by transport layer(TCP > or iSER) ? > > BTW is there any such limit for the no. of sessions allowed at > atime in this initiator? > > thanks and regards, > Mahesh. > Send free SMS to your Friends on Mobile from your Yahoo! > Messenger. Download Now! http://messenger.yahoo.com/download.php > > > Send free SMS to your Friends on Mobile from your Yahoo! Messenger. > Download Now! http://messenger.yahoo.com/download.php > > ------------------------------------------------------------------------ > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- ____________________________________________________________ Erez Zilber | 972-9-971-7689 Software Engineer, Storage Team Voltaire – _The Grid Backbone_ __ www.voltaire.com From ogerlitz at voltaire.com Sun Jan 7 01:03:54 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 07 Jan 2007 11:03:54 +0200 Subject: [openib-general] best way to get ibv_get_cq_event to return In-Reply-To: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> References: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> Message-ID: <45A0B77A.1070301@voltaire.com> Greenwood, Steve wrote: > I have a thread that uses ibv_get_cq_event to handling completing RDMA > requests. This call seems to have roughly the same capabilities as the > Mellanox vapi call EVAPI_poll_cq_block with one important exception. > The open-fabrics routine does not have a timeout argument. > Beyond restructuring my application, is there a simple way to make get > ibv_get_cq_event return without a message arriving? sure, yes, for both questions: the actual object to deliver cq event is struct ibv_comp_channel whose only field is a file descriptor with which you can implement the requirements, use poll/select to set a timeout, or make it non blocking such that you can int rc; struct ibv_comp_channel my_ch; struct pollfd my_pollfd; flags = fcntl(my_ch.fd, F_SETFL) rc = fcntl(my_ch.fd, F_SETFL, flags | O_NONBLOCK) will make ibv_get_event to return immediately if there is no event to consume at the channel my_pollfd.fd = my_ch.fd; my_pollfd.events = POLLIN; my_pollfd.revents = 0; rc = poll(&my_pollfd, 1, ms_timeout); will allow you to either get a timeout if ms milliseconds have elapsed or there is an event waiting for you to consume Or. From ogerlitz at voltaire.com Sun Jan 7 01:04:49 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 07 Jan 2007 11:04:49 +0200 Subject: [openib-general] best way to get ibv_get_cq_event to return In-Reply-To: <45A0B77A.1070301@voltaire.com> References: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> <45A0B77A.1070301@voltaire.com> Message-ID: <45A0B7B1.7090309@voltaire.com> Or Gerlitz wrote: > flags = fcntl(my_ch.fd, F_SETFL) the above should be F_GETFL > rc = fcntl(my_ch.fd, F_SETFL, flags | O_NONBLOCK) From ogerlitz at voltaire.com Sun Jan 7 02:02:41 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 07 Jan 2007 12:02:41 +0200 Subject: [openib-general] SVN deprecation In-Reply-To: <78BF8D4E-B381-4626-919D-29BE2B47982A@cisco.com> References: <4593A31D.50808@voltaire.com> <459B91E1.7000605@voltaire.com> <0B3B409D-1B50-4D18-8374-DEF85FBD2534@cisco.com> <459CE30B.3040503@voltaire.com> <15ddcffd0701040843te5979c2tbc7de1a82a33f16@mail.gmail.com> <78BF8D4E-B381-4626-919D-29BE2B47982A@cisco.com> Message-ID: <45A0C541.6060505@voltaire.com> Jeff Squyres wrote: > On Jan 4, 2007, at 11:43 AM, Or Gerlitz wrote: >>> When navigating the history via "svn up -r N", there's a few complex >>> corner cases where the update can fail (by design), but they aren't >>> common and can be avoided by doing a fresh checkout of the r number >>> that you want: >>> $ svn co -r N url/of/path/to/ancestor/directory >> thanks for all the clarifications, the process you suggest seems >> fine to me. > Ok. I'll work up a proposed README -- per your point, you're right, > the last "good" r number (i.e., the last N value before the deletions > occurred) may not be (r_number_of_HEAD - 1). I'll include the last > "good" r number in the README and some examples of how to peruse the > history. Cool, thanks. Or. From mst at mellanox.co.il Sun Jan 7 02:11:12 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 7 Jan 2007 12:11:12 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070106195837.GB5195@mellanox.co.il> References: <20070106195837.GB5195@mellanox.co.il> Message-ID: <20070107101112.GA26851@mellanox.co.il> > > > + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ > > > + mdev->limits.reserved_mtts = max(dma_get_cache_alignment() / (int)sizeof(u64), > > > + mdev->limits.reserved_mtts); > > > > I don't follow this -- first of all, what guarantee is there that the > > reserved MTTs end on a cacheline boundary just because they take up > > more than a single cacheline? It seems this should really be using > > ALIGN() somehow. > > Actually, I think that we really must have each of the tables start at > ICM-page aligned addresses. I think this happened to work fine so far > as profile was hard-coded, but with new module option code > this might not be the case anymore. > > Since we access some of them from CPU and some from hardware, we really need > them different tables separate dma cache lines too. ICM-page alignment probably > gives this to us for free, but the following patch makes this assumption explicit. > > Pls review. > > Warning: untested patch. > > Signed-off-by: Michael S. Tsirkin I see now this is broken. Working on an updated patch. -- MST From erezz at voltaire.com Sun Jan 7 02:28:02 2007 From: erezz at voltaire.com (Erez Zilber) Date: Sun, 07 Jan 2007 12:28:02 +0200 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent Message-ID: <45A0CB32.1060301@voltaire.com> iSER limits the number of outstanding PDUs to send. When this threshold is reached, it should return an error code (-ENOBUFS) instead of setting the suspend_tx bit (which should be used only by libiscsi). Signed-off-by: Erez Zilber --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++-- drivers/infiniband/ulp/iser/iser_initiator.c | 26 ++++++++++++-------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9b2041e..dd221ed 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn * - if yes, the mtask is recycled at iscsi_complete_pdu * - if no, the mtask is recycled at iser_snd_completion */ - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; @@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); iscsi_iser_ctask_xmit_exit: - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index e73c87b..0a7d1ab 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(str static int iser_check_xmit(struct iscsi_conn *conn, void *task) { - int rc = 0; struct iscsi_iser_conn *iser_conn = conn->dd_data; - write_lock_bh(conn->recv_lock); if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - rc = -EAGAIN; + iser_dbg("%ld can't xmit task %p\n",jiffies,task); + return -ENOBUFS; } - write_unlock_bh(conn->recv_lock); - return rc; + return 0; } @@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn return -EPERM; } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; edtl = ntohl(hdr->data_length); @@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; itt = ntohl(hdr->itt); data_seg_len = ntoh24(hdr->dlength); @@ -498,7 +494,7 @@ int iser_send_control(struct iscsi_conn } if (iser_check_xmit(conn,mtask)) - return -EAGAIN; + return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; @@ -605,6 +601,7 @@ void iser_snd_completion(struct iser_des struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; + int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -613,15 +610,16 @@ void iser_snd_completion(struct iser_des if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) + resume_tx = 1; + atomic_dec(&ib_conn->post_send_buf_count); - write_lock(conn->recv_lock); - if (conn->suspend_tx) { + if (resume_tx) { iser_dbg("%ld resuming tx\n",jiffies); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); scsi_queue_work(conn->session->host, &conn->xmitwork); } - write_unlock(conn->recv_lock); if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ -- 1.4.2 -- ____________________________________________________________ Erez Zilber | 972-9-971-7689 Software Engineer, Storage Team Voltaire – _The Grid Backbone_ __ www.voltaire.com From dotanb at dev.mellanox.co.il Sun Jan 7 02:27:52 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 07 Jan 2007 12:27:52 +0200 Subject: [openib-general] best way to get ibv_get_cq_event to return In-Reply-To: <45A0B77A.1070301@voltaire.com> References: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> <45A0B77A.1070301@voltaire.com> Message-ID: <45A0CB28.6010208@dev.mellanox.co.il> Or Gerlitz wrote: > Greenwood, Steve wrote: > >> I have a thread that uses ibv_get_cq_event to handling completing RDMA >> requests. This call seems to have roughly the same capabilities as the >> Mellanox vapi call EVAPI_poll_cq_block with one important exception. >> The open-fabrics routine does not have a timeout argument. >> > > >> Beyond restructuring my application, is there a simple way to make get >> ibv_get_cq_event return without a message arriving? >> > > sure, yes, for both questions: the actual object to deliver cq event is > struct ibv_comp_channel whose only field is a file descriptor with which > you can implement the requirements, use poll/select to set a timeout, or > make it non blocking such that you can > This is true (and i guess that it will work), but if in the future the implementation of the ibv_comp_channel will be changed, this code will not work .... Dotan From erezz at voltaire.com Sun Jan 7 03:03:00 2007 From: erezz at voltaire.com (Erez Zilber) Date: Sun, 07 Jan 2007 13:03:00 +0200 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent In-Reply-To: <45A0CB32.1060301@voltaire.com> References: <45A0CB32.1060301@voltaire.com> Message-ID: <45A0D364.9030908@voltaire.com> Erez Zilber wrote: > iSER limits the number of outstanding PDUs to send. When this threshold is > reached, it should return an error code (-ENOBUFS) instead of setting the > suspend_tx bit (which should be used only by libiscsi). > > Signed-off-by: Erez Zilber > --- > drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++-- > drivers/infiniband/ulp/iser/iser_initiator.c | 26 ++++++++++++-------------- > 2 files changed, 14 insertions(+), 16 deletions(-) > > diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c > index 9b2041e..dd221ed 100644 > --- a/drivers/infiniband/ulp/iser/iscsi_iser.c > +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c > @@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn > * - if yes, the mtask is recycled at iscsi_complete_pdu > * - if no, the mtask is recycled at iser_snd_completion > */ > - if (error && error != -EAGAIN) > + if (error && error != -ENOBUFS) > iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); > > return error; > @@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn > error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); > > iscsi_iser_ctask_xmit_exit: > - if (error && error != -EAGAIN) > + if (error && error != -ENOBUFS) > iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); > return error; > } > diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c > index e73c87b..0a7d1ab 100644 > --- a/drivers/infiniband/ulp/iser/iser_initiator.c > +++ b/drivers/infiniband/ulp/iser/iser_initiator.c > @@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(str > static int > iser_check_xmit(struct iscsi_conn *conn, void *task) > { > - int rc = 0; > struct iscsi_iser_conn *iser_conn = conn->dd_data; > > - write_lock_bh(conn->recv_lock); > if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == > ISER_QP_MAX_REQ_DTOS) { > - iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); > - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); > - rc = -EAGAIN; > + iser_dbg("%ld can't xmit task %p\n",jiffies,task); > + return -ENOBUFS; > } > - write_unlock_bh(conn->recv_lock); > - return rc; > + return 0; > } > > > @@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn > return -EPERM; > } > if (iser_check_xmit(conn, ctask)) > - return -EAGAIN; > + return -ENOBUFS; > > edtl = ntohl(hdr->data_length); > > @@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn > } > > if (iser_check_xmit(conn, ctask)) > - return -EAGAIN; > + return -ENOBUFS; > > itt = ntohl(hdr->itt); > data_seg_len = ntoh24(hdr->dlength); > @@ -498,7 +494,7 @@ int iser_send_control(struct iscsi_conn > } > > if (iser_check_xmit(conn,mtask)) > - return -EAGAIN; > + return -ENOBUFS; > > /* build the tx desc regd header and add it to the tx desc dto */ > mdesc->type = ISCSI_TX_CONTROL; > @@ -605,6 +601,7 @@ void iser_snd_completion(struct iser_des > struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; > struct iscsi_conn *conn = iser_conn->iscsi_conn; > struct iscsi_mgmt_task *mtask; > + int resume_tx = 0; > > iser_dbg("Initiator, Data sent dto=0x%p\n", dto); > > @@ -613,15 +610,16 @@ void iser_snd_completion(struct iser_des > if (tx_desc->type == ISCSI_TX_DATAOUT) > kmem_cache_free(ig.desc_cache, tx_desc); > > + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == > + ISER_QP_MAX_REQ_DTOS) > + resume_tx = 1; > + > atomic_dec(&ib_conn->post_send_buf_count); > > - write_lock(conn->recv_lock); > - if (conn->suspend_tx) { > + if (resume_tx) { > iser_dbg("%ld resuming tx\n",jiffies); > - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); > scsi_queue_work(conn->session->host, &conn->xmitwork); > } > - write_unlock(conn->recv_lock); > > if (tx_desc->type == ISCSI_TX_CONTROL) { > /* this arithmetic is legal by libiscsi dd_data allocation */ > Forgot to say: this patch is a bug fix for 2.6.20. Without it, iSER is broken on 2.6.20. Thanks, -- ____________________________________________________________ Erez Zilber | 972-9-971-7689 Software Engineer, Storage Team Voltaire – _The Grid Backbone_ __ www.voltaire.com From mst at mellanox.co.il Sun Jan 7 03:38:18 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 7 Jan 2007 13:38:18 +0200 Subject: [openib-general] [PATCHv5 5 of 5] IB/mthca: reserved MTTs and memory alignment issues In-Reply-To: <20070107113726.GA5556@mellanox.co.il> References: <20070103125119.GG6019@mellanox.co.il> <20070107113726.GA5556@mellanox.co.il> Message-ID: <20070107113818.GB5556@mellanox.co.il> > Subject: [PATCHv5 5 of 5] IB/mthca: reserved MTTs and memory alignment issues This should have been [PATCHv2 5 of 5], but oh well. -- MST From mst at mellanox.co.il Sun Jan 7 03:37:26 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 7 Jan 2007 13:37:26 +0200 Subject: [openib-general] [PATCHv5 5 of 5] IB/mthca: reserved MTTs and memory alignment issues In-Reply-To: <20070103125119.GG6019@mellanox.co.il> References: <20070103125119.GG6019@mellanox.co.il> Message-ID: <20070107113726.GA5556@mellanox.co.il> This fixes several issues related to reserved MTTs and memory alignment. 1. MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. 2. reserved_mtts field has different meaning in Tavor and Arbel, so we are wasting mtt entries on memfree. fix the Arbel case to match Tavor semantics. 2. We are allocating separate ICM tables for each resource, so each must be aligned at ICM page boundary (existing code happened to work fine so far as profile was hard-coded with values that are all high powers of 2, but with new module option code this might not be the case anymore). And since we access some of them from CPU and some from hardware, we really want to give different tables separate dma cache lines too. Signed-off-by: Michael S. Tsirkin --- This version works for me. Two side notes 1. This replaces the v1 of the patch in the series (which was titled [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line) and addresses the two issues raised. Roland, please let me know if you want this handled differently (e.g. should I repost all patches in the series, or split this one to 3 one-liners?). 2. memfree does not actually have a notion of mtt segments. So it seems we could save some mtt entries by making mtt segment size device-dependent. This is a win if there are lots of small regions, this optimization needs to be looked at separately. Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -464,6 +464,10 @@ static int mthca_init_icm(struct mthca_d goto err_unmap_aux; } + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, + dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_cmd.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); dev_lim->max_eqs = 1 << (field & 0x7); MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); - dev_lim->reserved_mtts = 1 << (field >> 4); + if (mthca_is_memfree(dev)) + dev_lim->reserved_mtts = ALIGN(1 << (field >> 4), + MTHCA_MTT_SEG_SIZE / sizeof(u64)); + else + dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); dev_lim->max_mrw_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_profile.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_profile.c @@ -39,6 +39,7 @@ #include #include "mthca_profile.h" +#include "mthca_memfree.h" enum { MTHCA_RES_QP, @@ -147,6 +148,10 @@ u64 mthca_make_profile(struct mthca_dev for (i = 0; i < MTHCA_RES_NUM; ++i) { if (profile[i].size) { + if (mthca_is_memfree(dev)) + total_size = ALIGN(total_size, max(MTHCA_ICM_PAGE_SIZE, + dma_get_cache_alignment())); + profile[i].start = mem_base + total_size; total_size += profile[i].size; } -- MST From eitan at mellanox.co.il Sun Jan 7 04:42:30 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Sun, 07 Jan 2007 14:42:30 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-06:normal completion In-Reply-To: <45A01E01.7070807@dev.mellanox.co.il> References: <200701060518.l065IN6q026554@sw053.yok.mtl.com> <45A01E01.7070807@dev.mellanox.co.il> Message-ID: <45A0EAB6.8080006@mellanox.co.il> We need to make sure the machine can resolve its own name without need to go to yp. For some reason the yp server stops responding EZ Yevgeny Kliteynik wrote: > Eitan, > I found this in the simulation log: > -E- ibdiagnet failed with status: > do_ypcall: clnt_call: RPC: Timed out > YPBINDPROC_DOMAIN: Domain not bound > All the logs look ok, including the ibdiagnet.log > > -- Yevgeny > > Eitan Zahavi wrote: > >> OSM Simulation Regression Summary >> OpenSM rev = Thu_Jan_4_17:15:43_2007 0987ee >> ibutils rev = Wed_Jan_3_11:42:12_2007 913448 >> Total=369 Pass=368 Fail=1 >> >> Pass: >> 27 Stability IS1-16.topo >> 27 Pkey IS1-16.topo >> 27 OsmTest IS1-16.topo >> 27 OsmStress IS1-16.topo >> 27 Multicast IS1-16.topo >> 27 LidMgr IS1-16.topo >> 9 Stability IS3-loop.topo >> 9 Stability IS3-128.topo >> 9 Pkey IS3-128.topo >> 9 OsmTest IS3-loop.topo >> 9 OsmTest IS3-128.topo >> 9 OsmStress IS3-128.topo >> 9 Multicast IS3-loop.topo >> 9 Multicast IS3-128.topo >> 9 LidMgr IS3-128.topo >> 9 FatTree part-4-ary-3-tree.topo >> 9 FatTree merge-roots-reorder-4-ary-2-tree.topo >> 9 FatTree merge-roots-4-ary-2-tree.topo >> 9 FatTree merge-root-4-ary-3-tree.topo >> 9 FatTree merge-root-12-ary-2-tree.topo >> 9 FatTree merge-2-ary-4-tree.topo >> 9 FatTree blend-4-ary-2-tree.topo >> 9 FatTree 4-ary-4-tree.topo >> 9 FatTree 4-ary-3-tree.topo >> 9 FatTree 32nodes-3lvl-is1.topo >> 9 FatTree 2-ary-4-tree.topo >> 9 FatTree 12-node-spaced.topo >> 9 FatTree 12-ary-2-tree.topo >> 8 FatTree half-4-ary-3-tree.topo >> >> Failures: >> 1 FatTree half-4-ary-3-tree.topo >> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> >> > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Sun Jan 7 06:17:39 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 7 Jan 2007 16:17:39 +0200 Subject: [openib-general] [PATCH] ofed 1.2: fix module compilation erros on ia64 In-Reply-To: <1167214095.27740.13.camel@muscida> References: <1167214095.27740.13.camel@muscida> Message-ID: <20070107141739.GB9190@mellanox.co.il> > Fix compilation errors on ia64 that are caused by the definition of > sg_dma_address and sg_dma_len in asm-ia64/pci.h instead of in > asm/scatterlist.h, as in other architectures. > > tested on: ia64[sles10]; x86_64 [sles10,rh4] > > Signed-off by: Yosef Etigin Looked at this again - what I don't get is this - how come things build fine for us on IA64 on RH4? I think I'll wait till we have a build environment for IA64. -- MST From rdreier at cisco.com Sun Jan 7 10:21:19 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 10:21:19 -0800 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: <45A0B02E.1020105@dev.mellanox.co.il> (Dotan Barak's message of "Sun, 07 Jan 2007 10:32:46 +0200") References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> <45A0B02E.1020105@dev.mellanox.co.il> Message-ID: > This is version 1.1-pre1. OK, I haven't pushed the changes to fix static linking in 1.1-pre because I still need to finish fixing all the drivers for the ABI changes. From rdreier at cisco.com Sun Jan 7 10:29:37 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 10:29:37 -0800 Subject: [openib-general] best way to get ibv_get_cq_event to return In-Reply-To: <45A0CB28.6010208@dev.mellanox.co.il> (Dotan Barak's message of "Sun, 07 Jan 2007 12:27:52 +0200") References: <892B927D8643E94287F16185036A571CF40921@tayexc19.americas.cpqcorp.net> <45A0B77A.1070301@voltaire.com> <45A0CB28.6010208@dev.mellanox.co.il> Message-ID: > This is true (and i guess that it will work), but if in the future the > implementation of the ibv_comp_channel will be changed, > this code will not work .... The use of a file descriptor is pretty fundamental, and it was done exactly to permit this sort of stuff (poll(), epoll, SIGIO, etc). So I think it is extremely unlikely to change in a way that would break an app using the file descriptor. - R. From swise at opengridcomputing.com Sun Jan 7 10:51:04 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sun, 07 Jan 2007 12:51:04 -0600 Subject: [openib-general] OFED 1.2 Questions In-Reply-To: <45802515.20605@mellanox.co.il> References: <45802515.20605@mellanox.co.il> Message-ID: <1168195864.18185.8.camel@linux-q667.site> Tziporet, I'm in the process of adding the Chelsio T3 drivers to the OFED repository and I have a question: The HowTo kernel section you posted on the wiki sez to add the new files to the repos directly via a git commit, but create patches for modifications to existing files and put the patches in the kernel_patches/fixes directory. However, I don't see patches in that directory to modify the core Makefile/Kconfig for SDP or other new modules added for ofed. So should I just modify infiniband/Makefile and Kconfig via the git commit that adds the new Chelsio files, or create a patch file and put it in kernel_patches/fixes? Also, are there machines available with the various ofed supported distros installed that I can do compile testing for the Chelsio user lib? Thanks, Steve. From sashak at voltaire.com Sun Jan 7 12:38:19 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 7 Jan 2007 22:38:19 +0200 Subject: [openib-general] [PATCH TRIVIAL] opensm: eliminate some local variable Message-ID: <20070107203819.GE18379@sashak.voltaire.com> This trivially eliminates some local variable. Signed-off-by: Sasha Khapyorsky --- osm/opensm/osm_mcast_mgr.c | 8 +++----- 1 files changed, 3 insertions(+), 5 deletions(-) diff --git a/osm/opensm/osm_mcast_mgr.c b/osm/opensm/osm_mcast_mgr.c index 404e8c4..e893bc0 100644 --- a/osm/opensm/osm_mcast_mgr.c +++ b/osm/opensm/osm_mcast_mgr.c @@ -1104,7 +1104,6 @@ osm_mcast_mgr_process_single( { uint8_t port_num; uint16_t mlid_ho; - osm_switch_t* p_sw; ib_net64_t sw_guid; osm_port_t* p_port; osm_physp_t* p_physp; @@ -1206,8 +1205,7 @@ osm_mcast_mgr_process_single( goto Exit; } - p_sw = p_remote_node->sw; - if( !p_sw ) + if( !p_remote_node->sw ) { osm_log( p_mgr->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A12: " @@ -1217,7 +1215,7 @@ osm_mcast_mgr_process_single( goto Exit; } - if( osm_switch_is_in_mcast_tree( p_sw, mlid_ho ) ) + if( osm_switch_is_in_mcast_tree( p_remote_node->sw, mlid_ho ) ) { /* We're in luck. The switch attached to this port @@ -1234,7 +1232,7 @@ osm_mcast_mgr_process_single( port_num = osm_physp_get_port_num( p_remote_physp ); CL_ASSERT( port_num ); - p_mcast_tbl = osm_switch_get_mcast_tbl_ptr( p_sw ); + p_mcast_tbl = osm_switch_get_mcast_tbl_ptr( p_remote_node->sw ); osm_mcast_tbl_set( p_mcast_tbl, mlid_ho, port_num ); } else -- 1.5.0.rc0.g2484-dirty From mst at mellanox.co.il Sun Jan 7 12:33:58 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 7 Jan 2007 22:33:58 +0200 Subject: [openib-general] OFED 1.2 Questions In-Reply-To: <1168195864.18185.8.camel@linux-q667.site> References: <45802515.20605@mellanox.co.il> <1168195864.18185.8.camel@linux-q667.site> Message-ID: <20070107203358.GB23738@mellanox.co.il> > Tziporet, > > I'm in the process of adding the Chelsio T3 drivers to the OFED > repository and I have a question: > > The HowTo kernel section you posted on the wiki sez to add the new files > to the repos directly via a git commit, but create patches for > modifications to existing files and put the patches in the > kernel_patches/fixes directory. However, I don't see patches in that > directory to modify the core Makefile/Kconfig for SDP or other new > modules added for ofed. So should I just modify infiniband/Makefile and > Kconfig via the git commit that adds the new Chelsio files, or create a > patch file and put it in kernel_patches/fixes? Yes you can modify the Makefile/Kconfig directly. Reason being, its always trivial to resolve conflicts there when merging from upstream. > Also, are there machines available with the various ofed supported > distros installed that I can do compile testing for the Chelsio user > lib? -- MST From sashak at voltaire.com Sun Jan 7 13:47:44 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 7 Jan 2007 23:47:44 +0200 Subject: [openib-general] [PATCH TRIVIAL] opensm: remove osm_physp_get_port_info_ptr() checks Message-ID: <20070107214744.GF18379@sashak.voltaire.com> This function returns address of osm_physp's port_info field, the result cannot be NULL (unless physp itself is valid pointer). Not need to check. Signed-off-by: Sasha Khapyorsky --- osm/opensm/osm_lid_mgr.c | 9 ++------- osm/opensm/osm_pkey_mgr.c | 13 +------------ osm/opensm/osm_qos.c | 15 ++++----------- osm/opensm/osm_sa_mcmember_record.c | 23 ++++------------------- 4 files changed, 11 insertions(+), 49 deletions(-) diff --git a/osm/opensm/osm_lid_mgr.c b/osm/opensm/osm_lid_mgr.c index aba243a..e6fb893 100644 --- a/osm/opensm/osm_lid_mgr.c +++ b/osm/opensm/osm_lid_mgr.c @@ -937,19 +937,14 @@ __osm_lid_mgr_set_remote_pi_state_to_init( IN osm_lid_mgr_t * const p_mgr, IN osm_physp_t* const p_physp) { - ib_port_info_t *p_pi; osm_physp_t *p_rem_physp = osm_physp_get_remote(p_physp); if ( p_rem_physp == NULL ) return; if (osm_physp_is_valid( p_rem_physp )) - { - p_pi = osm_physp_get_port_info_ptr( p_rem_physp ); - /* but in some rare cases the remote side might be irresponsive */ - if (p_pi) - ib_port_info_set_port_state( p_pi, IB_LINK_INIT ); - } + ib_port_info_set_port_state( osm_physp_get_port_info_ptr( p_rem_physp ), + IB_LINK_INIT ); } /********************************************************************** diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c index 9e3e35a..5581ce1 100644 --- a/osm/opensm/osm_pkey_mgr.c +++ b/osm/opensm/osm_pkey_mgr.c @@ -219,18 +219,7 @@ pkey_mgr_enforce_partition( ib_port_info_t *p_pi; ib_api_status_t status; - if (!(p_pi = osm_physp_get_port_info_ptr( p_physp ))) - { - osm_log( p_log, OSM_LOG_ERROR, - "pkey_mgr_enforce_partition: ERR 0507: " - "No port info for " - "node 0x%016" PRIx64 " port %u\n", - cl_ntoh64( - osm_node_get_node_guid( - osm_physp_get_node_ptr( p_physp ))), - osm_physp_get_port_num( p_physp ) ); - return FALSE; - } + p_pi = osm_physp_get_port_info_ptr( p_physp ); if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE)) { diff --git a/osm/opensm/osm_qos.c b/osm/opensm/osm_qos.c index 0dd5dc7..92fe6fc 100644 --- a/osm/opensm/osm_qos.c +++ b/osm/opensm/osm_qos.c @@ -80,8 +80,7 @@ static ib_api_status_t vlarb_update_table_block(osm_req_t * p_req, ib_port_info_t *p_pi; unsigned vl_mask, i; - if (!(p_pi = osm_physp_get_port_info_ptr(p))) - return IB_ERROR; + p_pi = osm_physp_get_port_info_ptr(p); vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1; @@ -115,8 +114,7 @@ static ib_api_status_t vlarb_update(osm_req_t * p_req, ib_port_info_t *p_pi; unsigned len; - if (!(p_pi = osm_physp_get_port_info_ptr(p))) - return IB_ERROR; + p_pi = osm_physp_get_port_info_ptr(p); if (p_pi->vl_arb_low_cap > 0) { len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ? @@ -166,8 +164,7 @@ static ib_api_status_t sl2vl_update_table(osm_req_t * p_req, uint8_t vl1, vl2; int i; - if (!(p_pi = osm_physp_get_port_info_ptr(p))) - return IB_ERROR; + p_pi = osm_physp_get_port_info_ptr(p); vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1; @@ -204,9 +201,6 @@ static ib_api_status_t sl2vl_update(osm_req_t * p_req, osm_port_t * p_port, ib_port_info_t *p_pi = osm_physp_get_port_info_ptr(p); osm_physp_t *p_physp; - if (!p_pi) - return IB_ERROR; - if (osm_node_get_type(osm_physp_get_node_ptr(p)) == IB_NODE_TYPE_SWITCH) { if (ib_port_info_get_vl_cap(p_pi) == 1) { /* Check port 0's capability mask */ @@ -240,8 +234,7 @@ static ib_api_status_t vl_high_limit_update(osm_req_t * p_req, osm_madw_context_t context; ib_port_info_t *p_pi; - if (!(p_pi = osm_physp_get_port_info_ptr(p))) - return IB_ERROR; + p_pi = osm_physp_get_port_info_ptr(p); if (p_pi->vl_high_limit == qcfg->vl_high_limit) return IB_SUCCESS; diff --git a/osm/opensm/osm_sa_mcmember_record.c b/osm/opensm/osm_sa_mcmember_record.c index 90d7300..a3a8bac 100644 --- a/osm/opensm/osm_sa_mcmember_record.c +++ b/osm/opensm/osm_sa_mcmember_record.c @@ -706,23 +706,12 @@ __validate_port_caps( const osm_mgrp_t *p_mgrp, const osm_physp_t *p_physp) { - ib_port_info_t *p_pi; uint8_t mtu_required; uint8_t mtu_mgrp; uint8_t rate_required; uint8_t rate_mgrp; - p_pi = osm_physp_get_port_info_ptr(p_physp); - if (!p_pi) - { - osm_log( p_log, OSM_LOG_DEBUG, - "__validate_port_caps: " - "Cannot get Port's 0x%016" PRIx64 " PortInfo\n", - cl_ntoh64( osm_physp_get_port_guid(p_physp) ) ); - return FALSE; - } - - mtu_required = ib_port_info_get_mtu_cap(p_pi); + mtu_required = ib_port_info_get_mtu_cap(&p_physp->port_info); mtu_mgrp = (uint8_t)(p_mgrp->mcmember_rec.mtu & 0x3F); if (mtu_required < mtu_mgrp) { @@ -733,7 +722,7 @@ __validate_port_caps( return FALSE; } - rate_required = ib_port_info_compute_rate(p_pi); + rate_required = ib_port_info_compute_rate(&p_physp->port_info); rate_mgrp = (uint8_t)(p_mgrp->mcmember_rec.rate & 0x3F); if (rate_required < rate_mgrp) { @@ -1075,13 +1064,9 @@ __mgrp_request_is_realizable( uint8_t rate_sel = 2; /* exactly */ uint8_t rate_required, rate, port_rate; osm_log_t *p_log = p_rcv->p_log; - ib_port_info_t *p_pi = NULL; OSM_LOG_ENTER( p_rcv->p_log, __mgrp_request_is_realizable ); - if (p_physp != NULL) - p_pi = osm_physp_get_port_info_ptr(p_physp); - /* * End of o15-0.2.3 specifies: * .... @@ -1093,7 +1078,7 @@ __mgrp_request_is_realizable( * so we might also need to assign RATE/MTU if they are not comp masked in. */ - port_mtu = p_pi ? ib_port_info_get_mtu_cap(p_pi) : 0; + port_mtu = p_physp ? ib_port_info_get_mtu_cap(&p_physp->port_info) : 0; if (!(comp_mask & IB_MCR_COMPMASK_MTU) || !(comp_mask & IB_MCR_COMPMASK_MTU_SEL) || (mtu_sel = (p_mcm_rec->mtu >> 6)) == 3) @@ -1146,7 +1131,7 @@ __mgrp_request_is_realizable( } p_mcm_rec->mtu = (mtu_sel<<6) | mtu; - port_rate = p_pi ? ib_port_info_compute_rate(p_pi) : 0; + port_rate = p_physp ? ib_port_info_compute_rate(&p_physp->port_info) : 0; if (!(comp_mask & IB_MCR_COMPMASK_RATE) || !(comp_mask & IB_MCR_COMPMASK_RATE_SEL) || (rate_sel = (p_mcm_rec->rate >> 6)) == 3) -- 1.5.0.rc0.g2484-dirty From sashak at voltaire.com Sun Jan 7 15:01:47 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 8 Jan 2007 01:01:47 +0200 Subject: [openib-general] [PATCH] opensm: eliminate port/switch_info access methods In-Reply-To: <20070107214744.GF18379@sashak.voltaire.com> References: <20070107214744.GF18379@sashak.voltaire.com> Message-ID: <20070107230147.GG18379@sashak.voltaire.com> Following previous patch ("remove osm_physp_get_port_info_ptr() checks") this removes confused functions osm_physp_get_port_info_ptr() and osm_switch_get_si_ptr(). Signed-off-by: Sasha Khapyorsky --- osm/include/opensm/osm_port.h | 30 ------------------------------ osm/include/opensm/osm_switch.h | 29 ----------------------------- osm/opensm/osm_lid_mgr.c | 5 ++--- osm/opensm/osm_link_mgr.c | 7 ++----- osm/opensm/osm_pkey_mgr.c | 4 ++-- osm/opensm/osm_port.c | 16 ++++------------ osm/opensm/osm_qos.c | 24 +++++++----------------- osm/opensm/osm_sa_guidinfo_record.c | 9 +++------ osm/opensm/osm_sa_multipath_record.c | 8 ++++---- osm/opensm/osm_sa_path_record.c | 8 ++++---- osm/opensm/osm_sa_pkey_record.c | 6 +----- osm/opensm/osm_sa_portinfo_record.c | 4 ++-- osm/opensm/osm_sa_slvl_record.c | 6 +----- osm/opensm/osm_sa_vlarb_record.c | 6 +----- osm/opensm/osm_state_mgr.c | 2 +- osm/opensm/osm_ucast_mgr.c | 2 +- 16 files changed, 35 insertions(+), 131 deletions(-) diff --git a/osm/include/opensm/osm_port.h b/osm/include/opensm/osm_port.h index 060dcff..347ab3b 100644 --- a/osm/include/opensm/osm_port.h +++ b/osm/include/opensm/osm_port.h @@ -1044,36 +1044,6 @@ osm_physp_get_port_num( * SEE ALSO *********/ -/****f* OpenSM: Physical Port/osm_physp_get_port_info_ptr -* NAME -* osm_physp_get_port_info_ptr -* -* DESCRIPTION -* Returns a pointer to the PortInfo attribute for this port. -* -* SYNOPSIS -*/ -static inline ib_port_info_t* -osm_physp_get_port_info_ptr( - IN const osm_physp_t* const p_physp ) -{ - CL_ASSERT( p_physp ); - CL_ASSERT( osm_physp_is_valid( p_physp ) ); - return( (ib_port_info_t*)&p_physp->port_info ); -} -/* -* PARAMETERS -* p_physp -* [in] Pointer to an osm_physp_t object. -* -* RETURN VALUES -* Returns a pointer to the PortInfo attribute for this port. -* -* NOTES -* -* SEE ALSO -*********/ - /****f* OpenSM: Physical Port/osm_physp_get_node_ptr * NAME * osm_physp_get_node_ptr diff --git a/osm/include/opensm/osm_switch.h b/osm/include/opensm/osm_switch.h index 3a05dcf..b2bf0db 100644 --- a/osm/include/opensm/osm_switch.h +++ b/osm/include/opensm/osm_switch.h @@ -629,35 +629,6 @@ osm_switch_get_route_by_lid( * Switch object *********/ -/****f* OpenSM: Switch/osm_switch_get_si_ptr -* NAME -* osm_switch_get_si_ptr -* -* DESCRIPTION -* Returns a pointer to the SwitchInfo for this switch. -* -* SYNOPSIS -*/ -static inline ib_switch_info_t* -osm_switch_get_si_ptr( - IN const osm_switch_t* const p_sw ) -{ - return( (ib_switch_info_t*)&p_sw->switch_info ); -} -/* -* PARAMETERS -* p_sw -* [in] Pointer to an osm_switch_t object. -* -* RETURN VALUES -* Returns a pointer to the SwitchInfo for this switch. -* -* NOTES -* -* SEE ALSO -* Switch object -*********/ - /****f* OpenSM: Switch/osm_switch_sp0_is_lmc_capable * NAME * osm_switch_sp0_is_lmc_capable diff --git a/osm/opensm/osm_lid_mgr.c b/osm/opensm/osm_lid_mgr.c index e6fb893..f44a0b4 100644 --- a/osm/opensm/osm_lid_mgr.c +++ b/osm/opensm/osm_lid_mgr.c @@ -943,8 +943,7 @@ __osm_lid_mgr_set_remote_pi_state_to_init( return; if (osm_physp_is_valid( p_rem_physp )) - ib_port_info_set_port_state( osm_physp_get_port_info_ptr( p_rem_physp ), - IB_LINK_INIT ); + ib_port_info_set_port_state( &p_rem_physp->port_info, IB_LINK_INIT ); } /********************************************************************** @@ -1002,7 +1001,7 @@ __osm_lid_mgr_set_physp_pi( goto Exit; } - p_old_pi = osm_physp_get_port_info_ptr( p_physp ); + p_old_pi = &p_physp->port_info; /* First, copy existing parameters from the PortInfo attribute we diff --git a/osm/opensm/osm_link_mgr.c b/osm/opensm/osm_link_mgr.c index 0f15f2b..de70368 100644 --- a/osm/opensm/osm_link_mgr.c +++ b/osm/opensm/osm_link_mgr.c @@ -140,8 +140,6 @@ __osm_link_mgr_set_physp_pi( if( port_num == 0 ) { - ib_switch_info_t* p_sw_info; - /* HCA's don't have a port 0, and for switch port 0, we need to check if this is enhanced port 0 or base port 0. @@ -156,8 +154,7 @@ __osm_link_mgr_set_physp_pi( goto Exit; } - p_sw_info = osm_switch_get_si_ptr(p_node->sw); - if (ib_switch_info_is_enhanced_port0( p_sw_info ) == FALSE) + if (ib_switch_info_is_enhanced_port0( &p_node->sw->switch_info ) == FALSE) { /* This means the switch doesn't support enhanced port zero. Can skip it. */ @@ -177,7 +174,7 @@ __osm_link_mgr_set_physp_pi( PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0 */ - p_old_pi = osm_physp_get_port_info_ptr( p_physp ); + p_old_pi = &p_physp->port_info; memset( payload, 0, IB_SMP_DATA_SIZE ); diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c index 5581ce1..bbbe192 100644 --- a/osm/opensm/osm_pkey_mgr.c +++ b/osm/opensm/osm_pkey_mgr.c @@ -211,7 +211,7 @@ static boolean_t pkey_mgr_enforce_partition( IN osm_log_t *p_log, IN const osm_req_t *p_req, - IN const osm_physp_t *p_physp, + IN osm_physp_t *p_physp, IN const boolean_t enforce) { osm_madw_context_t context; @@ -219,7 +219,7 @@ pkey_mgr_enforce_partition( ib_port_info_t *p_pi; ib_api_status_t status; - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE)) { diff --git a/osm/opensm/osm_port.c b/osm/opensm/osm_port.c index 2f427d2..3b69b75 100644 --- a/osm/opensm/osm_port.c +++ b/osm/opensm/osm_port.c @@ -419,23 +419,19 @@ osm_physp_calc_link_mtu( IN osm_log_t* p_log, IN const osm_physp_t* p_physp ) { - const ib_port_info_t* p_old_pi; - const ib_port_info_t* p_remote_pi; const osm_physp_t* p_remote_physp; uint8_t mtu; uint8_t remote_mtu; OSM_LOG_ENTER( p_log, osm_physp_calc_link_mtu ); - p_old_pi = osm_physp_get_port_info_ptr( p_physp ); /* use the available MTU */ - mtu = ib_port_info_get_mtu_cap( p_old_pi ); + mtu = ib_port_info_get_mtu_cap(&p_physp->port_info); p_remote_physp = osm_physp_get_remote( p_physp ); if( p_remote_physp && osm_physp_is_valid( p_remote_physp ) ) { - p_remote_pi = osm_physp_get_port_info_ptr( p_remote_physp ); - remote_mtu = ib_port_info_get_mtu_cap( p_remote_pi ); + remote_mtu = ib_port_info_get_mtu_cap(&p_remote_physp->port_info); if( osm_log_is_active( p_log, OSM_LOG_DEBUG ) ) { @@ -490,23 +486,19 @@ osm_physp_calc_link_op_vls( IN const osm_subn_t* p_subn, IN const osm_physp_t* p_physp ) { - const ib_port_info_t* p_old_pi; - const ib_port_info_t* p_remote_pi; const osm_physp_t* p_remote_physp; uint8_t op_vls; uint8_t remote_op_vls; OSM_LOG_ENTER( p_log, osm_physp_calc_link_op_vls ); - p_old_pi = osm_physp_get_port_info_ptr( p_physp ); /* use the available VL CAP */ - op_vls = ib_port_info_get_vl_cap(p_old_pi); + op_vls = ib_port_info_get_vl_cap(&p_physp->port_info); p_remote_physp = osm_physp_get_remote( p_physp ); if( p_remote_physp && osm_physp_is_valid( p_remote_physp ) ) { - p_remote_pi = osm_physp_get_port_info_ptr( p_remote_physp ); - remote_op_vls = ib_port_info_get_vl_cap(p_remote_pi); + remote_op_vls = ib_port_info_get_vl_cap(&p_remote_physp->port_info); if( osm_log_is_active( p_log, OSM_LOG_DEBUG ) ) { diff --git a/osm/opensm/osm_qos.c b/osm/opensm/osm_qos.c index 92fe6fc..e71c053 100644 --- a/osm/opensm/osm_qos.c +++ b/osm/opensm/osm_qos.c @@ -77,12 +77,9 @@ static ib_api_status_t vlarb_update_table_block(osm_req_t * p_req, ib_vl_arb_table_t block; osm_madw_context_t context; uint32_t attr_mod; - ib_port_info_t *p_pi; unsigned vl_mask, i; - p_pi = osm_physp_get_port_info_ptr(p); - - vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1; + vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1; memset(&block, 0, sizeof(block)); memcpy(&block, table_block, @@ -111,11 +108,9 @@ static ib_api_status_t vlarb_update(osm_req_t * p_req, const struct qos_config *qcfg) { ib_api_status_t status = IB_SUCCESS; - ib_port_info_t *p_pi; + ib_port_info_t *p_pi = &p->port_info; unsigned len; - p_pi = osm_physp_get_port_info_ptr(p); - if (p_pi->vl_arb_low_cap > 0) { len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ? p_pi->vl_arb_low_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; @@ -159,14 +154,11 @@ static ib_api_status_t sl2vl_update_table(osm_req_t * p_req, ib_slvl_table_t tbl, *p_tbl; osm_node_t *p_node = osm_physp_get_node_ptr(p); uint32_t attr_mod; - ib_port_info_t *p_pi; unsigned vl_mask; uint8_t vl1, vl2; int i; - p_pi = osm_physp_get_port_info_ptr(p); - - vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1; + vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1; for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) { vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4; @@ -198,20 +190,18 @@ static ib_api_status_t sl2vl_update(osm_req_t * p_req, osm_port_t * p_port, { ib_api_status_t status; uint8_t i, num_ports; - ib_port_info_t *p_pi = osm_physp_get_port_info_ptr(p); osm_physp_t *p_physp; if (osm_node_get_type(osm_physp_get_node_ptr(p)) == IB_NODE_TYPE_SWITCH) { - if (ib_port_info_get_vl_cap(p_pi) == 1) { + if (ib_port_info_get_vl_cap(&p->port_info) == 1) { /* Check port 0's capability mask */ p_physp = osm_port_get_default_phys_ptr(p_port); - p_pi = osm_physp_get_port_info_ptr(p_physp); - if (!(p_pi->capability_mask & IB_PORT_CAP_HAS_SL_MAP)) + if (!(p_physp->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP)) return IB_SUCCESS; } num_ports = osm_node_get_num_physp(osm_physp_get_node_ptr(p)); } else { - if (!(p_pi->capability_mask & IB_PORT_CAP_HAS_SL_MAP)) + if (!(p->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP)) return IB_SUCCESS; num_ports = 1; } @@ -234,7 +224,7 @@ static ib_api_status_t vl_high_limit_update(osm_req_t * p_req, osm_madw_context_t context; ib_port_info_t *p_pi; - p_pi = osm_physp_get_port_info_ptr(p); + p_pi = &p->port_info; if (p_pi->vl_high_limit == qcfg->vl_high_limit) return IB_SUCCESS; diff --git a/osm/opensm/osm_sa_guidinfo_record.c b/osm/opensm/osm_sa_guidinfo_record.c index 3eee05b..5d7c4ba 100644 --- a/osm/opensm/osm_sa_guidinfo_record.c +++ b/osm/opensm/osm_sa_guidinfo_record.c @@ -204,7 +204,6 @@ __osm_sa_gir_create_gir( ib_net16_t max_lid_ho; uint8_t lmc; ib_net64_t port_guid; - const ib_port_info_t* p_pi; uint8_t block_num, start_block_num, end_block_num, num_blocks; OSM_LOG_ENTER( p_rcv->p_log, __osm_sa_gir_create_gir ); @@ -245,8 +244,6 @@ __osm_sa_gir_create_gir( if( match_port_guid && ( port_guid != match_port_guid ) ) continue; - p_pi = osm_physp_get_port_info_ptr( p_physp ); - /* Note: the following check is a temporary workaround Since 1. GUIDCap should never be 0 on ports where this applies @@ -254,11 +251,11 @@ __osm_sa_gir_create_gir( So this should really be a check for whether the port is a switch external port or not! */ - if ( p_pi->guid_cap == 0 ) + if ( p_physp->port_info.guid_cap == 0 ) continue; - num_blocks = p_pi->guid_cap / 8; - if ( p_pi->guid_cap % 8 ) + num_blocks = p_physp->port_info.guid_cap / 8; + if ( p_physp->port_info.guid_cap % 8 ) num_blocks++; if ( match_block_num == 255 ) { diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 60a4930..2f61fb8 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -257,7 +257,7 @@ __osm_mpr_rcv_get_path_parms( p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); p_physp = osm_port_get_default_phys_ptr( p_src_port ); - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; mtu = ib_port_info_get_mtu_cap( p_pi ); rate = ib_port_info_compute_rate( p_pi ); @@ -432,7 +432,7 @@ __osm_mpr_rcv_get_path_parms( /* Check parameters for the ingress port in this switch. */ - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if ( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { @@ -498,7 +498,7 @@ __osm_mpr_rcv_get_path_parms( } } - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if ( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { @@ -535,7 +535,7 @@ __osm_mpr_rcv_get_path_parms( /* p_physp now points to the destination */ - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if ( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 11058ab..a0d4675 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -255,7 +255,7 @@ __osm_pr_rcv_get_path_parms( p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); p_physp = osm_port_get_default_phys_ptr( p_src_port ); - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; mtu = ib_port_info_get_mtu_cap( p_pi ); rate = ib_port_info_compute_rate( p_pi ); @@ -368,7 +368,7 @@ __osm_pr_rcv_get_path_parms( /* Check parameters for the ingress port in this switch. */ - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { @@ -419,7 +419,7 @@ __osm_pr_rcv_get_path_parms( CL_ASSERT( p_physp ); CL_ASSERT( osm_physp_is_valid( p_physp ) ); - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { @@ -456,7 +456,7 @@ __osm_pr_rcv_get_path_parms( /* p_physp now points to the destination */ - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) { diff --git a/osm/opensm/osm_sa_pkey_record.c b/osm/opensm/osm_sa_pkey_record.c index 8b6f69f..8e56c6c 100644 --- a/osm/opensm/osm_sa_pkey_record.c +++ b/osm/opensm/osm_sa_pkey_record.c @@ -151,13 +151,9 @@ __osm_sa_pkey_create( } if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) - { - lid = osm_physp_get_port_info_ptr( p_physp )->base_lid; - } + lid = p_physp->port_info.base_lid; else - { lid = osm_node_get_base_lid( p_physp->p_node, 0 ); - } if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) { diff --git a/osm/opensm/osm_sa_portinfo_record.c b/osm/opensm/osm_sa_portinfo_record.c index 054ff1f..300d536 100644 --- a/osm/opensm/osm_sa_portinfo_record.c +++ b/osm/opensm/osm_sa_portinfo_record.c @@ -175,7 +175,7 @@ __osm_pir_rcv_new_pir( memset( &p_rec_item->rec, 0, sizeof( p_rec_item->rec ) ); p_rec_item->rec.lid = lid; - p_rec_item->rec.port_info = *osm_physp_get_port_info_ptr( p_physp ); + p_rec_item->rec.port_info = p_physp->port_info; p_rec_item->rec.port_num = osm_physp_get_port_num( p_physp ); cl_qlist_insert_tail( p_list, (cl_list_item_t*)&p_rec_item->pool_item ); @@ -260,7 +260,7 @@ __osm_sa_pir_check_physp( p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; p_comp_pi = &p_rcvd_rec->port_info; - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; osm_dump_port_info( p_rcv->p_log, diff --git a/osm/opensm/osm_sa_slvl_record.c b/osm/opensm/osm_sa_slvl_record.c index 2476283..b9146b0 100644 --- a/osm/opensm/osm_sa_slvl_record.c +++ b/osm/opensm/osm_sa_slvl_record.c @@ -163,13 +163,9 @@ __osm_sa_slvl_create( } if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) - { - lid = osm_physp_get_port_info_ptr( p_physp )->base_lid; - } + lid = p_physp->port_info.base_lid; else - { lid = osm_node_get_base_lid( p_physp->p_node, 0 ); - } if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) { diff --git a/osm/opensm/osm_sa_vlarb_record.c b/osm/opensm/osm_sa_vlarb_record.c index 8f1de4c..d7f341e 100644 --- a/osm/opensm/osm_sa_vlarb_record.c +++ b/osm/opensm/osm_sa_vlarb_record.c @@ -163,13 +163,9 @@ __osm_sa_vl_arb_create( } if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) - { - lid = osm_physp_get_port_info_ptr( p_physp )->base_lid; - } + lid = p_physp->port_info.base_lid; else - { lid = osm_node_get_base_lid( p_physp->p_node, 0 ); - } if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) { diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c index aa129a9..f91677d 100644 --- a/osm/opensm/osm_state_mgr.c +++ b/osm/opensm/osm_state_mgr.c @@ -1306,7 +1306,7 @@ __osm_state_mgr_report( osm_get_node_type_str_fixed_width( node_type ), port_num ); - p_pi = osm_physp_get_port_info_ptr( p_physp ); + p_pi = &p_physp->port_info; /* * Port state is not defined for switch port 0 diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c index 214c05e..d91e278 100644 --- a/osm/opensm/osm_ucast_mgr.c +++ b/osm/opensm/osm_ucast_mgr.c @@ -906,7 +906,7 @@ osm_ucast_mgr_set_fwd_table( /* Set the top of the unicast forwarding table. */ - si = *osm_switch_get_si_ptr( p_sw ); + si = p_sw->switch_info; lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) ); if (lin_top != si.lin_top) { -- 1.5.0.rc0.g2484-dirty From rdreier at cisco.com Sun Jan 7 14:57:19 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 14:57:19 -0800 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070106195837.GB5195@mellanox.co.il> (Michael S. Tsirkin's message of "Sat, 6 Jan 2007 21:58:37 +0200") References: <20070106195837.GB5195@mellanox.co.il> Message-ID: > Actually, I think that we really must have each of the tables start at > ICM-page aligned addresses. I think this happened to work fine so far > as profile was hard-coded, but with new module option code > this might not be the case anymore. Yes, this is a good point. However... > + if (mthca_is_memfree(mdev)) > + profile[i].start = ALIGN(profile[i].start, > + max(MTHCA_ICM_PAGE_SIZE, > + dma_get_cache_alignment())); where is the requirement that tables are ICM-page-size-aligned coming from? I thought the only requirement was that each table was aligned to its size? Also this fails to take into account the gap that might be left when changing the start of a table in the total_size variable. And anyway, this is all moot I believe because the code already does: for (i = 0; i < MTHCA_RES_NUM; ++i) { profile[i].type = i; profile[i].log_num = max(ffs(profile[i].num) - 1, 0); profile[i].size *= profile[i].num; if (mthca_is_memfree(dev)) profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); } and I'm comfortable assuming that there is no architecture where dma_get_cache_alignment() is bigger than PAGE_SIZE... - R. From michael.arndt at informatik.tu-chemnitz.de Sun Jan 7 16:29:42 2007 From: michael.arndt at informatik.tu-chemnitz.de (Michael Arndt) Date: Mon, 8 Jan 2007 01:29:42 +0100 Subject: [openib-general] ioctl and send_agents Message-ID: <000901c732bc$17eeb9b0$21606d86@one7> Hallo, I have two questions. 1. In the function osmv_transport_init (file : osm_vendor_mlx_ts.c) the device is opened and an ioctl call is made. My question is which module is used to support the open and ioctl calls? I think it should be the user_mad module, but there is no ioctl defined there. And I just find unlocked_ioctl = ib_umad_ioctl and compat_ioctl = ib_umad_ioctl in this module which I think are used because they also register the agents. Is there some transformation from ioctl to unlocked_ioctl or compat_ioctl? 2. Does the SM register send agents? If so where exactly is this done. If no what send agent are used by the SM (ib_umad_write)? Thanks Michael From halr at voltaire.com Sun Jan 7 18:00:30 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 07 Jan 2007 21:00:30 -0500 Subject: [openib-general] ioctl and send_agents In-Reply-To: <000901c732bc$17eeb9b0$21606d86@one7> References: <000901c732bc$17eeb9b0$21606d86@one7> Message-ID: <1168221630.4577.39665.camel@hal.voltaire.com> On Sun, 2007-01-07 at 19:29, Michael Arndt wrote: > Hallo, > > I have two questions. > > 1. In the function osmv_transport_init (file : osm_vendor_mlx_ts.c) the > device is opened and an ioctl call is made. My question is which module is > used to support the open and ioctl calls? I think it should be the user_mad > module, but there is no ioctl defined there. And I just find unlocked_ioctl > = ib_umad_ioctl and compat_ioctl = ib_umad_ioctl in this module which I > think are used because they also register the agents. Is there some > transformation from ioctl to unlocked_ioctl or compat_ioctl? osm_vendor_mlx_ts.c is not included by the OpenIB (aka gen2) build. It is for a gen1 implementation. Mellanox is best to comment here. > 2. Does the SM register send agents? If so where exactly is this done. If no > what send agent are used by the SM (ib_umad_write)? OpenSM registers agents in opensm/osm_sm_mad_ctrl.c:osm_sm_mad_ctrl_bind and opensm/osm_sa_mad_ctrl.c:osm_sa_mad_ctrl_bind. osm_sm_mad_ctrl_bind is called from osm_sm.c:osm_sm_bind and osm_sa_mad_ctrl_bind is called from osm_sa.c:osm_sa_bind. Both osm_sm_bind and osm_sa_bind are called from opensm/osm_opensm.c:osm_opensm_bind which is in turn called from main.c during OpenSM startup. That is the vendor independent part. The vendor dependant part is done in the vendor layer. For OpenIB, it is done in osm_vendor_ibumad.c:osm_vendor_bind. -- Hal > Thanks Michael > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Sun Jan 7 18:37:38 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 8 Jan 2007 04:37:38 +0200 Subject: [openib-general] add SIGUSR1 to reopen osm.log In-Reply-To: <1164048220.4381.4161.camel@hal.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> Message-ID: <20070108023738.GB19217@sashak.voltaire.com> On 13:43 Mon 20 Nov , Hal Rosenstock wrote: > On Thu, 2006-11-16 at 20:53, Ira Weiny wrote: > > Our sysadmins have been rotating OpenSM's osm.log file and then restarting > > OpenSM. As this is a less than optimal solution if you have jobs running on > > the system, I wrote this patch (against OFED 1.1) which adds a handler for > > SIGUSR1 that reopens OpenSM's log file without a restart. > > This is a good idea to incorporate into OpenSM. > > Any objections if this is bundled with SIGHUP handling rather than > SIGUSR1 which is not backward compatible as Sasha indicated ? Actually looking more at this I noticed that SIGUSR* signals are not used anymore with linuxthread couple of years (since kernel 2.1.xx, where RT signals support was introduced). So I think it is not real issue today - some #ifdef should be sufficient. I reviewed and tested the original patch (it is appliable very well) and have some fixes/improvements. Two incremental patches are followng. Sasha From sashak at voltaire.com Sun Jan 7 18:42:11 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 8 Jan 2007 04:42:11 +0200 Subject: [openib-general] [PATCH 1/2] opensm: sigusr1: syslog() fixes In-Reply-To: <20070108023738.GB19217@sashak.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> <20070108023738.GB19217@sashak.voltaire.com> Message-ID: <20070108024211.GC19217@sashak.voltaire.com> Following Ira's log file reopening patch this fixes potential syslog other apps compatibility issues. Signed-off-by: Sasha Khapyorsky --- osm/complib/cl_log.c | 1 + osm/include/opensm/osm_log.h | 5 ++--- osm/opensm/osm_log.c | 2 +- osm/opensm/osm_subnet.c | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/osm/complib/cl_log.c b/osm/complib/cl_log.c index 87cda06..dd3a4c6 100644 --- a/osm/complib/cl_log.c +++ b/osm/complib/cl_log.c @@ -124,4 +124,5 @@ cl_log_event( { syslog( priority, "%s\n", message ); } + closelog(); } diff --git a/osm/include/opensm/osm_log.h b/osm/include/opensm/osm_log.h index a119d4f..7197439 100644 --- a/osm/include/opensm/osm_log.h +++ b/osm/include/opensm/osm_log.h @@ -51,7 +51,6 @@ #ifndef __WIN__ #include #endif -#include #include #include #include @@ -128,8 +127,8 @@ typedef struct _osm_log unsigned long max_size; boolean_t flush; FILE* out_port; - boolean_t accum_log_file; - char * log_file_name; + boolean_t accum_log_file; + char * log_file_name; } osm_log_t; /*********/ diff --git a/osm/opensm/osm_log.c b/osm/opensm/osm_log.c index 0c6de36..56c6482 100644 --- a/osm/opensm/osm_log.c +++ b/osm/opensm/osm_log.c @@ -164,7 +164,7 @@ osm_log( /* this is a call to the syslog */ if (verbosity & OSM_LOG_SYS) { - cl_log_event("OpenSM", LOG_INFO, buffer , NULL, 0); + syslog(LOG_INFO, "%s\n", buffer); /* SYSLOG should go to stdout too */ if (p_log->out_port != stdout) diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c index aec4ff2..90802b4 100644 --- a/osm/opensm/osm_subnet.c +++ b/osm/opensm/osm_subnet.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include -- 1.5.0.rc0.g2484-dirty From sashak at voltaire.com Sun Jan 7 18:44:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 8 Jan 2007 04:44:01 +0200 Subject: [openib-general] [PATCH 2/2] opensm: sigusr1: synchronous SIGUSR1 processing. In-Reply-To: <20070108023738.GB19217@sashak.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> <20070108023738.GB19217@sashak.voltaire.com> Message-ID: <20070108024401.GD19217@sashak.voltaire.com> Process SIGUSR1 synchronously similar to other signals. Move signal handler setup from potentially shared library code. Use osm log locking with file reopening in order to prevent races with osm_log(). Signed-off-by: Sasha Khapyorsky --- osm/include/opensm/osm_log.h | 21 +++++++++++++++++++++ osm/opensm/libopensm.map | 3 ++- osm/opensm/main.c | 17 +++++++++++++++++ osm/opensm/osm_log.c | 41 +++++++++++------------------------------ 4 files changed, 51 insertions(+), 31 deletions(-) diff --git a/osm/include/opensm/osm_log.h b/osm/include/opensm/osm_log.h index 7197439..55e996a 100644 --- a/osm/include/opensm/osm_log.h +++ b/osm/include/opensm/osm_log.h @@ -253,6 +253,27 @@ osm_log_init_v2( * osm_log_destroy *********/ +/****f* OpenSM: Log/osm_log_reopen_file +* NAME +* osm_log_reopen_file +* +* DESCRIPTION +* The osm_log_reopen_file function reopens the log file +* +* SYNOPSIS +*/ +int +osm_log_reopen_file( + osm_log_t *p_log); +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* RETURN VALUES +* 0 on success or nonzero value otherwise. +*********/ + /****f* OpenSM: Log/osm_log_init * NAME * osm_log_init diff --git a/osm/opensm/libopensm.map b/osm/opensm/libopensm.map index c200384..909b641 100644 --- a/osm/opensm/libopensm.map +++ b/osm/opensm/libopensm.map @@ -1,10 +1,11 @@ -OPENSM_1.4 { +OPENSM_1.5 { global: osm_log; osm_log_printf; osm_is_debug; osm_log_init; osm_log_init_v2; + osm_log_reopen_file; osm_mad_pool_construct; osm_mad_pool_destroy; osm_mad_pool_init; diff --git a/osm/opensm/main.c b/osm/opensm/main.c index 374d323..d63c95c 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -71,6 +71,7 @@ osm_opensm_t osm; volatile unsigned int osm_exit_flag = 0; static volatile unsigned int osm_hup_flag = 0; +static volatile unsigned int osm_usr1_flag = 0; #define GUID_ARRAY_SIZE 64 #define INVALID_GUID (0xFFFFFFFFFFFFFFFFULL) @@ -91,6 +92,11 @@ static void mark_hup_flag(int signum) osm_hup_flag = 1; } +static void mark_usr1_flag(int signum) +{ + osm_usr1_flag = 1; +} + static sigset_t saved_sigset; static void block_signals() @@ -101,6 +107,9 @@ static void block_signals() sigaddset(&set, SIGINT); sigaddset(&set, SIGTERM); sigaddset(&set, SIGHUP); +#ifndef HAVE_OLD_LINUX_THREADS + sigaddset(&set, SIGUSR1); +#endif pthread_sigmask(SIG_SETMASK, &set, &saved_sigset); } @@ -115,6 +124,10 @@ static void setup_signals() sigaction(SIGTERM, &act, NULL); act.sa_handler = mark_hup_flag; sigaction(SIGHUP, &act, NULL); +#ifndef HAVE_OLD_LINUX_THREADS + act.sa_handler = mark_usr1_flag; + sigaction(SIGUSR1, &act, NULL); +#endif pthread_sigmask(SIG_SETMASK, &saved_sigset, NULL); } #endif /* __WIN__ */ @@ -946,6 +959,10 @@ main( else cl_thread_suspend( 10000 ); + if (osm_usr1_flag) { + osm_usr1_flag = 0; + osm_log_reopen_file(&osm.log); + } if (osm_hup_flag) { osm_hup_flag = 0; /* a HUP signal should only start a new heavy sweep */ diff --git a/osm/opensm/osm_log.c b/osm/opensm/osm_log.c index 56c6482..6d9a450 100644 --- a/osm/opensm/osm_log.c +++ b/osm/opensm/osm_log.c @@ -58,7 +58,6 @@ #include #include #include -#include static int log_exit_count = 0; @@ -290,24 +289,16 @@ open_out_port(IN osm_log_t *p_log) return (0); } - -osm_log_t *handler_log = NULL; -void -sigusr1_handler(int signal) -{ - fclose(handler_log->out_port); - open_out_port(handler_log); -} - -static void -setup_sigusr1_handler(osm_log_t *p_log) +int osm_log_reopen_file(osm_log_t *p_log) { - struct sigaction act; - act.sa_handler = sigusr1_handler; - sigemptyset(&(act.sa_mask)); - act.sa_flags = 0; - handler_log = p_log; - sigaction(SIGUSR1, &act, NULL); + int ret; + if (p_log->out_port == stdout || p_log->out_port == stderr) + return 0; + cl_spinlock_acquire(&p_log->lock); + fclose(p_log->out_port); + ret = open_out_port(p_log); + cl_spinlock_release(&p_log->lock); + return ret; } ib_api_status_t @@ -330,21 +321,11 @@ osm_log_init_v2( if (log_file == NULL || !strcmp(log_file, "-") || !strcmp(log_file, "stdout")) - { p_log->out_port = stdout; - } else if (!strcmp(log_file, "stderr")) - { p_log->out_port = stderr; - } - else - { - setup_sigusr1_handler(p_log); - if (open_out_port(p_log)) - { - return (IB_UNKNOWN_ERROR); - } - } + else if (open_out_port(p_log)) + return (IB_UNKNOWN_ERROR); if (cl_spinlock_init( &p_log->lock ) == CL_SUCCESS) return IB_SUCCESS; -- 1.5.0.rc0.g2484-dirty From sashak at voltaire.com Sun Jan 7 18:58:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 8 Jan 2007 04:58:48 +0200 Subject: [openib-general] [PATCH] opensm: close umad port in osm_vendor_delete In-Reply-To: <20061201141901.GC23574@sashak.voltaire.com> References: <20061201141901.GC23574@sashak.voltaire.com> Message-ID: <20070108025848.GE19217@sashak.voltaire.com> This adds umad_port_close() call in osm_vendor_delete(). So the same process can reinitialize and reuse the vendor layer. Successful test was reported. Signed-off-by: Sasha Khapyorsky --- osm/libvendor/osm_vendor_ibumad.c | 13 +++++++++---- 1 files changed, 9 insertions(+), 4 deletions(-) diff --git a/osm/libvendor/osm_vendor_ibumad.c b/osm/libvendor/osm_vendor_ibumad.c index c2e04c3..35f127a 100644 --- a/osm/libvendor/osm_vendor_ibumad.c +++ b/osm/libvendor/osm_vendor_ibumad.c @@ -544,10 +544,15 @@ osm_vendor_delete( umad_receiver_t *p_ur; int agent_id; - /* unregister UMAD agents */ - for (agent_id = 0; agent_id < UMAD_CA_MAX_AGENTS; agent_id++) - if ( (*pp_vend)->agents[agent_id] ) - umad_unregister( (*pp_vend)->umad_port_id, agent_id ); + if ((*pp_vend)->umad_port_id >= 0) { + /* unregister UMAD agents */ + for (agent_id = 0; agent_id < UMAD_CA_MAX_AGENTS; agent_id++) + if ( (*pp_vend)->agents[agent_id] ) + umad_unregister((*pp_vend)->umad_port_id, + agent_id ); + umad_close_port((*pp_vend)->umad_port_id); + (*pp_vend)->umad_port_id = -1; + } clear_madw( *pp_vend ); /* make sure all ports are closed */ -- 1.5.0.rc0.g2484-dirty From rdreier at cisco.com Sun Jan 7 20:12:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:12:30 -0800 Subject: [openib-general] the valgrind support of libibverbs/libmthca is not full In-Reply-To: <45A0A0AE.3050603@dev.mellanox.co.il> (Dotan Barak's message of "Sun, 07 Jan 2007 09:26:38 +0200") References: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> <45A0A0AE.3050603@dev.mellanox.co.il> Message-ID: Some of these are almost definitely due to the what the test program is doing -- for example > ==10853== 72 bytes in 1 blocks are definitely lost in loss record 8 of 13 > ==10853== at 0x4904B7E: malloc (vg_replace_malloc.c:149) > ==10853== by 0x4C3DA82: mthca_alloc_pd (verbs.c:83) > ==10853== by 0x4C3BC15: mthca_alloc_context (mthca.c:179) > ==10853== by 0x4A32536: ibv_open_device (device.c:126) > ==10853== by 0x401FF4: open_hca (main.c:214) > ==10853== by 0x40E4E7: cq_7 (test_cq.c:529) > ==10853== by 0x4108A0: test_cq (test_cq.c:790) > ==10853== by 0x403196: main (main.c:147) mthca_free_pd() definitely frees the struct that is allocated in mthca_alloc_pd(), so I think your test must just not being freeing the PD it allocates. others are a little more mysterious. Out of curiousity is your test with a mem-free or tavor mode HCA? Are your test programs available from a git tree anywhere? Thanks, Roland From rdreier at cisco.com Sun Jan 7 20:16:25 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:16:25 -0800 Subject: [openib-general] [PATCH] rdma_cm iWARP connection setup timeouts reported as rejects. In-Reply-To: <4594526D.1000309@ichips.intel.com> (Sean Hefty's message of "Thu, 28 Dec 2006 15:25:33 -0800") References: <20061215225017.22628.17881.stgit@dell3.ogc.int> <4594526D.1000309@ichips.intel.com> Message-ID: > Can we pull this into 2.6.20? ...queued From rdreier at cisco.com Sun Jan 7 20:17:48 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:17:48 -0800 Subject: [openib-general] [PATCH] 2.6.20 rdma_ucm: fix struct ucma_event leak In-Reply-To: <000001c71fb5$2cf517b0$8698070a@amr.corp.intel.com> (Sean Hefty's message of "Thu, 14 Dec 2006 11:22:19 -0800") References: <000001c71fb5$2cf517b0$8698070a@amr.corp.intel.com> Message-ID: queued for 2.6.20 From rdreier at cisco.com Sun Jan 7 20:19:00 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:19:00 -0800 Subject: [openib-general] [PATCH] 2.6.20 rdma_ucm: fix reporting events with invalid user context In-Reply-To: <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> (Sean Hefty's message of "Fri, 5 Jan 2007 12:35:15 -0800") References: <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> Message-ID: looks good to me, and no one has complained. I'll queue this for 2.6.20, so if someone doesn't like it let me know and I'll revert it. From rdreier at cisco.com Sun Jan 7 20:26:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:26:04 -0800 Subject: [openib-general] [PATCH] mthca: fix PRM compliance problem in atomic-send completions In-Reply-To: <200612250924.52746.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 25 Dec 2006 09:24:52 +0200") References: <200612250924.52746.jackm@dev.mellanox.co.il> Message-ID: Thanks, queued for 2.6.20 From rdreier at cisco.com Sun Jan 7 20:29:22 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 20:29:22 -0800 Subject: [openib-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus A few small fixes here and there: Erez Zilber (1): IB/iser: Return error code when PDUs may not be sent Jack Morgenstein (1): IB/mthca: Fix PRM compliance problem in atomic-send completions Michael S. Tsirkin (1): IB/mthca: Fix off-by-one in FMR handling on memfree Sean Hefty (2): RDMA/ucma: Fix struct ucma_event leak when backlog is full RDMA/ucma: Don't report events with invalid user context Steve Wise (1): RDMA/iwcm: iWARP connection timeouts shouldn't be reported as rejects drivers/infiniband/core/cma.c | 17 ++++++++++++++--- drivers/infiniband/core/ucma.c | 11 +++++++++++ drivers/infiniband/hw/mthca/mthca_cq.c | 8 ++++++-- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++-- drivers/infiniband/ulp/iser/iser_initiator.c | 26 ++++++++++++-------------- 6 files changed, 46 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 533193d..9e0ab04 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1088,10 +1088,21 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) *sin = iw_event->local_addr; sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; *sin = iw_event->remote_addr; - if (iw_event->status) - event.event = RDMA_CM_EVENT_REJECTED; - else + switch (iw_event->status) { + case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; + break; + case -ECONNRESET: + case -ECONNREFUSED: + event.event = RDMA_CM_EVENT_REJECTED; + break; + case -ETIMEDOUT: + event.event = RDMA_CM_EVENT_UNREACHABLE; + break; + default: + event.event = RDMA_CM_EVENT_CONNECT_ERROR; + break; + } break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 81a5cdc..e2e8d32 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -209,10 +209,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -EDQUOT; + kfree(uevent); goto out; } ctx->backlog--; + } else if (!ctx->uid) { + /* + * We ignore events for new connections until userspace has set + * their context. This can only happen if an error occurs on a + * new connection before the user accepts it. This is okay, + * since the accept will just fail later. + */ + kfree(uevent); + goto out; } + list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); out: diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 283d50b..1159c8a 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -54,6 +54,10 @@ enum { MTHCA_CQ_ENTRY_SIZE = 0x20 }; +enum { + MTHCA_ATOMIC_BYTE_LEN = 8 +}; + /* * Must be packed because start is 64 bits but only aligned to 32 bits. */ @@ -599,11 +603,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev, break; case MTHCA_OPCODE_ATOMIC_CS: entry->opcode = IB_WC_COMP_SWAP; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_ATOMIC_FA: entry->opcode = IB_WC_FETCH_ADD; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_BIND_MW: entry->opcode = IB_WC_BIND_MW; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 15cc2f6..6b19645 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { - if (chunk->mem[i].length >= offset) { + if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9b2041e..dd221ed 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, * - if yes, the mtask is recycled at iscsi_complete_pdu * - if no, the mtask is recycled at iser_snd_completion */ - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; @@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn, error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); iscsi_iser_ctask_xmit_exit: - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index e73c87b..0a7d1ab 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) static int iser_check_xmit(struct iscsi_conn *conn, void *task) { - int rc = 0; struct iscsi_iser_conn *iser_conn = conn->dd_data; - write_lock_bh(conn->recv_lock); if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - rc = -EAGAIN; + iser_dbg("%ld can't xmit task %p\n",jiffies,task); + return -ENOBUFS; } - write_unlock_bh(conn->recv_lock); - return rc; + return 0; } @@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn *conn, return -EPERM; } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; edtl = ntohl(hdr->data_length); @@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn *conn, } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; itt = ntohl(hdr->itt); data_seg_len = ntoh24(hdr->dlength); @@ -498,7 +494,7 @@ int iser_send_control(struct iscsi_conn *conn, } if (iser_check_xmit(conn,mtask)) - return -EAGAIN; + return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; @@ -605,6 +601,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; + int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -613,15 +610,16 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) + resume_tx = 1; + atomic_dec(&ib_conn->post_send_buf_count); - write_lock(conn->recv_lock); - if (conn->suspend_tx) { + if (resume_tx) { iser_dbg("%ld resuming tx\n",jiffies); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); scsi_queue_work(conn->session->host, &conn->xmitwork); } - write_unlock(conn->recv_lock); if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ From mst at mellanox.co.il Sun Jan 7 20:43:34 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 06:43:34 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: Message-ID: <20070108044334.GB25410@mellanox.co.il> > And anyway, this is all moot I believe because the code already does: > > for (i = 0; i < MTHCA_RES_NUM; ++i) { > profile[i].type = i; > profile[i].log_num = max(ffs(profile[i].num) - 1, 0); > profile[i].size *= profile[i].num; > if (mthca_is_memfree(dev)) > profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); > } > > and I'm comfortable assuming that there is no architecture where > dma_get_cache_alignment() is bigger than PAGE_SIZE... OK. Missed that. Good, so there's no issue. we can drop this chunk then. So we are left with this: [PATCHv3 5 of 5] IB/mthca: reserved MTTs issues This fixes several issues related to reserved MTTs 1. MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. 2. reserved_mtts field has different meaning in Tavor and Arbel, so we are wasting mtt entries on memfree. fix the Arbel case to match Tavor semantics. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -464,6 +464,10 @@ static int mthca_init_icm(struct mthca_d goto err_unmap_aux; } + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, + dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_cmd.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); dev_lim->max_eqs = 1 << (field & 0x7); MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); - dev_lim->reserved_mtts = 1 << (field >> 4); + if (mthca_is_memfree(dev)) + dev_lim->reserved_mtts = ALIGN(1 << (field >> 4), + MTHCA_MTT_SEG_SIZE / sizeof(u64)); + else + dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); dev_lim->max_mrw_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); -- MST From rdreier at cisco.com Sun Jan 7 21:04:20 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 21:04:20 -0800 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070108044334.GB25410@mellanox.co.il> (Michael S. Tsirkin's message of "Mon, 8 Jan 2007 06:43:34 +0200") References: <20070108044334.GB25410@mellanox.co.il> Message-ID: > + if (mthca_is_memfree(dev)) > + dev_lim->reserved_mtts = ALIGN(1 << (field >> 4), > + MTHCA_MTT_SEG_SIZE / sizeof(u64)); > + else > + dev_lim->reserved_mtts = 1 << (field >> 4); this still seems screwed up. mem-free is reporting the number of reserved MTT entries, so I think we want (1 << log_rsvd_mtts) / (MTHCA_MTT_SEG_SIZE / sizeof (u64)) in that case (rounded up of course), rather than just aligned to something. - R. From mst at mellanox.co.il Sun Jan 7 21:08:29 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 07:08:29 +0200 Subject: [openib-general] [PATCH] rdma_cm: avoid port reuse after close In-Reply-To: References: Message-ID: <20070108050829.GC25410@mellanox.co.il> > Randomize the starting port number, and avoid re-using port values > immediately after they are closed. Instead, track the last port > value used and increment it every time a new port number is > assigned. > > These changes are in response to Michael's comments from this (old) > thread: > > http://openib.org/pipermail/openib-general/2006-September/025996.html > > Signed-off-by: Sean Hefty > --- > I'm not sure if this is still needed, but I had it on my list of things > to someday try to do. This should apply to 2.6.20-rc2. Yes, this makes RDMA_CM behave more like a TCP/IP socket, and helps applications that want to bind to the same port number for both IB and TCP/IP socket connection. Acked-by: Michael S. Tsirkin -- MST From mst at mellanox.co.il Sun Jan 7 21:20:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 07:20:38 +0200 Subject: [openib-general] [PATCH 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: References: <20070108044334.GB25410@mellanox.co.il> Message-ID: <20070108052038.GD25410@mellanox.co.il> > > + if (mthca_is_memfree(dev)) > > + dev_lim->reserved_mtts = ALIGN(1 << (field >> 4), > > + MTHCA_MTT_SEG_SIZE / sizeof(u64)); > > + else > > + dev_lim->reserved_mtts = 1 << (field >> 4); > > this still seems screwed up. mem-free is reporting the number of > reserved MTT entries, so I think we want (1 << log_rsvd_mtts) / > (MTHCA_MTT_SEG_SIZE / sizeof (u64)) in that case (rounded up of > course), rather than just aligned to something. This issue really seems to trigger brain lockups for me. Before I start testing, does the following look correct to you (untested patch)? BTW, the right thing to do I think is to get rid of MTT_SEG_SIZE for memfree completely - as it is we are wasting MTT entries for any region that is not a multiple of segment size. I'm just not sure I'll have the time to fix this before 2.6.20. Thanks, MST [PATCHv4 untested 5 of 5] IB/mthca: reserved MTTs and memory alignment issues This fixes several issues related to reserved MTTs and memory alignment. 1. MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. 2. reserved_mtts field has different meaning in Tavor and Arbel, so we are wasting mtt entries on memfree. fix the Arbel case to match Tavor semantics. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -464,6 +464,10 @@ static int mthca_init_icm(struct mthca_d goto err_unmap_aux; } + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, + dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_cmd.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); dev_lim->max_eqs = 1 << (field & 0x7); MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); - dev_lim->reserved_mtts = 1 << (field >> 4); + if (mthca_is_memfree(dev)) + dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), + MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + else + dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); dev_lim->max_mrw_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); -- MST From eitan at sw053.yok.mtl.com Sun Jan 7 21:26:42 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Mon, 8 Jan 2007 07:26:42 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-08:normal completion Message-ID: <200701080526.l085QgkG011229@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_6_06:44:34_2007 6c8647 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo 9 Multicast IS3-128.topo Failures: 1 Multicast IS3-128.topo From k_mahesh85 at yahoo.co.in Sun Jan 7 21:30:05 2007 From: k_mahesh85 at yahoo.co.in (keshetti mahesh) Date: Mon, 8 Jan 2007 05:30:05 +0000 (GMT) Subject: [openib-general] [query]maximum no. of connections per session in iser initiator In-Reply-To: <45A0B3D1.2060406@voltaire.com> Message-ID: <864924.13364.qm@web8327.mail.in.yahoo.com> >I guess that you're talking about this piece of code: >#define ISCSI_ISER_MAX_CONN 8 >#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ >ISER_QP_MAX_REQ_DTOS) * \ >ISCSI_ISER_MAX_CONN) >It is related to the cq used by iSER. We use it in order to define the >max len of the cq (the same cq is used for the sq & rq). It means that >per HCA, 8 connections may be opened. Yes, I was talking about this part only. Isn't this variable is equivalent to the maximum no. of connections allowed per the session? what is constraint for chosing this value as 8? >Where do you see that in iscsi_tcp? In the structure iscsi_transport there is a variable max_conn and it is set to 1 (In open-iscsi intiator file name iscsi_tcp.c) > whether this limit is imposed by iSCSI or by transport layer(TCP > or iSER) ? thanks and regards, Mahesh Send free SMS to your Friends on Mobile from your Yahoo! Messenger. Download Now! http://messenger.yahoo.com/download.php -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Sun Jan 7 22:16:42 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 08:16:42 +0200 Subject: [openib-general] [PATCHv3 5 of 5] IB/mthca: give reserved MTTs a separate cache line In-Reply-To: <20070108052038.GD25410@mellanox.co.il> References: <20070108044334.GB25410@mellanox.co.il> <20070108052038.GD25410@mellanox.co.il> Message-ID: <20070108061642.GB30652@mellanox.co.il> This fixes several issues related to reserved MTTs. 1. MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. 2. reserved_mtts field has different meaning in Tavor and Arbel, so we are wasting mtt entries on memfree. fix the Arbel case to match Tavor semantics. Signed-off-by: Michael S. Tsirkin --- This works for me. OK to apply the series now? Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_main.c @@ -464,6 +464,10 @@ static int mthca_init_icm(struct mthca_d goto err_unmap_aux; } + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, + dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, MTHCA_MTT_SEG_SIZE, mdev->limits.num_mtt_segs, Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_cmd.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); dev_lim->max_eqs = 1 << (field & 0x7); MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); - dev_lim->reserved_mtts = 1 << (field >> 4); + if (mthca_is_memfree(dev)) + dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), + MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + else + dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); dev_lim->max_mrw_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); -- MST From rdreier at cisco.com Sun Jan 7 22:23:28 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 07 Jan 2007 22:23:28 -0800 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent References: <45A0CB32.1060301@voltaire.com> Message-ID: Thanks, queued for 2.6.20. (And thanks for letting me know that this patch should go for 2.6.20) From mst at mellanox.co.il Sun Jan 7 22:31:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 08:31:53 +0200 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent In-Reply-To: References: <45A0CB32.1060301@voltaire.com> Message-ID: <20070108063153.GA1697@mellanox.co.il> > Thanks, queued for 2.6.20. (And thanks for letting me know that this > patch should go for 2.6.20) BTW, there were 3 Sean's patches that I think we need in 2.6.20 rdma_ucm: fix reporting events with invalid user context rdma_ucm: fix struct ucma_event rdma_cm: avoid port reuse after close I haven't put these into OFED yet but I think they look good. -- MST From mst at mellanox.co.il Sun Jan 7 22:40:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 08:40:59 +0200 Subject: [openib-general] 2.6.20: outstanding patches and issues In-Reply-To: <20070102200523.GA10451@mellanox.co.il> References: <20070102200523.GA10451@mellanox.co.il> Message-ID: <20070108064059.GB1697@mellanox.co.il> List of patches in OFED: http://git.openfabrics.org/git/?p=~vlad/ofed_1_2/.git;a=history;f=kernel_patches/fixes/kernel_patches/fixes;hb=HEAD Probably 2.6.20 material - Roland, could you please approve you got all these? fix_query_qp_in_reset.patch ib_verbs_h_missing_kref.patch mthca_0_fmr_page_fix.patch set of patches fixing FMR on non-cache-coherent. Patch 5 of 5 is at v3, hope it's all good now. mthca_1_merge_mr_fmr_on_64bit.patch mthca_2_fast_registration.patch mthca_3_alloc_consistent.patch mthca_4_dma_align_reserved_mtts.patch sean_cm_limit_mra_timeout.patch mthca_wrid_swap.patch - very small benefit, but very small patch either Patches I'm less sure about, please provide feedback srp_1_recreate_at_reconnect.patch - this improves SRP spec compliance. Good idea? ib_wc_qpn_to_qp.patch - currently benefit is for IPoIB CM, but doing API updates early has its benefits. What do you think? Need more work ipoib_selector_updated.patch - improves IPoIB compliance There are 3 Sean's patches I think we need rdma_ucm: fix reporting events with invalid user context rdma_ucm: fix struct ucma_event rdma_cm: avoid port reuse after close I haven't put these into OFED yet but I think they look good. Issues: Dotan reported oops with ucma at openib restart. Sean - any luck in reproducing this? -- MST From dotanb at dev.mellanox.co.il Sun Jan 7 23:02:35 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 08 Jan 2007 09:02:35 +0200 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> <45A0B02E.1020105@dev.mellanox.co.il> Message-ID: <45A1EC8B.2050106@dev.mellanox.co.il> Roland Dreier wrote: > > This is version 1.1-pre1. > > OK, I haven't pushed the changes to fix static linking in 1.1-pre > because I still need to finish fixing all the drivers for the ABI changes. > Great, thanks. Can you please update me when you'll add the static linking support to this version? I would like to add this test case to our regression. thanks Dotan From ogerlitz at voltaire.com Sun Jan 7 23:18:06 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 08 Jan 2007 09:18:06 +0200 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: References: Message-ID: <45A1F02E.2020604@voltaire.com> Tang, Changqing wrote: > We understand that, but we hope to have a connect/accept style IB > connection setup, without IPoIB involved, > like HP-UX IT-API(similar to uDAPL without underlying IP support), it > works with multiple cards. > Configure 4-5 IP addresses on a single node is kind of silly. CQ, Few more thoughts on your "being able to MPI on an IB PORT without an IPoIB working NIC" requirement... Basically, people use IB for both IPC and I/O, where except for SRP, all the IB I/O ULPs (both block based: iSER and file based: Lustre, GPFS, rNFS) use IP addressing and hence are either coded to the RDMA CM or work on top of TCP/IP (iSCSI-TCP, NFS, pFS, etc). So if the user will not configure IPoIB on this IB port, it will not be utilized for I/O. Now, you mention a use case of 4 cards on a node, I believe that typically this would happen on big SMP machines where you **must** use all the active IB links for I/O: eg when most of your MPI work is within the SMP (128 to 512 ranks) and most of the IB work is for I/O . I understand (please check and let me know eg about HP 1U offering) that all/most nowadays 1U PCI-EX nodes can have at most **one** PCI-EX card. Combing the above limitation with the fact that these nodes would run at most 16 ranks (eg 8 dual-core CPUs) and that 8 ranks/IB link is a ratio that makes sense, we are remained with **2** and not 4-5 NICs to configure. Oh, and one more thing, 4 IB links per node would make an N node cluster to 4N IB end-ports cluster for which you need f(4N) switching IB ports, and the specific f(.) might turn the IB deployment over this cluster into very expensive one... Or. From dotanb at dev.mellanox.co.il Sun Jan 7 23:51:22 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 08 Jan 2007 09:51:22 +0200 Subject: [openib-general] the valgrind support of libibverbs/libmthca is not full In-Reply-To: References: <1987.85.65.223.184.1167733398.squirrel@dev.mellanox.co.il> <45A0A0AE.3050603@dev.mellanox.co.il> Message-ID: <45A1F7FA.80007@dev.mellanox.co.il> Roland Dreier wrote: > with a mem-free or tavor mode HCA? > > Are your test programs available from a git tree anywhere? > > Thanks, > Roland > Here is the output of the valgrind when executing a "standard" test which comes with the driver: The server was executed on tavor and the client was executed on memfree device. on tavor -------- # valgrind --leak-check=yes ibv_rc_pingpong -p=2 ==29141== Memcheck, a memory error detector. ==29141== Copyright (C) 2002-2006, and GNU GPL'd, by Julian Seward et al. ==29141== Using LibVEX rev 1658, a library for dynamic binary translation. ==29141== Copyright (C) 2004-2006, and GNU GPL'd, by OpenWorks LLP. ==29141== Using valgrind-3.2.1, a dynamic binary instrumentation framework. ==29141== Copyright (C) 2000-2006, and GNU GPL'd, by Julian Seward et al. ==29141== For more details, rerun with: -v ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==29141== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==29141== by 0x4A33F75: ibv_reg_mr (verbs.c:159) ==29141== by 0x401BFC: main (rc_pingpong.c:323) ==29141== Address 0x7FF0004C8 is on thread 1's stack ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==29141== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==29141== by 0x4C3DCE9: mthca_create_cq (mthca.h:259) ==29141== by 0x4A34174: ibv_create_cq (verbs.c:247) ==29141== by 0x401C6C: main (rc_pingpong.c:329) ==29141== Address 0x7FF000418 is on thread 1's stack ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A30D43: ibv_cmd_create_cq (cmd.c:315) ==29141== by 0x4C3DDED: mthca_create_cq (verbs.c:233) ==29141== by 0x4A34174: ibv_create_cq (verbs.c:247) ==29141== by 0x401C6C: main (rc_pingpong.c:329) ==29141== Address 0x7FF0004B8 is on thread 1's stack ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==29141== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==29141== by 0x4C3E542: mthca_create_qp (verbs.c:523) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== Address 0x7FF000428 is on thread 1's stack ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A31633: ibv_cmd_create_qp (cmd.c:603) ==29141== by 0x4C3E5CD: mthca_create_qp (verbs.c:554) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== Address 0x7FF0004B8 is on thread 1's stack ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3D262: mthca_store_qp (qp.c:909) ==29141== by 0x4C3E5F2: mthca_create_qp (mthca.h:259) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3D2D8: mthca_store_qp (qp.c:910) ==29141== by 0x4C3E5F2: mthca_create_qp (mthca.h:259) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3D276: mthca_store_qp (qp.c:918) ==29141== by 0x4C3E5F2: mthca_create_qp (mthca.h:259) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3D27C: mthca_store_qp (qp.c:919) ==29141== by 0x4C3E5F2: mthca_create_qp (mthca.h:259) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3D28C: mthca_store_qp (qp.c:919) ==29141== by 0x4C3E5F2: mthca_create_qp (mthca.h:259) ==29141== by 0x4A34371: ibv_create_qp (verbs.c:342) ==29141== by 0x401CE5: main (rc_pingpong.c:349) ==29141== ==29141== Syscall param write(buf) points to uninitialised byte(s) ==29141== at 0x3F8980AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==29141== by 0x4A31AF7: ibv_cmd_modify_qp (cmd.c:774) ==29141== by 0x4C3E7E0: mthca_modify_qp (verbs.c:617) ==29141== by 0x4A34463: ibv_modify_qp (verbs.c:381) ==29141== by 0x401D4C: main (rc_pingpong.c:364) ==29141== Address 0x7FF000478 is on thread 1's stack ==29141== ==29141== Conditional jump or move depends on uninitialised value(s) ==29141== at 0x4C3C4F4: mthca_tavor_post_recv (qp.c:91) ==29141== by 0x4016B9: pp_post_recv (verbs.h:965) ==29141== by 0x401D6B: main (rc_pingpong.c:561) ==29141== ==29141== Conditional jump or move depends on uninitialised value(s) ==29141== at 0x4C3C544: mthca_tavor_post_recv (qp.c:369) ==29141== by 0x4016B9: pp_post_recv (verbs.h:965) ==29141== by 0x401D6B: main (rc_pingpong.c:561) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3C5A1: mthca_tavor_post_recv (qp.c:386) ==29141== by 0x4016B9: pp_post_recv (verbs.h:965) ==29141== by 0x401D6B: main (rc_pingpong.c:561) ==29141== ==29141== Use of uninitialised value of size 8 ==29141== at 0x4C3C50E: mthca_tavor_post_recv (qp.c:360) ==29141== by 0x4016B9: pp_post_recv (verbs.h:965) ==29141== by 0x401D6B: main (rc_pingpong.c:561) on memfree ---------- # valgrind --leak-check=yes ibv_rc_pingpong -p=2 10.4.3.30 ==28102== Memcheck, a memory error detector. ==28102== Copyright (C) 2002-2006, and GNU GPL'd, by Julian Seward et al. ==28102== Using LibVEX rev 1658, a library for dynamic binary translation. ==28102== Copyright (C) 2004-2006, and GNU GPL'd, by OpenWorks LLP. ==28102== Using valgrind-3.2.1, a dynamic binary instrumentation framework. ==28102== Copyright (C) 2000-2006, and GNU GPL'd, by Julian Seward et al. ==28102== For more details, rerun with: -v ==28102== ==28102== Conditional jump or move depends on uninitialised value(s) ==28102== at 0x4C3BA8B: mthca_alloc_db_tab (memfree.c:186) ==28102== by 0x4C3BCD0: mthca_alloc_context (mthca.c:162) ==28102== by 0x4A32536: ibv_open_device (device.c:126) ==28102== by 0x401B81: main (rc_pingpong.c:301) ==28102== ==28102== Conditional jump or move depends on uninitialised value(s) ==28102== at 0x4C3BAA0: mthca_alloc_db_tab (memfree.c:187) ==28102== by 0x4C3BCD0: mthca_alloc_context (mthca.c:162) ==28102== by 0x4A32536: ibv_open_device (device.c:126) ==28102== by 0x401B81: main (rc_pingpong.c:301) ==28102== ==28102== Syscall param write(buf) points to uninitialised byte(s) ==28102== at 0x382340AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==28102== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==28102== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==28102== by 0x4A33F75: ibv_reg_mr (verbs.c:159) ==28102== by 0x401BFC: main (rc_pingpong.c:323) ==28102== Address 0x7FF000488 is on thread 1's stack ==28102== ==28102== Syscall param write(buf) points to uninitialised byte(s) ==28102== at 0x382340AF22: __write_nocancel (in /lib64/tls/libpthread-2.3.4.so) ==28102== by 0x4A30B2E: ibv_cmd_reg_mr (cmd.c:245) ==28102== by 0x4C3DBA3: __mthca_reg_mr (verbs.c:134) ==28102== by 0x4C3DCE9: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== Address 0x7FF0003D8 is on thread 1's stack ==28102== ==28102== Conditional jump or move depends on uninitialised value(s) ==28102== at 0x4C3B7DA: mthca_alloc_db (memfree.c:91) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Conditional jump or move depends on uninitialised value(s) ==28102== at 0x4C3B82D: mthca_alloc_db (memfree.c:97) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4906375: posix_memalign (vg_replace_malloc.c:421) ==28102== by 0x4C3ABFE: mthca_alloc_buf (mthca.h:240) ==28102== by 0x4C3B854: mthca_alloc_db (memfree.c:102) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3AC20: mthca_alloc_buf (buf.c:68) ==28102== by 0x4C3B854: mthca_alloc_db (memfree.c:102) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3AC32: mthca_alloc_buf (buf.c:73) ==28102== by 0x4C3B854: mthca_alloc_db (memfree.c:102) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B85D: mthca_alloc_db (memfree.c:109) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B871: mthca_alloc_db (memfree.c:110) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B879: mthca_alloc_db (memfree.c:110) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B881: mthca_alloc_db (memfree.c:110) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B889: mthca_alloc_db (memfree.c:110) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) ==28102== ==28102== Use of uninitialised value of size 8 ==28102== at 0x4C3B891: mthca_alloc_db (memfree.c:110) ==28102== by 0x4C3DD33: mthca_create_cq (mthca.h:259) ==28102== by 0x4A34174: ibv_create_cq (verbs.c:247) ==28102== by 0x401C6C: main (rc_pingpong.c:329) thanks Dotan From mst at mellanox.co.il Mon Jan 8 00:26:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 10:26:23 +0200 Subject: [openib-general] [PATCHv3] IPoIB CM Experimental support Message-ID: <20070108082623.GA5883@mellanox.co.il> The following patch adds experimental support for IPoIB connected mode. The idea is to increase performance by increasing the MTU from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. With this code, I'm able to get 800MByte/sec or more with netperf without options on a Mellanox 4x back-to-back DDR system. Signed-off-by: Michael S. Tsirkin --- Please review, and put in -mm. This applies on top of the following patch: IB/verbs: return qp pointer as part of ib_wc (do I need to repost it? Here's a link: http://staging.openfabrics.org/git/?p=~vlad/ofed_1_2/.git;a=blob_plain;f=kernel_patches/fixes/ipoib_cm.patch;hb=HEAD) qp pointer is used on receive side to detect stale passive side connections Changes from the previous revision: - Using path MTU discovery, multicast and UDP traffic to UD mode now work, only a small number of packets is dropped. - Use timer to clean up stale RX connections - Make CM use same CQ IPoIB uses for UD (good for mixed UD/CM traffic and for NAPI if we ever enable it) - Tone down warning messages - only some packets are now dropped in CM/UD setup I still kept the sysfs flag to enable/disable CM - is this still a good idea, or is it better to go back to only looking at the device MTU now that multicast works? CM support is also still labeled as experimental, and set it to disabled by default, although its been very stable for me, and the code is complete as far as I'm concerned. Would it be easier to merge it this way in the future? Note that the connected mode support adds very little overhead when not activated at run time, and zero data-path overhead when not activated at compile time. Here's a short description of what the patch does: a. The code's here: git://staging.openfabrics.org/~mst/linux-2.6/.git ipoib_cm_branch This is based on 2.6.19, so ~>git diff v2.6.19..ipoib_cm_branch will show what I have done so far. b. How to activate: Server: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netserver Client: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netperf -H 11.4.3.68 -f M TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.4.3.68 (11.4.3.68) port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. MBytes/sec 87380 16384 16384 10.01 891.21 c. TODO list (Optional) Send side S/G support d. Limitations With MTU > 2044, UDP multicast and UDP connections to IPoIB UD mode currently will drop some packets since we sometimes get packets that are too large to send over a UD QP. Typically a single packet will be dropped each several minutes until path MTU discovery kicks in and lowers the path MTU to this destination. e. Some notes on code 1. SRQ is used for scalability to large cluster sizes 2. Only RC connections are used (UC does not support SRQ now) 3. Retry count is set to 0 since spec draft warns against retries 4. Each connection is used for data transfers in only 1 direction, so each connection is either active(TX) or passive (RX). 2 sides that want to communicate create 2 connections. 5. Each active (TX) connection has a separate CQ for send completions - this keeps the code simple without CQ resize and other tricks diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index c75322d..0ffca11 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -8,6 +8,20 @@ config INFINIBAND_IPOIB See Documentation/infiniband/ipoib.txt for more information +config INFINIBAND_IPOIB_CM + bool "IP-over-InfiniBand Connected Mode support" + depends on INFINIBAND_IPOIB && EXPERIMENTAL + default n + ---help--- + This option enables experimental support for IPoIB connected mode. + After enabling this option, you need to switch to connected mode through + /sys/class/net/ibXXX/mode to actually create connections, and then increase + the interface MTU with e.g. ifconfig ib0 mtu 65520. + + WARNING: Enabling connected mode will trigger some + packet drops for multicast and UD mode traffic from this interface, + unless you limit mtu for these destinations to 2044. + config INFINIBAND_IPOIB_DEBUG bool "IP-over-InfiniBand debugging" if EMBEDDED depends on INFINIBAND_IPOIB diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 8935e74..98ee38e 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \ ipoib_multicast.o \ ipoib_verbs.o \ ipoib_vlan.o +ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07deee8..a7ace70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -62,6 +62,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ + IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, + IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, + IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, IPOIB_RX_RING_SIZE = 128, IPOIB_TX_RING_SIZE = 64, IPOIB_MAX_QUEUE_SIZE = 8192, @@ -81,6 +85,8 @@ enum { IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, IPOIB_MCAST_STARTED = 8, + IPOIB_FLAG_NETIF_STOPPED = 9, + IPOIB_FLAG_ADMIN_CM = 10, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -90,6 +96,14 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, }; + +#define IPOIB_OP_RECV (1ul << 31) +#ifdef CONFIG_INFINIBAND_IPOIB_CM +#define IPOIB_CM_OP_SRQ (1ul << 30) +#else +#define IPOIB_CM_OP_SRQ (0) +#endif + /* structs */ struct ipoib_header { @@ -113,6 +127,61 @@ struct ipoib_tx_buf { u64 mapping; }; +#ifdef CONFIG_INFINIBAND_IPOIB_CM +struct ib_cm_id; + +struct ipoib_cm_data { + __be32 qpn; /* High byte MUST be ignored on receive */ + __be32 mtu; +}; + +struct ipoib_cm_rx { + struct ib_cm_id *id; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + unsigned long jiffies; +}; + +struct ipoib_cm_tx { + struct ib_cm_id *id; + struct ib_cq *cq; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + struct ipoib_neigh *neigh; + struct ipoib_path *path; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + unsigned long flags; + u32 mtu; + struct ib_wc ibwc[IPOIB_NUM_WC]; +}; + +struct ipoib_cm_rx_buf { + struct sk_buff *skb; + u64 mapping[IPOIB_CM_RX_SG]; +}; + +struct ipoib_cm_dev_priv { + struct ib_srq *srq; + struct ipoib_cm_rx_buf *srq_ring; + struct ib_cm_id *id; + struct list_head passive_ids; + struct work_struct start_task; + struct work_struct reap_task; + struct work_struct skb_task; + struct delayed_work stale_task; + struct sk_buff_head skb_queue; + struct list_head start_list; + struct list_head reap_list; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; + struct ib_recv_wr rx_wr; +}; + +#endif /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -179,6 +248,10 @@ struct ipoib_dev_priv { struct list_head child_intfs; struct list_head list; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_dev_priv cm; +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct list_head fs_list; struct dentry *mcg_dentry; @@ -212,6 +285,9 @@ struct ipoib_path { struct ipoib_neigh { struct ipoib_ah *ah; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_tx *cm; +#endif union ib_gid dgid; struct sk_buff_head queue; @@ -315,6 +391,145 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); +#ifdef CONFIG_INFINIBAND_IPOIB_CM + +#define IPOIB_FLAGS_RC 0x80 +#define IPOIB_FLAGS_UC 0x40 + +#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC | IPOIB_FLAGS_UC)) + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(dev->dev_addr) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(n->ha) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags); +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return neigh->cm; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ + neigh->cm = tx; +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); +int ipoib_cm_dev_open(struct net_device *dev); +void ipoib_cm_dev_stop(struct net_device *dev); +int ipoib_cm_dev_init(struct net_device *dev); +int ipoib_cm_add_mode_attr(struct net_device *dev); +void ipoib_cm_dev_cleanup(struct net_device *dev); +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh); +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu); +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); +#else + +struct ipoib_cm_tx; + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + return 0; +} +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) + +{ + return 0; +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return 0; +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ +} + +static inline +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_dev_open(struct net_device *dev) +{ + return 0; +} + +static inline +void ipoib_cm_dev_stop(struct net_device *dev) +{ + return; +} + +static inline +int ipoib_cm_dev_init(struct net_device *dev) +{ + return -ENOSYS; +} + +static inline +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + return; +} + +static inline +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return 0; +} + +static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + dev_kfree_skb_any(skb); +} + +static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ +} + +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG void ipoib_create_debug_files(struct net_device *dev); void ipoib_delete_debug_files(struct net_device *dev); @@ -392,4 +607,6 @@ extern int ipoib_debug_level; #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) +#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c new file mode 100644 index 0000000..2c8bd87 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -0,0 +1,1223 @@ +/* + * Copyright (c) 2006 Mellanox Technologies. All rights reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include + +#ifdef CONFIG_IPV6 +#include +#endif + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA +static int data_debug_level; + +module_param_named(cm_data_debug_level, data_debug_level, int, 0644); +MODULE_PARM_DESC(cm_data_debug_level, + "Enable data path debug tracing for connected mode if > 0"); +#endif + +#include "ipoib.h" + +#define IPOIB_CM_IETF_ID 0x1000000000000000ULL + +#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) +#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) +#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) +#define IPOIB_CM_RX_UPDATE_MASK (0x3) + +struct ipoib_cm_id { + struct ib_cm_id *id; + int flags; + u32 remote_qpn; + u32 remote_mtu; +}; + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event); + +static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_CM_RX_SG]) +{ + int i; + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); +} + +static int ipoib_cm_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int i, ret; + + priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[id].skb); + priv->cm.srq_ring[id].skb = NULL; + } + + return ret; +} + +static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, + u64 mapping[IPOIB_CM_RX_SG]) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + int i; + + skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); + if (unlikely(!skb)) + return -ENOMEM; + + /* + * IPoIB adds a 4 byte header. So we need 12 more bytes to align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 12); + + mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return -EIO; + } + + for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) { + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + goto partial_error; + skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); + + mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, + 0, PAGE_SIZE, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) + goto partial_error; + } + + priv->cm.srq_ring[id].skb = skb; + return 0; + +partial_error: + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (; i >= 0; --i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + + kfree_skb(skb); + return -ENOMEM; +} + +static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, + struct ipoib_cm_rx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = { + .send_cq = priv->cq, /* does not matter, we never send anything */ + .recv_cq = priv->cq, + .srq = priv->cm.srq, + .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ + .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ + .sq_sig_type = IB_SIGNAL_ALL_WR, + .qp_type = IB_QPT_RC, + .qp_context = p, + }; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_modify_rx_rts(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); + return ret; + } + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); + return ret; + } + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, + struct ib_qp *qp, struct ib_cm_req_event_param *req) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_rep_param rep = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + rep.private_data = &data; + rep.private_data_len = sizeof data; + rep.flow_control = 0; + rep.rnr_retry_count = req->rnr_retry_count; + rep.target_ack_delay = 20; /* FIXME */ + rep.srq = 1; + rep.qp_num = qp->qp_num; + rep.starting_psn = 0 /* FIXME */; + return ib_send_cm_rep(cm_id, &rep); +} + +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct net_device *dev = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + int ret; + + ipoib_dbg(priv, "REQ arrived\n"); + p = kzalloc(sizeof *p, GFP_KERNEL); + if (!p) + return -ENOMEM; + p->dev = dev; + p->id = cm_id; + p->qp = ipoib_cm_create_rx_qp(dev, p); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + goto err_qp; + } + + ret = ipoib_cm_modify_rx_rts(dev, cm_id, p->qp); + if (ret) + goto err_modify; + + ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd); + if (ret) { + ipoib_warn(priv, "failed to send REP: %d\n", ret); + goto err_rep; + } + + cm_id->context = p; + p->jiffies = jiffies; + spin_lock_irqsave(&priv->lock, flags); + list_add(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + return 0; + +err_rep: +err_modify: + ib_destroy_qp(p->qp); +err_qp: + kfree(p); + return ret; +} + +static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_rx *p; + struct ipoib_dev_priv *priv; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_REQ_RECEIVED: + return ipoib_cm_req_handler(cm_id, event); + case IB_CM_DREQ_RECEIVED: + p = cm_id->context; + ib_send_cm_drep(cm_id, NULL, 0); + /* Fall through */ + case IB_CM_REJ_RECEIVED: + p = cm_id->context; + priv = netdev_priv(p->dev); + spin_lock_irqsave(&priv->lock, flags); + if (list_empty(&p->list)) + ret = 0; /* Connection is going away already. */ + else { + list_del_init(&p->list); + ret = -ECONNRESET; + } + spin_unlock_irqrestore(&priv->lock, flags); + if (ret) { + ib_destroy_qp(p->qp); + kfree(p); + return ret; + } + return 0; + default: + return 0; + } +} +/* Adjust length of skb with fragments to match received data */ +static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, + unsigned int length) +{ + int i, num_frags; + unsigned int size; + + /* put header into skb */ + size = min(length, hdr_space); + skb->tail += size; + skb->len += size; + length -= size; + + num_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (length == 0) { + /* don't need this page */ + __free_page(frag->page); + --skb_shinfo(skb)->nr_frags; + } else { + size = min(length, (unsigned) PAGE_SIZE); + + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } + } +} + +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; + struct sk_buff *skb; + struct ipoib_cm_rx *p; + unsigned long flags; + u64 mapping[IPOIB_CM_RX_SG]; + + ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_recvq_size)) { + ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); + return; + } + + skb = priv->cm.srq_ring[wr_id].skb; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + ipoib_dbg(priv, "cm recv error " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + ++priv->stats.rx_dropped; + goto repost; + } + + if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { + p = wc->qp->qp_context; + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { + spin_lock_irqsave(&priv->lock, flags); + p->jiffies = jiffies; + /* Move this entry to list head, but do + * not re-add it if it has been removed. */ + if (!list_empty(&p->list)) + list_move(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + } + } + + if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) { + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); + ++priv->stats.rx_dropped; + goto repost; + } + + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping); + memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping); + + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); + + skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len); + + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb->mac.raw = skb->data; + skb_pull(skb, IPOIB_ENCAP_LEN); + + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); + +repost: + if (unlikely(ipoib_cm_post_receive(dev, wr_id))) + ipoib_warn(priv, "ipoib_cm_post_receive failed " + "for buf %d\n", wr_id); +} + +static inline int post_send(struct ipoib_dev_priv *priv, + struct ipoib_cm_tx *tx, + unsigned int wr_id, + u64 addr, int len) +{ + struct ib_send_wr *bad_wr; + + priv->tx_sge.addr = addr; + priv->tx_sge.length = len; + + priv->tx_wr.wr_id = wr_id; + + return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_tx_buf *tx_req; + u64 addr; + + if (unlikely(skb->len > tx->mtu)) { + ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", + skb->len, tx->mtu); + ++priv->stats.tx_dropped; + ++priv->stats.tx_errors; + ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN); + return; + } + + ipoib_dbg_data(priv, "sending packet %p, head %d length=%d connection=%p\n", + skb, tx->tx_head, skb->len, tx); + + /* + * We put the skb into the tx_ring _before_ we call post_send() + * because it's entirely possible that the completion handler will + * run before we execute anything after the post_send(). That + * means we have to make sure everything is properly recorded and + * our state is consistent before we call post_send(). + */ + tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; + tx_req->skb = skb; + addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + + tx_req->mapping = addr; + + if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), + addr, skb->len))) { + ipoib_warn(priv, "post_send failed\n"); + ++priv->stats.tx_errors; + ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + } else { + dev->trans_start = jiffies; + ++tx->tx_head; + + if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { + ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); + netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); + } + } +} + +static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, + struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + struct ipoib_tx_buf *tx_req; + unsigned long flags; + + ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_sendq_size)) { + ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", + wr_id, ipoib_sendq_size); + return; + } + + tx_req = &tx->tx_ring[wr_id]; + + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + + /* FIXME: is this right? Shouldn't we only increment on success? */ + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; + + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++tx->tx_tail; + if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags) && + tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { + netif_wake_queue(dev); + } + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) { + struct ipoib_neigh *neigh; + + ipoib_dbg(priv, "failed cm send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); + + spin_unlock(&priv->lock); + } + + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) +{ + struct ipoib_cm_tx *tx = tx_ptr; + int n, i; + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + do { + n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc); + for (i = 0; i < n; ++i) + ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i); + } while (n == IPOIB_NUM_WC); +} + +int ipoib_cm_dev_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return 0; + + priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); + if (IS_ERR(priv->cm.id)) { + printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); + return IS_ERR(priv->cm.id); + } + + ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), + 0, NULL); + if (ret) { + printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, + IPOIB_CM_IETF_ID | priv->qp->qp_num); + ib_destroy_cm_id(priv->cm.id); + return ret; + } + return 0; +} + +void ipoib_cm_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return; + + ib_destroy_cm_id(priv->cm.id); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); + + cancel_delayed_work(&priv->cm.stale_task); +} + +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct ipoib_cm_tx *p = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_cm_data *data = event->private_data; + struct sk_buff_head skqueue; + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + struct sk_buff *skb; + unsigned long flags; + + p->mtu = be32_to_cpu(data->mtu); + + if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { + ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", + p->mtu, priv->dev->mtu); + return -EINVAL; + } + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); + return ret; + } + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); + return ret; + } + + skb_queue_head_init(&skqueue); + + spin_lock_irqsave(&priv->lock, flags); + set_bit(IPOIB_FLAG_OPER_UP, &p->flags); + if (p->neigh) + while ((skb = __skb_dequeue(&p->neigh->queue))) + __skb_queue_tail(&skqueue, skb); + spin_unlock_irqrestore(&priv->lock, flags); + + while ((skb = __skb_dequeue(&skqueue))) { + skb->dev = p->dev; + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed " + "to requeue packet\n"); + } + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + if (ret) { + ipoib_warn(priv, "failed to send RTU: %d\n", ret); + return ret; + } + return 0; +} + +static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = {}; + attr.recv_cq = priv->cq; + attr.srq = priv->cm.srq; + attr.cap.max_send_wr = ipoib_sendq_size; + attr.cap.max_send_sge = 1; + attr.sq_sig_type = IB_SIGNAL_ALL_WR; + attr.qp_type = IB_QPT_RC; + attr.send_cq = cq; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_send_req(struct net_device *dev, + struct ib_cm_id *id, struct ib_qp *qp, + u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_req_param req = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + req.primary_path = pathrec; + req.alternate_path = NULL; + req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); + req.qp_num = qp->qp_num; + req.qp_type = qp->qp_type; + req.private_data = &data; + req.private_data_len = sizeof data; + req.flow_control = 0; + + req.starting_psn = 0; /* FIXME */ + + /* + * Pick some arbitrary defaults here; we could make these + * module parameters if anyone cared about setting them. + */ + req.responder_resources = 4; + req.remote_cm_response_timeout = 20; + req.local_cm_response_timeout = 20; + req.retry_count = 0; /* RFC draft warns against retries */ + req.rnr_retry_count = 0; /* RFC draft warns against retries */ + req.max_cm_retries = 15; + req.srq = 15; + return ib_send_cm_req(id, &req); +} + +static int ipoib_cm_modify_tx_init(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); + if (ret) { + ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.port_num = priv->port; + qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; + + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + int ret; + + ipoib_dbg(priv, "Request connection %p for gid " IPOIB_GID_FMT " qpn 0x%x\n", + p, IPOIB_GID_ARG(pathrec->dgid), qpn); + + p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_KERNEL); + if (!p->tx_ring) { + ipoib_warn(priv, "failed to allocate tx ring\n"); + ret = -ENOMEM; + goto err_tx; + } + + p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, + ipoib_sendq_size + 1); + if (IS_ERR(p->cq)) { + ret = PTR_ERR(p->cq); + ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret); + goto err_cq; + } + + ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP); + if (ret) { + ipoib_warn(priv, "failed to request completion notification: %d\n", ret); + goto err_req_notify; + } + + p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + goto err_qp; + } + + p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); + if (IS_ERR(p->id)) { + ret = PTR_ERR(p->id); + ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); + goto err_id; + } + + ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp); + if (ret) { + ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); + goto err_modify; + } + + ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec); + if (ret) { + ipoib_warn(priv, "failed to send cm req: %d\n", ret); + goto err_send_cm; + } + return 0; + +err_send_cm: +err_modify: + ib_destroy_cm_id(p->id); +err_id: + p->id = NULL; + ib_destroy_qp(p->qp); +err_req_notify: +err_qp: + p->qp = NULL; + ib_destroy_cq(p->cq); +err_cq: + p->cq = NULL; +err_tx: + return ret; +} + +static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_tx_buf *tx_req; + + ipoib_dbg(priv, "Destroy active connection %p. head 0x%x tail 0x%x\n", + p, p->tx_head, p->tx_tail); + + if (p->id) + ib_destroy_cm_id(p->id); + + if (p->qp) + ib_destroy_qp(p->qp); + + if (p->cq) + ib_destroy_cq(p->cq); + + if (p->tx_ring) { + while ((int) p->tx_tail - (int) p->tx_head < 0) { + tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_req->skb); + ++p->tx_tail; + } + + kfree(p->tx_ring); + } + + kfree(p); +} + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_tx *tx = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_DREQ_RECEIVED: + ipoib_dbg(priv, "DREQ received.\n"); + ib_send_cm_drep(cm_id, NULL, 0); + break; + case IB_CM_REP_RECEIVED: + ipoib_dbg(priv, "REP received.\n"); + ret = ipoib_cm_rep_handler(cm_id, event); + if (ret) + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + break; + case IB_CM_REQ_ERROR: + case IB_CM_REJ_RECEIVED: + case IB_CM_TIMEWAIT_EXIT: + ipoib_dbg(priv, "CM error %d.\n", event->event); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + break; + default: + break; + } + + return 0; +} + +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_tx *tx; + + tx = kzalloc(sizeof *tx, GFP_ATOMIC); + if (!tx) + return NULL; + + neigh->cm = tx; + tx->neigh = neigh; + tx->path = path; + tx->dev = dev; + list_add(&tx->list, &priv->cm.start_list); + set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); + queue_work(ipoib_workqueue, &priv->cm.start_task); + return tx; +} + +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(tx->neigh->dgid)); + tx->neigh = NULL; + } +} + +static void ipoib_cm_tx_start(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.start_task); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + struct ipoib_cm_tx *p; + unsigned long flags; + int ret; + + struct ib_sa_path_rec pathrec; + u32 qpn; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.start_list)) { + p = list_entry(priv->cm.start_list.next, typeof(*p), list); + list_del_init(&p->list); + neigh = p->neigh; + qpn = IPOIB_QPN(neigh->neighbour->ha); + memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + if (ret) { + neigh = p->neigh; + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + } + list_del(&p->list); + kfree(p); + } + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.reap_task); + struct ipoib_cm_tx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.reap_list)) { + p = list_entry(priv->cm.reap_list.next, typeof(*p), list); + list_del(&p->list); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ipoib_cm_tx_destroy(p); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_skb_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.skb_task); + struct net_device *dev = priv->dev; + struct sk_buff *skb; + unsigned long flags; + + __be32 mtu = cpu_to_be32(priv->mcast_mtu); + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); +#ifdef CONFIG_IPV6 + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); +#endif + dev_kfree_skb_any(skb); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int e = skb_queue_empty(&priv->cm.skb_queue); + + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + skb_queue_tail(&priv->cm.skb_queue, skb); + if (e) + queue_work(ipoib_workqueue, &priv->cm.skb_task); +} + +static void ipoib_cm_stale_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.stale_task.work); + struct ipoib_cm_rx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + /* List if sorted by LRU, start from tail, + * stop when we see a recently used entry */ + p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) + break; + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); +} + + +static ssize_t show_mode(struct class_device *cdev, char *buf) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) + return sprintf(buf, "connected\n"); + else + return sprintf(buf, "datagram\n"); +} + +static ssize_t set_mode(struct class_device *cdev, + const char *buf, size_t count) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + /* flush paths if we switch modes so that connections are restarted */ + if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { + set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_warn(priv, "enabling connected mode " + "will cause multicast packet drops\n"); + ipoib_flush_paths(dev); + return count; + } + + if (!strcmp(buf, "datagram\n")) { + clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_flush_paths(dev); + return count; + } + + return -EINVAL; +} + +static CLASS_DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode); + +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return class_device_create_file(&dev->class_dev, &class_device_attr_mode); +} + +int ipoib_cm_dev_init(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_srq_init_attr srq_init_attr = { + .attr = { + .max_wr = ipoib_recvq_size, + .max_sge = IPOIB_CM_RX_SG + } + }; + int ret, i; + + INIT_LIST_HEAD(&priv->cm.passive_ids); + INIT_LIST_HEAD(&priv->cm.reap_list); + INIT_LIST_HEAD(&priv->cm.start_list); + INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); + INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); + INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); + INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); + + skb_queue_head_init(&priv->cm.skb_queue); + + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); + if (IS_ERR(priv->cm.srq)) { + ret = PTR_ERR(priv->cm.srq); + priv->cm.srq = NULL; + return ret; + } + + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, + GFP_KERNEL); + if (!priv->cm.srq_ring) { + printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].lkey = priv->mr->lkey; + + priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].length = PAGE_SIZE; + priv->cm.rx_wr.next = NULL; + priv->cm.rx_wr.sg_list = priv->cm.rx_sge; + priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; + + for (i = 0; i < ipoib_recvq_size; ++i) { + if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + if (ipoib_cm_post_receive(dev, i)) { + ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -EIO; + } + } + + priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; + return 0; +} + +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i, ret; + + if (!priv->cm.srq) + return; + + ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); + + ret = ib_destroy_srq(priv->cm.srq); + if (ret) + ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); + + priv->cm.srq = NULL; + if (!priv->cm.srq_ring) + return; + for (i = 0; i < ipoib_recvq_size; ++i) + if (priv->cm.srq_ring[i].skb) { + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[i].skb); + priv->cm.srq_ring[i].skb = NULL; + } + kfree(priv->cm.srq_ring); + priv->cm.srq_ring = NULL; +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 59d9594..0888e17 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level, "Enable data path debug tracing if > 0"); #endif -#define IPOIB_OP_RECV (1ul << 31) - static DEFINE_MUTEX(pkey_mutex); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, @@ -268,10 +266,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; - if (netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) + if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags) && + priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { netif_wake_queue(dev); + } spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && @@ -283,7 +281,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) { - if (wc->wr_id & IPOIB_OP_RECV) + if (wc->wr_id & IPOIB_CM_OP_SRQ) + ipoib_cm_handle_rx_wc(dev, wc); + else if (wc->wr_id & IPOIB_OP_RECV) ipoib_ib_handle_rx_wc(dev, wc); else ipoib_ib_handle_tx_wc(dev, wc); @@ -327,12 +327,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; u64 addr; - if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { + if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", - skb->len, dev->mtu + INFINIBAND_ALEN); + skb->len, priv->mcast_mtu + INFINIBAND_ALEN); ++priv->stats.tx_dropped; ++priv->stats.tx_errors; - dev_kfree_skb_any(skb); + ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); return; } @@ -372,6 +372,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); } } } @@ -424,6 +425,13 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } + ret = ipoib_cm_dev_open(dev); + if (ret) { + ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); + ipoib_ib_dev_stop(dev); + return -1; + } + clear_bit(IPOIB_STOP_REAPER, &priv->flags); queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); @@ -509,6 +517,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_cm_dev_stop(dev); + /* * Move our QP to the error state and then reinitialize in * when all work requests have completed or have been flushed. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 705eb1d..19e82db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,8 +49,6 @@ #include -#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) - MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); + /* * Now flush workqueue to make sure a scheduled task doesn't * bring our internal state back up. @@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + /* dev->mtu > 2K ==> connected mode */ + if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { + if (new_mtu > priv->mcast_mtu) + ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", + priv->mcast_mtu); + dev->mtu = new_mtu; + return 0; + } + + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { return -EINVAL; + } priv->admin_mtu = new_mtu; @@ -414,6 +424,20 @@ static void path_rec_completion(int status, memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, + path, + neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + continue; + } + } + while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } @@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + goto err_drop; + } + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + __skb_queue_tail(&neigh->queue, skb); + else { + ipoib_warn(priv, "queue length limit %d. Packet drop.\n", + skb_queue_len(&neigh->queue)); + goto err_drop; + } + } else + ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); } else { neigh->ah = NULL; @@ -538,6 +580,7 @@ err_list: err_path: ipoib_neigh_free(dev, neigh); +err_drop: ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) neigh = *to_ipoib_neigh(skb->dst->neighbour); - if (likely(neigh->ah)) { + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto out; + } + } else if (neigh->ah) { if (unlikely(memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, sizeof(union ib_gid)))) { @@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) neigh->neighbour = neighbour; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); + ipoib_cm_set(neigh, NULL); return neigh; } @@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); } + if (ipoib_cm_get(neigh)) + ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); kfree(neigh); } @@ -1081,6 +1132,8 @@ static struct net_device *ipoib_add_port(const char *format, ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; if (class_device_create_file(&priv->dev->class_dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b04b72c..fea737f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - IPOIB_ENCAP_LEN; - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + + if (!ipoib_cm_admin_enabled(dev)) + dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b717c6..3cb551b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .qp_type = IB_QPT_UD }; + int ret, size; + priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, - ipoib_sendq_size + ipoib_recvq_size + 1); + priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(priv->mr)) { + printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); + goto out_free_pd; + } + + size = ipoib_sendq_size + ipoib_recvq_size + 1; + ret = ipoib_cm_dev_init(dev); + if (!ret) + size += ipoib_recvq_size; + + priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size); if (IS_ERR(priv->cq)) { printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); - goto out_free_pd; + goto out_free_mr; } if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) goto out_free_cq; - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_cq; - } - init_attr.send_cq = priv->cq; init_attr.recv_cq = priv->cq, priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_mr; + goto out_free_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return 0; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_cq: ib_destroy_cq(priv->cq); +out_free_mr: + ib_dereg_mr(priv->mr); + out_free_pd: ib_dealloc_pd(priv->pd); return -ENODEV; @@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - if (ib_destroy_cq(priv->cq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); + ipoib_cm_dev_cleanup(dev); + + if (ib_dereg_mr(priv->mr)) + ipoib_warn(priv, "ib_dereg_mr failed\n"); + if (ib_dealloc_pd(priv->pd)) ipoib_warn(priv, "ib_dealloc_pd failed\n"); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index f887780..d9fd82d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; -- MST From mirko.benz at xiranet.com Mon Jan 8 03:13:45 2007 From: mirko.benz at xiranet.com (Mirko Benz) Date: Mon, 08 Jan 2007 12:13:45 +0100 Subject: [openib-general] [PATCH] rdma_cm iWARP connection setup timeouts reported as rejects. In-Reply-To: References: Message-ID: <45A22769.4040405@xiranet.com> Hi, What could be the reasons for these timeouts to occur? How should an application handle this? Thanks, Mirko From nimrodg at mellanox.com Mon Jan 8 03:18:35 2007 From: nimrodg at mellanox.com (Nimrod Gindi) Date: Mon, 8 Jan 2007 03:18:35 -0800 Subject: [openib-general] OFED release testing Task force meeting minutes Message-ID: <1E3DCD1C63492545881FACB6063A57C1B23227@mtiexch01.mti.com> Meeting took place on Thursday - Jan. 4th, 2007 Agenda: 1. Introduction to targets as presented at the last OFA meeting 2. Determine priorities. 3. Determine schedule 4. Open discussion Attending companies: Mellanox, NetEffect, ORNL, Qlogic, Voltaire, Discussion Items and Action Items: 1) Note was made that OFA interoperability and IBTA Plugfest date after the OFED 1.2 scheduled release 2) Agreed initial targets (in priority order): a. Unified reporting of tests results b. Unified/Increased reporting bugs c. ULPS/driver parts testing ownerships 3) Agreed Action Items: a. AI 1: Amit K (Mellanox) to take that with OFA to re-visit the date and decide whether it would be better to have the testing prior to the release. b. AI 2: Amit K (Mellanox) to send out test-report format for group review. c. AI 3: Moni L (Voltaire) to send out test-report format for group review. Reviews/addition ideas were agreed to be taken by e-mail with the group in the To field involved. Follow-up meeting will be scheduled for either 17th or 18th of January 2007 8:30am PDT=11am EDT=6pm Israel (Please respond with which fits you better). Nimrod Gindi Mellanox Technologies Ltd. mail : nimrodg at mellanox.com Cell : +1-408-750-4801 Office: +1-347-342-0011 Fax : +1-212-987-0275 -------------- next part -------------- An HTML attachment was scrubbed... URL: From michael.arndt at informatik.tu-chemnitz.de Mon Jan 8 03:28:19 2007 From: michael.arndt at informatik.tu-chemnitz.de (Michael Arndt) Date: Mon, 8 Jan 2007 12:28:19 +0100 Subject: [openib-general] ioctl and send_agents References: <000901c732bc$17eeb9b0$21606d86@one7> <1168221630.4577.39665.camel@hal.voltaire.com> Message-ID: <001d01c73318$19033220$21606d86@one7> Hi, Thanks for the fast answer. > OpenSM registers agents in opensm/osm_sm_mad_ctrl.c:osm_sm_mad_ctrl_bind > and opensm/osm_sa_mad_ctrl.c:osm_sa_mad_ctrl_bind. osm_sm_mad_ctrl_bind > is called from osm_sm.c:osm_sm_bind and osm_sa_mad_ctrl_bind is called > from osm_sa.c:osm_sa_bind. Both osm_sm_bind and osm_sa_bind are called > from opensm/osm_opensm.c:osm_opensm_bind which is in turn called from > main.c during OpenSM startup. That is the vendor independent part. > > The vendor dependant part is done in the vendor layer. For OpenIB, it is > done in osm_vendor_ibumad.c:osm_vendor_bind. I looked at the osm_vendor_bind and seen the umad_register call. But if I checked the umad_register function (libibumad/src/umad.c) I just see an ioctl call again. And if it right that the user_mad module is uses at kernel space shouldn't there be a call like unlocked_ioctl or compat_ioctl like defined in this module? These agents are all receiver agents and you say nothing about send agents for SM? Thanks Michael From halr at voltaire.com Mon Jan 8 03:54:24 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 06:54:24 -0500 Subject: [openib-general] nightly osm_sim report 2007-01-07:normal completion In-Reply-To: <200701070515.l075FRD6001811@sw053.yok.mtl.com> References: <200701070515.l075FRD6001811@sw053.yok.mtl.com> Message-ID: <1168257262.4577.72049.camel@hal.voltaire.com> On Sun, 2007-01-07 at 00:15, Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Sat_Jan_6_06:44:34_2007 6c8647 > ibutils rev = Wed_Jan_3_11:42:12_2007 913448 > Total=369 Pass=366 Fail=3 > > Pass: > 27 Stability IS1-16.topo > 27 Pkey IS1-16.topo > 27 OsmTest IS1-16.topo > 27 OsmStress IS1-16.topo > 27 Multicast IS1-16.topo > 27 LidMgr IS1-16.topo > 9 Stability IS3-loop.topo > 9 Stability IS3-128.topo > 9 Pkey IS3-128.topo > 9 OsmTest IS3-loop.topo > 9 OsmTest IS3-128.topo > 9 OsmStress IS3-128.topo > 9 Multicast IS3-loop.topo > 9 Multicast IS3-128.topo > 9 FatTree part-4-ary-3-tree.topo > 9 FatTree merge-roots-reorder-4-ary-2-tree.topo > 9 FatTree merge-roots-4-ary-2-tree.topo > 9 FatTree merge-root-4-ary-3-tree.topo > 9 FatTree merge-root-12-ary-2-tree.topo > 9 FatTree half-4-ary-3-tree.topo > 9 FatTree blend-4-ary-2-tree.topo > 9 FatTree 4-ary-4-tree.topo > 9 FatTree 4-ary-3-tree.topo > 9 FatTree 32nodes-3lvl-is1.topo > 9 FatTree 2-ary-4-tree.topo > 9 FatTree 12-ary-2-tree.topo > 8 LidMgr IS3-128.topo > 8 FatTree merge-2-ary-4-tree.topo > 8 FatTree 12-node-spaced.topo > > Failures: > 1 LidMgr IS3-128.topo Is this LidMgr failure a DNS issue like the others ? Also, there was also pkey failure from late last week. -- Hal > 1 FatTree merge-2-ary-4-tree.topo > 1 FatTree 12-node-spaced.topo From halr at voltaire.com Mon Jan 8 03:54:45 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 06:54:45 -0500 Subject: [openib-general] nightly osm_sim report 2007-01-08:normal completion In-Reply-To: <200701080526.l085QgkG011229@sw053.yok.mtl.com> References: <200701080526.l085QgkG011229@sw053.yok.mtl.com> Message-ID: <1168257284.4577.72051.camel@hal.voltaire.com> On Mon, 2007-01-08 at 00:26, Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Sat_Jan_6_06:44:34_2007 6c8647 > ibutils rev = Wed_Jan_3_11:42:12_2007 913448 > Total=410 Pass=409 Fail=1 > > Pass: > 30 Stability IS1-16.topo > 30 Pkey IS1-16.topo > 30 OsmTest IS1-16.topo > 30 OsmStress IS1-16.topo > 30 Multicast IS1-16.topo > 30 LidMgr IS1-16.topo > 10 Stability IS3-loop.topo > 10 Stability IS3-128.topo > 10 Pkey IS3-128.topo > 10 OsmTest IS3-loop.topo > 10 OsmTest IS3-128.topo > 10 OsmStress IS3-128.topo > 10 Multicast IS3-loop.topo > 10 LidMgr IS3-128.topo > 10 FatTree part-4-ary-3-tree.topo > 10 FatTree merge-roots-reorder-4-ary-2-tree.topo > 10 FatTree merge-roots-4-ary-2-tree.topo > 10 FatTree merge-root-4-ary-3-tree.topo > 10 FatTree merge-root-12-ary-2-tree.topo > 10 FatTree merge-2-ary-4-tree.topo > 10 FatTree half-4-ary-3-tree.topo > 10 FatTree blend-4-ary-2-tree.topo > 10 FatTree 4-ary-4-tree.topo > 10 FatTree 4-ary-3-tree.topo > 10 FatTree 32nodes-3lvl-is1.topo > 10 FatTree 2-ary-4-tree.topo > 10 FatTree 12-node-spaced.topo > 10 FatTree 12-ary-2-tree.topo > 9 Multicast IS3-128.topo > > Failures: > 1 Multicast IS3-128.topo What about this failure too ? Is it also DNS related or something else ? -- Hal From halr at voltaire.com Mon Jan 8 04:20:25 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 07:20:25 -0500 Subject: [openib-general] [PATCH 2/2]: OpenSM/osm_console.c: Handle telnet disconnects better Message-ID: <1168258813.4577.73484.camel@hal.voltaire.com> OpenSM/osm_console.c: Handle telnet disconnects better Signed-off-by: Sasha Khapyorsky Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index 420acc2..8d770aa 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -336,7 +336,7 @@ void osm_console(osm_opensm_t *p_osm) pollfd[1].events = POLLIN|POLLOUT; pollfd[1].revents = 0; - if (poll(pollfd, 2, 10000) <= 0) + if (poll(pollfd, pollfd[1].fd >= 0 ? 2 : 1, 10000) <= 0) return; #ifdef ENABLE_OSM_CONSOLE_SOCKET @@ -382,11 +382,10 @@ void osm_console(osm_opensm_t *p_osm) if (n > 0) { /* Parse and act on input */ parse_cmd_line(p_line, p_osm); + osm_console_prompt(p_osm->console.out); + } else + osm_console_close_socket(p_osm); + if (p_line) free(p_line); - } else { - fprintf(p_osm->console.out, "Input error\n"); - fflush(p_osm->console.out); - } - osm_console_prompt(p_osm->console.out); } } From halr at voltaire.com Mon Jan 8 04:20:17 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 07:20:17 -0500 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console Message-ID: <1168258813.4577.73483.camel@hal.voltaire.com> OpenSM: Add socket support to OpenSM console Signed-off-by: Ira Weiny Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_console.h b/osm/include/opensm/osm_console.h index 705f918..2d212f2 100644 --- a/osm/include/opensm/osm_console.h +++ b/osm/include/opensm/osm_console.h @@ -38,6 +38,11 @@ #include #include +#define OSM_COMMAND_LINE_LEN 120 +#define OSM_COMMAND_PROMPT "$ " +#define OSM_DEFAULT_CONSOLE_PORT 10000 +#define OSM_DAEMON_NAME "opensm" + #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { # define END_C_DECLS } @@ -48,8 +53,10 @@ BEGIN_C_DECLS +void osm_console_init(osm_subn_opt_t *opt, osm_opensm_t *p_osm); void osm_console(osm_opensm_t *p_osm); -void osm_console_prompt(void); +void osm_console_prompt(FILE *out); +void osm_console_close_socket(osm_opensm_t *p_osm); END_C_DECLS diff --git a/osm/include/opensm/osm_opensm.h b/osm/include/opensm/osm_opensm.h index 16fef37..482de28 100644 --- a/osm/include/opensm/osm_opensm.h +++ b/osm/include/opensm/osm_opensm.h @@ -48,6 +48,7 @@ #ifndef _OSM_OPENSM_H_ #define _OSM_OPENSM_H_ +#include #include #include #include @@ -130,6 +131,15 @@ struct osm_routing_engine { * internals cleanup. */ +typedef struct _osm_console_t +{ + int socket; + int in_fd; + int out_fd; + FILE *in; + FILE *out; +} osm_console_t; + /****s* OpenSM: OpenSM/osm_opensm_t * NAME * osm_opensm_t @@ -156,6 +166,7 @@ typedef struct _osm_opensm_t cl_plock_t lock; struct osm_routing_engine routing_engine; osm_stats_t stats; + osm_console_t console; } osm_opensm_t; /* * FIELDS diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h index 79796e5..c9b04eb 100644 --- a/osm/include/opensm/osm_subnet.h +++ b/osm/include/opensm/osm_subnet.h @@ -266,6 +266,7 @@ typedef struct _osm_subn_opt boolean_t no_qos; boolean_t accum_log_file; boolean_t console; + uint16_t console_port; cl_map_t port_prof_ignore_guids; boolean_t port_profile_switch_nodes; osm_pfn_ui_extension_t pfn_ui_pre_lid_assign; diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in index 1ccf5c6..2d52675 100644 --- a/osm/opensm/configure.in +++ b/osm/opensm/configure.in @@ -62,6 +62,22 @@ AC_ARG_ENABLE(debug, esac],[debug=false]) AM_CONDITIONAL(DEBUG, test x$debug = xtrue) +dnl Console over a socket connection +AC_ARG_ENABLE(console-socket, +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], +[case $enableval in + yes) console_socket=yes ;; + no) console_socket=no ;; + esac], + console_socket=yes) +if test $console_socket = yes; then + AC_CHECK_LIB(wrap, request_init, [], + AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, + 1, + [Define as 1 if you want to enable a console on a socket connection]) +fi + dnl Provide user option to select vendor OPENIB_APP_OSMV_SEL diff --git a/osm/opensm/main.c b/osm/opensm/main.c index 374d323..90432be 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -217,6 +217,11 @@ show_usage(void) " 4 outstanding SMPs.\n\n" ); printf( "-console\n" " This option brings up the OpenSM console.\n\n" ); +#ifdef ENABLE_OSM_CONSOLE_SOCKET + printf( "--console_port \n" + " Specify an alternate telnet port for the console (default %d).\n\n", + OSM_DEFAULT_CONSOLE_PORT); +#endif printf( "-i \n" "-ignore-guids \n" " This option provides the means to define a set of ports\n" @@ -578,6 +583,9 @@ main( { "cache-options", 0, NULL, 'c'}, { "stay_on_fatal", 0, NULL, 'y'}, { "honor_guid2lid", 0, NULL, 'x'}, +#ifdef ENABLE_OSM_CONSOLE_SOCKET + { "console_port", 1, NULL, 'C'}, +#endif { NULL, 0, NULL, 0 } /* Required at the end of the array */ }; @@ -679,6 +687,12 @@ main( printf(" Enabling OpenSM interactive console\n"); break; +#ifdef ENABLE_OSM_CONSOLE_SOCKET + case 'C': + opt.console_port = strtol(optarg, NULL, 0); + break; +#endif + case 'd': dbg_lvl = strtol(optarg, NULL, 0); printf(" d level = 0x%x\n", dbg_lvl); @@ -931,15 +945,11 @@ main( } else { + osm_console_init(&opt, &osm); + /* Sit here forever - In the future, some sort of console interactivity could - be implemented in this loop. */ - if (opt.console) { - printf("\nOpenSM Console\n\n"); - osm_console_prompt(); - } while( !osm_exit_flag ) { if (opt.console) osm_console(&osm); @@ -953,6 +963,7 @@ main( osm_opensm_sweep( &osm ); } } + osm_console_close_socket(&osm); } #if 0 diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index a1a5eec..420acc2 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,78 +39,91 @@ #include #include #include +#include +#include +#include +#ifdef ENABLE_OSM_CONSOLE_SOCKET +#include +#endif +#include +#include +#include +#include #include -#define OSM_COMMAND_LINE_LEN 120 -#define OSM_COMMAND_PROMPT "$ " - struct command { char *name; - void (*help_function)(void); - void (*parse_function)(char **p_last, osm_opensm_t *p_osm); + void (*help_function)(FILE *out, int detail); + void (*parse_function)(char **p_last, osm_opensm_t *p_osm, FILE *out); }; static const struct command console_cmds[]; static inline char *next_token(char **p_last) { - return strtok_r(NULL, " \t\n", p_last); + return strtok_r(NULL, " \t\n\r", p_last); } -static void help_command() +static void help_command(FILE *out, int detail) { int i; - printf("Supported commands and syntax:\n"); - printf("help []\n"); + fprintf(out, "Supported commands and syntax:\n"); + fprintf(out, "help []\n"); /* skip help command */ for (i = 1; console_cmds[i].name; i++) - console_cmds[i].help_function(); + console_cmds[i].help_function(out, 0); +} + +static void help_quit(FILE *out, int detail) +{ + fprintf(out, "quit\n"); } -static void help_loglevel() + +static void help_loglevel(FILE *out, int detail) { - printf("loglevel []\n"); + fprintf(out, "loglevel []\n"); } -static void help_priority() +static void help_priority(FILE *out, int detail) { - printf("priority []\n"); + fprintf(out, "priority []\n"); } /* more help routines go here */ -static void help_parse(char **p_last, osm_opensm_t *p_osm) +static void help_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) { char *p_cmd; int i, found = 0; p_cmd = next_token(p_last); if (!p_cmd) - help_command(); + help_command(out, 0); else { for (i = 1; console_cmds[i].name; i++) { if (!strcmp(p_cmd, console_cmds[i].name)) { found = 1; - console_cmds[i].help_function(); + console_cmds[i].help_function(out, 1); break; } } if (!found) { - printf("Command %s not found\n\n", p_cmd); - help_command(); + fprintf(out, "%s : Command not found\n\n", p_cmd); + help_command(out, 0); } } } -static void loglevel_parse(char **p_last, osm_opensm_t *p_osm) +static void loglevel_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) { char *p_cmd; int level; p_cmd = next_token(p_last); if (!p_cmd) - printf("Current log level is 0x%x\n", osm_log_get_level(&p_osm->log)); + fprintf(out, "Current log level is 0x%x\n", osm_log_get_level(&p_osm->log)); else { /* Handle x, 0x, and decimal specification of log level */ if (!strncmp(p_cmd, "x", 1)) { @@ -124,38 +137,57 @@ static void loglevel_parse(char **p_last level = strtol(p_cmd, NULL, 10); } if ((level >= 0) && (level < 256)) { - printf("Setting log level to 0x%x\n", level); + fprintf(out, "Setting log level to 0x%x\n", level); osm_log_set_level(&p_osm->log, level); } else - printf("Invalid log level 0x%x\n", level); + fprintf(out, "Invalid log level 0x%x\n", level); } } -static void priority_parse(char **p_last, osm_opensm_t *p_osm) +static void priority_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) { char *p_cmd; int priority; p_cmd = next_token(p_last); if (!p_cmd) - printf("Current sm-priority is %d\n", p_osm->subn.opt.sm_priority); + fprintf(out, "Current sm-priority is %d\n", p_osm->subn.opt.sm_priority); else { priority = strtol(p_cmd, NULL, 0); if (0 > priority || 15 < priority) - printf("Invalid sm-priority %d; must be between 0 and 15\n", priority); + fprintf(out, "Invalid sm-priority %d; must be between 0 and 15\n", priority); else { - printf("Setting sm-priority to %d\n", priority); + fprintf(out, "Setting sm-priority to %d\n", priority); p_osm->subn.opt.sm_priority = (uint8_t)priority; /* Does the SM state machine need a kick now ? */ } } } +/* This is public to be able to close it on exit */ +void osm_console_close_socket(osm_opensm_t *p_osm) +{ + if (p_osm->console.socket > 0) { + close(p_osm->console.in_fd); + p_osm->console.in_fd = -1; + p_osm->console.out_fd = -1; + p_osm->console.in = NULL; + p_osm->console.out = NULL; + } +} + +static void quit_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) +{ + osm_console_close_socket(p_osm); +} + + /* more parse routines go here */ static const struct command console_cmds[] = { { "help", &help_command, &help_parse}, + { "quit", &help_quit, &quit_parse}, { "loglevel", &help_loglevel, &loglevel_parse}, { "priority", &help_priority, &priority_parse}, { NULL, NULL, NULL} /* end of array */ @@ -165,60 +197,196 @@ static void parse_cmd_line(char *line, o { char *p_cmd, *p_last; int i, found = 0; + FILE *out = p_osm->console.out; + /* find first token which is the command */ - p_cmd = strtok_r(line, " \t\n", &p_last); + p_cmd = strtok_r(line, " \t\n\r", &p_last); if (p_cmd) { for (i = 0; console_cmds[i].name; i++) { if (!strcmp(p_cmd, console_cmds[i].name)) { found = 1; - console_cmds[i].parse_function(&p_last, p_osm); + console_cmds[i].parse_function(&p_last, p_osm, out); break; } } if (!found) { - printf("Command %s not found\n\n", p_cmd); - help_command(); + fprintf(out, "%s : Command not found\n\n", p_cmd); + help_command(out, 0); } } else { - printf("Error parsing command line: %s\n", line); + fprintf(out, "Error parsing command line: %s\n", line); return; } } -void osm_console_prompt(void) +void osm_console_prompt(FILE *out) { - printf("%s", OSM_COMMAND_PROMPT); - fflush(stdout); + if (out) { + fprintf(out, "OpenSM %s", OSM_COMMAND_PROMPT); + fflush(out); + } } +void osm_console_init(osm_subn_opt_t *opt, osm_opensm_t *p_osm) +{ + p_osm->console.socket = -1; + /* set up the file descriptors for the console */ + if (opt->console) { + p_osm->console.in = stdin; + p_osm->console.out = stdout; + p_osm->console.in_fd = fileno(stdin); + p_osm->console.out_fd = fileno(stdout); + + osm_console_prompt(p_osm->console.out); +#ifdef ENABLE_OSM_CONSOLE_SOCKET + } else { + struct sockaddr_in sin; + int optval = 1; + + if ((p_osm->console.socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) + { + osm_log(&(p_osm->log), OSM_LOG_ERROR, + "osm_console_init: Failed to open console socket : %s\n", + strerror(errno)); + return; + } + setsockopt(p_osm->console.socket, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); + sin.sin_family = AF_INET; + sin.sin_port = htons(opt->console_port); + sin.sin_addr.s_addr = htonl(INADDR_ANY); + if (bind(p_osm->console.socket, &sin, sizeof(sin)) < 0) + { + osm_log(&(p_osm->log), OSM_LOG_ERROR, + "osm_console_init: Failed to bind console socket : %s\n", + strerror(errno)); + return; + } + if (listen(p_osm->console.socket, 1) < 0) + { + osm_log(&(p_osm->log), OSM_LOG_ERROR, + "osm_console_init: Failed to listen on socket : %s\n", + strerror(errno)); + return; + } + + p_osm->console.in = NULL; + p_osm->console.out = NULL; + p_osm->console.in_fd = -1; + p_osm->console.out_fd = -1; + opt->console = 1; + osm_log(&(p_osm->log), OSM_LOG_INFO, + "osm_console_init: Console listening on port %d\n", opt->console_port); +#endif + } +} + +#ifdef ENABLE_OSM_CONSOLE_SOCKET +static void handle_osm_connection(osm_opensm_t *p_osm, int new_fd, + char *client_ip, char *client_hn) +{ + char *p_line; + size_t len; + ssize_t n; + + if (p_osm->console.in_fd >= 0) + { + FILE *file = fdopen(new_fd, "w+"); + fprintf(file, "OpenSM Console connection already in use\n" + " kill other session (y/n)? "); + fflush(file); + p_line = NULL; + n = getline(&p_line, &len, file); + if (n > 0 && (p_line[0] == 'y' || p_line[0] == 'Y')) + { + osm_console_close_socket(p_osm); + } else { + close(new_fd); + return; + } + } + p_osm->console.in_fd = new_fd; + p_osm->console.out_fd = p_osm->console.in_fd; + p_osm->console.in = fdopen(p_osm->console.in_fd, "w+"); + p_osm->console.out = p_osm->console.in; + osm_console_prompt(p_osm->console.out); + osm_log(&(p_osm->log), OSM_LOG_INFO, + "osm_console_init: Console connection accepted : %s (%s)\n", + client_hn, client_ip); +} + +static int connection_ok(char *client_ip, char *client_hn) +{ + return (hosts_ctl(OSM_DAEMON_NAME, client_hn, client_ip, "STRING_UNKNOWN")); +} +#endif + void osm_console(osm_opensm_t *p_osm) { - struct pollfd pollfd; - char *p_line; - size_t len; - ssize_t n; + struct pollfd pollfd[2]; + char *p_line; + size_t len; + ssize_t n; - pollfd.fd = 0; - pollfd.events = POLLIN; - pollfd.revents = 0; + pollfd[0].fd = p_osm->console.socket; + pollfd[0].events = POLLIN|POLLOUT; + pollfd[0].revents = 0; - if (poll(&pollfd, 1, 10000) <= 0) + pollfd[1].fd = p_osm->console.in_fd; + pollfd[1].events = POLLIN|POLLOUT; + pollfd[1].revents = 0; + + if (poll(pollfd, 2, 10000) <= 0) return; - if (pollfd.revents|POLLIN) { +#ifdef ENABLE_OSM_CONSOLE_SOCKET + if (pollfd[0].revents & POLLIN) { + int new_fd = 0; + struct sockaddr_in sin; + socklen_t len = sizeof(sin); + char client_ip[64]; + char client_hn[128]; + struct hostent *hent; + if ((new_fd = accept(p_osm->console.socket, &sin, &len)) < 0) { + osm_log(&(p_osm->log), OSM_LOG_ERROR, + "osm_console: Failed to accept console socket : %s\n", + strerror(errno)); + p_osm->console.in_fd = -1; + return; + } + if (inet_ntop(AF_INET, &sin.sin_addr, client_ip, sizeof(client_ip)) == NULL) { + snprintf(client_ip, 64, "STRING_UNKNOWN"); + } + if ((hent = gethostbyaddr((const char *) &sin.sin_addr, + sizeof(struct in_addr), AF_INET)) == NULL) { + snprintf(client_hn, 128, "STRING_UNKNOWN"); + } else { + snprintf(client_hn, 128, "%s", hent->h_name); + } + if (connection_ok(client_ip, client_hn)) { + handle_osm_connection(p_osm, new_fd, client_ip, client_hn); + } else { + osm_log(&(p_osm->log), OSM_LOG_ERROR, + "osm_console: Console connection denied : %s (%s)\n", + client_hn, client_ip); + close(new_fd); + } + return; + } +#endif + + if (pollfd[1].revents & POLLIN) { p_line = NULL; /* Get input line */ - n = getline(&p_line, &len, stdin); + n = getline(&p_line, &len, p_osm->console.in); if (n > 0) { /* Parse and act on input */ parse_cmd_line(p_line, p_osm); free(p_line); } else { - printf("Input error\n"); - fflush(stdin); + fprintf(p_osm->console.out, "Input error\n"); + fflush(p_osm->console.out); } - osm_console_prompt(); + osm_console_prompt(p_osm->console.out); } } - diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c index aec4ff2..aff2130 100644 --- a/osm/opensm/osm_subnet.c +++ b/osm/opensm/osm_subnet.c @@ -63,6 +63,7 @@ #include #include #include +#include /********************************************************************** **********************************************************************/ @@ -444,6 +445,7 @@ osm_subn_set_default_opt( p_opt->sweep_interval = OSM_DEFAULT_SWEEP_INTERVAL_SECS; p_opt->max_wire_smps = OSM_DEFAULT_SMP_MAX_ON_WIRE; p_opt->console = FALSE; + p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT; p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; /* by default we will consider waiting for 50x transaction timeout normal */ p_opt->max_msg_fifo_timeout = 50*OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; From Steve.Greenwood at hp.com Mon Jan 8 04:54:26 2007 From: Steve.Greenwood at hp.com (Greenwood, Steve) Date: Mon, 8 Jan 2007 07:54:26 -0500 Subject: [openib-general] best way to get ibv_get_cq_event to return In-Reply-To: Message-ID: <892B927D8643E94287F16185036A571CF40923@tayexc19.americas.cpqcorp.net> Guys, Thanks for the information - I'll give it a try. SRG -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Sunday, January 07, 2007 1:30 PM To: Dotan Barak Cc: Or Gerlitz; Greenwood, Steve; openib-general at openib.org Subject: Re: [openib-general] best way to get ibv_get_cq_event to return > This is true (and i guess that it will work), but if in the future the > implementation of the ibv_comp_channel will be changed, > this code will not work .... The use of a file descriptor is pretty fundamental, and it was done exactly to permit this sort of stuff (poll(), epoll, SIGIO, etc). So I think it is extremely unlikely to change in a way that would break an app using the file descriptor. - R. From halr at voltaire.com Mon Jan 8 05:47:48 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 08:47:48 -0500 Subject: [openib-general] [PATCH] OpenSM/osm_console.c: Add resweep and status commands Message-ID: <1168264067.4577.78353.camel@hal.voltaire.com> OpenSM/osm_console.c: Add resweep and status commands Signed-off-by: Ira Weiny Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index 8d770aa..8157f90 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -84,6 +84,20 @@ static void help_quit(FILE *out, int det static void help_loglevel(FILE *out, int detail) { fprintf(out, "loglevel []\n"); + if (detail) { + fprintf(out, " log-level is OR'ed from the following\n"); + fprintf(out, " OSM_LOG_NONE 0x%02X\n", OSM_LOG_NONE); + fprintf(out, " OSM_LOG_ERROR 0x%02X\n", OSM_LOG_ERROR); + fprintf(out, " OSM_LOG_INFO 0x%02X\n", OSM_LOG_INFO); + fprintf(out, " OSM_LOG_VERBOSE 0x%02X\n", OSM_LOG_VERBOSE); + fprintf(out, " OSM_LOG_DEBUG 0x%02X\n", OSM_LOG_DEBUG); + fprintf(out, " OSM_LOG_FUNCS 0x%02X\n", OSM_LOG_FUNCS); + fprintf(out, " OSM_LOG_FRAMES 0x%02X\n", OSM_LOG_FRAMES); + fprintf(out, " OSM_LOG_ROUTING 0x%02X\n", OSM_LOG_ROUTING); + fprintf(out, " OSM_LOG_SYS 0x%02X\n", OSM_LOG_SYS); + fprintf(out, "\n"); + fprintf(out, " OSM_LOG_DEFAULT_LEVEL 0x%02X\n", OSM_LOG_DEFAULT_LEVEL); + } } static void help_priority(FILE *out, int detail) @@ -91,6 +105,16 @@ static void help_priority(FILE *out, int fprintf(out, "priority []\n"); } +static void help_resweep(FILE *out, int detail) +{ + fprintf(out, "resweep [heavy|light]\n"); +} + +static void help_status(FILE *out, int detail) +{ + fprintf(out, "status\n"); +} + /* more help routines go here */ static void help_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) @@ -164,6 +188,99 @@ static void priority_parse(char **p_last } } +static char *sm_state_str(int state) +{ + switch (state) + { + case IB_SMINFO_STATE_INIT: + return ("Init"); + case IB_SMINFO_STATE_DISCOVERING: + return ("Discovering"); + case IB_SMINFO_STATE_STANDBY: + return ("Standby"); + case IB_SMINFO_STATE_NOTACTIVE: + return ("Not Active"); + case IB_SMINFO_STATE_MASTER: + return ("Master"); + } + return ("UNKNOWN"); +} + +static char *sa_state_str(osm_sa_state_t state) +{ + switch (state) + { + case OSM_SA_STATE_INIT: + return ("Init"); + case OSM_SA_STATE_READY: + return ("Ready"); + } + return ("UNKNOWN"); +} + +static void status_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) +{ + fprintf(out, " SM State : %s\n", + sm_state_str(p_osm->subn.sm_state)); + fprintf(out, " SA State : %s\n", + sa_state_str(p_osm->sa.state)); + fprintf(out, " MAD stats\n" + " ---------\n" + " QP0 MADS outstanding : %d\n" + " QP0 MADS outstanding (on wire) : %d\n" + " QP0 MADS rcvd : %d\n" + " QP0 MADS sent : %d\n" + " QP0 unicasts sent : %d\n" + " QP1 MADS outstanding : %d\n" + " QP1 MADS rcvd : %d\n" + " QP1 MADS sent : %d\n" + , + p_osm->stats.qp0_mads_outstanding, + p_osm->stats.qp0_mads_outstanding_on_wire, + p_osm->stats.qp0_mads_rcvd, + p_osm->stats.qp0_mads_sent, + p_osm->stats.qp0_unicasts_sent, + p_osm->stats.qp1_mads_outstanding, + p_osm->stats.qp1_mads_rcvd, + p_osm->stats.qp1_mads_sent + ); + fprintf(out, " Subnet flags\n" + " ------------\n" + " Ignore existing lfts : %d\n" + " Subnet Init errors : %d\n" + " In sweep hop 0 : %d\n" + " Moved to master state : %d\n" + " First time master sweep : %d\n" + " Coming out of standby : %d\n" + , + p_osm->subn.ignore_existing_lfts, + p_osm->subn.subnet_initialization_error, + p_osm->subn.in_sweep_hop_0, + p_osm->subn.moved_to_master_state, + p_osm->subn.first_time_master_sweep, + p_osm->subn.coming_out_of_standby + ); + fprintf(out, "\n"); +} + +static void resweep_parse(char **p_last, osm_opensm_t *p_osm, FILE *out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (!p_cmd || + (strcmp(p_cmd, "heavy") != 0 && + strcmp(p_cmd, "light") != 0)) { + fprintf(out, "Invalid resweep command\n"); + help_resweep(out, 1); + } else { + if (strcmp(p_cmd, "heavy") == 0) { + p_osm->subn.force_immediate_heavy_sweep = TRUE; + } + osm_opensm_sweep( p_osm ); + } +} + /* This is public to be able to close it on exit */ void osm_console_close_socket(osm_opensm_t *p_osm) { @@ -190,6 +307,8 @@ static const struct command console_cmds { "quit", &help_quit, &quit_parse}, { "loglevel", &help_loglevel, &loglevel_parse}, { "priority", &help_priority, &priority_parse}, + { "resweep", &help_resweep, &resweep_parse}, + { "status", &help_status, &status_parse}, { NULL, NULL, NULL} /* end of array */ }; From tziporet at dev.mellanox.co.il Mon Jan 8 05:53:14 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 08 Jan 2007 15:53:14 +0200 Subject: [openib-general] [openfabrics-ewg] OFED 1.2 Questions In-Reply-To: <20070107203358.GB23738@mellanox.co.il> References: <45802515.20605@mellanox.co.il> <1168195864.18185.8.camel@linux-q667.site> <20070107203358.GB23738@mellanox.co.il> Message-ID: <45A24CCA.4080106@dev.mellanox.co.il> Michael S. Tsirkin wrote: >> Tziporet, >> >> I'm in the process of adding the Chelsio T3 drivers to the OFED >> repository and I have a question: >> >> The HowTo kernel section you posted on the wiki sez to add the new files >> to the repos directly via a git commit, but create patches for >> modifications to existing files and put the patches in the >> kernel_patches/fixes directory. However, I don't see patches in that >> directory to modify the core Makefile/Kconfig for SDP or other new >> modules added for ofed. So should I just modify infiniband/Makefile and >> Kconfig via the git commit that adds the new Chelsio files, or create a >> patch file and put it in kernel_patches/fixes? >> > > Yes you can modify the Makefile/Kconfig directly. > Reason being, its always trivial to resolve conflicts there > when merging from upstream. > > After you check its working if you changed the general Makfiles/Kconfig please send the patches to Vlad. >> Also, are there machines available with the various ofed supported >> distros installed that I can do compile testing for the Chelsio user >> lib? >> >> You can compile on the OFA server - but this has only Ubuntu OS. For testing in other OSes you should setup systems in your company. Tziporet From changquing.tang at hp.com Mon Jan 8 06:07:46 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 8 Jan 2007 08:07:46 -0600 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: <45A1F02E.2020604@voltaire.com> Message-ID: Or: Thank you for the information, I may change my mind to require IPoIB to run newer version of HP-MPI on OFED 1.2, if I don't find other way to easily establish IB connection dynamically between two process groups with dynamic size. --CQ > -----Original Message----- > From: Or Gerlitz [mailto:ogerlitz at voltaire.com] > Sent: Monday, January 08, 2007 1:18 AM > To: Tang, Changqing > Cc: openib-general at openib.org > Subject: using IB on a port without IPoIB running NIC > > Tang, Changqing wrote: > > We understand that, but we hope to have a connect/accept style IB > > connection setup, without IPoIB involved, > > > like HP-UX IT-API(similar to uDAPL without underlying IP > support), it > > works with multiple cards. > > > Configure 4-5 IP addresses on a single node is kind of silly. > > CQ, > > Few more thoughts on your "being able to MPI on an IB PORT > without an IPoIB working NIC" requirement... > > Basically, people use IB for both IPC and I/O, where except > for SRP, all the IB I/O ULPs (both block based: iSER and file > based: Lustre, GPFS, > rNFS) use IP addressing and hence are either coded to the > RDMA CM or work on top of TCP/IP (iSCSI-TCP, NFS, pFS, etc). > > So if the user will not configure IPoIB on this IB port, it > will not be utilized for I/O. > > Now, you mention a use case of 4 cards on a node, I believe > that typically this would happen on big SMP machines where > you **must** use all the active IB links for I/O: eg when > most of your MPI work is within the SMP (128 to 512 ranks) > and most of the IB work is for I/O . > > I understand (please check and let me know eg about HP 1U > offering) that all/most nowadays 1U PCI-EX nodes can have at > most **one** PCI-EX card. > > Combing the above limitation with the fact that these nodes > would run at most 16 ranks (eg 8 dual-core CPUs) and that 8 > ranks/IB link is a ratio that makes sense, we are remained > with **2** and not 4-5 NICs to configure. > > Oh, and one more thing, 4 IB links per node would make an N > node cluster to 4N IB end-ports cluster for which you need > f(4N) switching IB ports, and the specific f(.) might turn > the IB deployment over this cluster into very expensive one... > > Or. > > From mst at mellanox.co.il Mon Jan 8 06:17:43 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 16:17:43 +0200 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: References: <45A1F02E.2020604@voltaire.com> Message-ID: <20070108141743.GD20047@mellanox.co.il> > Thank you for the information, I may change my mind to require > IPoIB to run newer version of HP-MPI on OFED 1.2, if I don't find other > way to easily establish IB connection dynamically between two process > groups with dynamic size. I'm not really sure what your needs are, but it's not like this is completely impossible. Some people use ad-hoc socket-based tricks establish IB connections, and this will work for some topologies. You can look at libibverbs/examples for an example of such implementation. -- MST From swise at opengridcomputing.com Mon Jan 8 06:55:20 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 08:55:20 -0600 Subject: [openib-general] [PATCH] rdma_cm iWARP connection setup timeouts reported as rejects. In-Reply-To: <45A22769.4040405@xiranet.com> References: <45A22769.4040405@xiranet.com> Message-ID: <1168268120.19412.8.camel@stevo-desktop> On Mon, 2007-01-08 at 12:13 +0100, Mirko Benz wrote: > Hi, > > What could be the reasons for these timeouts to occur? One way: If the host is not reachable but the next hop neighbour is, then the connection attempt will timeout. Another way is if, for some reason, the MPA negotiation doesn't complete in a timely manner. For instance, if the passive side never rdma_accept()s the connection, then the active side should eventually timeout the attempt and return a timeout error to the consumer. > How should an application handle this? > Applications should handle connection timeouts however they want. Usually they just report it to the user. Steve. From swise at opengridcomputing.com Mon Jan 8 06:59:49 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 08:59:49 -0600 Subject: [openib-general] [PATCH] ofed_1_2 configure script typo Message-ID: <1168268389.19412.12.camel@stevo-desktop> Typo in OFED 1.2 configure script. From: Steve Wise Signed-off-by: Steve Wise --- ofed_scripts/configure | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/ofed_scripts/configure b/ofed_scripts/configure index 5a1694d..a0557e2 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -598,7 +598,7 @@ main() --with-vnic_debug-mod) CONFIG_INFINIBAND_VNIC_DEBUG="y" ;; - --without-vnic-mod) + --without-vnic_debug-mod) CONFIG_INFINIBAND_VNIC_DEBUG= ;; --with-vnic_stats-mod) From changquing.tang at hp.com Mon Jan 8 07:10:51 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 8 Jan 2007 09:10:51 -0600 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: <20070108141743.GD20047@mellanox.co.il> Message-ID: What I need is that, without IPoIB, how do I wire IB connection ? Currently with Verbs API, it is an alltoall QP number exchange. I want to remove the alltoall QP number exchange in MPI dynamic process. --CQ > -----Original Message----- > From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] > Sent: Monday, January 08, 2007 8:18 AM > To: Tang, Changqing > Cc: Or Gerlitz; openib-general at openib.org > Subject: Re: using IB on a port without IPoIB running NIC > > > Thank you for the information, I may change my mind to > require IPoIB > > to run newer version of HP-MPI on OFED 1.2, if I don't find > other way > > to easily establish IB connection dynamically between two process > > groups with dynamic size. > > I'm not really sure what your needs are, but it's not like > this is completely impossible. > > Some people use ad-hoc socket-based tricks establish IB > connections, and this will work for some topologies. You can > look at libibverbs/examples for an example of such implementation. > > -- > MST > From halr at voltaire.com Mon Jan 8 07:34:12 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 10:34:12 -0500 Subject: [openib-general] [PATCH TRIVIAL] opensm: eliminate some local variable In-Reply-To: <20070107203819.GE18379@sashak.voltaire.com> References: <20070107203819.GE18379@sashak.voltaire.com> Message-ID: <1168270442.4577.84382.camel@hal.voltaire.com> On Sun, 2007-01-07 at 15:38, Sasha Khapyorsky wrote: > This trivially eliminates some local variable. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From vlad at mellanox.co.il Mon Jan 8 07:28:01 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 08 Jan 2007 17:28:01 +0200 Subject: [openib-general] [PATCH] ofed_1_2 configure script typo In-Reply-To: <1168268389.19412.12.camel@stevo-desktop> References: <1168268389.19412.12.camel@stevo-desktop> Message-ID: <1168270081.11676.22.camel@swlab50.yok.mtl.com> Applied, Regards, Vladimir On Mon, 2007-01-08 at 08:59 -0600, Steve Wise wrote: > Typo in OFED 1.2 configure script. > > From: Steve Wise > > Signed-off-by: Steve Wise > --- > > ofed_scripts/configure | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > index 5a1694d..a0557e2 100755 > --- a/ofed_scripts/configure > +++ b/ofed_scripts/configure > @@ -598,7 +598,7 @@ main() > --with-vnic_debug-mod) > CONFIG_INFINIBAND_VNIC_DEBUG="y" > ;; > - --without-vnic-mod) > + --without-vnic_debug-mod) > CONFIG_INFINIBAND_VNIC_DEBUG= > ;; > --with-vnic_stats-mod) > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From mst at mellanox.co.il Mon Jan 8 08:25:45 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 18:25:45 +0200 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: References: <20070108141743.GD20047@mellanox.co.il> Message-ID: <20070108162545.GH20047@mellanox.co.il> > What I need is that, without IPoIB, how do I wire IB connection ? > Currently with Verbs API, it is an alltoall QP number exchange. I want > to remove the alltoall > QP number exchange in MPI dynamic process. Well, does your MPI implementation currently use librdmacm? If not, you don't currently have a dependency on IPoIB and probably have no reason to introduce one. -- MST From changquing.tang at hp.com Mon Jan 8 08:43:01 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 8 Jan 2007 10:43:01 -0600 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: <20070108162545.GH20047@mellanox.co.il> Message-ID: > > What I need is that, without IPoIB, how do I wire IB connection ? > > Currently with Verbs API, it is an alltoall QP number > exchange. I want > > to remove the alltoall QP number exchange in MPI dynamic process. > > Well, does your MPI implementation currently use librdmacm? No, we don't use both librdmacm and libibcm. > If not, you don't currently have a dependency on IPoIB and > probably have no reason to introduce one. As I said, the problem is the alltoall QP number exchange. I hope that a process can only provide one piece of information(such as ip/port in TCP/IP) so that all other processes have the same piece of info and can make connection to it. --CQ > > -- > MST > From mst at mellanox.co.il Mon Jan 8 08:47:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 18:47:35 +0200 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: References: Message-ID: <20070108164734.GK20047@mellanox.co.il> > > If not, you don't currently have a dependency on IPoIB and > > probably have no reason to introduce one. > > As I said, the problem is the alltoall QP number exchange. I hope that a > process can only provide one piece of information(such as ip/port in > TCP/IP) so that all other processes have the same piece of info and can > make connection to it. Well, start with a socket, each time a process connects create a QP on both sides and exchange the 2 QP numbers? -- MST From swise at opengridcomputing.com Mon Jan 8 08:54:20 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 10:54:20 -0600 Subject: [openib-general] OFED 1.2, iw_cxgb3, and genalloc() Message-ID: <1168275260.19412.34.camel@stevo-desktop> I've packaged the Chelsio T3 Drivers (modules iw_cxgb3 and cxgb3) into Vlad's ofed_1_2 repos and I'm testing now. I've run into an issue with the Chelsio driver. It requires the kernel genalloc() allocator, which is only built into the kernel if any code requires it at config time of the kernel. Also, it was new to 2.6.17 or 2.6.18 so it won't exist for older OFED distros like SLES. So there are two related issues: 1) the genalloc services don't exist in older kernels. 2) Even if it does exist in the kernel src tree on a distro, it might not have been built in if nothing required that service when the kernel was configured. I need to handle both cases. I'm seeking advice on how to pull this functionality in for ofed 1.2. My initial thought is to add a patch similar to the memtrack patch and add it either as a module or as part of the iw_cxgb3 module. I could even rename the services and always add them so I can avoid having to detect if its in the running kernel. Any Ideas/comments? Thanks, Steve. From mst at mellanox.co.il Mon Jan 8 08:57:14 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 18:57:14 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support Message-ID: <20070108165714.GM20047@mellanox.co.il> The following patch adds experimental support for IPoIB connected mode. The idea is to increase performance by increasing the MTU from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. With this code, I'm able to get 800MByte/sec or more with netperf without options on a Mellanox 4x back-to-back DDR system. Signed-off-by: Michael S. Tsirkin --- Sorry about the churn, just fixed a bug in this code. Changes from PATCHv3: - Fix TX ring full recovery - Whitespace fix Roland, please review, and put in -mm. This applies on top of the following patch: IB/verbs: return qp pointer as part of ib_wc qp pointer is used on receive side to detect stale passive side connections Changes from PATCHv2: - Using path MTU discovery, multicast and UDP traffic to UD mode now work, only a small number of packets is dropped. - Use timer to clean up stale RX connections - Make CM use same CQ IPoIB uses for UD (good for mixed UD/CM traffic and for NAPI if we ever enable it) - Tone down warning messages - only some packets are now dropped in CM/UD setup I still kept the sysfs flag to enable/disable CM - is this still a good idea, or is it better to go back to only looking at the device MTU now that multicast works? CM support is also still labeled as experimental, and set it to disabled by default, although its been very stable for me, and the code is complete as far as I'm concerned. Would it be easier to merge it this way in the future? Note that the connected mode support adds very little overhead when not activated at run time, and zero data-path overhead when not activated at compile time. Here's a short description of what the patch does: a. The code's here: git://staging.openfabrics.org/~mst/linux-2.6/.git ipoib_cm_branch This is based on 2.6.19, so ~>git diff v2.6.19..ipoib_cm_branch will show what I have done so far. b. How to activate: Server: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netserver Client: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netperf -H 11.4.3.68 -f M TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.4.3.68 (11.4.3.68) port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. MBytes/sec 87380 16384 16384 10.01 891.21 c. TODO list (Optional) Send side S/G support d. Limitations With MTU > 2044, UDP multicast and UDP connections to IPoIB UD mode currently will drop some packets since we sometimes get packets that are too large to send over a UD QP. Typically a single packet will be dropped each several minutes until path MTU discovery kicks in and lowers the path MTU to this destination. e. Some notes on code 1. SRQ is used for scalability to large cluster sizes 2. Only RC connections are used (UC does not support SRQ now) 3. Retry count is set to 0 since spec draft warns against retries 4. Each connection is used for data transfers in only 1 direction, so each connection is either active(TX) or passive (RX). 2 sides that want to communicate create 2 connections. 5. Each active (TX) connection has a separate CQ for send completions - this keeps the code simple without CQ resize and other tricks diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index c75322d..0ffca11 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -8,6 +8,20 @@ config INFINIBAND_IPOIB See Documentation/infiniband/ipoib.txt for more information +config INFINIBAND_IPOIB_CM + bool "IP-over-InfiniBand Connected Mode support" + depends on INFINIBAND_IPOIB && EXPERIMENTAL + default n + ---help--- + This option enables experimental support for IPoIB connected mode. + After enabling this option, you need to switch to connected mode through + /sys/class/net/ibXXX/mode to actually create connections, and then increase + the interface MTU with e.g. ifconfig ib0 mtu 65520. + + WARNING: Enabling connected mode will trigger some + packet drops for multicast and UD mode traffic from this interface, + unless you limit mtu for these destinations to 2044. + config INFINIBAND_IPOIB_DEBUG bool "IP-over-InfiniBand debugging" if EMBEDDED depends on INFINIBAND_IPOIB diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 8935e74..98ee38e 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \ ipoib_multicast.o \ ipoib_verbs.o \ ipoib_vlan.o +ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07deee8..7bac722 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -62,6 +62,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ + IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, + IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, + IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, IPOIB_RX_RING_SIZE = 128, IPOIB_TX_RING_SIZE = 64, IPOIB_MAX_QUEUE_SIZE = 8192, @@ -81,6 +85,8 @@ enum { IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, IPOIB_MCAST_STARTED = 8, + IPOIB_FLAG_NETIF_STOPPED = 9, + IPOIB_FLAG_ADMIN_CM = 10, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -90,6 +96,14 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, }; + +#define IPOIB_OP_RECV (1ul << 31) +#ifdef CONFIG_INFINIBAND_IPOIB_CM +#define IPOIB_CM_OP_SRQ (1ul << 30) +#else +#define IPOIB_CM_OP_SRQ (0) +#endif + /* structs */ struct ipoib_header { @@ -113,6 +127,61 @@ struct ipoib_tx_buf { u64 mapping; }; +#ifdef CONFIG_INFINIBAND_IPOIB_CM +struct ib_cm_id; + +struct ipoib_cm_data { + __be32 qpn; /* High byte MUST be ignored on receive */ + __be32 mtu; +}; + +struct ipoib_cm_rx { + struct ib_cm_id *id; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + unsigned long jiffies; +}; + +struct ipoib_cm_tx { + struct ib_cm_id *id; + struct ib_cq *cq; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + struct ipoib_neigh *neigh; + struct ipoib_path *path; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + unsigned long flags; + u32 mtu; + struct ib_wc ibwc[IPOIB_NUM_WC]; +}; + +struct ipoib_cm_rx_buf { + struct sk_buff *skb; + u64 mapping[IPOIB_CM_RX_SG]; +}; + +struct ipoib_cm_dev_priv { + struct ib_srq *srq; + struct ipoib_cm_rx_buf *srq_ring; + struct ib_cm_id *id; + struct list_head passive_ids; + struct work_struct start_task; + struct work_struct reap_task; + struct work_struct skb_task; + struct delayed_work stale_task; + struct sk_buff_head skb_queue; + struct list_head start_list; + struct list_head reap_list; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; + struct ib_recv_wr rx_wr; +}; + +#endif /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -179,6 +248,10 @@ struct ipoib_dev_priv { struct list_head child_intfs; struct list_head list; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_dev_priv cm; +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct list_head fs_list; struct dentry *mcg_dentry; @@ -212,6 +285,9 @@ struct ipoib_path { struct ipoib_neigh { struct ipoib_ah *ah; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_tx *cm; +#endif union ib_gid dgid; struct sk_buff_head queue; @@ -315,6 +391,145 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); +#ifdef CONFIG_INFINIBAND_IPOIB_CM + +#define IPOIB_FLAGS_RC 0x80 +#define IPOIB_FLAGS_UC 0x40 + +#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC | IPOIB_FLAGS_UC)) + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(dev->dev_addr) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(n->ha) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags); +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return neigh->cm; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ + neigh->cm = tx; +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); +int ipoib_cm_dev_open(struct net_device *dev); +void ipoib_cm_dev_stop(struct net_device *dev); +int ipoib_cm_dev_init(struct net_device *dev); +int ipoib_cm_add_mode_attr(struct net_device *dev); +void ipoib_cm_dev_cleanup(struct net_device *dev); +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh); +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu); +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); +#else + +struct ipoib_cm_tx; + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + return 0; +} +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) + +{ + return 0; +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return 0; +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ +} + +static inline +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_dev_open(struct net_device *dev) +{ + return 0; +} + +static inline +void ipoib_cm_dev_stop(struct net_device *dev) +{ + return; +} + +static inline +int ipoib_cm_dev_init(struct net_device *dev) +{ + return -ENOSYS; +} + +static inline +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + return; +} + +static inline +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return 0; +} + +static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + dev_kfree_skb_any(skb); +} + +static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ +} + +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG void ipoib_create_debug_files(struct net_device *dev); void ipoib_delete_debug_files(struct net_device *dev); @@ -392,4 +607,6 @@ extern int ipoib_debug_level; #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) +#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c new file mode 100644 index 0000000..be55413 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -0,0 +1,1224 @@ +/* + * Copyright (c) 2006 Mellanox Technologies. All rights reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include + +#ifdef CONFIG_IPV6 +#include +#endif + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA +static int data_debug_level; + +module_param_named(cm_data_debug_level, data_debug_level, int, 0644); +MODULE_PARM_DESC(cm_data_debug_level, + "Enable data path debug tracing for connected mode if > 0"); +#endif + +#include "ipoib.h" + +#define IPOIB_CM_IETF_ID 0x1000000000000000ULL + +#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) +#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) +#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) +#define IPOIB_CM_RX_UPDATE_MASK (0x3) + +struct ipoib_cm_id { + struct ib_cm_id *id; + int flags; + u32 remote_qpn; + u32 remote_mtu; +}; + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event); + +static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_CM_RX_SG]) +{ + int i; + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); +} + +static int ipoib_cm_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int i, ret; + + priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[id].skb); + priv->cm.srq_ring[id].skb = NULL; + } + + return ret; +} + +static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, + u64 mapping[IPOIB_CM_RX_SG]) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + int i; + + skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); + if (unlikely(!skb)) + return -ENOMEM; + + /* + * IPoIB adds a 4 byte header. So we need 12 more bytes to align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 12); + + mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return -EIO; + } + + for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) { + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + goto partial_error; + skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); + + mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, + 0, PAGE_SIZE, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) + goto partial_error; + } + + priv->cm.srq_ring[id].skb = skb; + return 0; + +partial_error: + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (; i >= 0; --i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + + kfree_skb(skb); + return -ENOMEM; +} + +static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, + struct ipoib_cm_rx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = { + .send_cq = priv->cq, /* does not matter, we never send anything */ + .recv_cq = priv->cq, + .srq = priv->cm.srq, + .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ + .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ + .sq_sig_type = IB_SIGNAL_ALL_WR, + .qp_type = IB_QPT_RC, + .qp_context = p, + }; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_modify_rx_rts(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); + return ret; + } + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); + return ret; + } + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, + struct ib_qp *qp, struct ib_cm_req_event_param *req) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_rep_param rep = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + rep.private_data = &data; + rep.private_data_len = sizeof data; + rep.flow_control = 0; + rep.rnr_retry_count = req->rnr_retry_count; + rep.target_ack_delay = 20; /* FIXME */ + rep.srq = 1; + rep.qp_num = qp->qp_num; + rep.starting_psn = 0 /* FIXME */; + return ib_send_cm_rep(cm_id, &rep); +} + +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct net_device *dev = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + int ret; + + ipoib_dbg(priv, "REQ arrived\n"); + p = kzalloc(sizeof *p, GFP_KERNEL); + if (!p) + return -ENOMEM; + p->dev = dev; + p->id = cm_id; + p->qp = ipoib_cm_create_rx_qp(dev, p); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + goto err_qp; + } + + ret = ipoib_cm_modify_rx_rts(dev, cm_id, p->qp); + if (ret) + goto err_modify; + + ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd); + if (ret) { + ipoib_warn(priv, "failed to send REP: %d\n", ret); + goto err_rep; + } + + cm_id->context = p; + p->jiffies = jiffies; + spin_lock_irqsave(&priv->lock, flags); + list_add(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + return 0; + +err_rep: +err_modify: + ib_destroy_qp(p->qp); +err_qp: + kfree(p); + return ret; +} + +static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_rx *p; + struct ipoib_dev_priv *priv; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_REQ_RECEIVED: + return ipoib_cm_req_handler(cm_id, event); + case IB_CM_DREQ_RECEIVED: + p = cm_id->context; + ib_send_cm_drep(cm_id, NULL, 0); + /* Fall through */ + case IB_CM_REJ_RECEIVED: + p = cm_id->context; + priv = netdev_priv(p->dev); + spin_lock_irqsave(&priv->lock, flags); + if (list_empty(&p->list)) + ret = 0; /* Connection is going away already. */ + else { + list_del_init(&p->list); + ret = -ECONNRESET; + } + spin_unlock_irqrestore(&priv->lock, flags); + if (ret) { + ib_destroy_qp(p->qp); + kfree(p); + return ret; + } + return 0; + default: + return 0; + } +} +/* Adjust length of skb with fragments to match received data */ +static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, + unsigned int length) +{ + int i, num_frags; + unsigned int size; + + /* put header into skb */ + size = min(length, hdr_space); + skb->tail += size; + skb->len += size; + length -= size; + + num_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (length == 0) { + /* don't need this page */ + __free_page(frag->page); + --skb_shinfo(skb)->nr_frags; + } else { + size = min(length, (unsigned) PAGE_SIZE); + + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } + } +} + +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; + struct sk_buff *skb; + struct ipoib_cm_rx *p; + unsigned long flags; + u64 mapping[IPOIB_CM_RX_SG]; + + ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_recvq_size)) { + ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); + return; + } + + skb = priv->cm.srq_ring[wr_id].skb; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + ipoib_dbg(priv, "cm recv error " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + ++priv->stats.rx_dropped; + goto repost; + } + + if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { + p = wc->qp->qp_context; + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { + spin_lock_irqsave(&priv->lock, flags); + p->jiffies = jiffies; + /* Move this entry to list head, but do + * not re-add it if it has been removed. */ + if (!list_empty(&p->list)) + list_move(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + } + } + + if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) { + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); + ++priv->stats.rx_dropped; + goto repost; + } + + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping); + memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping); + + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); + + skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len); + + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb->mac.raw = skb->data; + skb_pull(skb, IPOIB_ENCAP_LEN); + + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); + +repost: + if (unlikely(ipoib_cm_post_receive(dev, wr_id))) + ipoib_warn(priv, "ipoib_cm_post_receive failed " + "for buf %d\n", wr_id); +} + +static inline int post_send(struct ipoib_dev_priv *priv, + struct ipoib_cm_tx *tx, + unsigned int wr_id, + u64 addr, int len) +{ + struct ib_send_wr *bad_wr; + + priv->tx_sge.addr = addr; + priv->tx_sge.length = len; + + priv->tx_wr.wr_id = wr_id; + + return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_tx_buf *tx_req; + u64 addr; + + if (unlikely(skb->len > tx->mtu)) { + ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", + skb->len, tx->mtu); + ++priv->stats.tx_dropped; + ++priv->stats.tx_errors; + ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN); + return; + } + + ipoib_dbg_data(priv, "sending packet %p, head %d length=%d connection=%p\n", + skb, tx->tx_head, skb->len, tx); + + /* + * We put the skb into the tx_ring _before_ we call post_send() + * because it's entirely possible that the completion handler will + * run before we execute anything after the post_send(). That + * means we have to make sure everything is properly recorded and + * our state is consistent before we call post_send(). + */ + tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; + tx_req->skb = skb; + addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + + tx_req->mapping = addr; + + if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), + addr, skb->len))) { + ipoib_warn(priv, "post_send failed\n"); + ++priv->stats.tx_errors; + ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + } else { + dev->trans_start = jiffies; + ++tx->tx_head; + + if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { + ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); + netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); + } + } +} + +static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, + struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + struct ipoib_tx_buf *tx_req; + unsigned long flags; + + ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_sendq_size)) { + ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", + wr_id, ipoib_sendq_size); + return; + } + + tx_req = &tx->tx_ring[wr_id]; + + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + + /* FIXME: is this right? Shouldn't we only increment on success? */ + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; + + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++tx->tx_tail; + if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && + tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); + netif_wake_queue(dev); + } + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) { + struct ipoib_neigh *neigh; + + ipoib_dbg(priv, "failed cm send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); + + spin_unlock(&priv->lock); + } + + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) +{ + struct ipoib_cm_tx *tx = tx_ptr; + int n, i; + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + do { + n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc); + for (i = 0; i < n; ++i) + ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i); + } while (n == IPOIB_NUM_WC); +} + +int ipoib_cm_dev_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return 0; + + priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); + if (IS_ERR(priv->cm.id)) { + printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); + return IS_ERR(priv->cm.id); + } + + ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), + 0, NULL); + if (ret) { + printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, + IPOIB_CM_IETF_ID | priv->qp->qp_num); + ib_destroy_cm_id(priv->cm.id); + return ret; + } + return 0; +} + +void ipoib_cm_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return; + + ib_destroy_cm_id(priv->cm.id); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); + + cancel_delayed_work(&priv->cm.stale_task); +} + +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct ipoib_cm_tx *p = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_cm_data *data = event->private_data; + struct sk_buff_head skqueue; + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + struct sk_buff *skb; + unsigned long flags; + + p->mtu = be32_to_cpu(data->mtu); + + if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { + ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", + p->mtu, priv->dev->mtu); + return -EINVAL; + } + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); + return ret; + } + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); + return ret; + } + + skb_queue_head_init(&skqueue); + + spin_lock_irqsave(&priv->lock, flags); + set_bit(IPOIB_FLAG_OPER_UP, &p->flags); + if (p->neigh) + while ((skb = __skb_dequeue(&p->neigh->queue))) + __skb_queue_tail(&skqueue, skb); + spin_unlock_irqrestore(&priv->lock, flags); + + while ((skb = __skb_dequeue(&skqueue))) { + skb->dev = p->dev; + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed " + "to requeue packet\n"); + } + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + if (ret) { + ipoib_warn(priv, "failed to send RTU: %d\n", ret); + return ret; + } + return 0; +} + +static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = {}; + attr.recv_cq = priv->cq; + attr.srq = priv->cm.srq; + attr.cap.max_send_wr = ipoib_sendq_size; + attr.cap.max_send_sge = 1; + attr.sq_sig_type = IB_SIGNAL_ALL_WR; + attr.qp_type = IB_QPT_RC; + attr.send_cq = cq; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_send_req(struct net_device *dev, + struct ib_cm_id *id, struct ib_qp *qp, + u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_req_param req = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + req.primary_path = pathrec; + req.alternate_path = NULL; + req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); + req.qp_num = qp->qp_num; + req.qp_type = qp->qp_type; + req.private_data = &data; + req.private_data_len = sizeof data; + req.flow_control = 0; + + req.starting_psn = 0; /* FIXME */ + + /* + * Pick some arbitrary defaults here; we could make these + * module parameters if anyone cared about setting them. + */ + req.responder_resources = 4; + req.remote_cm_response_timeout = 20; + req.local_cm_response_timeout = 20; + req.retry_count = 0; /* RFC draft warns against retries */ + req.rnr_retry_count = 0; /* RFC draft warns against retries */ + req.max_cm_retries = 15; + req.srq = 15; + return ib_send_cm_req(id, &req); +} + +static int ipoib_cm_modify_tx_init(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); + if (ret) { + ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.port_num = priv->port; + qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; + + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + int ret; + + ipoib_dbg(priv, "Request connection %p for gid " IPOIB_GID_FMT " qpn 0x%x\n", + p, IPOIB_GID_ARG(pathrec->dgid), qpn); + + p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_KERNEL); + if (!p->tx_ring) { + ipoib_warn(priv, "failed to allocate tx ring\n"); + ret = -ENOMEM; + goto err_tx; + } + + p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, + ipoib_sendq_size + 1); + if (IS_ERR(p->cq)) { + ret = PTR_ERR(p->cq); + ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret); + goto err_cq; + } + + ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP); + if (ret) { + ipoib_warn(priv, "failed to request completion notification: %d\n", ret); + goto err_req_notify; + } + + p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + goto err_qp; + } + + p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); + if (IS_ERR(p->id)) { + ret = PTR_ERR(p->id); + ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); + goto err_id; + } + + ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp); + if (ret) { + ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); + goto err_modify; + } + + ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec); + if (ret) { + ipoib_warn(priv, "failed to send cm req: %d\n", ret); + goto err_send_cm; + } + return 0; + +err_send_cm: +err_modify: + ib_destroy_cm_id(p->id); +err_id: + p->id = NULL; + ib_destroy_qp(p->qp); +err_req_notify: +err_qp: + p->qp = NULL; + ib_destroy_cq(p->cq); +err_cq: + p->cq = NULL; +err_tx: + return ret; +} + +static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_tx_buf *tx_req; + + ipoib_dbg(priv, "Destroy active connection %p. head 0x%x tail 0x%x\n", + p, p->tx_head, p->tx_tail); + + if (p->id) + ib_destroy_cm_id(p->id); + + if (p->qp) + ib_destroy_qp(p->qp); + + if (p->cq) + ib_destroy_cq(p->cq); + + if (p->tx_ring) { + while ((int) p->tx_tail - (int) p->tx_head < 0) { + tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_req->skb); + ++p->tx_tail; + } + + kfree(p->tx_ring); + } + + kfree(p); +} + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_tx *tx = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_DREQ_RECEIVED: + ipoib_dbg(priv, "DREQ received.\n"); + ib_send_cm_drep(cm_id, NULL, 0); + break; + case IB_CM_REP_RECEIVED: + ipoib_dbg(priv, "REP received.\n"); + ret = ipoib_cm_rep_handler(cm_id, event); + if (ret) + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + break; + case IB_CM_REQ_ERROR: + case IB_CM_REJ_RECEIVED: + case IB_CM_TIMEWAIT_EXIT: + ipoib_dbg(priv, "CM error %d.\n", event->event); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + break; + default: + break; + } + + return 0; +} + +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_tx *tx; + + tx = kzalloc(sizeof *tx, GFP_ATOMIC); + if (!tx) + return NULL; + + neigh->cm = tx; + tx->neigh = neigh; + tx->path = path; + tx->dev = dev; + list_add(&tx->list, &priv->cm.start_list); + set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); + queue_work(ipoib_workqueue, &priv->cm.start_task); + return tx; +} + +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(tx->neigh->dgid)); + tx->neigh = NULL; + } +} + +static void ipoib_cm_tx_start(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.start_task); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + struct ipoib_cm_tx *p; + unsigned long flags; + int ret; + + struct ib_sa_path_rec pathrec; + u32 qpn; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.start_list)) { + p = list_entry(priv->cm.start_list.next, typeof(*p), list); + list_del_init(&p->list); + neigh = p->neigh; + qpn = IPOIB_QPN(neigh->neighbour->ha); + memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + if (ret) { + neigh = p->neigh; + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + } + list_del(&p->list); + kfree(p); + } + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.reap_task); + struct ipoib_cm_tx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.reap_list)) { + p = list_entry(priv->cm.reap_list.next, typeof(*p), list); + list_del(&p->list); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ipoib_cm_tx_destroy(p); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_skb_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.skb_task); + struct net_device *dev = priv->dev; + struct sk_buff *skb; + unsigned long flags; + + __be32 mtu = cpu_to_be32(priv->mcast_mtu); + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); +#ifdef CONFIG_IPV6 + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); +#endif + dev_kfree_skb_any(skb); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int e = skb_queue_empty(&priv->cm.skb_queue); + + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + skb_queue_tail(&priv->cm.skb_queue, skb); + if (e) + queue_work(ipoib_workqueue, &priv->cm.skb_task); +} + +static void ipoib_cm_stale_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.stale_task.work); + struct ipoib_cm_rx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + /* List if sorted by LRU, start from tail, + * stop when we see a recently used entry */ + p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) + break; + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); +} + + +static ssize_t show_mode(struct class_device *cdev, char *buf) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) + return sprintf(buf, "connected\n"); + else + return sprintf(buf, "datagram\n"); +} + +static ssize_t set_mode(struct class_device *cdev, + const char *buf, size_t count) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + /* flush paths if we switch modes so that connections are restarted */ + if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { + set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_warn(priv, "enabling connected mode " + "will cause multicast packet drops\n"); + ipoib_flush_paths(dev); + return count; + } + + if (!strcmp(buf, "datagram\n")) { + clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_flush_paths(dev); + return count; + } + + return -EINVAL; +} + +static CLASS_DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode); + +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return class_device_create_file(&dev->class_dev, &class_device_attr_mode); +} + +int ipoib_cm_dev_init(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_srq_init_attr srq_init_attr = { + .attr = { + .max_wr = ipoib_recvq_size, + .max_sge = IPOIB_CM_RX_SG + } + }; + int ret, i; + + INIT_LIST_HEAD(&priv->cm.passive_ids); + INIT_LIST_HEAD(&priv->cm.reap_list); + INIT_LIST_HEAD(&priv->cm.start_list); + INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); + INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); + INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); + INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); + + skb_queue_head_init(&priv->cm.skb_queue); + + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); + if (IS_ERR(priv->cm.srq)) { + ret = PTR_ERR(priv->cm.srq); + priv->cm.srq = NULL; + return ret; + } + + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, + GFP_KERNEL); + if (!priv->cm.srq_ring) { + printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].lkey = priv->mr->lkey; + + priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].length = PAGE_SIZE; + priv->cm.rx_wr.next = NULL; + priv->cm.rx_wr.sg_list = priv->cm.rx_sge; + priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; + + for (i = 0; i < ipoib_recvq_size; ++i) { + if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + if (ipoib_cm_post_receive(dev, i)) { + ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -EIO; + } + } + + priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; + return 0; +} + +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i, ret; + + if (!priv->cm.srq) + return; + + ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); + + ret = ib_destroy_srq(priv->cm.srq); + if (ret) + ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); + + priv->cm.srq = NULL; + if (!priv->cm.srq_ring) + return; + for (i = 0; i < ipoib_recvq_size; ++i) + if (priv->cm.srq_ring[i].skb) { + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[i].skb); + priv->cm.srq_ring[i].skb = NULL; + } + kfree(priv->cm.srq_ring); + priv->cm.srq_ring = NULL; +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 59d9594..f2aa923 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level, "Enable data path debug tracing if > 0"); #endif -#define IPOIB_OP_RECV (1ul << 31) - static DEFINE_MUTEX(pkey_mutex); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, @@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; - if (netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) + if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && + priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); netif_wake_queue(dev); + } spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && @@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) { - if (wc->wr_id & IPOIB_OP_RECV) + if (wc->wr_id & IPOIB_CM_OP_SRQ) + ipoib_cm_handle_rx_wc(dev, wc); + else if (wc->wr_id & IPOIB_OP_RECV) ipoib_ib_handle_rx_wc(dev, wc); else ipoib_ib_handle_tx_wc(dev, wc); @@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; u64 addr; - if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { + if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", - skb->len, dev->mtu + INFINIBAND_ALEN); + skb->len, priv->mcast_mtu + INFINIBAND_ALEN); ++priv->stats.tx_dropped; ++priv->stats.tx_errors; - dev_kfree_skb_any(skb); + ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); return; } @@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); } } } @@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } + ret = ipoib_cm_dev_open(dev); + if (ret) { + ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); + ipoib_ib_dev_stop(dev); + return -1; + } + clear_bit(IPOIB_STOP_REAPER, &priv->flags); queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); @@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_cm_dev_stop(dev); + /* * Move our QP to the error state and then reinitialize in * when all work requests have completed or have been flushed. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 705eb1d..19e82db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,8 +49,6 @@ #include -#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) - MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); + /* * Now flush workqueue to make sure a scheduled task doesn't * bring our internal state back up. @@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + /* dev->mtu > 2K ==> connected mode */ + if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { + if (new_mtu > priv->mcast_mtu) + ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", + priv->mcast_mtu); + dev->mtu = new_mtu; + return 0; + } + + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { return -EINVAL; + } priv->admin_mtu = new_mtu; @@ -414,6 +424,20 @@ static void path_rec_completion(int status, memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, + path, + neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + continue; + } + } + while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } @@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + goto err_drop; + } + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + __skb_queue_tail(&neigh->queue, skb); + else { + ipoib_warn(priv, "queue length limit %d. Packet drop.\n", + skb_queue_len(&neigh->queue)); + goto err_drop; + } + } else + ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); } else { neigh->ah = NULL; @@ -538,6 +580,7 @@ err_list: err_path: ipoib_neigh_free(dev, neigh); +err_drop: ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) neigh = *to_ipoib_neigh(skb->dst->neighbour); - if (likely(neigh->ah)) { + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto out; + } + } else if (neigh->ah) { if (unlikely(memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, sizeof(union ib_gid)))) { @@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) neigh->neighbour = neighbour; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); + ipoib_cm_set(neigh, NULL); return neigh; } @@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); } + if (ipoib_cm_get(neigh)) + ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); kfree(neigh); } @@ -1081,6 +1132,8 @@ static struct net_device *ipoib_add_port(const char *format, ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; if (class_device_create_file(&priv->dev->class_dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b04b72c..fea737f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - IPOIB_ENCAP_LEN; - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + + if (!ipoib_cm_admin_enabled(dev)) + dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b717c6..3cb551b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .qp_type = IB_QPT_UD }; + int ret, size; + priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, - ipoib_sendq_size + ipoib_recvq_size + 1); + priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(priv->mr)) { + printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); + goto out_free_pd; + } + + size = ipoib_sendq_size + ipoib_recvq_size + 1; + ret = ipoib_cm_dev_init(dev); + if (!ret) + size += ipoib_recvq_size; + + priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size); if (IS_ERR(priv->cq)) { printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); - goto out_free_pd; + goto out_free_mr; } if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) goto out_free_cq; - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_cq; - } - init_attr.send_cq = priv->cq; init_attr.recv_cq = priv->cq, priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_mr; + goto out_free_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return 0; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_cq: ib_destroy_cq(priv->cq); +out_free_mr: + ib_dereg_mr(priv->mr); + out_free_pd: ib_dealloc_pd(priv->pd); return -ENODEV; @@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - if (ib_destroy_cq(priv->cq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); + ipoib_cm_dev_cleanup(dev); + + if (ib_dereg_mr(priv->mr)) + ipoib_warn(priv, "ib_dereg_mr failed\n"); + if (ib_dealloc_pd(priv->pd)) ipoib_warn(priv, "ib_dealloc_pd failed\n"); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index f887780..d9fd82d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; -- MST From halr at voltaire.com Mon Jan 8 08:59:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 11:59:03 -0500 Subject: [openib-general] [PATCH TRIVIAL] opensm: remove osm_physp_get_port_info_ptr() checks In-Reply-To: <20070107214744.GF18379@sashak.voltaire.com> References: <20070107214744.GF18379@sashak.voltaire.com> Message-ID: <1168275542.4577.89016.camel@hal.voltaire.com> On Sun, 2007-01-07 at 16:47, Sasha Khapyorsky wrote: > This function returns address of osm_physp's port_info field, the result > cannot be NULL (unless physp itself is valid pointer). Not need to check. > > Signed-off-by: Sasha Khapyorsky > --- Good catch. Thanks. -- Hal From changquing.tang at hp.com Mon Jan 8 09:22:16 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 8 Jan 2007 11:22:16 -0600 Subject: [openib-general] using IB on a port without IPoIB running NIC In-Reply-To: <20070108164734.GK20047@mellanox.co.il> Message-ID: > > As I said, the problem is the alltoall QP number exchange. > I hope that > > a process can only provide one piece of information(such as > ip/port in > > TCP/IP) so that all other processes have the same piece of info and > > can make connection to it. > > Well, start with a socket, each time a process connects > create a QP on both sides and exchange the 2 QP numbers? Then the speed would be a big concern. --CQ > > -- > MST > From halr at voltaire.com Mon Jan 8 09:37:46 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 08 Jan 2007 12:37:46 -0500 Subject: [openib-general] [PATCH] opensm: eliminate port/switch_info access methods In-Reply-To: <20070107230147.GG18379@sashak.voltaire.com> References: <20070107214744.GF18379@sashak.voltaire.com> <20070107230147.GG18379@sashak.voltaire.com> Message-ID: <1168277863.4577.91139.camel@hal.voltaire.com> On Sun, 2007-01-07 at 18:01, Sasha Khapyorsky wrote: > Following previous patch ("remove osm_physp_get_port_info_ptr() checks") > this removes confused functions osm_physp_get_port_info_ptr() and > osm_switch_get_si_ptr(). > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From yosefe at voltaire.com Mon Jan 8 09:41:19 2007 From: yosefe at voltaire.com (Yosef Etigin) Date: Mon, 08 Jan 2007 19:41:19 +0200 Subject: [openib-general] [PATCH] [MINOR] perftest: send_bw: fix dangling else Message-ID: <45A2823F.5000302@voltaire.com> Symptom: ib_send_bw reports 'inf' bandwidth Cause: dangling else Signed-off-by: Yosef Etigin --- diff -rup a/src/userspace/perftest/send_bw.c b/src/userspace/perftest/send_bw.c --- a/src/userspace/perftest/send_bw.c 2007-01-08 18:20:08.000000000 +0200 +++ b/src/userspace/perftest/send_bw.c 2007-01-08 18:21:06.000000000 +0200 @@ -1156,12 +1156,14 @@ int main(int argc, char *argv[]) rem_dest = pp_server_exch_dest(sockfd, &my_dest); } } else { - if (user_param.duplex) + if (user_param.duplex) { if (run_iter_bi(ctx, &user_param, rem_dest, size)) return 18; - else + } + else { if(run_iter_uni(ctx, &user_param, rem_dest, size)) return 18; + } if (user_param.servername) print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted); -- Yosef Etigin From swise at opengridcomputing.com Mon Jan 8 09:43:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 11:43:51 -0600 Subject: [openib-general] Bad URL on OFED Development Wiki Site Message-ID: <1168278231.19412.41.camel@stevo-desktop> This URL is bad on the OFED Development Wiki page: https://wiki.openfabrics.org/tiki/tiki-download_file.php?fileId=23 It is supposed to be a OFED Release Process presentation. Thanks, Steve. From mshefty at ichips.intel.com Mon Jan 8 10:13:18 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 08 Jan 2007 10:13:18 -0800 Subject: [openib-general] 2.6.20: outstanding patches and issues In-Reply-To: <20070108064059.GB1697@mellanox.co.il> References: <20070102200523.GA10451@mellanox.co.il> <20070108064059.GB1697@mellanox.co.il> Message-ID: <45A289BE.6080305@ichips.intel.com> > sean_cm_limit_mra_timeout.patch I don't believe that I ever sent Roland a patch for merging upstream. The last patch I remember sending was untested and waiting for some feedback. I can resubmit this patch if it is working for you. (Was this in OFED 1.1?) > There are 3 Sean's patches I think we need > rdma_ucm: fix reporting events with invalid user context > rdma_ucm: fix struct ucma_event > rdma_cm: avoid port reuse after close The first two were pulled upstream. I have not published the port reuse patch in any git branch yet, but can add it to my multicast-sa_cache branch if needed. > Dotan reported oops with ucma at openib restart. > Sean - any luck in reproducing this? I have not, but maybe there's a difference in our configuration. - Sean From mst at mellanox.co.il Mon Jan 8 10:20:29 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 20:20:29 +0200 Subject: [openib-general] 2.6.20: outstanding patches and issues In-Reply-To: <45A289BE.6080305@ichips.intel.com> References: <45A289BE.6080305@ichips.intel.com> Message-ID: <20070108182029.GD17239@mellanox.co.il> > > sean_cm_limit_mra_timeout.patch > > I don't believe that I ever sent Roland a patch for merging upstream. The last > patch I remember sending was untested and waiting for some feedback. I can > resubmit this patch if it is working for you. (Was this in OFED 1.1?) Yes, it was in OFED, and it solves real problem with misbehaved remote. I did say this works for us, did I not? Let's have this in 2.6.20 - is there need to resend? Acked-by: Michael S. Tsirkin > > There are 3 Sean's patches I think we need > > rdma_ucm: fix reporting events with invalid user context > > rdma_ucm: fix struct ucma_event > > rdma_cm: avoid port reuse after close > > The first two were pulled upstream. I have not published the port reuse patch > in any git branch yet, but can add it to my multicast-sa_cache branch if needed. OK. The patch is small enough though - I hope it just lands upstream and we don't have to maintain it in side branch. Acked-by: Michael S. Tsirkin > > Dotan reported oops with ucma at openib restart. > > Sean - any luck in reproducing this? > > I have not, but maybe there's a difference in our configuration. Hmm. One of these then. So where do we go from here? -- MST From mst at mellanox.co.il Mon Jan 8 10:37:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 20:37:48 +0200 Subject: [openib-general] Fwd: [ANNOUNCE] GIT 1.4.4.4 Message-ID: <20070108183748.GF17239@mellanox.co.il> FYI. The infinite loop fix looks potentially relevant, so I guess we should update staging. Sasha? ----- Forwarded message from Junio C Hamano ----- Subject: [ANNOUNCE] GIT 1.4.4.4 Date: Mon, 8 Jan 2007 05:30:50 +0200 From: Junio C Hamano The latest maintenance release GIT 1.4.4.4 is available at the usual places: http://www.kernel.org/pub/software/scm/git/ git-1.4.4.4.tar.{gz,bz2} (tarball) git-htmldocs-1.4.4.4.tar.{gz,bz2} (preformatted docs) git-manpages-1.4.4.4.tar.{gz,bz2} (preformatted docs) RPMS/$arch/git-*-1.4.4.4-1.$arch.rpm (RPM) This is to push out a handful bugfixes since 1.4.4.3. On the 'master' development front, the stabilization for v1.5.0 will start soonish. ---------------------------------------------------------------- Changes since v1.4.4.3 are as follows: Johannes Schindelin (1): diff --check: fix off by one error Junio C Hamano (3): spurious .sp in manpages Fix infinite loop when deleting multiple packed refs. pack-check.c::verify_packfile(): don't run SHA-1 update on huge data - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ----- End forwarded message ----- -- MST From shubbell at dbresearch.net Mon Jan 8 10:26:00 2007 From: shubbell at dbresearch.net (Sean Hubbell) Date: Mon, 08 Jan 2007 12:26:00 -0600 Subject: [openib-general] Infiniband Network Library Message-ID: <45A28CB8.3070902@dbresearch.net> Hello, I have a question that is slightly off topic but I would think that this would be to ask the question. So, here goes ... I have been using InfiniBand here for about 2 years now. I have had to make significant work arounds for our current, third party network API that we purchased and continue to watch if fall down and still not take advantage on the bandwidth that I need. With that said, does anyone on this list have a recommendation for an InfiniBand capable network library? Thanks in advance, Sean From sean.hefty at intel.com Mon Jan 8 10:49:42 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 8 Jan 2007 10:49:42 -0800 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts Message-ID: <002301c73355$c220e180$8698070a@amr.corp.intel.com> Limit the timeout that the ib_cm will wait to receive a response to a message, to avoid excessively large (on the order of hours) timeout values. This prevents consuming resources tracking requests for extended periods of time. This helps correct for a bug in the SRP Engenio target sending a large value (> 1 hour) as a service timeout. Signed-off-by: Sean Hefty --- drivers/infiniband/core/cm.c | 30 +++++++++++++++++++----------- 1 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index d446998..147b41e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -54,6 +54,12 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +/* + * Limit CM msg timeouts to something reasonable. + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min. + */ +#define IB_CM_MAX_TIMEOUT 21 + static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device); @@ -888,12 +894,12 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, - param->remote_cm_response_timeout); + min((u8) IB_CM_MAX_TIMEOUT, param->remote_cm_response_timeout)); cm_req_set_qp_type(req_msg, param->qp_type); cm_req_set_flow_ctrl(req_msg, param->flow_control); cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, - param->local_cm_response_timeout); + min((u8) IB_CM_MAX_TIMEOUT, param->local_cm_response_timeout)); cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); @@ -999,10 +1005,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } cm_id->service_id = param->service_id; cm_id->service_mask = __constant_cpu_to_be64(~0ULL); - cm_id_priv->timeout_ms = cm_convert_to_ms( - param->primary_path->packet_life_time) * 2 + - cm_convert_to_ms( - param->remote_cm_response_timeout); + cm_id_priv->timeout_ms = + min(IB_CM_MAX_TIMEOUT, + cm_convert_to_ms(param->primary_path->packet_life_time) * 2 + + cm_convert_to_ms(param->remote_cm_response_timeout)); cm_id_priv->max_cm_retries = param->max_cm_retries; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; @@ -1400,8 +1406,9 @@ static int cm_req_handler(struct cm_work *work) } } cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->timeout_ms = cm_convert_to_ms( - cm_req_get_local_resp_timeout(req_msg)); + cm_id_priv->timeout_ms = + min(IB_CM_MAX_TIMEOUT, + cm_convert_to_ms(cm_req_get_local_resp_timeout(req_msg))); cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); @@ -2303,8 +2310,9 @@ static int cm_mra_handler(struct cm_work *work) work->cm_event.private_data = &mra_msg->private_data; work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); - timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + - cm_convert_to_ms(cm_id_priv->av.packet_life_time); + timeout = min(IB_CM_MAX_TIMEOUT, + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + cm_convert_to_ms(cm_id_priv->av.packet_life_time)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { @@ -2707,7 +2715,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_id->service_id = param->service_id; cm_id->service_mask = __constant_cpu_to_be64(~0ULL); - cm_id_priv->timeout_ms = param->timeout_ms; + cm_id_priv->timeout_ms = min(IB_CM_MAX_TIMEOUT, param->timeout_ms); cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) -- 1.4.4.3 From mshefty at ichips.intel.com Mon Jan 8 11:11:11 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 08 Jan 2007 11:11:11 -0800 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <459D1A84.9060609@voltaire.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> Message-ID: <45A2974F.7060801@ichips.intel.com> > I just noticed that once i apply the patch, the last + lines (that is > pthread_mutex_lock, while loop doing pthread_cond_wait and then > pthread_mutex_unlock) become part of rdma_leave_multicast which seems to > me strictly buggy as no one is going to wake up this code. The leave must wait until all events have been reported on the multicast group. There can be more than one event on a group if an error occurs. See ucma_complete_mc_event() for where the condition is signaled. - Sean From swise at opengridcomputing.com Mon Jan 8 11:13:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 13:13:46 -0600 Subject: [openib-general] [PATCH RFC 0/2] ofed_1_2 - Chelsio T3 RDMA Support Message-ID: <20070108191346.27253.16936.stgit@dell3.ogc.int> This series adds the Chelsio T3 drivers to the ofed_1_2 tree. For this review, I've omitted the patch that actually adds the two drivers themselves, and just included the changes to the ofed_1_2 configuration scripts and the new kernel_patches/ files needed. The driver code itself is on track to go into either 2.6.20 or 2.6.21. I would appreciate any feedback/comments on what I've done. This is just for review. I'm still testing it. Here are the key changes: The package now needs to visit drivers/net to build the T3 Ethernet driver which is required for the T3 RDMA driver. Added a patch to backport the Linux 2.6.20 genalloc() services. I added the allocator as local services to the T3 RDMA module. Core changes are required for the T3 driver. This includes the addition of a udata pointer parameter to the ib_req_notify_cq() provider method. This is still being discussed on the openib-general list and I'll update it accordingly once we finalize the solution. Steve. From swise at opengridcomputing.com Mon Jan 8 11:13:48 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 13:13:48 -0600 Subject: [openib-general] [PATCH 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070108191346.27253.16936.stgit@dell3.ogc.int> References: <20070108191346.27253.16936.stgit@dell3.ogc.int> Message-ID: <20070108191348.27253.9576.stgit@dell3.ogc.int> - rdma core changes needed for T3 Support. - genalloc backport. - modified the qp_num -> qp ptr patch to include cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/genalloc.patch | 392 ++++++++++++++++++++++++++++ kernel_patches/fixes/ib_wc_qpn_to_qp.patch | 13 + kernel_patches/fixes/t3_core_changes.patch | 202 ++++++++++++++ 3 files changed, 607 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/genalloc.patch b/kernel_patches/fixes/genalloc.patch new file mode 100644 index 0000000..c44a98f --- /dev/null +++ b/kernel_patches/fixes/genalloc.patch @@ -0,0 +1,392 @@ +Backport of the Linux 2.6.20 generic allocator. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/Kconfig | 1 + drivers/infiniband/hw/cxgb3/Makefile | 3 + drivers/infiniband/hw/cxgb3/core/cxio_hal.h | 4 + drivers/infiniband/hw/cxgb3/core/cxio_resource.c | 20 +- + drivers/infiniband/hw/cxgb3/core/cxio_resource.h | 2 + drivers/infiniband/hw/cxgb3/core/genalloc.c | 196 ++++++++++++++++++++++ + drivers/infiniband/hw/cxgb3/core/genalloc.h | 36 ++++ + 7 files changed, 247 insertions(+), 15 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig +index d3db264..0361a72 100644 +--- a/drivers/infiniband/hw/cxgb3/Kconfig ++++ b/drivers/infiniband/hw/cxgb3/Kconfig +@@ -1,7 +1,6 @@ + config INFINIBAND_CXGB3 + tristate "Chelsio RDMA Driver" + depends on CHELSIO_T3 && INFINIBAND +- select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T3 1GbE and + 10GbE adapters. +diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile +index 7a89f6d..12e7a94 100644 +--- a/drivers/infiniband/hw/cxgb3/Makefile ++++ b/drivers/infiniband/hw/cxgb3/Makefile +@@ -4,7 +4,8 @@ EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/ + obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o + + iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ +- iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o ++ iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o \ ++ core/genalloc.o + + ifdef CONFIG_INFINIBAND_CXGB3_DEBUG + EXTRA_CFLAGS += -DDEBUG -g +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +index e5e702d..a9e8452 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +@@ -104,8 +104,8 @@ struct cxio_rdev { + u32 qpnr; + u32 qpmask; + struct cxio_ucontext uctx; +- struct gen_pool *pbl_pool; +- struct gen_pool *rqt_pool; ++ struct iwch_gen_pool *pbl_pool; ++ struct iwch_gen_pool *rqt_pool; + }; + + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +index d1d8722..cecb27b 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +@@ -265,7 +265,7 @@ #define PBL_CHUNK 2*1024*1024 + + u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size); + return (u32)addr; + } +@@ -273,24 +273,24 @@ u32 cxio_hal_pblpool_alloc(struct cxio_r + void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size); +- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); ++ iwch_gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); + } + + int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); ++ rdev_p->pbl_pool = iwch_gen_pool_create(MIN_PBL_SHIFT, -1); + if (rdev_p->pbl_pool) + for (i = rdev_p->rnic_info.pbl_base; + i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; + i += PBL_CHUNK) +- gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); + return rdev_p->pbl_pool ? 0 : -ENOMEM; + } + + void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->pbl_pool); ++ iwch_gen_pool_destroy(rdev_p->pbl_pool); + } + + /* +@@ -302,7 +302,7 @@ #define RQT_CHUNK 2*1024*1024 + + u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6); + return (u32)addr; + } +@@ -310,22 +310,22 @@ u32 cxio_hal_rqtpool_alloc(struct cxio_r + void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6); +- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); ++ iwch_gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); + } + + int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); ++ rdev_p->rqt_pool = iwch_gen_pool_create(MIN_RQT_SHIFT, -1); + if (rdev_p->rqt_pool) + for (i = rdev_p->rnic_info.rqt_base; + i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; + i += RQT_CHUNK) +- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); + return rdev_p->rqt_pool ? 0 : -ENOMEM; + } + + void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->rqt_pool); ++ iwch_gen_pool_destroy(rdev_p->rqt_pool); + } +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +index a6bbe83..06a8076 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +@@ -39,7 +39,7 @@ #include + #include + #include + #include +-#include ++#include "genalloc.h" + #include "cxio_hal.h" + + extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.c b/drivers/infiniband/hw/cxgb3/core/genalloc.c +new file mode 100644 +index 0000000..27ba8ec +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.c +@@ -0,0 +1,196 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * Copyright 2005 (C) Jes Sorensen ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++#include ++#include "genalloc.h" ++ ++ ++/** ++ * iwch_gen_pool_create - create a new special memory pool ++ * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents ++ * @nid: node id of the node the pool structure should be allocated on, or -1 ++ * ++ * Create a new special memory pool that can be used to manage special purpose ++ * memory not managed by the regular kmalloc/kfree interface. ++ */ ++struct iwch_gen_pool *iwch_gen_pool_create(int min_alloc_order, int nid) ++{ ++ struct iwch_gen_pool *pool; ++ ++ pool = kmalloc_node(sizeof(struct iwch_gen_pool), GFP_KERNEL, nid); ++ if (pool != NULL) { ++ rwlock_init(&pool->lock); ++ INIT_LIST_HEAD(&pool->chunks); ++ pool->min_alloc_order = min_alloc_order; ++ } ++ return pool; ++} ++ ++/** ++ * iwch_gen_pool_add - add a new chunk of special memory to the pool ++ * @pool: pool to add new memory chunk to ++ * @addr: starting address of memory chunk to add to pool ++ * @size: size in bytes of the memory chunk to add to pool ++ * @nid: node id of the node the chunk structure and bitmap should be ++ * allocated on, or -1 ++ * ++ * Add a new chunk of special memory to the specified pool. ++ */ ++int iwch_gen_pool_add(struct iwch_gen_pool *pool, unsigned long addr, size_t size, int nid) ++{ ++ struct iwch_gen_pool_chunk *chunk; ++ int nbits = size >> pool->min_alloc_order; ++ int nbytes = sizeof(struct iwch_gen_pool_chunk) + ++ (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; ++ ++ chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); ++ if (unlikely(chunk == NULL)) ++ return -1; ++ ++ memset(chunk, 0, nbytes); ++ spin_lock_init(&chunk->lock); ++ chunk->start_addr = addr; ++ chunk->end_addr = addr + size; ++ ++ write_lock(&pool->lock); ++ list_add(&chunk->next_chunk, &pool->chunks); ++ write_unlock(&pool->lock); ++ ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_destroy - destroy a special memory pool ++ * @pool: pool to destroy ++ * ++ * Destroy the specified special memory pool. Verifies that there are no ++ * outstanding allocations. ++ */ ++void iwch_gen_pool_destroy(struct iwch_gen_pool *pool) ++{ ++ struct list_head *_chunk, *_next_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ int order = pool->min_alloc_order; ++ int bit, end_bit; ++ ++ ++ write_lock(&pool->lock); ++ list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ list_del(&chunk->next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ bit = find_next_bit(chunk->bits, end_bit, 0); ++ BUG_ON(bit < end_bit); ++ ++ kfree(chunk); ++ } ++ kfree(pool); ++ return; ++} ++ ++/** ++ * iwch_gen_pool_alloc - allocate special memory from the pool ++ * @pool: pool to allocate from ++ * @size: number of bytes to allocate from the pool ++ * ++ * Allocate the requested number of bytes from the specified pool. ++ * Uses a first-fit algorithm. ++ */ ++unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *pool, size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long addr, flags; ++ int order = pool->min_alloc_order; ++ int nbits, bit, start_bit, end_bit; ++ ++ if (size == 0) ++ return 0; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ end_bit -= nbits + 1; ++ ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = -1; ++ while (bit + 1 < end_bit) { ++ bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); ++ if (bit >= end_bit) ++ break; ++ ++ start_bit = bit; ++ if (nbits > 1) { ++ bit = find_next_bit(chunk->bits, bit + nbits, ++ bit + 1); ++ if (bit - start_bit < nbits) ++ continue; ++ } ++ ++ addr = chunk->start_addr + ++ ((unsigned long)start_bit << order); ++ while (nbits--) ++ __set_bit(start_bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ read_unlock(&pool->lock); ++ return addr; ++ } ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ } ++ read_unlock(&pool->lock); ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_free - free allocated special memory back to the pool ++ * @pool: pool to free to ++ * @addr: starting address of memory to free back to pool ++ * @size: size in bytes of memory to free ++ * ++ * Free previously allocated special memory back to the specified pool. ++ */ ++void iwch_gen_pool_free(struct iwch_gen_pool *pool, unsigned long addr, ++ size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long flags; ++ int order = pool->min_alloc_order; ++ int bit, nbits; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ if (addr >= chunk->start_addr && addr < chunk->end_addr) { ++ BUG_ON(addr + size > chunk->end_addr); ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = (addr - chunk->start_addr) >> order; ++ while (nbits--) ++ __clear_bit(bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ break; ++ } ++ } ++ BUG_ON(nbits > 0); ++ read_unlock(&pool->lock); ++} +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.h b/drivers/infiniband/hw/cxgb3/core/genalloc.h +new file mode 100644 +index 0000000..1dc336c +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.h +@@ -0,0 +1,36 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++ ++/* ++ * General purpose special memory pool descriptor. ++ */ ++struct iwch_gen_pool { ++ rwlock_t lock; ++ struct list_head chunks; /* list of chunks in this pool */ ++ int min_alloc_order; /* minimum allocation order */ ++}; ++ ++/* ++ * General purpose special memory pool chunk descriptor. ++ */ ++struct iwch_gen_pool_chunk { ++ spinlock_t lock; ++ struct list_head next_chunk; /* next chunk in pool */ ++ unsigned long start_addr; /* starting address of memory chunk */ ++ unsigned long end_addr; /* ending address of memory chunk */ ++ unsigned long bits[0]; /* bitmap for allocating memory chunk */ ++}; ++ ++extern struct iwch_gen_pool *iwch_gen_pool_create(int, int); ++extern int iwch_gen_pool_add(struct iwch_gen_pool *, unsigned long, size_t, int); ++extern void iwch_gen_pool_destroy(struct iwch_gen_pool *); ++extern unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *, size_t); ++extern void iwch_gen_pool_free(struct iwch_gen_pool *, unsigned long, size_t); diff --git a/kernel_patches/fixes/ib_wc_qpn_to_qp.patch b/kernel_patches/fixes/ib_wc_qpn_to_qp.patch index 67f9da5..571d579 100644 --- a/kernel_patches/fixes/ib_wc_qpn_to_qp.patch +++ b/kernel_patches/fixes/ib_wc_qpn_to_qp.patch @@ -309,3 +309,16 @@ index 0bfa332..54cde37 100644 u32 src_qp; int wc_flags; u16 pkey_index; +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " diff --git a/kernel_patches/fixes/t3_core_changes.patch b/kernel_patches/fixes/t3_core_changes.patch new file mode 100644 index 0000000..c4631e7 --- /dev/null +++ b/kernel_patches/fixes/t3_core_changes.patch @@ -0,0 +1,202 @@ +Linux RDMA Core Changes + +From: Steve Wise + +Support provider-specific data in ib_uverbs_cmd_req_notify_cq(). +The Chelsio iwarp provider library needs to pass information to the +kernel verb for re-arming the CQ. + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/core/uverbs_cmd.c | 9 +++++++-- + drivers/infiniband/hw/amso1100/c2.h | 2 +- + drivers/infiniband/hw/amso1100/c2_cq.c | 3 ++- + drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 ++- + drivers/infiniband/hw/ehca/ehca_reqs.c | 3 ++- + drivers/infiniband/hw/ipath/ipath_cq.c | 4 +++- + drivers/infiniband/hw/ipath/ipath_verbs.h | 3 ++- + drivers/infiniband/hw/mthca/mthca_cq.c | 6 ++++-- + drivers/infiniband/hw/mthca/mthca_dev.h | 4 ++-- + include/rdma/ib_verbs.h | 5 +++-- + 10 files changed, 28 insertions(+), 14 deletions(-) + +diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c +index 743247e..5dd1de9 100644 +--- a/drivers/infiniband/core/uverbs_cmd.c ++++ b/drivers/infiniband/core/uverbs_cmd.c +@@ -959,6 +959,7 @@ ssize_t ib_uverbs_req_notify_cq(struct i + int out_len) + { + struct ib_uverbs_req_notify_cq cmd; ++ struct ib_udata udata; + struct ib_cq *cq; + + if (copy_from_user(&cmd, buf, sizeof cmd)) +@@ -968,8 +969,12 @@ ssize_t ib_uverbs_req_notify_cq(struct i + if (!cq) + return -EINVAL; + +- ib_req_notify_cq(cq, cmd.solicited_only ? +- IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); ++ INIT_UDATA(&udata, buf + sizeof cmd, 0, ++ in_len - sizeof cmd, 0); ++ ++ cq->device->req_notify_cq(cq, cmd.solicited_only ? ++ IB_CQ_SOLICITED : IB_CQ_NEXT_COMP, ++ &udata); + + put_cq_read(cq); + +diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h +index 04a9db5..9a76869 100644 +--- a/drivers/infiniband/hw/amso1100/c2.h ++++ b/drivers/infiniband/hw/amso1100/c2.h +@@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2 + extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); + extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); + extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); +-extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); ++extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, struct ib_udata *udata); + + /* CM */ + extern int c2_llp_connect(struct iw_cm_id *cm_id, +diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c +index 05c9154..7ce8bca 100644 +--- a/drivers/infiniband/hw/amso1100/c2_cq.c ++++ b/drivers/infiniband/hw/amso1100/c2_cq.c +@@ -217,7 +217,8 @@ int c2_poll_cq(struct ib_cq *ibcq, int n + return npolled; + } + +-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) ++int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, ++ struct ib_udata *udata) + { + struct c2_mq_shared __iomem *shared; + struct c2_cq *cq; +diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h +index 3720e30..566b30c 100644 +--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h ++++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h +@@ -135,7 +135,8 @@ int ehca_poll_cq(struct ib_cq *cq, int n + + int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); ++int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify, ++ struct ib_udata *udata); + + struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, +diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c +index b46bda1..3ed6992 100644 +--- a/drivers/infiniband/hw/ehca/ehca_reqs.c ++++ b/drivers/infiniband/hw/ehca/ehca_reqs.c +@@ -634,7 +634,8 @@ poll_cq_exit0: + return ret; + } + +-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) ++int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify, ++ struct ib_udata *udata) + { + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + +diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c +index 87462e0..27ba4db 100644 +--- a/drivers/infiniband/hw/ipath/ipath_cq.c ++++ b/drivers/infiniband/hw/ipath/ipath_cq.c +@@ -307,13 +307,15 @@ int ipath_destroy_cq(struct ib_cq *ibcq) + * ipath_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify: the type of notification to request ++ * @udata: user data + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ +-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) ++int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, ++ struct ib_udata *udata) + { + struct ipath_cq *cq = to_icq(ibcq); + unsigned long flags; +diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h +index c0c8d5b..7db01ae 100644 +--- a/drivers/infiniband/hw/ipath/ipath_verbs.h ++++ b/drivers/infiniband/hw/ipath/ipath_verbs.h +@@ -716,7 +716,8 @@ struct ib_cq *ipath_create_cq(struct ib_ + + int ipath_destroy_cq(struct ib_cq *ibcq); + +-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); ++int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, ++ struct ib_udata *udata); + + int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); + +diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c +index 283d50b..15cbd49 100644 +--- a/drivers/infiniband/hw/mthca/mthca_cq.c ++++ b/drivers/infiniband/hw/mthca/mthca_cq.c +@@ -722,7 +722,8 @@ repoll: + return err == 0 || err == -EAGAIN ? npolled : err; + } + +-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) ++int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify, ++ struct ib_udata *udata) + { + __be32 doorbell[2]; + +@@ -739,7 +740,8 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, + return 0; + } + +-int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) ++int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, ++ struct ib_udata *udata) + { + struct mthca_cq *cq = to_mcq(ibcq); + __be32 doorbell[2]; +diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h +index fe5cecf..6b9ccf6 100644 +--- a/drivers/infiniband/hw/mthca/mthca_dev.h ++++ b/drivers/infiniband/hw/mthca/mthca_dev.h +@@ -493,8 +493,8 @@ void mthca_unmap_eq_icm(struct mthca_dev + + int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *entry); +-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); +-int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); ++int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify, struct ib_udata *udata); ++int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify, struct ib_udata *udata); + int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, + struct mthca_cq *cq); +diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h +index 0bfa332..4dc771f 100644 +--- a/include/rdma/ib_verbs.h ++++ b/include/rdma/ib_verbs.h +@@ -986,7 +986,8 @@ struct ib_device { + struct ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, +- enum ib_cq_notify cq_notify); ++ enum ib_cq_notify cq_notify, ++ struct ib_udata *udata); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, +@@ -1420,7 +1421,7 @@ int ib_peek_cq(struct ib_cq *cq, int wc_ + static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify cq_notify) + { +- return cq->device->req_notify_cq(cq, cq_notify); ++ return cq->device->req_notify_cq(cq, cq_notify, NULL); + } + + /** From swise at opengridcomputing.com Mon Jan 8 11:13:50 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 13:13:50 -0600 Subject: [openib-general] [PATCH 2/2] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070108191346.27253.16936.stgit@dell3.ogc.int> References: <20070108191346.27253.16936.stgit@dell3.ogc.int> Message-ID: <20070108191350.27253.78926.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- ofed_scripts/Makefile | 9 +++++++-- ofed_scripts/configure | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile index d63b1d2..049e533 100644 --- a/ofed_scripts/Makefile +++ b/ofed_scripts/Makefile @@ -46,8 +46,10 @@ kernel: @echo "Kernel sources: $(KSRC)" env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ - -I$(CWD)/drivers/infiniband/debug" \ - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ + -I$(CWD)/drivers/infiniband/debug \ + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ + -I$(CWD)/drivers/net/cxgb3 " \ + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ @@ -74,6 +76,9 @@ kernel: CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ LINUXINCLUDE=' \ $(BACKPORT_INCLUDES) \ -I$(CWD)/include \ diff --git a/ofed_scripts/configure b/ofed_scripts/configure index a0557e2..08f15f5 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -126,6 +126,12 @@ Usage: `basename $0` [options] --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] --without-vnic_stats-mod [yes] + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] + --without-cxgb3-mod [yes] + + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] + --without-cxgb3_debug-mod [yes] + --help - print out options @@ -607,6 +613,20 @@ main() --without-vnic_stats-mod) CONFIG_INFINIBAND_VNIC_STATS= ;; + --with-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3="m" + CONFIG_CHELSIO_T3="m" + ;; + --without-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3= + CONFIG_CHELSIO_T3= + ;; + --with-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG="y" + ;; + --without-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG= + ;; --with-modprobe|--without-modprobe) ;; -h | --help) @@ -679,6 +699,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} @@ -689,6 +711,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} # Check for minimal supported kernel version if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then @@ -742,6 +765,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} @@ -752,6 +777,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} EOFCONFIG echo "Created ${CONFIG}:" @@ -887,6 +913,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" else DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" fi +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" +else + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" +fi +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" +else + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" +fi +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" +else + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" +fi cat >> ${AUTOCONF_H} << EOFAUTOCONF #undef CONFIG_INFINIBAND #undef CONFIG_INFINIBAND_IPOIB @@ -908,6 +949,9 @@ #undef CONFIG_INFINIBAND_MADEYE #undef CONFIG_INFINIBAND_VNIC #undef CONFIG_INFINIBAND_VNIC_DEBUG #undef CONFIG_INFINIBAND_VNIC_STATS +#undef CONFIG_INFINIBAND_CXGB3 +#undef CONFIG_INFINIBAND_CXGB3_DEBUG +#undef CONFIG_CHELSIO_T3 #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY @@ -927,6 +971,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_ADDR_TRANS} ${DEFINE_INFINIBAND_MTHCA} ${DEFINE_INFINIBAND_VNIC} +${DEFINE_INFINIBAND_CXGB3} +${DEFINE_CHELSIO_T3} ${DEFINE_INFINIBAND_IPOIB_DEBUG} ${DEFINE_INFINIBAND_ISER} @@ -937,6 +983,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_RDS_DEBUG} ${DEFINE_INFINIBAND_VNIC_DEBUG} ${DEFINE_INFINIBAND_VNIC_STATS} +${DEFINE_INFINIBAND_CXGB3_DEBUG} ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} From mst at mellanox.co.il Mon Jan 8 11:24:07 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 21:24:07 +0200 Subject: [openib-general] [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree Message-ID: <20070108192407.GJ17239@mellanox.co.il> I looked at what be the clean fix for the MTT SEG handling in mthca, and I came up with the following (applies on top of the series I posted earlier). I think this gives us an important optimization. Roland, could you please give me a hint whether something like this is too big a change to get into 2.6.20? Arbel does not actually have a concept of MTT segment. So we should set MTT segment size to 64 bit (1 entry) for memfree, otherwise we might be wasting as much as 87% of MTT entries. Signed-off-by: Michael S. Tsirkin --- diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 7131446..968d151 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1051,11 +1051,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); dev_lim->max_eqs = 1 << (field & 0x7); MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); - if (mthca_is_memfree(dev)) - dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), - MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; - else - dev_lim->reserved_mtts = 1 << (field >> 4); + dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); dev_lim->max_mrw_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index b7e42ef..0973359 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -78,16 +78,17 @@ enum { }; enum { - MTHCA_EQ_CONTEXT_SIZE = 0x40, - MTHCA_CQ_CONTEXT_SIZE = 0x40, - MTHCA_QP_CONTEXT_SIZE = 0x200, - MTHCA_RDB_ENTRY_SIZE = 0x20, - MTHCA_AV_SIZE = 0x20, - MTHCA_MGM_ENTRY_SIZE = 0x40, + MTHCA_EQ_CONTEXT_SIZE = 0x40, + MTHCA_CQ_CONTEXT_SIZE = 0x40, + MTHCA_QP_CONTEXT_SIZE = 0x200, + MTHCA_RDB_ENTRY_SIZE = 0x20, + MTHCA_AV_SIZE = 0x20, + MTHCA_MGM_ENTRY_SIZE = 0x40, + + MTHCA_TAVOR_MTT_SEG_SIZE = 0x40, /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, - MTHCA_MTT_SEG_SIZE = 0x40, MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; @@ -595,4 +596,8 @@ static inline int mthca_is_memfree(struct mthca_dev *dev) return dev->mthca_flags & MTHCA_FLAG_MEMFREE; } +static inline unsigned mthca_mtt_seg_size(struct mthca_dev *dev) +{ + return mthca_is_memfree(dev) ? sizeof(u64) : MTHCA_TAVOR_MTT_SEG_SIZE; +} #endif /* MTHCA_DEV_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index bbe9143..d9d5b89 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -465,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ - mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, - dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * sizeof(u64), + dma_get_cache_alignment()) / sizeof(u64); mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, - MTHCA_MTT_SEG_SIZE, + sizeof(u64), mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 88f9dc2..0357dbe 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -212,7 +212,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, mtt->buddy = buddy; mtt->order = 0; - for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + for (i = mthca_mtt_seg_size(dev) / sizeof(u64); i < size; i <<= 1) ++mtt->order; mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); @@ -259,7 +259,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtt->first_seg * mthca_mtt_seg_size(dev) + start_index * 8); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) @@ -302,7 +302,7 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, u32 mtt_seg; int i; - mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mtt->first_seg * MTHCA_TAVOR_MTT_SEG_SIZE; mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64); for (i = 0; i < list_len; ++i) { __be64 mtt_entry = cpu_to_be64(buffer_list[i] | @@ -321,11 +321,9 @@ void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, /* For Arbel, all MTTs must fit in the same page. */ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); - /* Require full segments */ - BUG_ON(s % MTHCA_MTT_SEG_SIZE); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE, &dma_handle); + s / sizeof(u64), &dma_handle); BUG_ON(!mtts); @@ -470,7 +468,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, if (mr->mtt) mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + mr->mtt->first_seg * mthca_mtt_seg_size(dev)); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -615,7 +613,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, if (IS_ERR(mr->mtt)) goto err_out_table; - mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * mthca_mtt_seg_size(dev); if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, @@ -907,7 +905,7 @@ int mthca_init_mr_table(struct mthca_dev *dev) dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * MTHCA_TAVOR_MTT_SEG_SIZE); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 26bf86d..7367150 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -96,7 +96,7 @@ u64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; - profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_MTT].size = mthca_mtt_seg_size(dev); profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; @@ -234,7 +234,8 @@ u64 mthca_make_profile(struct mthca_dev *dev, dev->limits.num_mtt_segs = profile[i].num; dev->mr_table.mtt_base = profile[i].start; init_hca->mtt_base = profile[i].start; - init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + if (!mthca_is_memfree(dev)) + init_hca->mtt_seg_sz = ffs(MTHCA_TAVOR_MTT_SEG_SIZE) - 7; break; case MTHCA_RES_UAR: dev->limits.num_uars = profile[i].num; -- MST From mst at mellanox.co.il Mon Jan 8 11:29:36 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 21:29:36 +0200 Subject: [openib-general] [PATCH 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070108191348.27253.9576.stgit@dell3.ogc.int> References: <20070108191348.27253.9576.stgit@dell3.ogc.int> Message-ID: <20070108192936.GK17239@mellanox.co.il> > - modified the qp_num -> qp ptr patch to include cxgb3. If you don't mind, this might be better as a separate patch - it's just easier for me to continue pushing this upstream if I can just copy it from OFED sources. -- MST From mst at mellanox.co.il Mon Jan 8 11:32:21 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 21:32:21 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <002301c73355$c220e180$8698070a@amr.corp.intel.com> References: <002301c73355$c220e180$8698070a@amr.corp.intel.com> Message-ID: <20070108193221.GL17239@mellanox.co.il> > Limit the timeout that the ib_cm will wait to receive a response to > a message, to avoid excessively large (on the order of hours) timeout > values. This prevents consuming resources tracking requests for > extended periods of time. > > This helps correct for a bug in the SRP Engenio target sending a large > value (> 1 hour) as a service timeout. > > Signed-off-by: Sean Hefty A very similiar code is in OFED 1.1 (we chickened out and had a module parameter to disable this just in case, but I don't think its really needed upstream). Acked-by: Michael S. Tsirkin -- MST From swise at opengridcomputing.com Mon Jan 8 11:40:10 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 13:40:10 -0600 Subject: [openib-general] [PATCH 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070108192936.GK17239@mellanox.co.il> References: <20070108191348.27253.9576.stgit@dell3.ogc.int> <20070108192936.GK17239@mellanox.co.il> Message-ID: <1168285210.30883.15.camel@stevo-desktop> On Mon, 2007-01-08 at 21:29 +0200, Michael S. Tsirkin wrote: > > - modified the qp_num -> qp ptr patch to include cxgb3. > > If you don't mind, this might be better as a separate patch - it's just easier > for me to continue pushing this upstream if I can just copy it from OFED > sources. > Ok...that makes sense. From mst at mellanox.co.il Mon Jan 8 11:57:45 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 21:57:45 +0200 Subject: [openib-general] [PATCH RFC 0/2] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <20070108191346.27253.16936.stgit@dell3.ogc.int> References: <20070108191346.27253.16936.stgit@dell3.ogc.int> Message-ID: <20070108195745.GB25444@mellanox.co.il> > Core changes are required for the T3 driver. This includes the addition > of a udata pointer parameter to the ib_req_notify_cq() provider method. > This is still being discussed on the openib-general list and I'll update > it accordingly once we finalize the solution. So what I plan to do is, review the patches are in proper format, but delay applying until this API issue is closed. OK? -- MST From or.gerlitz at gmail.com Mon Jan 8 12:02:32 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 22:02:32 +0200 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <45A2974F.7060801@ichips.intel.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> Message-ID: <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> On 1/8/07, Sean Hefty wrote: > > I just noticed that once i apply the patch, the last + lines (that is > > pthread_mutex_lock, while loop doing pthread_cond_wait and then > > pthread_mutex_unlock) become part of rdma_leave_multicast which seems to > > me strictly buggy as no one is going to wake up this code. > The leave must wait until all events have been reported on the multicast group. > There can be more than one event on a group if an error occurs. See > ucma_complete_mc_event() for where the condition is signaled. OK, got you, however printing resp->events_reported after the write call returns shows complete junk most of the times where as you explain here it should be 1 unless some error occurs. Looking on the ucma kernel code under http://www2.openfabrics.org/git/?p=~shefty/rdma-dev.git;a=blob;f=drivers/infiniband/core/ucma.c I think to see the bug: there is no copy_to_user() before ucma_leave_multicast() returns and hence the response structure at rdma_leave_multicast of librdmacm is not set to anything, what do you say? Or. From rdreier at cisco.com Mon Jan 8 12:03:43 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 08 Jan 2007 12:03:43 -0800 Subject: [openib-general] Infiniband Network Library In-Reply-To: <45A28CB8.3070902@dbresearch.net> (Sean Hubbell's message of "Mon, 08 Jan 2007 12:26:00 -0600") References: <45A28CB8.3070902@dbresearch.net> Message-ID: what is a network library? From or.gerlitz at gmail.com Mon Jan 8 12:04:22 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 22:04:22 +0200 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> Message-ID: <15ddcffd0701081204g2780dc84g8c43df42cf82e18c@mail.gmail.com> On 1/8/07, Or Gerlitz wrote: > explain here it should be 1 unless some error occurs. Looking on the > ucma kernel code under http://www2.openfabrics.org/git/?p=~shefty/rdma-dev.git;a=blob;f=drivers/infiniband/core/ucma.c I have looked in the multicast-sa_cache branch. Or. From rdreier at cisco.com Mon Jan 8 12:10:31 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 08 Jan 2007 12:10:31 -0800 Subject: [openib-general] [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree In-Reply-To: <20070108192407.GJ17239@mellanox.co.il> (Michael S. Tsirkin's message of "Mon, 8 Jan 2007 21:24:07 +0200") References: <20070108192407.GJ17239@mellanox.co.il> Message-ID: Have you tested this? I think it increases the amount of memory needed for the buddy allocator bitmaps by a factor of 8, and right now those bitmaps are kmalloc()ed. So I'd be aftraid that it would make it impossible to load the module. Anyway this is definitely 2.6.21 material given that we're already at 2.6.20-rc4, and this change is has a decent chance of introducing regressions. - R. From or.gerlitz at gmail.com Mon Jan 8 12:13:25 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 22:13:25 +0200 Subject: [openib-general] [PATCH] 2.6.20 rdma_ucm: fix reporting events with invalid user context In-Reply-To: <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> References: <1167951597.3071.74.camel@stevo-desktop> <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> Message-ID: <15ddcffd0701081213n74fb91eak57a74edf9c5e7d7c@mail.gmail.com> On 1/5/07, Sean Hefty wrote: > There's a problem with how rdma cm events are reported to userspace that can > lead to application crashes. > > When a new connection request arrives, a context for the connection is allocated > in the kernel. The connection event is then reported to userspace. The > userspace library retrieves the event and allocates its own context for the > connection. The userspace context is associated with the kernel's context when > accepting. This allows the kernel to give userspace context with other events. > A problem occurs if a second event for the same connection occurs before the > user has had a chance to call accept. The userspace context has not yet been > set, which causes the librdmacm to crash. (This has been seen when the app > takes too long to call accept, resulting in the remote side timing out and > rejecting the connection.) Assuming that events are reported in order (correct?) then the user space consumer was calling rdma_get_cm_event, got a connection request and before calling rdma_accept they have called rdma_get_cm_event again and got connection reject ? Or the thing is that there are two threads in user space, one calling rdma_get_cm_event and on some events acting by itself where on other events causing another thread to act, so it got the conn request and moved it to the other thread and then got the conn reject and tried to act on it before the other thread called rdma_accept ? Or. From mshefty at ichips.intel.com Mon Jan 8 12:16:25 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 08 Jan 2007 12:16:25 -0800 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> Message-ID: <45A2A699.6080603@ichips.intel.com> > I think to see the bug: there is no copy_to_user() before > ucma_leave_multicast() returns and hence the response structure at > rdma_leave_multicast of librdmacm is not set to anything, what do you say? This looks like problem. I wonder how this is working for me at all... maybe the response structure is being initialized to 0, but this doesn't match up with your debug output... I will look into this more, but the copy_to_user definitely seems to be missing. - Sean From swise at opengridcomputing.com Mon Jan 8 12:22:09 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 14:22:09 -0600 Subject: [openib-general] [PATCH RFC 0/2] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <20070108195745.GB25444@mellanox.co.il> References: <20070108191346.27253.16936.stgit@dell3.ogc.int> <20070108195745.GB25444@mellanox.co.il> Message-ID: <1168287729.22705.1.camel@stevo-desktop> On Mon, 2007-01-08 at 21:57 +0200, Michael S. Tsirkin wrote: > > Core changes are required for the T3 driver. This includes the addition > > of a udata pointer parameter to the ib_req_notify_cq() provider method. > > This is still being discussed on the openib-general list and I'll update > > it accordingly once we finalize the solution. > > So what I plan to do is, review the patches are in proper format, > but delay applying until this API issue is closed. OK? > Right. Don't apply these at all. I just wanted folks to look at what I did and make sure it looks ok. I'll repost a final patch set after we resolve this issue. Thanks, Steve. From mshefty at ichips.intel.com Mon Jan 8 12:30:34 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 08 Jan 2007 12:30:34 -0800 Subject: [openib-general] [PATCH] 2.6.20 rdma_ucm: fix reporting events with invalid user context In-Reply-To: <15ddcffd0701081213n74fb91eak57a74edf9c5e7d7c@mail.gmail.com> References: <1167951597.3071.74.camel@stevo-desktop> <000f01c73109$0183ccb0$8698070a@amr.corp.intel.com> <15ddcffd0701081213n74fb91eak57a74edf9c5e7d7c@mail.gmail.com> Message-ID: <45A2A9EA.7020209@ichips.intel.com> > Assuming that events are reported in order (correct?) then the user > space consumer was calling rdma_get_cm_event, got a connection request > and before calling rdma_accept they have called rdma_get_cm_event > again and got connection reject ? The events are reported in order in the kernel, but the same guarantee cannot be made for userspace if an application is processing events using multiple threads. However, in the case where the bug occurred, a single thread was polling for events. > Or the thing is that there are two threads in user space, one calling > rdma_get_cm_event and on some events acting by itself where on other > events causing another thread to act, so it got the conn request and > moved it to the other thread and then got the conn reject and tried to > act on it before the other thread called rdma_accept ? This was what was happening. - Sean From greg.lindahl at qlogic.com Mon Jan 8 12:35:10 2007 From: greg.lindahl at qlogic.com (Greg Lindahl) Date: Mon, 8 Jan 2007 12:35:10 -0800 Subject: [openib-general] Infiniband Network Library In-Reply-To: <45A28CB8.3070902@dbresearch.net> References: <45A28CB8.3070902@dbresearch.net> Message-ID: <20070108203510.GA5531@dhcp-2-231.internal.keyresearch.com> On Mon, Jan 08, 2007 at 12:26:00PM -0600, Sean Hubbell wrote: > I have had to make significant > work arounds for our current, third party network API that we purchased > and continue to watch if fall down and still not take advantage on the > bandwidth that I need. With that said, does anyone on this list have a > recommendation for an InfiniBand capable network library? To amplify Roland's question: What does this library do that the existing ways of using Infiniband doesn't? Sockets, verbs, MPI... -- greg From mst at mellanox.co.il Mon Jan 8 12:37:25 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 22:37:25 +0200 Subject: [openib-general] [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree In-Reply-To: References: Message-ID: <20070108203725.GC25444@mellanox.co.il> Subject: Re: [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree > Have you tested this? No, didn't I make this clear? Sorry. I'm not in the lab at the moment, and my laptop does not have infiniband. That's why it says untested in the subject :). > I think it increases the amount of memory > needed for the buddy allocator bitmaps by a factor of 8, and right now > those bitmaps are kmalloc()ed. So I'd be aftraid that it would make > it impossible to load the module. Hmph. We'll need to make these 2-level then? > Anyway this is definitely 2.6.21 material given that we're already at > 2.6.20-rc4, and this change is has a decent chance of introducing regressions. OK. -- MST From shubbell at dbresearch.net Mon Jan 8 12:29:30 2007 From: shubbell at dbresearch.net (Sean Hubbell) Date: Mon, 08 Jan 2007 14:29:30 -0600 Subject: [openib-general] Infiniband Network Library Message-ID: <45A2A9AA.9040308@dbresearch.net> -------------- next part -------------- An embedded message was scrubbed... From: "Sean Hubbell" Subject: Re: [openib-general] Infiniband Network Library Date: Mon, 08 Jan 2007 14:28:27 -0600 Size: 818 URL: From shubbell at dbresearch.net Mon Jan 8 12:39:37 2007 From: shubbell at dbresearch.net (Sean Hubbell) Date: Mon, 08 Jan 2007 14:39:37 -0600 Subject: [openib-general] Infiniband Network Library In-Reply-To: <20070108203510.GA5531@dhcp-2-231.internal.keyresearch.com> References: <45A28CB8.3070902@dbresearch.net> <20070108203510.GA5531@dhcp-2-231.internal.keyresearch.com> Message-ID: <45A2AC09.9090006@dbresearch.net> This would just be a higher level of abstraction... For example code to send 1 msg would look like Connect, Send and Disconnect... Sean Greg Lindahl wrote: > On Mon, Jan 08, 2007 at 12:26:00PM -0600, Sean Hubbell wrote: > > >> I have had to make significant >> work arounds for our current, third party network API that we purchased >> and continue to watch if fall down and still not take advantage on the >> bandwidth that I need. With that said, does anyone on this list have a >> recommendation for an InfiniBand capable network library? >> > > To amplify Roland's question: What does this library do that the > existing ways of using Infiniband doesn't? Sockets, verbs, MPI... > > -- greg > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > From wombat2 at us.ibm.com Mon Jan 8 12:49:14 2007 From: wombat2 at us.ibm.com (Bernard King-Smith) Date: Mon, 8 Jan 2007 15:49:14 -0500 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: Message-ID: ----- Message from "Michael S. Tsirkin" on Mon, > 8 Jan 2007 18:57:14 +0200 ----- > > To: > > openib-general at openib.org, "Roland Dreier" > > Subject: > > [openib-general] [PATCHv4] IPoIB CM Experimental support > > The following patch adds experimental support for IPoIB connected mode. > The idea is to increase performance by increasing the MTU > from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. > With this code, I'm able to get 800MByte/sec or more with netperf > without options on a Mellanox 4x back-to-back DDR system. > > Signed-off-by: Michael S. Tsirkin > > --- > > Sorry about the churn, just fixed a bug in this code. [SNIP] > e. Some notes on code > 1. SRQ is used for scalability to large cluster sizes I still want to support non-SRQ adapters with this code. Not all systems have 100's or 1000's of endpoints and those smaller systems will benefit from IPoIB-CM. The larger systems tend to have larger memory per node so can support the additional memory requirements. At the November meeting one of the main themes from application developers and customers is we must have a well performing TCP/IP story across as much of the IB space as possible. If only one or two of the IB adapters perform well, then we haven't addressed the customer needs. Those adapters that can't support RC is one issue, but for those who do without SRQ, smaller configurations should be able to use IPoIB-CM. > 2. Only RC connections are used (UC does not support SRQ now) > 3. Retry count is set to 0 since spec draft warns against retries > 4. Each connection is used for data transfers in only 1 direction, > so each connection is either active(TX) or passive (RX). > 2 sides that want to communicate create 2 connections. > 5. Each active (TX) connection has a separate CQ for send completions - > this keeps the code simple without CQ resize and other tricks > Bernie King-Smith IBM Corporation Server Group Cluster System Performance wombat2 at us.ibm.com (845)433-8483 Tie. 293-8483 or wombat2 on NOTES "We are not responsible for the world we are born into, only for the world we leave when we die. So we have to accept what has gone before us and work to change the only thing we can, -- The Future." William Shatner -------------- next part -------------- An HTML attachment was scrubbed... URL: From or.gerlitz at gmail.com Mon Jan 8 13:00:36 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 23:00:36 +0200 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <45A2A699.6080603@ichips.intel.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> <45A2A699.6080603@ichips.intel.com> Message-ID: <15ddcffd0701081300q49a59a8cna3b03863d823d12d@mail.gmail.com> On 1/8/07, Sean Hefty wrote: > > I think to see the bug: there is no copy_to_user() before > > ucma_leave_multicast() returns and hence the response structure at > > rdma_leave_multicast of librdmacm is not set to anything, what do you say? > > This looks like problem. I wonder how this is working for me at all... I don't think mckey calls rdma_leave_multicast so maybe this is why you did not notice the problem? Or. From mst at mellanox.co.il Mon Jan 8 13:00:03 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 23:00:03 +0200 Subject: [openib-general] Infiniband Network Library In-Reply-To: <45A2A9AA.9040308@dbresearch.net> References: <45A2A9AA.9040308@dbresearch.net> Message-ID: <20070108210003.GE25444@mellanox.co.il> > >what is a network library? > > > > > > openpgm, openib are some but but I am looking for one that is a few > levels higher or abstracted. I am looking for around 3 or 4 calls to > send a message, something like connection, disconnect send and receive. PGM is transport level, isn't it? So a few levels higher would be the Application layer in the OSI model ... Are you looking for something that works with e.g. SQL queries? -- MST From or.gerlitz at gmail.com Mon Jan 8 12:59:48 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 22:59:48 +0200 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <45A2974F.7060801@ichips.intel.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> Message-ID: <15ddcffd0701081259q4f7a0688s85fc05fa92b2ee35@mail.gmail.com> On 1/8/07, Sean Hefty wrote: > > I just noticed that once i apply the patch, the last + lines (that is > > pthread_mutex_lock, while loop doing pthread_cond_wait and then > > pthread_mutex_unlock) become part of rdma_leave_multicast which seems to > > me strictly buggy as no one is going to wake up this code. > > The leave must wait until all events have been reported on the multicast group. > There can be more than one event on a group if an error occurs. See > ucma_complete_mc_event() for where the condition is signaled. let me see i follow your design: mc->events_completed is incremented in the library when the consumer calls rdma_ack_cm_event() and resp->events_reported is incremeted in the kernel called when the user calls rdma_get_cm_event() ? If this is indeed the case, the design seems fine to me, else it might be problematic eg if it does not support the case where there was multicast error but the user did not consume the associated event and now want to call rdma_leave_multicast(). Or. From caitlinb at broadcom.com Mon Jan 8 13:04:20 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Mon, 8 Jan 2007 13:04:20 -0800 Subject: [openib-general] [PATCH] rdma_cm iWARP connection setup timeouts reported as rejects. In-Reply-To: <1168268120.19412.8.camel@stevo-desktop> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1EE6A73@NT-SJCA-0751.brcm.ad.broadcom.com> > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Steve Wise > Sent: Monday, January 08, 2007 6:55 AM > To: Mirko Benz > Cc: openib-general at openib.org > Subject: Re: [openib-general] [PATCH] rdma_cm iWARP > connection setup timeouts reported as rejects. > > On Mon, 2007-01-08 at 12:13 +0100, Mirko Benz wrote: > > Hi, > > > > What could be the reasons for these timeouts to occur? > > One way: If the host is not reachable but the next hop > neighbour is, then the connection attempt will timeout. > > Another way is if, for some reason, the MPA negotiation > doesn't complete in a timely manner. For instance, if the > passive side never rdma_accept()s the connection, then the > active side should eventually timeout the attempt and return > a timeout error to the consumer. > > One very important additonal example of "MPA negotiation failure" is the case where only one end of the TCP connection was anticipating the usage of MPA. For example, if an ssh client mistakenly tried to connect to an iWARP port, both sides would just sit there waiting for the other one to say something. An eventaul timeout is the only way out of this. > > How should an application handle this? > > > > Applications should handle connection timeouts however they want. > Usually they just report it to the user. > > One way to look at it is that "host unreachable" is an *optimized* error report that deals with certain conditions where the unreachability can be quickly determined. In the more general case, the fact that a given host/service is currently unavailable is only known by its failure to answer. In most cases corrective action (either get the remote service restarted, make the path to it work, or select another service) is up to the user. From or.gerlitz at gmail.com Mon Jan 8 13:05:30 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 8 Jan 2007 23:05:30 +0200 Subject: [openib-general] Infiniband Network Library In-Reply-To: <45A2AC09.9090006@dbresearch.net> References: <45A28CB8.3070902@dbresearch.net> <20070108203510.GA5531@dhcp-2-231.internal.keyresearch.com> <45A2AC09.9090006@dbresearch.net> Message-ID: <15ddcffd0701081305o19fce7bes8cfd2d8597b89f72@mail.gmail.com> On 1/8/07, Sean Hubbell wrote: > This would just be a higher level of abstraction... For example code to > send 1 msg would look like Connect, Send and Disconnect... >From your email i understand that using BSD sockets "over" IB ULPs such as IPoIB UD, IPoIB CM or SDP is not enough for the performance enhancemt you want to get with IB. Can you share what are you hunting for, ie which from the following measures: BW / LAT / PPS / CPU %% and for which msg size huge/big/med/small Or. From mshefty at ichips.intel.com Mon Jan 8 13:22:04 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 08 Jan 2007 13:22:04 -0800 Subject: [openib-general] [PATCH] librdmacm: updated librdmacm to work with proposed 2.6.20 kernel CMA In-Reply-To: <15ddcffd0701081300q49a59a8cna3b03863d823d12d@mail.gmail.com> References: <000001c6f877$23298c80$52fc070a@amr.corp.intel.com> <459D1A84.9060609@voltaire.com> <45A2974F.7060801@ichips.intel.com> <15ddcffd0701081202v4ef23d82n10237ed230d9fdfa@mail.gmail.com> <45A2A699.6080603@ichips.intel.com> <15ddcffd0701081300q49a59a8cna3b03863d823d12d@mail.gmail.com> Message-ID: <45A2B5FC.5000201@ichips.intel.com> > I don't think mckey calls rdma_leave_multicast so maybe this is why > you did not notice the problem? Yep - this was the case. I've updated mckey and created a patch for the kernel, which I'll push out through my rdma-dev tree shortly. Thanks for the report. - Sean From mst at mellanox.co.il Mon Jan 8 13:21:41 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 23:21:41 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: References: Message-ID: <20070108212141.GF25444@mellanox.co.il> > > e. Some notes on code > > 1. SRQ is used for scalability to large cluster sizes > > I still want to support non-SRQ adapters with this code. IPoIB with and without patch performs the same on non-SRQ CAs. So let's see this code land upstream first, then we can look at uglifying it to wring a bit more performance from ehca, too, on some topologies. -- MST From rdreier at cisco.com Mon Jan 8 13:40:52 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 08 Jan 2007 13:40:52 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20061231190942.GB32485@mellanox.co.il> (Michael S. Tsirkin's message of "Sun, 31 Dec 2006 21:09:42 +0200") References: <20061231190942.GB32485@mellanox.co.il> Message-ID: This change makes sense to me. Does anyone object to queueing this for 2.6.21? - R. From swise at opengridcomputing.com Mon Jan 8 13:43:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 08 Jan 2007 15:43:42 -0600 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: <20061231190942.GB32485@mellanox.co.il> Message-ID: <1168292622.22705.14.camel@stevo-desktop> Ok with me. On Mon, 2007-01-08 at 13:40 -0800, Roland Dreier wrote: > This change makes sense to me. Does anyone object to queueing this > for 2.6.21? > > - R. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Mon Jan 8 13:49:14 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 8 Jan 2007 23:49:14 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: Message-ID: <20070108214914.GG25444@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH RFC] return qp pointer as part of ib_wc > > This change makes sense to me. Does anyone object to queueing this > for 2.6.21? And for-mm, pls: last version of IPoIB CM patch needs this. -- MST From sean.hefty at intel.com Mon Jan 8 16:14:54 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 8 Jan 2007 16:14:54 -0800 Subject: [openib-general] [RFC] userspace IB SA support Message-ID: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> Today, userspace support for SA related operations is limited to the libibmad interface, which supports sending and receiving MADs only. I've been assigned with the task of exposing multicast and informinfo support to userspace. Specifically, the following functionality is needed: 1. Join a multicast group - needs to use the ib_sa multicast capability. 2. Receive notification of multicast errors. 3. Leave a multicast group. 4. Register to receive SA events - needs to use the ib_sa notice capability. 5. Receive notification of events. 6. Deregister from SA events. Are there any preferences for how this is added? - Sean From rdreier at cisco.com Mon Jan 8 20:58:16 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 08 Jan 2007 20:58:16 -0800 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <002301c73355$c220e180$8698070a@amr.corp.intel.com> (Sean Hefty's message of "Mon, 8 Jan 2007 10:49:42 -0800") References: <002301c73355$c220e180$8698070a@amr.corp.intel.com> Message-ID: This all looks rather fishy: > +/* > + * Limit CM msg timeouts to something reasonable. > + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min. > + */ > +#define IB_CM_MAX_TIMEOUT 21 OK... (although 8 seconds seems a little short -- it seems a somewhat longer timeout could be legitimate on a very busy fabric across a WAN or something like that) but then... > + timeout = min(IB_CM_MAX_TIMEOUT, > + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + > + cm_convert_to_ms(cm_id_priv->av.packet_life_time)); should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too? and similarly... > - cm_id_priv->timeout_ms = param->timeout_ms; > + cm_id_priv->timeout_ms = min(IB_CM_MAX_TIMEOUT, param->timeout_ms); is timeout_ms misnamed, or did we just limit all timeouts to 21 msecs? ...and other places in the patch seem to have similar problems. Also, I would like to see warning messages like ib_cm: Possibly bogus timeout of xx (yyyyyy msecs) in REP from GID zzzz printed in the kernel log so people realize they have broken SRP targets or whatever. - R. From rdreier at cisco.com Mon Jan 8 21:01:02 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 08 Jan 2007 21:01:02 -0800 Subject: [openib-general] 2.6.20: outstanding patches and issues In-Reply-To: <20070108064059.GB1697@mellanox.co.il> (Michael S. Tsirkin's message of "Mon, 8 Jan 2007 08:40:59 +0200") References: <20070102200523.GA10451@mellanox.co.il> <20070108064059.GB1697@mellanox.co.il> Message-ID: fix_query_qp_in_reset.patch will merge ib_verbs_h_missing_kref.patch does this actually fix any compilation problems? if not I think it's better for 2.6.21. mthca_0_fmr_page_fix.patch already merged in my tree pending a pull, right? Patch 5 of 5 is at v3, hope it's all good now. you only listed 4... mthca_1_merge_mr_fmr_on_64bit.patch mthca_2_fast_registration.patch mthca_3_alloc_consistent.patch mthca_4_dma_align_reserved_mtts.patch still need review but I don't think they're appropriate for 2.6.20 given how much they change some pretty key memory registration stuff. mthca_wrid_swap.patch - very small benefit, but very small patch either Will merge for 2.6.21 From krkumar2 at in.ibm.com Mon Jan 8 21:16:12 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 9 Jan 2007 10:46:12 +0530 Subject: [openib-general] [PATCH/RFC] libibverbs: Improve driver loading In-Reply-To: Message-ID: Hi Roland/others, Sorry to be a bit off-topic, but ... Is this a good time to submit the Transport Neutral Verbs code ? Roland had earlier suggested to do this after all major changes were finished and before libibverbs1.1 is released. The way the code is designed is to have the existing ibv_*() routines which are exported, but these are changed to simply call similarly named rdmav_*() routines (also exported) which implement the original code. The intention is to remove or deprecate the use of ibv_*() routines by the next release (1.2?). Thanks, - KK openib-general-bounces at openib.org wrote on 01/05/2007 07:49:39 PM: > > BTW, the question still stands. If I start trying to play with > > static linking issues, I'd like to do this based on this patch, > > not what's in master currently. > > Yes, I had hoped to push it out sooner but I wanted to fix all the > driver libraries first. I didn't get a chance to finish that up > before my vacatation, but I will do that soon and post patches for > driver libraries when I change libibverbs. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From jgunthorpe at obsidianresearch.com Mon Jan 8 21:22:19 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Mon, 8 Jan 2007 22:22:19 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1168015486.4564.156860.camel@hal.voltaire.com> References: <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> <1167948831.4564.97385.camel@hal.voltaire.com> <20070104235854.GA12386@obsidianresearch.com> <1168015486.4564.156860.camel@hal.voltaire.com> Message-ID: <20070109052219.GA16736@obsidianresearch.com> On Fri, Jan 05, 2007 at 11:44:49AM -0500, Hal Rosenstock wrote: > > However, it might be smart to have opensm consider the routers to be a > > send-only member for every MLID.. > > Do you mean non-member rather than send-only member ? Routers need to > receive as well as send, right ? Or are you worried about some other > issue here ? I would like it if routers did not have to worry about joins in order to send a multicast packet. There really isn't a good way to know how long to keep a join active for.. Having them be send-only members of every relevent group skips that problem and decreases the latency for first-packet multicast forwarding. > Also, I'm still not sure about a couple of aspects of "every MLID": > 1. Wouldn't the router only want to be full member of link local scoped > MGIDs (that it was interested in locally) ? Are you saying any local > scoped MGIDs not of interest would just get dropped anyhow ? If that is > the point here, that would work but isn't there a performance impact of > doing so ? > 2. Similarly for any other (non local scope) MGRPs which do not match > across any router ports, isn't there a performance impact of receiving > and then having to drop/filter these packets ? I think there is a balance to be had here, on one side if you have alot of multicast groups (ie Ipv6 SNMs) then requiring alot of extra work to keep the SM informed about what is going on is more harmful than having the router get more multicast traffic than it optimally could. A router must already keep track of what multicast groups are forwarded to what ports, so it is virtually free for it to also do filtering. [Aside: The more I think about scaling a router up the more it seems to me that the router and SM need alot of intercommunication. The most efficient thing would be if the router could maintain a replica of the entire SM database for paths and multicast. The router would then always be ready to handle any incoming packet, just like an IB switch.] > Right, the router is some sort of member on the MGRPs "of interest". I > think you are trying to make that list of MGRPs "of interest" simpler > and utilize filtering where not needed (as I mentioned above), but I may Yes. Simpler, I hope :> > > A onlink line routing table just terminates the routing > > lookup. 'unreachable' is another termination. A via line changes the > > next hop GID and creates more lookups until an onlink is reached. > > So is the specification of all multicast as onlink a short term thing > then ? > > Also, with using onlink for all multicast, is there some forwarding > determination made somewhere in the router stack ? I think it is useful to keep the router stack and the SM stack seperate. Especailly when it comes to multicast. The router will have a multicast routing table that works somewhat differently than the unicast table. This table would indicate which ports in the router are part of each group. The SM should only need a MGID to MLID path translation. This is similar to the distinction between a host routing table and a router routing table in IP land - where hosts generally do not have multicast routing information. > Yes, I have no idea how IPv6 will work with large inter subnet clusters > either. We had a thread on this a while ago and I think it died out at > that point. To state the obvious, I think some changes need to be made > for IPv6 to work well with current IB hardware or perhaps some > configuration restrictions ? Yes, I agree. I wonder if the IPoIB RFC authors considerd the negative impact of IPv6 SNM when they designed the specification? It would be much better if 1 IP subnet = 1 IB subnet. > > Yes, but in this case I don't think multicast routing can be pushed to > > the host. It is either the router or some combination of the router > > and the SM. > > I'm not quite following you on this yet. Why/how is host multicast > routing any different (than unicast) ? Well, I can't see how to make this situation sane if the host is in control and the routers/sm are fairly passive: - Two subnets, each with nodes joined to multicast group M - Two routers connecting the two subnets (multipath) - Each host has an inter-subnet multicast spanning tree and knows which router to use for M - Host sends a packet for M, what LRH does it use? Jason From mst at mellanox.co.il Mon Jan 8 21:28:43 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 07:28:43 +0200 Subject: [openib-general] 2.6.20: outstanding patches and issues In-Reply-To: References: Message-ID: <20070109052843.GB19743@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: 2.6.20: outstanding patches and issues > > fix_query_qp_in_reset.patch > > will merge > > ib_verbs_h_missing_kref.patch > > does this actually fix any compilation problems? if not I think it's > better for 2.6.21. 2.6.21 then. > mthca_0_fmr_page_fix.patch > > already merged in my tree pending a pull, right? Yes. > Patch 5 of 5 is at v3, hope it's all good now. > > you only listed 4... > > mthca_1_merge_mr_fmr_on_64bit.patch > mthca_2_fast_registration.patch > mthca_3_alloc_consistent.patch > mthca_4_dma_align_reserved_mtts.patch Because I counted mthca_0_fmr_page_fix.patch > still need review but I don't think they're appropriate for 2.6.20 > given how much they change some pretty key memory registration stuff. Hmph. I was afraid you'd say this. The only reason I'm surprised is that these do fix FMR on non-cache-coherent architectures - it's a bug fix, not just a feature searies. And you did say (patches 1-2 are what was posted then): http://article.gmane.org/gmane.linux.drivers.openib/34184/match=patchv2+mthca+speed+memory+registration+filling+mtts+directly "I think this still can go into 2.6.20 after -rc1 if we can get this fixed up." -- MST From halr at voltaire.com Mon Jan 8 21:31:00 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 09 Jan 2007 00:31:00 -0500 Subject: [openib-general] [PATCH][MINOR] OpenSM/osm_ucast_updn.c: Handle failed memory allocation Message-ID: <1168320659.14647.20109.camel@hal.voltaire.com> OpenSM/osm_ucast_updn.c: Handle failed memory allocation Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_ucast_updn.c b/osm/opensm/osm_ucast_updn.c index 7fa119e..3d96478 100644 --- a/osm/opensm/osm_ucast_updn.c +++ b/osm/opensm/osm_ucast_updn.c @@ -628,6 +628,11 @@ updn_init( if (strlen(line) > 1) { p_tmp = malloc(sizeof(uint64_t)); + if (!p_tmp) + { + status = IB_ERROR; + goto Exit; + } *p_tmp = strtoull(line, NULL, 16); cl_list_insert_tail(p_updn->p_root_nodes, p_tmp); } From eitan at sw053.yok.mtl.com Mon Jan 8 21:31:24 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Tue, 9 Jan 2007 07:31:24 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-09:normal completion Message-ID: <200701090531.l095VOZG021318@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_8_12:41:44_2007 064f5e ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From halr at voltaire.com Mon Jan 8 22:17:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 09 Jan 2007 01:17:47 -0500 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <20070109052219.GA16736@obsidianresearch.com> References: <20070102230027.GB13404@obsidianresearch.com> <1167781157.4596.106104.camel@hal.voltaire.com> <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> <1167948831.4564.97385.camel@hal.voltaire.com> <20070104235854.GA12386@obsidianresearch.com> <1168015486.4564.156860.camel@hal.voltaire.com> <20070109052219.GA16736@obsidianresearch.com> Message-ID: <1168323466.14647.22961.camel@hal.voltaire.com> On Tue, 2007-01-09 at 00:22, Jason Gunthorpe wrote: > On Fri, Jan 05, 2007 at 11:44:49AM -0500, Hal Rosenstock wrote: > > > > However, it might be smart to have opensm consider the routers to be a > > > send-only member for every MLID.. > > > > Do you mean non-member rather than send-only member ? Routers need to > > receive as well as send, right ? Or are you worried about some other > > issue here ? > > I would like it if routers did not have to worry about joins in order > to send a multicast packet. Send-only members are not supposed to receive. How do they receive then ? Send-only members do not receive. Don't routers need to receive multicast (as well as send) ? > There really isn't a good way to know how long to keep a join active for.. What about MGID creation/deletion events ? > Having them be send-only members of every relevent group How does the router know "every relevant" group ? > skips that problem and decreases the latency for > first-packet multicast forwarding. It could be a benefit on first packet MC forwarding on a new group but it depends on when the first packet is received relative to the group detected and joined. > > Also, I'm still not sure about a couple of aspects of "every MLID": > > 1. Wouldn't the router only want to be full member of link local scoped > > MGIDs (that it was interested in locally) ? Are you saying any local > > scoped MGIDs not of interest would just get dropped anyhow ? If that is > > the point here, that would work but isn't there a performance impact of > > doing so ? > > 2. Similarly for any other (non local scope) MGRPs which do not match > > across any router ports, isn't there a performance impact of receiving > > and then having to drop/filter these packets ? > > I think there is a balance to be had here, on one side if you have > alot of multicast groups (ie Ipv6 SNMs) then requiring alot of > extra work to keep the SM informed about what is going on is more > harmful than having the router get more multicast traffic than it > optimally could. > > A router must already keep track of what multicast groups are > forwarded to what ports, so it is virtually free for it to also do > filtering. > > [Aside: The more I think about scaling a router up the more it seems > to me that the router and SM need alot of intercommunication. The > most efficient thing would be if the router could maintain a replica > of the entire SM database for paths and multicast. > The router would then always be ready to handle any incoming > packet, just like an IB switch.] There is no IBA standard for replicating the SM or SA database. This is a similar issue which multiple SMs in the same subnet might have depending on the "approach" taken for this. > > Right, the router is some sort of member on the MGRPs "of interest". I > > think you are trying to make that list of MGRPs "of interest" simpler > > and utilize filtering where not needed (as I mentioned above), but I may > > Yes. > > Simpler, I hope :> > > > > A onlink line routing table just terminates the routing > > > lookup. 'unreachable' is another termination. A via line changes the > > > next hop GID and creates more lookups until an onlink is reached. > > > > So is the specification of all multicast as onlink a short term thing > > then ? > > > > Also, with using onlink for all multicast, is there some forwarding > > determination made somewhere in the router stack ? > > I think it is useful to keep the router stack and the SM stack > seperate. Yes, but on the other hand, you just said you wanted a partial copy of the SM database for the router... > Especailly when it comes to multicast. The router will have > a multicast routing table that works somewhat differently than the > unicast table. This table would indicate which ports in the router are > part of each group. The SM should only need a MGID to MLID path > translation. Doesn't it already have this ? > This is similar to the distinction between a host routing > table and a router routing table in IP land - where hosts generally do > not have multicast routing information. I don't think the SM needs the multicast routing (intersubnet) information either. > > Yes, I have no idea how IPv6 will work with large inter subnet clusters > > either. We had a thread on this a while ago and I think it died out at > > that point. To state the obvious, I think some changes need to be made > > for IPv6 to work well with current IB hardware or perhaps some > > configuration restrictions ? > > Yes, I agree. I wonder if the IPoIB RFC authors considerd the negative > impact of IPv6 SNM when they designed the specification? Not sure. > It would be much better if 1 IP subnet = 1 IB subnet. Yes, it would be better in terms of this but it was an architectural goal to allow flexibility in the IP <-> IB subnet mappings. There was a lot of discussion about this and there were earlier schemes which restricted to that mapping. > > > Yes, but in this case I don't think multicast routing can be pushed to > > > the host. It is either the router or some combination of the router > > > and the SM. > > > > I'm not quite following you on this yet. Why/how is host multicast > > routing any different (than unicast) ? > > Well, I can't see how to make this situation sane if the host is in > control and the routers/sm are fairly passive: > - Two subnets, each with nodes joined to multicast group M > - Two routers connecting the two subnets (multipath) > - Each host has an inter-subnet multicast spanning tree and knows > which router to use for M > - Host sends a packet for M, what LRH does it use? I would think that all the host does is use the MLID for the group obtained from it's join (or query). The IB routers need to work out which one is the active forwarder for each group (via their multicast routing protocol (DVMRP, MOSPF, PIM, ...). I have no clue how multipath could work for multicast. -- Hal > Jason From ogerlitz at voltaire.com Mon Jan 8 23:17:54 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 09 Jan 2007 09:17:54 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: <20061231190942.GB32485@mellanox.co.il> Message-ID: <45A341A2.6000603@voltaire.com> Roland Dreier wrote: > This change makes sense to me. Does anyone object to queueing this > for 2.6.21? Indeed, it makes much sense, do you any idea what would it take to expose this capability also by libibverbs? Or. From dotanb at dev.mellanox.co.il Mon Jan 8 23:26:09 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 09 Jan 2007 09:26:09 +0200 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> References: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> Message-ID: <45A34391.8040706@dev.mellanox.co.il> Sean Hefty wrote: > Today, userspace support for SA related operations is limited to the libibmad > interface, which supports sending and receiving MADs only. I've been assigned > with the task of exposing multicast and informinfo support to userspace. > Specifically, the following functionality is needed: > > 1. Join a multicast group - needs to use the ib_sa multicast capability. > 2. Receive notification of multicast errors. > 3. Leave a multicast group. > 4. Register to receive SA events - needs to use the ib_sa notice capability. > 5. Receive notification of events. > 6. Deregister from SA events. > > Are there any preferences for how this is added? > What about path query or any SA query from the user level ? Thanks Dotan From dotanb at dev.mellanox.co.il Mon Jan 8 23:40:50 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 09 Jan 2007 09:40:50 +0200 Subject: [openib-general] second version of the libibverbs man pages In-Reply-To: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> References: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> Message-ID: <45A34702.9090604@dev.mellanox.co.il> dotanb at dev.mellanox.co.il wrote: > Hi all and Happy new year. > > * I rewrote the man pages and removed all of the extra characters of the > POD module (according to Roland request). > * I tried to stick with the 80 characters limit (according to James > request), without 100% success (when i > described the attributes of the structures, i needed more than 80 > characters in a line..) > * Several spelling mistakes were fixed > > > Roland, what do you think? can you use this version and check in those > files? > Roland, do you plan to check in these man pages? thanks Dotan From jgunthorpe at obsidianresearch.com Mon Jan 8 23:47:12 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 9 Jan 2007 00:47:12 -0700 Subject: [openib-general] ib_gid_is_link_local In-Reply-To: <1168323466.14647.22961.camel@hal.voltaire.com> References: <20070103012440.GC13404@obsidianresearch.com> <1167864035.4564.23831.camel@hal.voltaire.com> <20070103231451.GB3267@obsidianresearch.com> <1167912258.4564.65120.camel@hal.voltaire.com> <20070104184248.GC3267@obsidianresearch.com> <1167948831.4564.97385.camel@hal.voltaire.com> <20070104235854.GA12386@obsidianresearch.com> <1168015486.4564.156860.camel@hal.voltaire.com> <20070109052219.GA16736@obsidianresearch.com> <1168323466.14647.22961.camel@hal.voltaire.com> Message-ID: <20070109074712.GB16736@obsidianresearch.com> On Tue, Jan 09, 2007 at 01:17:47AM -0500, Hal Rosenstock wrote: > > I would like it if routers did not have to worry about joins in order > > to send a multicast packet. > > Send-only members are not supposed to receive. How do they receive then > ? Send-only members do not receive. Don't routers need to receive > multicast (as well as send) ? How about more exactly: The SM could implicity consider the router as a send-only member of all forwardable multicast groups until a reason arises for it to be a full member. Anyhow, I think this discussion has lost context and we are not thinking about the same things. Let me describe to you how I think that a router today can implement multicast without special SM support using IBA defined protocols: Let me try to do that: - The router maintains a table of all forwardable multicast groups on each IB subnet that it is connected to. - It also tracks for each router port which groups have receivers on the local subnet. If so the group on that port is flagged 'rxer' otherwise 'txonly' - This table is kept in sync with the SM by using SM traps and SM queries. - Each router port then computes a set of joins to perform on the local subnet based on this table: Join Type Local_MGID Remote_MGID none none txonly none none rxer none txonly none none txonly txonly [No receiver] full txonly rxer [Only remote receiver] none rxer none send-only rxer txonly [Only local receiver] full rxer rxer [Both receiver] Remote_MGID would be rxer if any other participating port has a rxer flag for this MGID. [participating port being derived from a multicast routing protocol]. (How exactly to determine the rxer/txonly flag and if this optimization is even really necessary is not something I have spent alot of time on just yet - but this conceptually describes the optimal, minimum spanning methodology.) - The router connects to other routers on the local subnet and performs a multicast routing protocol to produce a inter-subnet multicast spanning tree for each MGID. The results from this control which ports participate in each MGID. - Finally, the router programs its internal forwarding path. As an example using IPv6 SNM: 1) A new nodes comes up on subnet alpha and registers SNM MGID A as full membership. 2) The subnet alpha local router port learns of #1 from the local SM. 3) The router forwards the new mgid to other routers it is connected to via the multicast routing protocol. 4) On the beta subnet, another node registers as send-only for SNM MGID A. 5) The beta local router port learns of #4 from the local SM. 6) The router inspects its MGID table and finds one of its ports has a path to the rxer in #1. It joins MGID A on subnet beta as a full member, and the other port joins MGID A as a send-only member. 7) The above repeats through the chain of subnets until subnet alpha is reached. 8) A port on the router connected to subnet alpha sees the MGID A creation on one of its other ports and registers as send-only for MGID A on subnet alpha. (similar to step #6) 9) The host sends the SNM, unsubscribres from MGID A and the process reverses itself. I think this is within what IBA already defines and is pretty much what has to be done today to have a chance of working with existing subnet managers. I don't think it needs changes to the SM. I don't think it scales very well since it requires alot of exchanges between the SM and the routers. This is more or less what I had in mind during the concall we had last year... Also, I expect the first SNM message will be lost since the SM will ack the host before the router has received the trap, found the new MGID and joined it. (I don't think that is very good :<) ====== The above describes the router as being autonomous of the SM. The routers learn of data the SM has through queries (a pull model). Another approach is to have the SM program the routers explicitly (a push model). In this view the router is more like an IB switch from a SM programming perspective. It has a more complex LinearFDB that uses GIDs rather than LIDs and a more complex multicast table that works on MGIDs rather than MLIDS. Like a switch the SM would program the router as needed. I view this as being more in line with the IB treatment of the network as a completely managed resource. It should be more efficient since the SM only sends what changes to the routers rather than the routers responding to traps/etc. I'd ultimately like to find other interested people to work on this idea since I think it has merit.. It is with this second case where my prior thoughts about optimization strategies make more sense. (Ie pre-arranging send-only status for the router is an optimization that lets the SM do less work on group creation, the SM can 'replicate' the SM database into the router in the same way it 'replicates' it into the switches, etc) Regards, Jason From humanw.mindc at noreplay.com Tue Jan 9 01:44:40 2007 From: humanw.mindc at noreplay.com ( ) Date: Tue, 9 Jan 2007 11:44:40 +0200 Subject: ? Message-ID: <20070109094355.E03E83B0009@sentry-two.sandia.gov> סולם הטונים הרגשיים איך תזהה אנשים מסוכנים? כיצד תאתר את הבן-זוג האידיאלי? איך מעלים יכולת? נשמע מעניין? חברים יקרים, אנחנו פותחים את דלתנו לקהל הרחב! מבצע לכבוד השנה החדשה שיעור ניסיון חינם!!! בימים אלו מתקיים שיעור ואתם מוזמנים להגיע עם חברים! ביום רביעי ,10/1/07 משעה 20:00 עד 22:00 ביפו העתיקה ביום חמישי, 11/1/07 משעה 20:00 עד 22:00 בבאר-שבע כל שנדרש לעשות הוא לתאם מראש ואתם מוזמנים להגיע לפרטים והרשמה 03-6820595 המרכז ליעילות אישית טלפון 03-6820595 סלולארי 054-3103381 פקס 03-6820535 להסרה מרשימה לחץ כאן Remove from list -------------- next part -------------- An HTML attachment was scrubbed... URL: From dotanb at dev.mellanox.co.il Tue Jan 9 01:53:07 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 09 Jan 2007 11:53:07 +0200 Subject: [openib-general] [PATCH] [mthca] Allow the following QP state transition : reset --> reset Message-ID: <1168336387.25538.1.camel@mtls05.yok.mtl.com> Allow the following QP state transition : reset --> reset. Signed-off-by: Dotan Barak --- Index: gen2_devel_kernel/drivers/infiniband/hw/mthca/mthca_qp.c =================================================================== --- gen2_devel_kernel.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2007-01-08 16:14:25.000000000 +0200 +++ gen2_devel_kernel/drivers/infiniband/hw/mthca/mthca_qp.c 2007-01-09 11:34:42.000000000 +0200 @@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, goto out; } + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + if ((attr_mask & IB_QP_PKEY_INDEX) && attr->pkey_index >= dev->limits.pkey_table_len) { mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", From monis at voltaire.com Tue Jan 9 02:00:48 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 09 Jan 2007 12:00:48 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling Message-ID: <45A367D0.3060206@voltaire.com> This patch implements selective tx signaling for IPoIB. It lets the user set the ratio between the number of sent packets and the number of TX completion signals. This optimization has the following advantages: + increase the packet per second (PPS) rate + reduce the number of interrupts related to ipoib tx completions Since the IB HCA HW executes work requests posted QP in-order, we can i assume that a completion of a work request means that all the work requests posted before it are also completed and hence their associated resources (skbs in this context) can be recycled. The current driver implementation asks for a completion signaling for every sent packet (a ratio of 1). This patch enables the user to set a higher ratio. Asking for a completion signal for every n (>1) packets saves the following: 1. less interrupts to the host 2. the amortized cost for tx completion handing is lowered 3. the tx_lock is taken less often The cost of selective signaling is in the average amount of memory that the IPoIB driver consumes since skbs are freed in the TX completion handler (which is now executed less often). So, if the current driver holds only few skbs at any given time (and normally not more than one) the new driver holds skbs up to n (the ratio between sent packets and the number of tx completions). For reasonable value of n can lead to over consumption of few tens of Kbytes but the real issue is elsewhere. Applications that set the socket buffer to a small size (with setsockopt()) may suffer from ENOBUFS failures when calling to sendto() or sendmsg(). A good example for this is ping and a signaling ratio of 16 packets to 1 completion request. In this case few successful pings are followed by an endless sequence of errors (until ping restarts). The solution is to set n with attention to the specific user applications and to use setsockopt() with care (ping for instance, can be run with -S). Another issue is related to the ipoib_ib_dev_stop() operation. This function checks that the tail and head of the tx_ring are equal and if they are not it assumes that there are uncompleted work requests. With this patch it is normal that the tail and head of the tx_ring would be different sice we are not always asking for a completion notification. Since I don't see a way to tell if the tail/head gap is normal or due to a failure I only reduce the message severity from warn to dbg if the condition for expected gap is true. However, I still see there a tiny chance that a completion notification would arrive after the timeout in ipoib_ib_dev_stop() expires and the it tries to free the skbs in the tx_ring(). Solutions to that can be 1. protect the code with a lock - but I started with trying to avoid locks 2. reduce the hazard by adding more to the timeout and calling test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); in the TX completion handler to check if ipoib_ib_dev_stop() had started. I would be happy to get comments for the last issue and for the rest of the patch of course. thanks > MoniS ipoib.h | 2 ++ ipoib_ib.c | 39 ++++++++++++++++++++++++++++----------- ipoib_main.c | 10 +++++++++- ipoib_verbs.c | 4 ++-- 4 files changed, 41 insertions(+), 14 deletions(-) Signed-off-by: Moni Shoua --- Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-07 15:39:49.421190295 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-07 15:42:33.768824668 +0200 @@ -164,6 +164,7 @@ struct ipoib_dev_priv { struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; + unsigned tx_completion_mark; struct ib_sge tx_sge; struct ib_send_wr tx_wr; @@ -335,6 +336,7 @@ static inline void ipoib_unregister_debu extern int ipoib_sendq_size; extern int ipoib_recvq_size; +extern int num_unsignal_tx; extern struct ib_sa_client ipoib_sa_client; Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-01-07 15:39:49.443186365 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-01-07 19:29:21.885896644 +0200 @@ -256,29 +256,32 @@ static void ipoib_ib_handle_tx_wc(struct return; } - tx_req = &priv->tx_ring[wr_id]; + do { + tx_req = &priv->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, - tx_req->skb->len, DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, + tx_req->skb->len, DMA_TO_DEVICE); - ++priv->stats.tx_packets; - priv->stats.tx_bytes += tx_req->skb->len; + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; - dev_kfree_skb_any(tx_req->skb); + dev_kfree_skb_any(tx_req->skb); + } while (wr_id-- > priv->tx_completion_mark); spin_lock_irqsave(&priv->tx_lock, flags); - ++priv->tx_tail; + priv->tx_tail += wc->wr_id - priv->tx_completion_mark + 1; if (netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) netif_wake_queue(dev); spin_unlock_irqrestore(&priv->tx_lock, flags); + priv->tx_completion_mark = (wc->wr_id + 1) & (ipoib_sendq_size - 1); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed send event " "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + wc->status, (unsigned int)wc->wr_id, wc->vendor_err); } static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) @@ -309,7 +312,11 @@ static inline int post_send(struct ipoib u64 addr, int len) { struct ib_send_wr *bad_wr; + int send_signaled=0; + int ret; + if ((wr_id & num_unsignal_tx) == num_unsignal_tx) + send_signaled=1; priv->tx_sge.addr = addr; priv->tx_sge.length = len; @@ -317,7 +324,11 @@ static inline int post_send(struct ipoib priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.ah = address; - return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); + if (send_signaled) + priv->tx_wr.send_flags |= IB_SEND_SIGNALED; + ret = ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); + priv->tx_wr.send_flags &= ~IB_SEND_SIGNALED; + return ret; } void ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -522,8 +533,14 @@ int ipoib_ib_dev_stop(struct net_device while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) { if (time_after(jiffies, begin + 5 * HZ)) { - ipoib_warn(priv, "timing out; %d sends %d receives not completed\n", - priv->tx_head - priv->tx_tail, recvs_pending(dev)); + if (recvs_pending(dev) || + (priv->tx_head - priv->tx_tail) > num_unsignal_tx) + ipoib_warn(priv, "timing out; %d sends %d receives not completed\n", + priv->tx_head - priv->tx_tail, recvs_pending(dev)); + else + ipoib_dbg(priv, "%d sends left in q." + "probably sent without completion notification.\n", + priv->tx_head - priv->tx_tail); /* * assume the HW is wedged and just free up Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-07 15:39:49.454184399 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-07 15:42:33.806817879 +0200 @@ -57,11 +57,15 @@ MODULE_LICENSE("Dual BSD/GPL"); int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; +int tx_signal_rate __read_mostly = 1; +int num_unsignal_tx; module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +module_param_named(tx_signal_rate, tx_signal_rate, int, 0444); +MODULE_PARM_DESC(tx_signal_rate, "Number of tx wr per wc. Must be power of 2"); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -849,7 +853,7 @@ int ipoib_dev_init(struct net_device *de goto out_rx_ring_cleanup; } - /* priv->tx_head & tx_tail are already 0 */ + /* tx_completion_mark, priv->tx_head & tx_tail are already 0 */ if (ipoib_ib_dev_init(dev, ca, port)) goto out_tx_ring_cleanup; @@ -1179,6 +1183,10 @@ static int __init ipoib_init_module(void ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); + tx_signal_rate = roundup_pow_of_two(tx_signal_rate); + tx_signal_rate = min(tx_signal_rate, ipoib_sendq_size); + tx_signal_rate = max(tx_signal_rate, 1); + num_unsignal_tx = tx_signal_rate - 1; ret = ipoib_register_debugfs(); if (ret) return ret; Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-01-07 15:39:49.481179576 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-01-07 15:42:33.832813235 +0200 @@ -164,7 +164,7 @@ int ipoib_transport_dev_init(struct net_ .max_send_sge = 1, .max_recv_sge = 1 }, - .sq_sig_type = IB_SIGNAL_ALL_WR, + .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_UD }; @@ -208,7 +208,7 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.opcode = IB_WR_SEND; priv->tx_wr.sg_list = &priv->tx_sge; priv->tx_wr.num_sge = 1; - priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->tx_wr.send_flags = 0; return 0; From monis at voltaire.com Tue Jan 9 02:05:55 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 09 Jan 2007 12:05:55 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <45A367D0.3060206@voltaire.com> References: <45A367D0.3060206@voltaire.com> Message-ID: <45A36903.4050807@voltaire.com> Tests with iperf and netperf for unicast and multicast destinations show an improvement in the ability of user applications to xmit packets. Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and OFED-1.1 (receiver) Note that the results below show improvement only for TX so we see an end to end packet loss. Improving the receiver (NAPI) will reduce the packet loss. ---------------------------------------------------------------------- Without the patch PPS=230507 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.00 6915225 0 184.40 135168 30.00 6366068 169.75 ---------------------------------------------------------------------- tx_signal_rate=1 PPS=244116 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.00 7323482 0 195.27 135168 30.00 6905764 184.13 ---------------------------------------------------------------------- tx_signal_rate=4 PPS=254748 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.00 7642461 0 203.77 135168 30.00 6741080 179.74 ---------------------------------------------------------------------- tx_signal_rate=8 PPS=278458 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.01 8353760 0 222.73 135168 30.01 6884056 183.54 ---------------------------------------------------------------------- tx_signal_rate=16 PPS=316418 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.00 9492551 0 253.11 135168 30.00 6501771 173.37 ---------------------------------------------------------------------- tx_signal_rate=32 PPS=328316 linux:~ # netperf -H 192.168.11.234 -t UDP_STREAM -l 30 -- -m 100 UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.11.234 (192.168.11.234) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 262144 100 30.00 9849480 0 262.62 135168 30.00 6006394 160.15 ---------------------------------------------------------------------- From mst at mellanox.co.il Tue Jan 9 03:20:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 13:20:30 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A367D0.3060206@voltaire.com> References: <45A367D0.3060206@voltaire.com> Message-ID: <20070109112030.GH24084@mellanox.co.il> > This patch implements selective tx signaling for IPoIB. Let's assume that the last tx packet you have sent is marked unsignalled. Since you never free the skb, won't the TX watchdog get triggered? -- MST From mst at mellanox.co.il Tue Jan 9 03:26:21 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 13:26:21 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <45A36903.4050807@voltaire.com> References: <45A367D0.3060206@voltaire.com> <45A36903.4050807@voltaire.com> Message-ID: <20070109112621.GI24084@mellanox.co.il> > Tests with iperf and netperf for unicast and multicast destinations show > an improvement in the ability of user applications to xmit packets. > > Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. > Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and > OFED-1.1 (receiver) IMO netperf reporting is actually not too informative without stats settings. Try running with e.g. -i 10,2 -I 99,5 - you might discover that your numbers are only accurate within 30% > Note that the results below show improvement only for TX so we see an end to end packet loss. Hmm, as long as packet drops increase, BW improvements in UDP don't sound too convincing, do they? You can get infinite BW at 100% drop ... > Improving the receiver (NAPI) will reduce the packet loss. Needs testing with NAPI patch then? -- MST From mst at mellanox.co.il Tue Jan 9 05:15:58 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 15:15:58 +0200 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <1168258813.4577.73483.camel@hal.voltaire.com> References: <1168258813.4577.73483.camel@hal.voltaire.com> Message-ID: <20070109131558.GB16107@mellanox.co.il> > diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in > index 1ccf5c6..2d52675 100644 > --- a/osm/opensm/configure.in > +++ b/osm/opensm/configure.in > @@ -62,6 +62,22 @@ AC_ARG_ENABLE(debug, > esac],[debug=false]) > AM_CONDITIONAL(DEBUG, test x$debug = xtrue) > > +dnl Console over a socket connection > +AC_ARG_ENABLE(console-socket, > +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], > +[case $enableval in > + yes) console_socket=yes ;; > + no) console_socket=no ;; > + esac], > + console_socket=yes) > +if test $console_socket = yes; then > + AC_CHECK_LIB(wrap, request_init, [], > + AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) > + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, > + 1, > + [Define as 1 if you want to enable a console on a socket connection]) > +fi > + > dnl Provide user option to select vendor > OPENIB_APP_OSMV_SEL > Might it be better to set the default to off? libwrap devel package might not be present on some systems - so build now fails where previously it passed - and user didn't even ask for any new features. Further, could you please clarify: when compiled in, is opensm listening on a socket by default or does it need to be enabled with a run-time option? I hope it's the later. -- MST From ogerlitz at voltaire.com Tue Jan 9 05:20:44 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 09 Jan 2007 15:20:44 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <20070104225319.GI4155@mellanox.co.il> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> Message-ID: <45A396AC.3050306@voltaire.com> Michael S. Tsirkin wrote: >>> I currently have these in separate branches. I know that Woody has them >>> combined in his tree, but I can create an 'ofed' branch with these together. >> I published a 'multicast-sa_cache' branch (which was a little clearer than >> 'ofed'). I can change the name if more patches need to be pulled in. > > Cool, thanks, I'll try to put these in ofed next week. Hi Sean, My thought re this was that since the rest of the original patch sequence specifically the rdma_cm UDP and ucma code are merged in 2.6.20-rcX which is the code OFED 1.2 is based on, the easy path for you would be to stage the multicast code for upstream push to 2.6.21 and then push the code as to OFED 1.2, what do you think? Other then that, as we discussed in SC06 there are some changes that need to be integrated in the code to allow for interoperability between a multicast rdma cm based app to IPoIB, specifically removing the RDMA CM signature from the mgid which generated from the ip addr and pkey, but not only. The second change is related to the qkey, looking in the current code of cma_join_ib_multicast() (at the multicast-sa_cache branch of the rdma-dev git) i see that the qkey is the mc ip address, which is not consistent with what librdmacm is assuming (0x1234567 etc). Anyway, what we need here is to plug into the scheme of ipoib which uses the qkey associated with the ipv4 broadcast multicast group. It turns out that there is some twilight zone here which i am working to understand better. You can see that for the ipv4 brd group ipoib lets the SM to allocate the group and qkey (ie the create param of ipoib_mcast_join is zero), i will give it some thought and let you know how i think the rdma cm can plug into this scheme, will be happy to get other ideas as well. Or. From monis at voltaire.com Tue Jan 9 05:33:12 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 09 Jan 2007 15:33:12 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <20070109112030.GH24084@mellanox.co.il> References: <45A367D0.3060206@voltaire.com> <20070109112030.GH24084@mellanox.co.il> Message-ID: <45A39998.2010304@voltaire.com> Michael S. Tsirkin wrote: >>This patch implements selective tx signaling for IPoIB. > > > Let's assume that the last tx packet you have sent is marked unsignalled. > Since you never free the skb, won't the TX watchdog get triggered? > AFAIK, tx_timeout is called when (jiffies - dev->trans_start) > dev->watchdog_timeo. I don't think that holding the skb too long is a trigger for somethink. Anyway, I never saw ipoib_timeout being called during my tests. From swise at opengridcomputing.com Tue Jan 9 05:38:28 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 09 Jan 2007 07:38:28 -0600 Subject: [openib-general] [PATCH 1/10] cxgb3 - main header files In-Reply-To: <45A36E59.30500@garzik.org> References: <20061220124125.6286.17148.stgit@localhost.localdomain> <45918CA4.3020601@garzik.org> <45A36C22.6010009@chelsio.com> <45A36E59.30500@garzik.org> Message-ID: <1168349908.4628.3.camel@stevo-desktop> Roland, The T3 Ethernet driver is queued for inclusion into Jeff's tree. How do you want to merge in the RDMA driver? I can give you a single monolithic patch if you want. We also need to decide on the ib_req_notify_cq() issue. Steve. On Tue, 2007-01-09 at 05:28 -0500, Jeff Garzik wrote: > Divy Le Ray wrote: > > Jeff Garzik wrote: > >> Divy Le Ray wrote: > >>> From: Divy Le Ray > >>> > >>> This patch implements the main header files of > >>> the Chelsio T3 network driver. > >>> > >>> Signed-off-by: Divy Le Ray > >> > >> Once you think it's ready, email me a URL to a single patch that adds > >> the driver to the latest linux-2.6.git kernel. Include in the email a > >> description of the driver and signed-off-by line, which will get > >> directly included in the git changelog. > >> > >> Adding new drivers is a bit special, because we want to merge it as a > >> single changeset, but that would create a patch too large to review on > >> the common kernel mailing lists. > > Jeff, > > > > You can grab the monolithic patch at this URL: > > http://service.chelsio.com/kernel.org/cxgb3.patch.bz2 > > this is in my queue, thanks. Sorry I didn't indicate that earlier. > > Jeff > > > From halr at voltaire.com Tue Jan 9 05:39:51 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 09 Jan 2007 08:39:51 -0500 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <20070109131558.GB16107@mellanox.co.il> References: <1168258813.4577.73483.camel@hal.voltaire.com> <20070109131558.GB16107@mellanox.co.il> Message-ID: <1168349987.14647.49958.camel@hal.voltaire.com> On Tue, 2007-01-09 at 08:15, Michael S. Tsirkin wrote: > > diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in > > index 1ccf5c6..2d52675 100644 > > --- a/osm/opensm/configure.in > > +++ b/osm/opensm/configure.in > > @@ -62,6 +62,22 @@ AC_ARG_ENABLE(debug, > > esac],[debug=false]) > > AM_CONDITIONAL(DEBUG, test x$debug = xtrue) > > > > +dnl Console over a socket connection > > +AC_ARG_ENABLE(console-socket, > > +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], > > +[case $enableval in > > + yes) console_socket=yes ;; > > + no) console_socket=no ;; > > + esac], > > + console_socket=yes) > > +if test $console_socket = yes; then > > + AC_CHECK_LIB(wrap, request_init, [], > > + AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) > > + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, > > + 1, > > + [Define as 1 if you want to enable a console on a socket connection]) > > +fi > > + > > dnl Provide user option to select vendor > > OPENIB_APP_OSMV_SEL > > > > Might it be better to set the default to off? > libwrap devel package might not be present on some systems - so build now fails > where previously it passed - and user didn't even ask for any new features. The default can easily be changed. Are there any other opinions on this ? > Further, could you please clarify: when compiled in, is opensm listening on a socket > by default or does it need to be enabled with a run-time option? > I hope it's the later. Not currently but I think that is also easily changed as well. -- Hal From mst at mellanox.co.il Tue Jan 9 05:50:39 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 15:50:39 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A39998.2010304@voltaire.com> References: <45A39998.2010304@voltaire.com> Message-ID: <20070109135039.GD16107@mellanox.co.il> > I don't think that holding the skb too long is a trigger for somethink. Are you sure? We are not talking about too long here - unsignalled TX packet will never get a completion. As far as I can see, __kfree_skb will 1. call dst_release - so this patch might keep a reference on dst indefinitely? 2. call skb->destructor if not NULL - this is responsible for socket buffer accounting 3. Releases reference to lots of other objects related to netfiltering Are you sure keeping all these references indefinitely is safe? -- MST From mst at mellanox.co.il Tue Jan 9 05:52:10 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 15:52:10 +0200 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <1168349987.14647.49958.camel@hal.voltaire.com> References: <1168349987.14647.49958.camel@hal.voltaire.com> Message-ID: <20070109135210.GE16107@mellanox.co.il> > > Further, could you please clarify: when compiled in, is opensm listening on a socket > > by default or does it need to be enabled with a run-time option? > > I hope it's the later. > > Not currently but I think that is also easily changed as well. Probably a good idea - opening a port involves a security risk that user should be aware of. -- MST From mst at mellanox.co.il Tue Jan 9 05:57:25 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 15:57:25 +0200 Subject: [openib-general] [PATCH 1/10] cxgb3 - main header files In-Reply-To: <1168349908.4628.3.camel@stevo-desktop> References: <20061220124125.6286.17148.stgit@localhost.localdomain> <45918CA4.3020601@garzik.org> <45A36C22.6010009@chelsio.com> <45A36E59.30500@garzik.org> <1168349908.4628.3.camel@stevo-desktop> Message-ID: <20070109135725.GF16107@mellanox.co.il> > We also need to decide on the ib_req_notify_cq() issue. Let's clarify - do you oppose doing copy_from_user from a fixed address passed in during setup? If OK with you, this seems the best way as it is the least controversial and least disruptive one. -- MST From halr at voltaire.com Tue Jan 9 05:59:50 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 09 Jan 2007 08:59:50 -0500 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <20070109135210.GE16107@mellanox.co.il> References: <1168349987.14647.49958.camel@hal.voltaire.com> <20070109135210.GE16107@mellanox.co.il> Message-ID: <1168351188.14647.51138.camel@hal.voltaire.com> On Tue, 2007-01-09 at 08:52, Michael S. Tsirkin wrote: > > > Further, could you please clarify: when compiled in, is opensm listening on a socket > > > by default or does it need to be enabled with a run-time option? > > > I hope it's the later. > > > > Not currently but I think that is also easily changed as well. > > Probably a good idea - opening a port involves a security risk Agreed. > that user should be aware of. tcpwrappers can at least allow/deny per IP address so there is some weak control. -- Hal From mst at mellanox.co.il Tue Jan 9 06:09:25 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 16:09:25 +0200 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <1168351188.14647.51138.camel@hal.voltaire.com> References: <1168349987.14647.49958.camel@hal.voltaire.com> <20070109135210.GE16107@mellanox.co.il> <1168351188.14647.51138.camel@hal.voltaire.com> Message-ID: <20070109140925.GG16107@mellanox.co.il> > > > > Further, could you please clarify: when compiled in, is opensm listening on a socket > > > > by default or does it need to be enabled with a run-time option? > > > > I hope it's the later. > > > > > > Not currently but I think that is also easily changed as well. > > > > Probably a good idea - opening a port involves a security risk > > Agreed. > > > that user should be aware of. > > tcpwrappers can at least allow/deny per IP address so there is some weak > control. Right, beats developing opensm-specific syntax for that :). -- MST From swise at opengridcomputing.com Tue Jan 9 06:46:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 09 Jan 2007 08:46:53 -0600 Subject: [openib-general] [PATCH 1/10] cxgb3 - main header files In-Reply-To: <20070109135725.GF16107@mellanox.co.il> References: <20061220124125.6286.17148.stgit@localhost.localdomain> <45918CA4.3020601@garzik.org> <45A36C22.6010009@chelsio.com> <45A36E59.30500@garzik.org> <1168349908.4628.3.camel@stevo-desktop> <20070109135725.GF16107@mellanox.co.il> Message-ID: <1168354013.4628.14.camel@stevo-desktop> On Tue, 2007-01-09 at 15:57 +0200, Michael S. Tsirkin wrote: > > We also need to decide on the ib_req_notify_cq() issue. > > Let's clarify - do you oppose doing copy_from_user from a fixed > address passed in during setup? > So far its been you and I arguing over this issue. Before I go implement it and retest everything, I'd like some indication that anyone else thinks its the right thing to do vs adding the extra parameter to ib_req_notify_cq(). > If OK with you, this seems the best way as it is the least controversial > and least disruptive one. > In the interest of expediting this I'll go implement it... Steve. From jlentini at netapp.com Tue Jan 9 07:47:42 2007 From: jlentini at netapp.com (James Lentini) Date: Tue, 9 Jan 2007 10:47:42 -0500 (EST) Subject: [openib-general] second version of the libibverbs man pages In-Reply-To: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> References: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> Message-ID: The manual page for IBV_GET_CQ_EVENT(3) should be clarified. In the NOTES section, the text states that all completion events returned by ibv_get_cq_event() must be acknowledged using ibv_ack_cq_events(). This statement is confusing because ibv_get_cq_event() doesn't actually return any events. Furthermore, in the EXAMPLES section, the call to ibv_get_cq_event() is not paired with a call to ibv_ack_cq_events(). On Tue, 2 Jan 2007, dotanb at dev.mellanox.co.il wrote: > Hi all and Happy new year. > > * I rewrote the man pages and removed all of the extra characters of the > POD module (according to Roland request). > * I tried to stick with the 80 characters limit (according to James > request), without 100% success (when i > described the attributes of the structures, i needed more than 80 > characters in a line..) > * Several spelling mistakes were fixed > > > Roland, what do you think? can you use this version and check in those > files? > > > thanks > Dotan > > > From monis at voltaire.com Tue Jan 9 07:49:58 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 09 Jan 2007 17:49:58 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <20070109135039.GD16107@mellanox.co.il> References: <45A39998.2010304@voltaire.com> <20070109135039.GD16107@mellanox.co.il> Message-ID: <45A3B9A6.8080908@voltaire.com> Michael S. Tsirkin wrote: >>I don't think that holding the skb too long is a trigger for somethink. > > > > Are you sure? We are not talking about too long here - unsignalled TX packet > will never get a completion. As far as I can see, __kfree_skb will > 1. call dst_release - so this patch might keep a reference on dst indefinitely? I don't think that holding dst too long is unsafe. Imagine a constant stream of packets to the same destination. In this case will always be a reference to a dst struct. > 2. call skb->destructor if not NULL - this is responsible for socket buffer > accounting I addressed the issue of the socket buffer accounting in the openning message. I don't see it as a problem but more than an note to the user. Don't you think? > 3. Releases reference to lots of other objects related to netfiltering > > Are you sure keeping all these references indefinitely is safe? I can't say I'm 100% sure but please see my comment below. > A comment regarding the word "indefinitely" - I understand that theoretically there is a chance that no packet will be sent through the ib interface causing unnecessary resource allocation as you described. I think however that the chance for that is very small and that the price is worth for gaining performance increase. This is true of course if the penalty is just resource allocation and not system safety. In this context I can say that my tests didn't cause any bad system behavior and my senses tell me there shouldn't be any. However, I would be glad to learn more from those who know more. From monis at voltaire.com Tue Jan 9 07:58:20 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 09 Jan 2007 17:58:20 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <20070109112621.GI24084@mellanox.co.il> References: <45A367D0.3060206@voltaire.com> <45A36903.4050807@voltaire.com> <20070109112621.GI24084@mellanox.co.il> Message-ID: <45A3BB9C.3010807@voltaire.com> Michael S. Tsirkin wrote: >>Tests with iperf and netperf for unicast and multicast destinations show >>an improvement in the ability of user applications to xmit packets. >> >>Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. >>Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and >>OFED-1.1 (receiver) > > > IMO netperf reporting is actually not too informative without stats settings. > Try running with e.g. -i 10,2 -I 99,5 - you might discover that your numbers are > only accurate within 30% I tried that and I am getting a warning about confidence level not being achieved. I am still trying to learn about that and trying to understand why (any ideas?) but for the meantime can you explain why do I need statistics when I am only trying to count the number of successful writes? > > >>Note that the results below show improvement only for TX so we see an end to end packet loss. > > > Hmm, as long as packet drops increase, BW improvements in UDP don't sound > too convincing, do they? You can get infinite BW at 100% drop ... > > >>Improving the receiver (NAPI) will reduce the packet loss. > > > Needs testing with NAPI patch then? > I tried NAPI and I get better results for the receiver but my opinion is that the receiver side is less important here since all I'm trying to improve is the ability to send packets. Am I right? From mst at mellanox.co.il Tue Jan 9 08:21:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 18:21:48 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <45A3BB9C.3010807@voltaire.com> References: <45A3BB9C.3010807@voltaire.com> Message-ID: <20070109162148.GB22476@mellanox.co.il> > >>Tests with iperf and netperf for unicast and multicast destinations show > >>an improvement in the ability of user applications to xmit packets. > >> > >>Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. > >>Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and > >>OFED-1.1 (receiver) > > > > > > IMO netperf reporting is actually not too informative without stats settings. > > Try running with e.g. -i 10,2 -I 99,5 - you might discover that your numbers are > > only accurate within 30% > > I tried that and I am getting a warning about confidence level not being > achieved. I am still trying to learn about that and trying to understand why > (any ideas?) but for the meantime can you explain why do I need statistics when > I am only trying to count the number of successful writes? Otherwise your results could be just noise. > >>Note that the results below show improvement only for TX so we see an end to end packet loss. > > > > > > Hmm, as long as packet drops increase, BW improvements in UDP don't sound > > too convincing, do they? You can get infinite BW at 100% drop ... > > > > > >>Improving the receiver (NAPI) will reduce the packet loss. > > > > > > Needs testing with NAPI patch then? > > I tried NAPI and I get better results for the receiver but my opinion is that > the receiver side is less important here since all I'm trying to improve is > the ability to send packets. Am I right? Only if you are sure something else is not dropping the packets (e.g. buffer overruns triggered). -- MST From mst at mellanox.co.il Tue Jan 9 08:58:47 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 18:58:47 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A3B9A6.8080908@voltaire.com> References: <45A39998.2010304@voltaire.com> <20070109135039.GD16107@mellanox.co.il> <45A3B9A6.8080908@voltaire.com> Message-ID: <20070109165847.GE22476@mellanox.co.il> > > 2. call skb->destructor if not NULL - this is responsible for socket buffer > > accounting > I addressed the issue of the socket buffer accounting in the openning message. > I don't see it as a problem but more than an note to the user. Don't you think? No, I actually think this is a severe problem. -- MST From hnguyen at linux.vnet.ibm.com Tue Jan 9 09:04:14 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Tue, 9 Jan 2007 18:04:14 +0100 Subject: [openib-general] [PATCH 2.6.20] ehca: use proper flag for get_zeroed_page() to prevent BUG:scheduling while atomic... Message-ID: <200701091804.14297.hnguyen@linux.vnet.ibm.com> Hello Roland! Here is a patch for ehca to use proper flag, ie. GFP_ATOMIC resp. GFP_KERNEL, when calling get_zeroed_page() to prevent "Bug: scheduling while atomic...". This error does not cause a kernel panic but makes ipoib un-usable afterwards. It is reproducible on 2.6.20-rc4 if one does ifconfig down during a flood ping test. I have not observed this error in earlier releases incl. 2.6.20-rc1. Due to vacation time I just recognized it last couple of days. This error occurs when a qp event/irq is received and ehca event handler allocates a control block/page to obtain HCA error data block. Use of GFP_ATOMIC prevents this issue. Since this has a good chance of crashing the kernel every time HCA error data is fetched, it would be great if you pushed this patch upstream. Regards Nam Signed-off-by Hoang-Nam Nguyen --- ehca_hca.c | 8 ++++---- ehca_irq.c | 2 +- ehca_iverbs.h | 4 ++-- ehca_main.c | 10 +++++----- ehca_mrmw.c | 4 ++-- ehca_qp.c | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index e1b618c..b7be950 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device * ib_device); struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ib ib_device); struct hipz_query_port *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -179,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ib return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -212,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibd return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index c3ea746..e7209af 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -138,7 +138,7 @@ int ehca_error_data(struct ehca_shca *sh u64 *rblock; unsigned long block_count; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!rblock) { ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 3720e30..cd7789f 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -180,10 +180,10 @@ int ehca_mmap_register(u64 physical,void int ehca_munmap(unsigned long addr, size_t len); #ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(void); +void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index cc47e4c..6574fbb 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -106,9 +106,9 @@ static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES static struct kmem_cache *ctblk_cache = NULL; -void *ehca_alloc_fw_ctrlblock(void) +void *ehca_alloc_fw_ctrlblock(gfp_t flags) { - void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); + void *ret = kmem_cache_zalloc(ctblk_cache, flags); if (!ret) ehca_gen_err("Out of memory for ctblk"); return ret; @@ -206,7 +206,7 @@ int ehca_sense_attributes(struct ehca_sh u64 h_ret; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_gen_err("Cannot allocate rblock memory."); return -ENOMEM; @@ -258,7 +258,7 @@ static int init_node_guid(struct ehca_sh int ret = 0; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -469,7 +469,7 @@ static ssize_t ehca_show_##name(struct \ shca = dev->driver_data; \ \ - rblock = ehca_alloc_fw_ctrlblock(); \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ dev_err(dev, "Can't allocate rblock memory."); \ return 0; \ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 0a5e221..cfb362a 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca u32 i; u64 *kpage; - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; @@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct e ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index c6c9cef..34b8555 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -807,7 +807,7 @@ static int internal_modify_qp(struct ib_ unsigned long spl_flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -1273,7 +1273,7 @@ int ehca_query_qp(struct ib_qp *qp, return -EINVAL; } - qpcb = ehca_alloc_fw_ctrlblock(); + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { ehca_err(qp->device,"Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); From mshefty at ichips.intel.com Tue Jan 9 09:28:30 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 09 Jan 2007 09:28:30 -0800 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: References: <002301c73355$c220e180$8698070a@amr.corp.intel.com> Message-ID: <45A3D0BE.4070001@ichips.intel.com> > > +/* > > + * Limit CM msg timeouts to something reasonable. > > + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min. > > + */ > > +#define IB_CM_MAX_TIMEOUT 21 > > OK... (although 8 seconds seems a little short -- it seems a somewhat > longer timeout could be legitimate on a very busy fabric across a WAN > or something like that) I included retries in the overall time, we could bump this to 23 = 32 seconds per message, and a total potential timeout of 8 minutes. > but then... > > > + timeout = min(IB_CM_MAX_TIMEOUT, > > + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + > > + cm_convert_to_ms(cm_id_priv->av.packet_life_time)); > > should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too? > and similarly... Several of these are off. - Sean From chrise at sgi.com Tue Jan 9 09:30:22 2007 From: chrise at sgi.com (Chris Elmquist) Date: Tue, 9 Jan 2007 11:30:22 -0600 Subject: [openib-general] building and running IBMgtsim? In-Reply-To: <458AA161.5090708@mellanox.co.il> References: <20061220165624.GL31149@sgi.com> <458AA161.5090708@mellanox.co.il> Message-ID: <20070109173022.GG11323@sgi.com> Hey Guys... We had pretty good luck with this a week ago or so until we ran into a problem with the simulator trying to make a MAD library call which apparently did not exist. It looked to us like it was trying to make a call into a 1.2 library even though we are trying to do everything in an OFED 1.1 context. We were therefore suspecious that we had grabbed a GIT tree version of the simulator that had been advanced past something that was compatible with a generic OFED 1.1 tree. Can you tell me were I should get (with GIT) an appropriate source tree for the ibutils (including simulator) that should work with an underlying generic OFED 1.1 installation? Thank you. Chris On Thursday (12/21/2006 at 04:59PM +0200), Eitan Zahavi wrote: > Hi Chris, > > Sorry for my late response on this: > > The simulator is a standalone "server" where clients connect to it > through a TCP/IP socket. > > OpenSM which is not built with "sim" vendor (using --with-osmv=sim > --with-sim=) > will not try to connect to the simulator but will go to the real IB > network instead. > > So you need a second "simulator" install of OpenSM. > You can simply clone the GIT tree and > ./autogen.sh > ./configure --with-osmv=sim --with-sim= install> --prefix= > make > make install > > RunSimTest -f OsmTest.sim.tcl -c OsmTest.check.tcl -t IS1-16.topo -o > /bin/opensm > > Actually OsmTest is a test that currently fail (due to last changes in > InformInfo), > but any other *.check.tcl/*.sim.tcl pair should work. > > Eitan > > > Chris Elmquist wrote: > >Folks, > > > >I am trying to build and run IBMgtsim so that I can explore some different > >topologies and system sizes. But I am having a lot of trouble getting > >OpenSM to work with the simulator. > > > >I pulled down Eitan's ibutils git tree (to get the simulator) and > >am otherwise using the OFED 1.1 tarball for the rest of the stuff. > >I suspect I have a problem with OpenSM not being built correctly to use > >the simulator. > > > >Does anyone have a recipe on how to build and install all of these pieces > >(ie, openib, openSM and ibmgtsim) so that they will work together? > > > >I have been just trying to run one of the tests provided with the > >simulator like this: > > > >% cd ~/ibutils/ibmgtsim/tests > >% RunSimTest -c OsmTest.check.tcl -f OsmTest.sim.tcl -t IS1-16.topo -o > >/usr/local/bin/opensm > > > >but we get this sort of output: > > > >-I- Using random seed:43204 > >-I- Simulation directory is: /tmp/ibmgtsim.29716 > >-I- Calling IBMgtSim -s 43204 -V 0xA3 -t > >/root/ibutils/ibmgtsim/tests/IS1-16.top > >o -f /root/ibutils/ibmgtsim/tests/OsmTest.sim.tcl -l > >/tmp/ibmgtsim.29716/sim.log > >-I- Simulator Ready > >-I- Connecting to the simulator control server:pcplod.americas.sgi.com > >port:3726 > >5 > >-I- Connected to the simulator control server > >-I- Defined 51 guids > >-I- Node H-1 data: 0x0002c90000000008 {0x0002c90000000009 1} > >{0x0002c9000000000a > > 2} > >-I- Starting: /usr/local/bin/opensm -g 0x0002c90000000009 ... > >-I- Waiting for OpenSM subnet up ... > >-I- OpenSM Event:ERR Dec 20 10:53:09 470415 [5100E100] -> > >osm_vendor_open_port: ERR 5422: Unable to find requested CA guid > >0x2c90000000009 > >-I- New 1 events of /tmp/ibmgtsim.29716/osm.log > >-I- OpenSM Event:ERR Dec 20 10:53:09 470419 [5100E100] -> osm_vendor_bind: > >ERR 5 > >424: Unable to Open Port 0x2c90000000009 > >-I- New 1 events of /tmp/ibmgtsim.29716/osm.log > >-I- OpenSM Event:ERR Dec 20 10:53:09 470422 [5100E100] -> > >osm_sm_mad_ctrl_bind: ERR 3118: Vendor specific bind failed > >-I- New 1 events of /tmp/ibmgtsim.29716/osm.log > >-I- OpenSM Event:ERR Dec 20 10:53:09 470427 [5100E100] -> osm_sm_bind: ERR > >2E10: > > SM MAD Controller bind failed (IB_ERROR) > >-I- New 1 events of /tmp/ibmgtsim.29716/osm.log > >-I- OpenSM Event:ERR Dec 20 10:53:09 470434 [5100E100] -> > >osm_sa_mad_ctrl_unbind > >: ERR 1A11: No previous bind > >-I- New 1 events of /tmp/ibmgtsim.29716/osm.log > > > >Thank you. > > > >Chris > >SGI Network Engineering > > -- Chris Elmquist mailto:chrise at sgi.com (651)683-3093 Silicon Graphics, Inc. Eagan, MN From mshefty at ichips.intel.com Tue Jan 9 09:54:30 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 09 Jan 2007 09:54:30 -0800 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <45A34391.8040706@dev.mellanox.co.il> References: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> <45A34391.8040706@dev.mellanox.co.il> Message-ID: <45A3D6D6.5090503@ichips.intel.com> > What about path query or any SA query from the user level ? Yes, I'd like that too, but libibumad can provide this functionality - even if the interface isn't ideal. Duplicating the ib_sa kernel module functionality in userspace could allow for a simpler interface however. Trying to use the existing interfaces for multicast or notice registration, however, is more problematic, which is why I was trying to focus on those areas specifically. - Sean From mshefty at ichips.intel.com Tue Jan 9 10:06:51 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 09 Jan 2007 10:06:51 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A396AC.3050306@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> Message-ID: <45A3D9BB.2080408@ichips.intel.com> > My thought re this was that since the rest of the original patch > sequence specifically the rdma_cm UDP and ucma code are merged in > 2.6.20-rcX which is the code OFED 1.2 is based on, the easy path for you > would be to stage the multicast code for upstream push to 2.6.21 and > then push the code as to OFED 1.2, what do you think? I think shooting for 2.6.21 is fine, but... > Other then that, as we discussed in SC06 there are some changes that > need to be integrated in the code to allow for interoperability between > a multicast rdma cm based app to IPoIB, specifically removing the RDMA > CM signature from the mgid which generated from the ip addr and pkey, > but not only. ...I have not completed these changes yet. Specifically, I have not added a send only join parameter or changed the qkey. I have also not full examined an issue where the SM log fills up with bad multicast join requests. > The second change is related to the qkey, looking in the current code > of cma_join_ib_multicast() (at the multicast-sa_cache branch of the > rdma-dev git) i see that the qkey is the mc ip address, which is not > consistent with what librdmacm is assuming (0x1234567 etc). This is a bug in the kernel code. It should be using the standard qkey of 0x12345678 - for now anyway. > Anyway, what we need here is to plug into the scheme of ipoib which uses > the qkey associated with the ipv4 broadcast multicast group. It turns > out that there is some twilight zone here which i am working to > understand better. You can see that for the ipv4 brd group ipoib lets > the SM to allocate the group and qkey (ie the create param of > ipoib_mcast_join is zero), i will give it some thought and let you know > how i think the rdma cm can plug into this scheme, will be happy to get > other ideas as well. The rdma_cm knows the qkey that ipoib uses before it joins a multicast group. See cma_join_ib_multicast() - call to ib_sa_get_mcmember_rec(). - Sean From mst at mellanox.co.il Tue Jan 9 09:57:56 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 9 Jan 2007 19:57:56 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <45A3D0BE.4070001@ichips.intel.com> References: <002301c73355$c220e180$8698070a@amr.corp.intel.com> <45A3D0BE.4070001@ichips.intel.com> Message-ID: <20070109175755.GG22476@mellanox.co.il> > > > +/* > > > + * Limit CM msg timeouts to something reasonable. > > > + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min. > > > + */ > > > +#define IB_CM_MAX_TIMEOUT 21 > > > > OK... (although 8 seconds seems a little short -- it seems a somewhat > > longer timeout could be legitimate on a very busy fabric across a WAN > > or something like that) > > I included retries in the overall time, we could bump this to 23 = 32 seconds > per message, and a total potential timeout of 8 minutes. I think that's excessive. > > but then... > > > > > + timeout = min(IB_CM_MAX_TIMEOUT, > > > + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + > > > + cm_convert_to_ms(cm_id_priv->av.packet_life_time)); > > > > should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too? > > and similarly... > > Several of these are off. So - an updated patch? -- MST From changquing.tang at hp.com Tue Jan 9 10:21:26 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 9 Jan 2007 12:21:26 -0600 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <45A3D6D6.5090503@ichips.intel.com> Message-ID: Where do you put these new user functions ? Do you create a new library ? I hope not to create a new library, we already have so many libraries now, it is hard to manage For users using dlopen(). Also when is it available ? Thanks. --CQ > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Sean Hefty > Sent: Tuesday, January 09, 2007 11:55 AM > To: Dotan Barak > Cc: Roland Dreier; openib > Subject: Re: [openib-general] [RFC] userspace IB SA support > > > What about path query or any SA query from the user level ? > > Yes, I'd like that too, but libibumad can provide this > functionality - even if the interface isn't ideal. > Duplicating the ib_sa kernel module functionality in > userspace could allow for a simpler interface however. > > Trying to use the existing interfaces for multicast or notice > registration, however, is more problematic, which is why I > was trying to focus on those areas specifically. > > - Sean > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From caitlinb at broadcom.com Tue Jan 9 10:29:12 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Tue, 9 Jan 2007 10:29:12 -0800 Subject: [openib-general] [PATCH 1/10] cxgb3 - main header files In-Reply-To: <20070109135725.GF16107@mellanox.co.il> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1EE6B67@NT-SJCA-0751.brcm.ad.broadcom.com> > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of > Michael S. Tsirkin > Sent: Tuesday, January 09, 2007 5:57 AM > To: Steve Wise > Cc: netdev at vger.kernel.org; Roland Dreier; Divy Le Ray; > linux-kernel at vger.kernel.org; openib-general > Subject: Re: [openib-general] [PATCH 1/10] cxgb3 - main header files > > > We also need to decide on the ib_req_notify_cq() issue. > > Let's clarify - do you oppose doing copy_from_user from a > fixed address passed in during setup? > > If OK with you, this seems the best way as it is the least > controversial and least disruptive one. > To clarfiy my understanding of this issue: A device MAY implement ib_req_notify_cq by updating a location directly from user mode. Any of the techniques that apply to other user allocated objects, such as the Send Queue, can be applied here. Even those the proposed changes would be about as low impact and benign as possible, the fact that there are valid solutions without an API changes leans heavily towards using those solutions. From dotanb at dev.mellanox.co.il Tue Jan 9 10:49:38 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 9 Jan 2007 20:49:38 +0200 (IST) Subject: [openib-general] second version of the libibverbs man pages In-Reply-To: References: <1376.85.65.223.184.1167726918.squirrel@dev.mellanox.co.il> Message-ID: <1854.85.65.224.88.1168368578.squirrel@dev.mellanox.co.il> Hi James. > > The manual page for IBV_GET_CQ_EVENT(3) should be clarified. > > In the NOTES section, the text states that all completion events > returned by ibv_get_cq_event() must be acknowledged using > ibv_ack_cq_events(). > > This statement is confusing because ibv_get_cq_event() doesn't > actually return any events. You are right, i will change the description to a more clear version ... > > Furthermore, in the EXAMPLES section, the call to ibv_get_cq_event() > is not paired with a call to ibv_ack_cq_events(). I missed this one ... Thanks allot, Feedback is most welcome. Until Roland will check in those pages, i will "release" from time to time an updated tar file with the latest files with the changes that were made from last "release". thanks again Dotan From swise at opengridcomputing.com Tue Jan 9 11:39:22 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 09 Jan 2007 13:39:22 -0600 Subject: [openib-general] [PATCH 1/10] cxgb3 - main header files In-Reply-To: <1168354013.4628.14.camel@stevo-desktop> References: <20061220124125.6286.17148.stgit@localhost.localdomain> <45918CA4.3020601@garzik.org> <45A36C22.6010009@chelsio.com> <45A36E59.30500@garzik.org> <1168349908.4628.3.camel@stevo-desktop> <20070109135725.GF16107@mellanox.co.il> <1168354013.4628.14.camel@stevo-desktop> Message-ID: <1168371562.17406.3.camel@stevo-desktop> > > In the interest of expediting this I'll go implement it... > > Steve. > Here it is. I think this is the correct way to solve the issue (now that I've implemented it :). This is a delta from the driver patch series just for reviewing purposes. commit e6053f2aee764b21e28cbb19f52995cb413cf733 Author: Steve Wise Date: Tue Jan 9 13:06:13 2007 -0600 Chelsio-specific solution for copying in the user cq_index. - at cq_create time, user lib passes in the address of its cq rptr u32. - kernel saves this address in the iwch_cq struct. - kernel copies in the rptr value in iwch_req_notify_cq(). Signed-off-by: Steve Wise diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ab99202..28be418 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -143,6 +143,7 @@ static struct ib_cq *iwch_create_cq(stru struct iwch_dev *rhp; struct iwch_cq *chp; struct iwch_create_cq_resp uresp; + struct iwch_create_cq_req ureq; PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries); rhp = to_iwch_dev(ibdev); @@ -150,6 +151,14 @@ static struct ib_cq *iwch_create_cq(stru if (!chp) return ERR_PTR(-ENOMEM); + if (context) { + if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { + kfree(chp); + return ERR_PTR(-EFAULT); + } + chp->user_rptr_addr = (u32 *)(unsigned long)ureq.user_rptr_addr; + } + if (t3a_device(rhp)) { /* @@ -269,15 +278,14 @@ static int iwch_resize_cq(struct ib_cq * return ret; } -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify, - struct ib_udata *udata) +static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) { struct iwch_dev *rhp; struct iwch_cq *chp; enum t3_cq_opcode cq_op; int err; unsigned long flag; - struct iwch_req_notify_cq ucmd; + u32 rptr; chp = to_iwch_cq(ibcq); rhp = chp->rhp; @@ -285,11 +293,11 @@ static int iwch_arm_cq(struct ib_cq *ibc cq_op = CQ_ARM_SE; else cq_op = CQ_ARM_AN; - if (udata && t3b_device(rhp)) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + if (chp->user_rptr_addr) { + if (get_user(rptr, chp->user_rptr_addr)) return -EFAULT; spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = ucmd.rptr; + chp->cq.rptr = rptr; } else spin_lock_irqsave(&chp->lock, flag); PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index f339427..d9d94e3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -105,6 +105,7 @@ struct iwch_cq { spinlock_t lock; atomic_t refcnt; wait_queue_head_t wait; + u32 *user_rptr_addr; }; static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index 4e4b9c9..e8ff061 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -42,6 +42,9 @@ #define IWCH_UVERBS_ABI_VERSION 1 * In particular do not use pointer types -- pass pointers in __u64 * instead. */ +struct iwch_create_cq_req { + __u64 user_rptr_addr; +}; struct iwch_create_cq_resp { __u64 physaddr; @@ -61,8 +64,4 @@ struct iwch_create_qp_resp { struct iwch_reg_user_mr_resp { __u32 pbl_addr; }; - -struct iwch_req_notify_cq { - __u32 rptr; -}; #endif From mshefty at ichips.intel.com Tue Jan 9 11:49:13 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 09 Jan 2007 11:49:13 -0800 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: References: Message-ID: <45A3F1B9.2020500@ichips.intel.com> Tang, Changqing wrote: > Where do you put these new user functions ? That is part of what I'd like input on. My thought was to add them to the libibumad library, but how exactly is not clear yet. > Also when is it available ? I would like to have something by early February or sooner. - Sean From swise at opengridcomputing.com Tue Jan 9 12:15:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 09 Jan 2007 14:15:42 -0600 Subject: [openib-general] wiki problem on openfabrics.org Message-ID: <1168373742.17406.13.camel@stevo-desktop> I'm trying to edit the Chelsio T3 HowTo on the open fabrics wiki. After logging in ok and editing the page ok, I cannot save the edits. When I click on the save button, my browser pops up a window with a blurb about "You have chosen to open tiki-index.php..." I think something is still whacked with the wiki configuration. Is this just me or my browser? Or is something wrong. Steve. From jsquyres at cisco.com Tue Jan 9 12:19:46 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 9 Jan 2007 15:19:46 -0500 Subject: [openib-general] wiki problem on openfabrics.org In-Reply-To: <1168373742.17406.13.camel@stevo-desktop> References: <1168373742.17406.13.camel@stevo-desktop> Message-ID: <4528D45F-E086-4EF4-B5C4-9B719DDAF79E@cisco.com> Michael -- Do you know what's going on? On Jan 9, 2007, at 3:15 PM, Steve Wise wrote: > I'm trying to edit the Chelsio T3 HowTo on the open fabrics wiki. > After > logging in ok and editing the page ok, I cannot save the edits. > When I > click on the save button, my browser pops up a window with a blurb > about > "You have chosen to open tiki-index.php..." > > I think something is still whacked with the wiki configuration. Is > this > just me or my browser? Or is something wrong. > > > > Steve. > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From tziporet at dev.mellanox.co.il Tue Jan 9 12:36:19 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 09 Jan 2007 22:36:19 +0200 Subject: [openib-general] building and running IBMgtsim? In-Reply-To: <20070109173022.GG11323@sgi.com> References: <20061220165624.GL31149@sgi.com> <458AA161.5090708@mellanox.co.il> <20070109173022.GG11323@sgi.com> Message-ID: <45A3FCC3.9040007@dev.mellanox.co.il> Chris Elmquist wrote: > Hey Guys... > > We had pretty good luck with this a week ago or so until we ran into > a problem with the simulator trying to make a MAD library call which > apparently did not exist. It looked to us like it was trying to make > a call into a 1.2 library even though we are trying to do everything in > an OFED 1.1 context. We were therefore suspecious that we had grabbed a > GIT tree version of the simulator that had been advanced past something > that was compatible with a generic OFED 1.1 tree. > > Can you tell me were I should get (with GIT) an appropriate source tree > for the ibutils (including simulator) that should work with an underlying > generic OFED 1.1 installation? > > > OFED 1.1 was done in SVN and not git. Its under: https://staging.openfabrics.org/svn/openib/gen2/trunk/src/userspace/ibutils/ Note that in OFED 1.1 development the code was located in another place (Eitan do you know where?) and was placed into OFED as a tarball Tziporet From halr at voltaire.com Tue Jan 9 12:44:52 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 09 Jan 2007 15:44:52 -0500 Subject: [openib-general] [PATCH] OpenSM/opensm/configure.in: Make default not build socket support for console Message-ID: <1168375339.14647.75325.camel@hal.voltaire.com> OpenSM/opensm/configure.in: Make default not build socket support for console Signed-off-by: Ira Weiny Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in index 2d52675..cecf932 100644 --- a/osm/opensm/configure.in +++ b/osm/opensm/configure.in @@ -64,12 +64,12 @@ AM_CONDITIONAL(DEBUG, test x$debug = xtr dnl Console over a socket connection AC_ARG_ENABLE(console-socket, -[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default no)], [case $enableval in yes) console_socket=yes ;; no) console_socket=no ;; esac], - console_socket=yes) + console_socket=no) if test $console_socket = yes; then AC_CHECK_LIB(wrap, request_init, [], AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) From afriedle at open-mpi.org Tue Jan 9 13:23:06 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Tue, 09 Jan 2007 16:23:06 -0500 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> References: <000001c73383$30ae7360$8698070a@amr.corp.intel.com> Message-ID: <45A407BA.5080508@open-mpi.org> Sean Hefty wrote: > Today, userspace support for SA related operations is limited to the libibmad > interface, which supports sending and receiving MADs only. I've been assigned > with the task of exposing multicast and informinfo support to userspace. > Specifically, the following functionality is needed: > > 1. Join a multicast group - needs to use the ib_sa multicast capability. > 2. Receive notification of multicast errors. > 3. Leave a multicast group. > 4. Register to receive SA events - needs to use the ib_sa notice capability. > 5. Receive notification of events. > 6. Deregister from SA events. > > Are there any preferences for how this is added? I'm a user; I can't comment on how this is implemented, but I'm very interested in what the API will look like. I think I discussed this on-list before, but a big feature for me (Open MPI) is to be able to request and join an unused multicast group. The exact address (or format, i.e. IP or MGID/MLID) of the group is not important as long as I can query it and pass it on out-of-band to peers. This avoids hard-coding of multicast addresses and/or outside (MPI user) input, which could result in multiple MPI jobs unknowingly using the same multicast group. The other big concern is control over which hca/port is joined to a multicast group -- IIRC this was a problem I had with the RDMA CM. Open MPI specifically tends to open every available network interface (multiple IB ports, as well as say GM/IB/TCP together) for bandwidth aggregation and failover purposes, so controlling which interfaces data goes out over is important. I hope this is useful information -- let me know if there is more input I can provide, or testing I can do. Andrew From rdreier at cisco.com Tue Jan 9 14:04:54 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 14:04:54 -0800 Subject: [openib-general] [PATCH 2.6.20] ehca: use proper flag for get_zeroed_page() to prevent BUG:scheduling while atomic... In-Reply-To: <200701091804.14297.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Tue, 9 Jan 2007 18:04:14 +0100") References: <200701091804.14297.hnguyen@linux.vnet.ibm.com> Message-ID: Thanks, applied. From rdreier at cisco.com Tue Jan 9 14:07:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 14:07:18 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <45A341A2.6000603@voltaire.com> (Or Gerlitz's message of "Tue, 09 Jan 2007 09:17:54 +0200") References: <20061231190942.GB32485@mellanox.co.il> <45A341A2.6000603@voltaire.com> Message-ID: > Indeed, it makes much sense, do you any idea what would it take to > expose this capability also by libibverbs? I think the biggest problem would be libipathverbs, which is copying work completion structures directly out of the kernel (which looks pretty fragile if struct ibv_wc ever changes...). - R. From rdreier at cisco.com Tue Jan 9 14:12:17 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 14:12:17 -0800 Subject: [openib-general] [PATCH] [mthca] don't execute the QUERY command in QP is in RESET state In-Reply-To: <1167227166.6664.2.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Wed, 27 Dec 2006 15:46:06 +0200") References: <1167227166.6664.2.camel@mtls05.yok.mtl.com> Message-ID: Thanks, applied. From rdreier at cisco.com Tue Jan 9 14:18:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 14:18:45 -0800 Subject: [openib-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This includes the small patches from my previous pull request that you seem to have dropped, plus two more fixes: Dotan Barak (1): IB/mthca: Don't execute QUERY_QP firmware command for QP in RESET state Erez Zilber (1): IB/iser: Return error code when PDUs may not be sent Hoang-Nam Nguyen (1): IB/ehca: Use proper GFP_ flags for get_zeroed_page() Jack Morgenstein (1): IB/mthca: Fix PRM compliance problem in atomic-send completions Michael S. Tsirkin (1): IB/mthca: Fix off-by-one in FMR handling on memfree Sean Hefty (2): RDMA/ucma: Fix struct ucma_event leak when backlog is full RDMA/ucma: Don't report events with invalid user context Steve Wise (1): RDMA/iwcm: iWARP connection timeouts shouldn't be reported as rejects drivers/infiniband/core/cma.c | 17 ++++++++++++++--- drivers/infiniband/core/ucma.c | 11 +++++++++++ drivers/infiniband/hw/ehca/ehca_hca.c | 8 ++++---- drivers/infiniband/hw/ehca/ehca_irq.c | 2 +- drivers/infiniband/hw/ehca/ehca_iverbs.h | 4 ++-- drivers/infiniband/hw/ehca/ehca_main.c | 10 +++++----- drivers/infiniband/hw/ehca/ehca_mrmw.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_qp.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_cq.c | 8 ++++++-- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- drivers/infiniband/hw/mthca/mthca_qp.c | 26 +++++++++++++++++--------- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++-- drivers/infiniband/ulp/iser/iser_initiator.c | 26 ++++++++++++-------------- 13 files changed, 79 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 533193d..9e0ab04 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1088,10 +1088,21 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) *sin = iw_event->local_addr; sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; *sin = iw_event->remote_addr; - if (iw_event->status) - event.event = RDMA_CM_EVENT_REJECTED; - else + switch (iw_event->status) { + case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; + break; + case -ECONNRESET: + case -ECONNREFUSED: + event.event = RDMA_CM_EVENT_REJECTED; + break; + case -ETIMEDOUT: + event.event = RDMA_CM_EVENT_UNREACHABLE; + break; + default: + event.event = RDMA_CM_EVENT_CONNECT_ERROR; + break; + } break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 81a5cdc..e2e8d32 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -209,10 +209,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -EDQUOT; + kfree(uevent); goto out; } ctx->backlog--; + } else if (!ctx->uid) { + /* + * We ignore events for new connections until userspace has set + * their context. This can only happen if an error occurs on a + * new connection before the user accepts it. This is okay, + * since the accept will just fail later. + */ + kfree(uevent); + goto out; } + list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); out: diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index e1b618c..b7be950 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) ib_device); struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ibdev, ib_device); struct hipz_query_port *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -179,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -212,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index c3ea746..e7209af 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -138,7 +138,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, u64 *rblock; unsigned long block_count; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!rblock) { ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 3720e30..cd7789f 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -180,10 +180,10 @@ int ehca_mmap_register(u64 physical,void **mapped, int ehca_munmap(unsigned long addr, size_t len); #ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(void); +void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index cc47e4c..6574fbb 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -106,9 +106,9 @@ static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES static struct kmem_cache *ctblk_cache = NULL; -void *ehca_alloc_fw_ctrlblock(void) +void *ehca_alloc_fw_ctrlblock(gfp_t flags) { - void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); + void *ret = kmem_cache_zalloc(ctblk_cache, flags); if (!ret) ehca_gen_err("Out of memory for ctblk"); return ret; @@ -206,7 +206,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) u64 h_ret; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_gen_err("Cannot allocate rblock memory."); return -ENOMEM; @@ -258,7 +258,7 @@ static int init_node_guid(struct ehca_shca *shca) int ret = 0; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -469,7 +469,7 @@ static ssize_t ehca_show_##name(struct device *dev, \ \ shca = dev->driver_data; \ \ - rblock = ehca_alloc_fw_ctrlblock(); \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ dev_err(dev, "Can't allocate rblock memory."); \ return 0; \ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 0a5e221..cfb362a 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, u32 i; u64 *kpage; - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; @@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index c6c9cef..34b8555 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -807,7 +807,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, unsigned long spl_flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -1273,7 +1273,7 @@ int ehca_query_qp(struct ib_qp *qp, return -EINVAL; } - qpcb = ehca_alloc_fw_ctrlblock(); + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { ehca_err(qp->device,"Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 283d50b..1159c8a 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -54,6 +54,10 @@ enum { MTHCA_CQ_ENTRY_SIZE = 0x20 }; +enum { + MTHCA_ATOMIC_BYTE_LEN = 8 +}; + /* * Must be packed because start is 64 bits but only aligned to 32 bits. */ @@ -599,11 +603,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev, break; case MTHCA_OPCODE_ATOMIC_CS: entry->opcode = IB_WC_COMP_SWAP; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_ATOMIC_FA: entry->opcode = IB_WC_FETCH_ADD; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_BIND_MW: entry->opcode = IB_WC_BIND_MW; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 15cc2f6..6b19645 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { - if (chunk->mem[i].length >= offset) { + if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index d844a25..5f5214c 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -429,13 +429,18 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); - int err; - struct mthca_mailbox *mailbox; + int err = 0; + struct mthca_mailbox *mailbox = NULL; struct mthca_qp_param *qp_param; struct mthca_qp_context *context; int mthca_state; u8 status; + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -454,7 +459,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m mthca_state = be32_to_cpu(context->flags) >> 28; qp_attr->qp_state = to_ib_qp_state(mthca_state); - qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->path_mtu = context->mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); @@ -464,11 +468,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff; qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context->params2)); - qp_attr->cap.max_send_wr = qp->sq.max; - qp_attr->cap.max_recv_wr = qp->rq.max; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = qp->max_inline_data; if (qp->transport == RC || qp->transport == UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); @@ -495,7 +494,16 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; qp_attr->alt_timeout = context->alt_path.ackto >> 3; - qp_init_attr->cap = qp_attr->cap; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_send_wr = qp->sq.max; + qp_attr->cap.max_recv_wr = qp->rq.max; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + qp_init_attr->cap = qp_attr->cap; out: mthca_free_mailbox(dev, mailbox); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9b2041e..dd221ed 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, * - if yes, the mtask is recycled at iscsi_complete_pdu * - if no, the mtask is recycled at iser_snd_completion */ - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; @@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn, error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); iscsi_iser_ctask_xmit_exit: - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index e73c87b..0a7d1ab 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) static int iser_check_xmit(struct iscsi_conn *conn, void *task) { - int rc = 0; struct iscsi_iser_conn *iser_conn = conn->dd_data; - write_lock_bh(conn->recv_lock); if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - rc = -EAGAIN; + iser_dbg("%ld can't xmit task %p\n",jiffies,task); + return -ENOBUFS; } - write_unlock_bh(conn->recv_lock); - return rc; + return 0; } @@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn *conn, return -EPERM; } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; edtl = ntohl(hdr->data_length); @@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn *conn, } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; itt = ntohl(hdr->itt); data_seg_len = ntoh24(hdr->dlength); @@ -498,7 +494,7 @@ int iser_send_control(struct iscsi_conn *conn, } if (iser_check_xmit(conn,mtask)) - return -EAGAIN; + return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; @@ -605,6 +601,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; + int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -613,15 +610,16 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) + resume_tx = 1; + atomic_dec(&ib_conn->post_send_buf_count); - write_lock(conn->recv_lock); - if (conn->suspend_tx) { + if (resume_tx) { iser_dbg("%ld resuming tx\n",jiffies); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); scsi_queue_work(conn->session->host, &conn->xmitwork); } - write_unlock(conn->recv_lock); if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ From rjwalsh at pathscale.com Tue Jan 9 14:28:39 2007 From: rjwalsh at pathscale.com (Robert Walsh) Date: Tue, 09 Jan 2007 14:28:39 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: <20061231190942.GB32485@mellanox.co.il> <45A341A2.6000603@voltaire.com> Message-ID: <45A41717.5090600@pathscale.com> Roland Dreier wrote: > > Indeed, it makes much sense, do you any idea what would it take to > > expose this capability also by libibverbs? > > I think the biggest problem would be libipathverbs, which is copying > work completion structures directly out of the kernel (which looks > pretty fragile if struct ibv_wc ever changes...). We'll take a look at this - thanks for pointing it out. From sashak at voltaire.com Tue Jan 9 15:21:29 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 10 Jan 2007 01:21:29 +0200 Subject: [openib-general] [PATCH] opensm: fix segfault with up/down and root nodes file. Message-ID: <20070109232129.GP17240@sashak.voltaire.com> Segfault happens when yet non-initialized lid_matrix tables are accessed - with up/down routing engine when root nodes are provided by user and lid matrices are not pre-created. There is the fix. Signed-off-by: Sasha Khapyorsky --- osm/include/opensm/osm_matrix.h | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff --git a/osm/include/opensm/osm_matrix.h b/osm/include/opensm/osm_matrix.h index 9a940be..bf28e83 100644 --- a/osm/include/opensm/osm_matrix.h +++ b/osm/include/opensm/osm_matrix.h @@ -236,7 +236,10 @@ osm_lid_matrix_get( IN const uint8_t port_num ) { CL_ASSERT( port_num < p_lmx->num_ports ); - CL_ASSERT( lid_ho lid_vec) ); + + if ( lid_ho >= cl_vector_get_size(&p_lmx->lid_vec) ) + return OSM_NO_PATH; + return( ((uint8_t *)cl_vector_get_ptr( &p_lmx->lid_vec, lid_ho ))[port_num] ); } @@ -273,7 +276,8 @@ static inline uint16_t osm_lid_matrix_get_max_lid_ho( IN const osm_lid_matrix_t* const p_lmx ) { - return( (uint16_t)(cl_vector_get_size( &p_lmx->lid_vec ) - 1 ) ); + return cl_vector_get_size( &p_lmx->lid_vec ) ? + (uint16_t)(cl_vector_get_size( &p_lmx->lid_vec ) - 1) : 0; } /* * PARAMETERS -- 1.4.4.2.gfc82d From caitlinb at broadcom.com Tue Jan 9 15:37:25 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Tue, 9 Jan 2007 15:37:25 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <45A341A2.6000603@voltaire.com> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1EE6BE4@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > Roland Dreier wrote: >> This change makes sense to me. Does anyone object to queueing this >> for 2.6.21? > > Indeed, it makes much sense, do you any idea what would it > take to expose this capability also by libibverbs? > > Or. > Translating QP ID to a kernel pointer in kernel verbs should be trivial for almost any implemention I've ever imagined. But translating a QP ID to a useful user mode pointer in user mode verbs is considerably trickier, and definitely not something you would want to do automatically even if the application had no use for this automatic translation. From vishal at endace.com Tue Jan 9 17:24:03 2007 From: vishal at endace.com (vishal) Date: Wed, 10 Jan 2007 14:24:03 +1300 Subject: [openib-general] NFS-RDMA Performance Issue Message-ID: <1168392243.5086.58.camel@julia.et.endace.com> Hi, I am getting an average speed of ~100 MiB/s using NFS-RDMA over Infiniband, writing to an md device. The local write speed was ~800 MiB/s. The kernel being used is 2.6.18.3. The tests were performed using dd (various block sizes have been tried). I have fiddled with the rsize and wsize as well, but that didn't make any difference. Can anyone please help me out! Thanks! Vishal P.S This is using NFS v3 From vishal at endace.com Tue Jan 9 17:29:05 2007 From: vishal at endace.com (vishal) Date: Wed, 10 Jan 2007 14:29:05 +1300 Subject: [openib-general] NFS(v4)-RDMA Message-ID: <1168392546.5086.64.camel@julia.et.endace.com> Hi, I am getting an invalid argument error when I try to mount using nfs(v4)-rdma. The mounts with nfs(v3)-rdma work fine. Following are the details:- 1. Command Issued on the client side: - ./nfsrdmamount -o rdma=10.0.0.2 -t nfs4 10.0.0.2:/ /data 2. /etc/exports on the client side /data 10.0.0.1(rw,fsid=0,insecure,no_subtree_check,async) 3. Error recieved:- nfsmount: Invalid argument 4. From strace:- mount("10.0.0.2:/", "/data", "nfs4", 0, "\1") = -1 EINVAL (Invalid argument) Thanks for your help! Vishal From vatsa at veritas.com Tue Jan 9 19:06:35 2007 From: vatsa at veritas.com (vatsa at veritas.com) Date: Tue, 09 Jan 2007 19:06:35 -0800 Subject: [openib-general] Stale CM callbacks Message-ID: <45A4583B.8050807@veritas.com> Hi, I am using IB as a cluster interconnect. If a node which had established several connections with a remote node was reset (not rebooted) and it came back up quickly is it possible for the node to get stale REQ/DREQ callbacks ? If yes, is there an API to purge stale states in the CM or should it be detected by the module getting the callback ? The nodes are running RHEL4 Update 4 on x86_64, the kernel version 2.6.9-42.ELsmp. The OpenIB rpms are what comes with RHEL4 Update4. Thanks, Sreevatsa From sean.hefty at intel.com Tue Jan 9 20:00:33 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 9 Jan 2007 20:00:33 -0800 Subject: [openib-general] [RFC] userspace IB SA support Message-ID: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> >1. Join a multicast group - needs to use the ib_sa multicast capability. Adding this functionality to the existing ib_umad module would add an extra dependency of ib_umad on the ib_sa module. Multicast join / leave operations could be done by adding additional IOCTLs, by embedding the request as a send_mad call, or by modifying the ib_umad send interface. As an alternative, a new kernel userspace SA module could be created to explicitly interface with the kernel ib_sa. >2. Receive notification of multicast errors. Add to this, receive notification of the join as well. This could be accomplished by returning the relevant data (SA attribute only) through libibumad recv_mad. This would change the behavior of the user/kernel interface, but I'm not sure how many applications would be affected by the change. >3. Leave a multicast group. >4. Register to receive SA events - needs to use the ib_sa notice capability. >5. Receive notification of events. >6. Deregister from SA events. SA event notification could be handled similarly to multicast... - Sean From rdreier at cisco.com Tue Jan 9 20:39:14 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 20:39:14 -0800 Subject: [openib-general] [PATCH cosmetic] IB/verbs: include linux/kref directly In-Reply-To: <20070103125328.GI6019@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 3 Jan 2007 14:53:28 +0200") References: <20070103125328.GI6019@mellanox.co.il> Message-ID: queued for 2.6.21 From rdreier at cisco.com Tue Jan 9 20:51:16 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 20:51:16 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20061231190942.GB32485@mellanox.co.il> (Michael S. Tsirkin's message of "Sun, 31 Dec 2006 21:09:42 +0200") References: <20061231190942.GB32485@mellanox.co.il> Message-ID: Looking at this in depth, I see one very iffy part: > @@ -652,7 +653,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, > wc->pkey_index = pkey_index; > wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); > wc->src_qp = IB_QP0; > - wc->qp_num = IB_QP0; > + wc->qp = qp; this is now returning a pointer to the MAD layer's internal QP. I guess this is OK -- the only user of the pointer seems to be the mthca MAD_IFC command, which just grabs the QP number anyway. But I just wanted to point out this wart... - R. From mst at mellanox.co.il Tue Jan 9 20:56:20 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 06:56:20 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: Message-ID: <20070110045620.GP22476@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH RFC] return qp pointer as part of ib_wc > > Looking at this in depth, I see one very iffy part: > > > @@ -652,7 +653,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, > > wc->pkey_index = pkey_index; > > wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); > > wc->src_qp = IB_QP0; > > - wc->qp_num = IB_QP0; > > + wc->qp = qp; > > this is now returning a pointer to the MAD layer's internal QP. I > guess this is OK -- the only user of the pointer seems to be the mthca > MAD_IFC command, which just grabs the QP number anyway. But I just > wanted to point out this wart... What's the problem with this? -- MST From rdreier at cisco.com Tue Jan 9 21:08:48 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 09 Jan 2007 21:08:48 -0800 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: <20070110045620.GP22476@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 06:56:20 +0200") References: <20070110045620.GP22476@mellanox.co.il> Message-ID: > > this is now returning a pointer to the MAD layer's internal QP. I > > guess this is OK -- the only user of the pointer seems to be the mthca > > MAD_IFC command, which just grabs the QP number anyway. But I just > > wanted to point out this wart... > > What's the problem with this? It's a bit of a layering violation -- a consumer uses the MAD layer through the agent abstraction and so on, and then ends up getting a pointer to the MAD layer's QP struct. I don't think it's really a big deal but it's worth pointing out. From mst at mellanox.co.il Tue Jan 9 21:17:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 07:17:24 +0200 Subject: [openib-general] [PATCH RFC] return qp pointer as part of ib_wc In-Reply-To: References: Message-ID: <20070110051724.GB11857@mellanox.co.il> > > > this is now returning a pointer to the MAD layer's internal QP. I > > > guess this is OK -- the only user of the pointer seems to be the mthca > > > MAD_IFC command, which just grabs the QP number anyway. But I just > > > wanted to point out this wart... > > > > What's the problem with this? > > It's a bit of a layering violation -- a consumer uses the MAD layer > through the agent abstraction and so on, and then ends up getting a > pointer to the MAD layer's QP struct. > > I don't think it's really a big deal but it's worth pointing out. Yea. Well, one can argue that passing ib_wc for a work request that a client didn't build is a layering violation too - e.g. the wr_id does not make much sense either, does it? But it still seems easier than invent a ib_mad_wc. -- MST From eitan at sw053.yok.mtl.com Tue Jan 9 21:26:10 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Wed, 10 Jan 2007 07:26:10 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-10:normal completion Message-ID: <200701100526.l0A5QAhS025280@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_8_12:41:44_2007 064f5e ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From monis at voltaire.com Tue Jan 9 22:40:53 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 10 Jan 2007 08:40:53 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <20070109162148.GB22476@mellanox.co.il> References: <45A3BB9C.3010807@voltaire.com> <20070109162148.GB22476@mellanox.co.il> Message-ID: <45A48A75.5070300@voltaire.com> Michael S. Tsirkin wrote: >>>>Tests with iperf and netperf for unicast and multicast destinations show >>>>an improvement in the ability of user applications to xmit packets. >>>> >>>>Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. >>>>Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and >>>>OFED-1.1 (receiver) >>> >>> >>>IMO netperf reporting is actually not too informative without stats settings. >>>Try running with e.g. -i 10,2 -I 99,5 - you might discover that your numbers are >>>only accurate within 30% >> >>I tried that and I am getting a warning about confidence level not being >>achieved. I am still trying to learn about that and trying to understand why >>(any ideas?) but for the meantime can you explain why do I need statistics when >>I am only trying to count the number of successful writes? > > > Otherwise your results could be just noise. > > >>>>Note that the results below show improvement only for TX so we see an end to end packet loss. >>> >>> >>>Hmm, as long as packet drops increase, BW improvements in UDP don't sound >>>too convincing, do they? You can get infinite BW at 100% drop ... >>> >>> >>> >>>>Improving the receiver (NAPI) will reduce the packet loss. >>> >>> >>>Needs testing with NAPI patch then? >> >>I tried NAPI and I get better results for the receiver but my opinion is that >>the receiver side is less important here since all I'm trying to improve is >>the ability to send packets. Am I right? > > > Only if you are sure something else is not dropping the packets (e.g. > buffer overruns triggered). > The number of sent packets reported by netperf is equal to the number of sent packets reported by netdev stats (from running ifconfig before and after netperf) and to the number of sent packets reported by the port (perfquery) From ogerlitz at voltaire.com Tue Jan 9 23:10:44 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 10 Jan 2007 09:10:44 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A3D9BB.2080408@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> Message-ID: <45A49174.5040009@voltaire.com> Sean Hefty wrote: >> Other then that, as we discussed in SC06 there are some changes that >> need to be integrated in the code to allow for interoperability >> between a multicast rdma cm based app to IPoIB, specifically removing >> the RDMA CM signature from the mgid which generated from the ip addr >> and pkey, but not only. > ...I have not completed these changes yet. Specifically, I have not > added a send only join parameter or changed the qkey. OK, I understand that adding a send only join param changes the librdmacm/ucma ABI and further that you might be somehow busy to fully implement the sendonly scheme at the multicast code for the 2.6.21 time frame. How about adding sendonly param to the ABI and having the ucma kernel code returning -EINVAL if someone tries to set it to true. Such code can be pushed to 2.6.21 and when you have the time to complete the implementation you can complete this? > I have also not full examined an issue where the SM log fills up with > bad multicast join requests. Is it what Dotan has reported? i recall the test does not use librdmacm nor IPoIB, so how does it exercise the kernel ib_sa api at all ??? i guess it uses libibmad or libibumad to send the joins etc. >> The second change is related to the qkey, looking in the current code >> of cma_join_ib_multicast() (at the multicast-sa_cache branch of the >> rdma-dev git) i see that the qkey is the mc ip address, which is not >> consistent with what librdmacm is assuming (0x1234567 etc). > > This is a bug in the kernel code. It should be using the standard qkey > of 0x12345678 - for now anyway. OK >> Anyway, what we need here is to plug into the scheme of ipoib which >> uses the qkey associated with the ipv4 broadcast multicast group. It >> turns out that there is some twilight zone here which i am working to >> understand better. You can see that for the ipv4 brd group ipoib lets >> the SM to allocate the group and qkey (ie the create param of >> ipoib_mcast_join is zero), i will give it some thought and let you >> know how i think the rdma cm can plug into this scheme, will be happy >> to get other ideas as well. > The rdma_cm knows the qkey that ipoib uses before it joins a multicast > group. See cma_join_ib_multicast() - call to ib_sa_get_mcmember_rec(). Looking on the code, i understand that if an multicast consumer attempts to join a group for which another consumer is already joined then it just gets the group params, that is the mgid is your discriminator (with the exception of an all zeros mgid which has a different treatment) which makes much sense to me. Going forward with this idea, a cma consumer that wants to use the ipv4 broadcast group qkey can join the group and learn the qkey. However, there are two problems with this approach a) it can't provide the qkey to the rdma cm for another group it want to join and assuming the --local-- ipoib is not joined on the other group, we are back to the original problem. b) assuming the above problem is solved, the cma consumer must stay on-line (ie not leave) with the broadcast group and hence will get all the ipv4 broadcast traffic of the cluster. We can assume that at least some of the multicast traffic of the node is routed to an ipoib subnet, we can further assume that the net stack would cause ipoib to join to the mgroup related to the "all hosts" ipv4 address --> 224.0.0.1 Since for our apps needs we do intend to join the 224.0.0.1 group, resolving a) above is fine for us --> we will join 224.0.0.1 above, provide the qkey to the rdma cm and it will join to the other group (eg 224.5.5.5) with this qkey. what do you think? > int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, > union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) > { > struct mcast_device *dev; > struct mcast_port *port; > struct mcast_group *group; > unsigned long flags; > int ret = 0; > > dev = ib_get_client_data(device, &mcast_client); > if (!dev) > return -ENODEV; > > port = &dev->port[port_num - dev->start_port]; > if (mgid && memcmp(mgid, &mgid0, sizeof mgid0)) { > spin_lock_irqsave(&port->lock, flags); > group = mcast_find(port, mgid); > if (group) > *rec = group->rec; > else > ret = -EADDRNOTAVAIL; > spin_unlock_irqrestore(&port->lock, flags); > } else { > memset(rec, 0, sizeof *rec); > ib_get_cached_gid(device, port_num, 0, &rec->port_gid); > rec->pkey = 0xFFFF; > get_random_bytes(&rec->qkey, sizeof rec->qkey); > rec->join_state = 1; can you remind me what the idea/trick here, aren't you supposed to generate an mgid for this case? > } > > return ret; > } Or. From kliteyn at dev.mellanox.co.il Tue Jan 9 23:47:36 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 10 Jan 2007 09:47:36 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070109180712.GA17240@sashak.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> Message-ID: <45A49A18.8050702@dev.mellanox.co.il> Hi Sasha, Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: >> Hi Sasha. >> Thanks for spotting this issue - I missed it when I reviewed the patch. >> Here's how things look in windows: >> * openlog() and closelog() are redefined to nothing in windows config header >> * cl_log_event() implementation on windows does all the work - gets handler >> to event log, logs the message, and closes the handler. >> It does *not* uses syslog(). >> >> To use syslog/sysopen/sysclose on windows, a user have to install some >> package (e.g. "Interix") that provides various daemons (such as syslogd), >> similar to Linux/Unix: >> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 >> http://www.interix.com/ > > And this is not done for WinOpenSM now. Is this would be useful option? I > guess this could solve a lot of compatibility problems and simplify the > porting dramatically, right? I'll address this question to the windows guys. But I seriously doubt that they would want to make OSM depend on some third-party product, whether it is commercial or free. >> So replacing cl_log_event() with syslog() would definitely cause a problem. >> >> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() >> on windows and redefining syslog() to use this wrapper. > > Like #define syslog() cl_log_event() ? Not so ugly (better than > combination of openlog()/closelog() and "unrelated" cl_log_event()), > and perfectly simple IMO. "#define syslog() cl_log_event()" will not do the job, since these functions have different definitios: syslog(int priority, char *message, ...); cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); so there should be wrapper just for windows that will prepare the message for cl_log_event() as a single string. >> But the question is: why do we need this replacement in the first place? > > Look at the original Ira's patch for details - there syslog() is > introduced and closelog() is removed from cl_event_log(). I think his > motivation is to keep syslog connected for log reopening events and > prevent noisy openlog()/closelog() calls in cl_log_event(). Agree. --Yevgeny >>> replacement itself is needed in order to restore complib's cl_log_event() >>> behavior used by various applications (like ibutils ones and others). >> What behavior are we talking about here? > > OTOH closelog() is needed than openlog()/syslog()/closelog() is part > shared library (look "Submitting Syslog Messages" at 'info libc' ). > > Sasha > From mst at mellanox.co.il Tue Jan 9 23:59:13 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 09:59:13 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070108165714.GM20047@mellanox.co.il> References: <20070108165714.GM20047@mellanox.co.il> Message-ID: <20070110075913.GA14774@mellanox.co.il> > Subject: Re: [PATCHv4] IPoIB CM Experimental support Roland, can we queue this for 2.6.21? -- MST From monis at voltaire.com Wed Jan 10 03:09:38 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 10 Jan 2007 13:09:38 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling - performance measurements In-Reply-To: <20070109162148.GB22476@mellanox.co.il> References: <45A3BB9C.3010807@voltaire.com> <20070109162148.GB22476@mellanox.co.il> Message-ID: <45A4C972.7050403@voltaire.com> Michael S. Tsirkin wrote: >>>>Tests with iperf and netperf for unicast and multicast destinations show >>>>an improvement in the ability of user applications to xmit packets. >>>> >>>>Examples: Number of successful writes as reported by 30 seconds UDP_STREAM of 100 byte packets. >>>>Tested with netperf on Dual CPU (64bit Intel Xeon 3GHz) running linux-2.6.20-rc1 (sender) and >>>>OFED-1.1 (receiver) >>> >>> >>>IMO netperf reporting is actually not too informative without stats settings. >>>Try running with e.g. -i 10,2 -I 99,5 - you might discover that your numbers are >>>only accurate within 30% >> >>I tried that and I am getting a warning about confidence level not being >>achieved. I am still trying to learn about that and trying to understand why >>(any ideas?) but for the meantime can you explain why do I need statistics when >>I am only trying to count the number of successful writes? > > > Otherwise your results could be just noise. I'm sorry but I don't understand how can it be noise. I am not measuring average nor PPS (or BW) but true a counter (number of total sent packets) so confidence seems irrelevant here. Anyway, port counters and device counters show the same number as netperf so I guess this is the real confidence. > > >>>>Note that the results below show improvement only for TX so we see an end to end packet loss. >>> >>> >>>Hmm, as long as packet drops increase, BW improvements in UDP don't sound >>>too convincing, do they? You can get infinite BW at 100% drop ... >>> >>> >>> >>>>Improving the receiver (NAPI) will reduce the packet loss. >>> >>> >>>Needs testing with NAPI patch then? >> >>I tried NAPI and I get better results for the receiver but my opinion is that >>the receiver side is less important here since all I'm trying to improve is >>the ability to send packets. Am I right? > > > Only if you are sure something else is not dropping the packets (e.g. > buffer overruns triggered). > From halr at voltaire.com Wed Jan 10 03:51:02 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 06:51:02 -0500 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45A49A18.8050702@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> Message-ID: <1168429858.14647.132778.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-10 at 02:47, Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: > >> Hi Sasha. > >> Thanks for spotting this issue - I missed it when I reviewed the patch. > >> Here's how things look in windows: > >> * openlog() and closelog() are redefined to nothing in windows config header > >> * cl_log_event() implementation on windows does all the work - gets handler > >> to event log, logs the message, and closes the handler. > >> It does *not* uses syslog(). > >> > >> To use syslog/sysopen/sysclose on windows, a user have to install some > >> package (e.g. "Interix") that provides various daemons (such as syslogd), > >> similar to Linux/Unix: > >> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 > >> http://www.interix.com/ > > > > And this is not done for WinOpenSM now. Is this would be useful option? I > > guess this could solve a lot of compatibility problems and simplify the > > porting dramatically, right? > > I'll address this question to the windows guys. But I seriously doubt that > they would want to make OSM depend on some third-party product, whether it > is commercial or free. > > >> So replacing cl_log_event() with syslog() would definitely cause a problem. > >> > >> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() > >> on windows and redefining syslog() to use this wrapper. > > > > Like #define syslog() cl_log_event() ? Not so ugly (better than > > combination of openlog()/closelog() and "unrelated" cl_log_event()), > > and perfectly simple IMO. > > "#define syslog() cl_log_event()" will not do the job, since these functions > have different definitios: > syslog(int priority, char *message, ...); > cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); > so there should be wrapper just for windows that will prepare the message > for cl_log_event() as a single string. So just so I am clear on this: Is the wrapper approach acceptable to you ? -- Hal > >> But the question is: why do we need this replacement in the first place? > > > > Look at the original Ira's patch for details - there syslog() is > > introduced and closelog() is removed from cl_event_log(). I think his > > motivation is to keep syslog connected for log reopening events and > > prevent noisy openlog()/closelog() calls in cl_log_event(). > > Agree. > > --Yevgeny > > >>> replacement itself is needed in order to restore complib's cl_log_event() > >>> behavior used by various applications (like ibutils ones and others). > >> What behavior are we talking about here? > > > > OTOH closelog() is needed than openlog()/syslog()/closelog() is part > > shared library (look "Submitting Syslog Messages" at 'info libc' ). > > > > Sasha > > From halr at voltaire.com Wed Jan 10 04:36:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 07:36:03 -0500 Subject: [openib-general] ioctl and send_agents In-Reply-To: <001d01c73318$19033220$21606d86@one7> References: <000901c732bc$17eeb9b0$21606d86@one7> <1168221630.4577.39665.camel@hal.voltaire.com> <001d01c73318$19033220$21606d86@one7> Message-ID: <1168432557.14647.135624.camel@hal.voltaire.com> On Mon, 2007-01-08 at 06:28, Michael Arndt wrote: > Hi, > > Thanks for the fast answer. > > > OpenSM registers agents in opensm/osm_sm_mad_ctrl.c:osm_sm_mad_ctrl_bind > > and opensm/osm_sa_mad_ctrl.c:osm_sa_mad_ctrl_bind. osm_sm_mad_ctrl_bind > > is called from osm_sm.c:osm_sm_bind and osm_sa_mad_ctrl_bind is called > > from osm_sa.c:osm_sa_bind. Both osm_sm_bind and osm_sa_bind are called > > from opensm/osm_opensm.c:osm_opensm_bind which is in turn called from > > main.c during OpenSM startup. That is the vendor independent part. > > > > The vendor dependant part is done in the vendor layer. For OpenIB, it is > > done in osm_vendor_ibumad.c:osm_vendor_bind. > > I looked at the osm_vendor_bind and seen the umad_register call. But if I > checked the umad_register function (libibumad/src/umad.c) I just see an > ioctl call again. What do you mean by "again" here ? Where was the "previous" ioctl ? register and unregister are done via ioctl. > And if it right that the user_mad module is uses at kernel > space Yes, it uses ib_umad module (which is built from user_mad.c). > shouldn't there be a call like unlocked_ioctl or compat_ioctl like > defined in this module? In user_mad.c, both unlocked/compat_ioctl are defined: static struct file_operations umad_fops = { .owner = THIS_MODULE, .read = ib_umad_read, .write = ib_umad_write, .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, .compat_ioctl = ib_umad_ioctl, .open = ib_umad_open, .release = ib_umad_close }; > These agents are all receiver agents and you say nothing about send agents > for SM? When you say send agents, do you mean send only agents or something else ? Assuming you mean send only agents, I don't think that the SM/SA uses send only agents. I think we had a previous discussion of send only agents in the context of IB agents (SMA, ...). -- Hal > Thanks Michael From kliteyn at dev.mellanox.co.il Wed Jan 10 05:31:56 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 10 Jan 2007 15:31:56 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <1168429858.14647.132778.camel@hal.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <1168429858.14647.132778.camel@hal.voltaire.com> Message-ID: <45A4EACC.3070609@dev.mellanox.co.il> Hi Hal. Hal Rosenstock wrote: > Hi Yevgeny, > > On Wed, 2007-01-10 at 02:47, Yevgeny Kliteynik wrote: >> Hi Sasha, >> >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> >>> On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: >>>> Hi Sasha. >>>> Thanks for spotting this issue - I missed it when I reviewed the patch. >>>> Here's how things look in windows: >>>> * openlog() and closelog() are redefined to nothing in windows config header >>>> * cl_log_event() implementation on windows does all the work - gets handler >>>> to event log, logs the message, and closes the handler. >>>> It does *not* uses syslog(). >>>> >>>> To use syslog/sysopen/sysclose on windows, a user have to install some >>>> package (e.g. "Interix") that provides various daemons (such as syslogd), >>>> similar to Linux/Unix: >>>> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 >>>> http://www.interix.com/ >>> And this is not done for WinOpenSM now. Is this would be useful option? I >>> guess this could solve a lot of compatibility problems and simplify the >>> porting dramatically, right? >> I'll address this question to the windows guys. But I seriously doubt that >> they would want to make OSM depend on some third-party product, whether it >> is commercial or free. >> >>>> So replacing cl_log_event() with syslog() would definitely cause a problem. >>>> >>>> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() >>>> on windows and redefining syslog() to use this wrapper. >>> Like #define syslog() cl_log_event() ? Not so ugly (better than >>> combination of openlog()/closelog() and "unrelated" cl_log_event()), >>> and perfectly simple IMO. >> "#define syslog() cl_log_event()" will not do the job, since these functions >> have different definitios: >> syslog(int priority, char *message, ...); >> cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); >> so there should be wrapper just for windows that will prepare the message >> for cl_log_event() as a single string. > > So just so I am clear on this: > > Is the wrapper approach acceptable to you ? Yes, I think I can live with that :) -- Yevgeny > -- Hal > >>>> But the question is: why do we need this replacement in the first place? >>> Look at the original Ira's patch for details - there syslog() is >>> introduced and closelog() is removed from cl_event_log(). I think his >>> motivation is to keep syslog connected for log reopening events and >>> prevent noisy openlog()/closelog() calls in cl_log_event(). >> Agree. >> >> --Yevgeny >> >>>>> replacement itself is needed in order to restore complib's cl_log_event() >>>>> behavior used by various applications (like ibutils ones and others). >>>> What behavior are we talking about here? >>> OTOH closelog() is needed than openlog()/syslog()/closelog() is part >>> shared library (look "Submitting Syslog Messages" at 'info libc' ). >>> >>> Sasha >>> > From halr at voltaire.com Wed Jan 10 05:39:43 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 08:39:43 -0500 Subject: [openib-general] add SIGUSR1 to reopen osm.log In-Reply-To: <20061116175304.668afcab.weiny2@llnl.gov> References: <20061116175304.668afcab.weiny2@llnl.gov> Message-ID: <1168436367.14647.139518.camel@hal.voltaire.com> Hi Ira, On Thu, 2006-11-16 at 20:53, Ira Weiny wrote: > Our sysadmins have been rotating OpenSM's osm.log file and then restarting > OpenSM. As this is a less than optimal solution if you have jobs running on > the system, I wrote this patch (against OFED 1.1) which adds a handler for > SIGUSR1 that reopens OpenSM's log file without a restart. Thanks! Applied. -- Hal > Ira Weiny > weiny2 at llnl.gov > > From halr at voltaire.com Wed Jan 10 05:42:37 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 08:42:37 -0500 Subject: [openib-general] [PATCH 1/2] opensm: sigusr1: syslog() fixes In-Reply-To: <20070108024211.GC19217@sashak.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> <20070108023738.GB19217@sashak.voltaire.com> <20070108024211.GC19217@sashak.voltaire.com> Message-ID: <1168436386.14647.139520.camel@hal.voltaire.com> On Sun, 2007-01-07 at 21:42, Sasha Khapyorsky wrote: > Following Ira's log file reopening patch this fixes potential syslog > other apps compatibility issues. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Wed Jan 10 05:42:43 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 08:42:43 -0500 Subject: [openib-general] [PATCH 2/2] opensm: sigusr1: synchronous SIGUSR1 processing. In-Reply-To: <20070108024401.GD19217@sashak.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> <20070108023738.GB19217@sashak.voltaire.com> <20070108024401.GD19217@sashak.voltaire.com> Message-ID: <1168436396.14647.139522.camel@hal.voltaire.com> On Sun, 2007-01-07 at 21:44, Sasha Khapyorsky wrote: > Process SIGUSR1 synchronously similar to other signals. Move signal > handler setup from potentially shared library code. Use osm log locking > with file reopening in order to prevent races with osm_log(). > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Wed Jan 10 05:45:20 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 08:45:20 -0500 Subject: [openib-general] add SIGUSR1 to reopen osm.log In-Reply-To: <20070108023738.GB19217@sashak.voltaire.com> References: <20061116175304.668afcab.weiny2@llnl.gov> <1164048220.4381.4161.camel@hal.voltaire.com> <20070108023738.GB19217@sashak.voltaire.com> Message-ID: <1168436708.14647.139820.camel@hal.voltaire.com> On Sun, 2007-01-07 at 21:37, Sasha Khapyorsky wrote: > On 13:43 Mon 20 Nov , Hal Rosenstock wrote: > > On Thu, 2006-11-16 at 20:53, Ira Weiny wrote: > > > Our sysadmins have been rotating OpenSM's osm.log file and then restarting > > > OpenSM. As this is a less than optimal solution if you have jobs running on > > > the system, I wrote this patch (against OFED 1.1) which adds a handler for > > > SIGUSR1 that reopens OpenSM's log file without a restart. > > > > This is a good idea to incorporate into OpenSM. > > > > Any objections if this is bundled with SIGHUP handling rather than > > SIGUSR1 which is not backward compatible as Sasha indicated ? > > Actually looking more at this I noticed that SIGUSR* signals are not > used anymore with linuxthread couple of years (since kernel 2.1.xx, > where RT signals support was introduced). Yes, that's what I saw too. > So I think it is not real > issue today - some #ifdef should be sufficient. > > I reviewed and tested the original patch (it is appliable very well) and > have some fixes/improvements. Two incremental patches are followng. Thanks! -- Hal > Sasha From halr at voltaire.com Wed Jan 10 06:36:22 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 09:36:22 -0500 Subject: [openib-general] [PATCH] opensm: close umad port in osm_vendor_delete In-Reply-To: <20070108025848.GE19217@sashak.voltaire.com> References: <20061201141901.GC23574@sashak.voltaire.com> <20070108025848.GE19217@sashak.voltaire.com> Message-ID: <1168439774.14647.142664.camel@hal.voltaire.com> On Sun, 2007-01-07 at 21:58, Sasha Khapyorsky wrote: > This adds umad_port_close() call in osm_vendor_delete(). So the same > process can reinitialize and reuse the vendor layer. Successful test > was reported. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From vlad at dev.mellanox.co.il Wed Jan 10 06:40:54 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 10 Jan 2007 16:40:54 +0200 Subject: [openib-general] [PATCH] [mthca] Allow the following QP state transition : reset --> reset In-Reply-To: <1168336387.25538.1.camel@mtls05.yok.mtl.com> References: <1168336387.25538.1.camel@mtls05.yok.mtl.com> Message-ID: <45A4FAF6.80401@dev.mellanox.co.il> The patch was added to kernel_patches/fixes (ofed_1_2). Regards, Vladimir Dotan Barak wrote: > Allow the following QP state transition : reset --> reset. > > Signed-off-by: Dotan Barak > --- > > Index: gen2_devel_kernel/drivers/infiniband/hw/mthca/mthca_qp.c > =================================================================== > --- gen2_devel_kernel.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2007-01-08 16:14:25.000000000 +0200 > +++ gen2_devel_kernel/drivers/infiniband/hw/mthca/mthca_qp.c 2007-01-09 11:34:42.000000000 +0200 > @@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, > goto out; > } > > + if (cur_state == new_state && cur_state == IB_QPS_RESET) { > + err = 0; > + goto out; > + } > + > if ((attr_mask & IB_QP_PKEY_INDEX) && > attr->pkey_index >= dev->limits.pkey_table_len) { > mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Wed Jan 10 07:02:42 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 07:02:42 -0800 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070110075913.GA14774@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 09:59:13 +0200") References: <20070108165714.GM20047@mellanox.co.il> <20070110075913.GA14774@mellanox.co.il> Message-ID: > Roland, can we queue this for 2.6.21? Yes, once I have a chance to really read it over. From rdreier at cisco.com Wed Jan 10 07:07:57 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 07:07:57 -0800 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070110075913.GA14774@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 09:59:13 +0200") References: <20070108165714.GM20047@mellanox.co.il> <20070110075913.GA14774@mellanox.co.il> Message-ID: Also, I haven't really looked yet, but how does the connected mode patch interact with the NAPI patches? - R. From rdreier at cisco.com Wed Jan 10 07:11:39 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 07:11:39 -0800 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A367D0.3060206@voltaire.com> (Moni Shoua's message of "Tue, 09 Jan 2007 12:00:48 +0200") References: <45A367D0.3060206@voltaire.com> Message-ID: I played with this idea too a long time ago, but I don't think this sort of naive implementation is acceptable. Having packets that take a potentially unbounded amount of time before the send is completed and the skb is freed just causes too many problems -- for example the ping failures that you noted. Unless you can come up with a way that makes sure that all skbs are completed even in low-traffic situations, I don't think this is mergeable -- it's just too much of a usability nightmare to have a flag that is essentially "break some workloads in a mysterious way to make some benchmarks run a little faster." - R. From rdreier at cisco.com Wed Jan 10 07:15:12 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 07:15:12 -0800 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070108165714.GM20047@mellanox.co.il> (Michael S. Tsirkin's message of "Mon, 8 Jan 2007 18:57:14 +0200") References: <20070108165714.GM20047@mellanox.co.il> Message-ID: > - Using path MTU discovery, multicast and UDP traffic to UD mode now work, > only a small number of packets is dropped. How does this work? What happens if I set my MTU to 8K and send a 8000-byte UDP message to a node that doesn't implement CM? What happens if I send an 8000-byte UDP multicast? I didn't think UDP did path MTU discovery by default... - R. From halr at voltaire.com Wed Jan 10 07:16:16 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 10:16:16 -0500 Subject: [openib-general] [PATCH] OpenSM/libvendor/libvendor.map: Remove static functions Message-ID: <1168442166.14647.144857.camel@hal.voltaire.com> OpenSM/libvendor/libvendor.map: Remove functions defined in osm_vendor_sa_api.h as static inline Signed-off-by: Hal Rosenstock References: <20061217154241.GD11360@minantech.com> Message-ID: <20070110154448.GK8257@minantech.com> Hi Roland, On Sun, Dec 17, 2006 at 05:42:41PM +0200, glebn at voltaire.com wrote: > On Wed, Dec 13, 2006 at 11:41:41PM -0800, Roland Dreier wrote: > > Are there other possible ordering problems involving user memory (not > > in a CQ or QP)? Something like a CPU on node A writing to memory on > > node B and then posting a work request that makes the HCA DMA from > > that memory on node B, and having the work request doorbell reach the > > HCA before the write to node B actually happens, so the HCA DMAs the > > old contents of node B's memory? > > > > I guess the only feasible solution to the problem you're pointing out > > is to have libmthca use some special mmap()-based allocator for queues > > so that the kernel can give it memory that has the special > > dma_map_consistent treatment. > Do you think this should be part of mthca or some general framework like uio > which allows writing driver in userspace? > Also another solution could be to do something similar to ehca. It > allocates QP and CQ in the kernel and maps them into process address > space. > Can we get back to this problem? I understand that the way ehca does things is not acceptable, so is seems that ehca people will also have to rethink how CQ/QP memory is allocated. It would be a good idea to consolidate solution for mthca and ehca. mmap() special allocator can be added to fd libibverbs uses for communication with the kernel and it can be used by all low level drivers (I don't know if such a way is acceptable for echa). The question is if this is a good idea to add something as general as coherent memory allocation for userspace (which is needed by other userspace drivers) to infiniband subsystem. What do you think? -- Gleb. From rdreier at cisco.com Wed Jan 10 08:04:55 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 08:04:55 -0800 Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: <20070110154448.GK8257@minantech.com> (Gleb Natapov's message of "Wed, 10 Jan 2007 17:44:49 +0200") References: <20061217154241.GD11360@minantech.com> <20070110154448.GK8257@minantech.com> Message-ID: > Can we get back to this problem? I understand that the way ehca does > things is not acceptable, so is seems that ehca people will also have > to rethink how CQ/QP memory is allocated. It would be a good idea to > consolidate solution for mthca and ehca. mmap() special allocator can be > added to fd libibverbs uses for communication with the kernel and it can > be used by all low level drivers (I don't know if such a way is > acceptable for echa). The question is if this is a good idea to add > something as general as coherent memory allocation for userspace (which > is needed by other userspace drivers) to infiniband subsystem. > > What do you think? I think that's kind of overengineering things. Allocating and mapping memory in an mmap method is just a few lines of code so we might as well let drivers do exactly what they want. I think the bigger problem is fixing the kernel so it's easier for drivers to specify the page attributes they want for memory mapped to userspace (eg uncached, magic altix "OK for DMA", write combining, etc) - R. From halr at voltaire.com Wed Jan 10 08:08:52 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 11:08:52 -0500 Subject: [openib-general] [RFC] Support for Additional Scopes for IPoIB MC Groups Message-ID: <1168445329.14647.147877.camel@hal.voltaire.com> Hi, Currently, in ipoib_main,c, the IPv4 broadcast group from which all other IPoIB multicast groups are derived is defined as: static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; The scope of this group is (0x2) link local. In preparation for IB routers, other scopes should be supported. Is an acceptable approach to supporting additional scopes to add a module parameter for this ? -- Hal From glebn at voltaire.com Wed Jan 10 08:13:26 2007 From: glebn at voltaire.com (glebn at voltaire.com) Date: Wed, 10 Jan 2007 18:13:26 +0200 Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: References: <20061217154241.GD11360@minantech.com> <20070110154448.GK8257@minantech.com> Message-ID: <20070110161326.GL8257@minantech.com> On Wed, Jan 10, 2007 at 08:04:55AM -0800, Roland Dreier wrote: > > Can we get back to this problem? I understand that the way ehca does > > things is not acceptable, so is seems that ehca people will also have > > to rethink how CQ/QP memory is allocated. It would be a good idea to > > consolidate solution for mthca and ehca. mmap() special allocator can be > > added to fd libibverbs uses for communication with the kernel and it can > > be used by all low level drivers (I don't know if such a way is > > acceptable for echa). The question is if this is a good idea to add > > something as general as coherent memory allocation for userspace (which > > is needed by other userspace drivers) to infiniband subsystem. > > > > What do you think? > > I think that's kind of overengineering things. Allocating and mapping > memory in an mmap method is just a few lines of code so we might as > well let drivers do exactly what they want. OK, but as far as I know (and I may be wrong here) low level driver doesn't have device node, but libibverbs has one, we don't want to create device node for mthca just to provide mmap() to userspace, or do we? > > I think the bigger problem is fixing the kernel so it's easier for > drivers to specify the page attributes they want for memory mapped to > userspace (eg uncached, magic altix "OK for DMA", write combining, etc) > There is no way to make memory coherent after allocation, it have to be allocated coherent, I think the API doesn't exists because it cannot be implemented on all arches, but it is worth asking mm people. -- Gleb. From rdreier at cisco.com Wed Jan 10 08:19:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 08:19:58 -0800 Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: <20070110161326.GL8257@minantech.com> (Gleb Natapov's message of "Wed, 10 Jan 2007 18:13:26 +0200") References: <20061217154241.GD11360@minantech.com> <20070110154448.GK8257@minantech.com> <20070110161326.GL8257@minantech.com> Message-ID: > OK, but as far as I know (and I may be wrong here) low level driver > doesn't have device node, but libibverbs has one, we don't want to > create device node for mthca just to provide mmap() to userspace, or do > we? Not sure what you mean -- mthca already implements mmap() to provide access to doorbell pages. It would be no problem to extend that to allow allocating QPs and CQs via mmap() if we're forced into that. > There is no way to make memory coherent after allocation, it have to be > allocated coherent, I think the API doesn't exists because it cannot be > implemented on all arches, but it is worth asking mm people. Not sure what you mean here either. dma_alloc_coherent() already exists on all architectures, but that's not the end of the story -- if we remap the memory into userspace then we have to make sure that the page tables for userspace also make the memory coherent (eg uncached on non-cache-coherent architectures, or whatever the altix magic is to avoid ordering problems). dma_alloc_coherent() only takes care of the kernel mapping. - R. From glebn at voltaire.com Wed Jan 10 08:58:20 2007 From: glebn at voltaire.com (glebn at voltaire.com) Date: Wed, 10 Jan 2007 18:58:20 +0200 Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: References: <20061217154241.GD11360@minantech.com> <20070110154448.GK8257@minantech.com> <20070110161326.GL8257@minantech.com> Message-ID: <20070110165820.GN8257@minantech.com> On Wed, Jan 10, 2007 at 08:19:58AM -0800, Roland Dreier wrote: > > OK, but as far as I know (and I may be wrong here) low level driver > > doesn't have device node, but libibverbs has one, we don't want to > > create device node for mthca just to provide mmap() to userspace, or do > > we? > > Not sure what you mean -- mthca already implements mmap() to provide > access to doorbell pages. It would be no problem to extend that to > allow allocating QPs and CQs via mmap() if we're forced into that. Right, haven't looked at the code for a long time :). So how can we tell to mmap that we want to allocate coherent memory and not map doorbell? Magic offset value? > > > There is no way to make memory coherent after allocation, it have to be > > allocated coherent, I think the API doesn't exists because it cannot be > > implemented on all arches, but it is worth asking mm people. > > Not sure what you mean here either. dma_alloc_coherent() already > exists on all architectures, but that's not the end of the story -- if > we remap the memory into userspace then we have to make sure that the > page tables for userspace also make the memory coherent (eg uncached > on non-cache-coherent architectures, or whatever the altix magic is to > avoid ordering problems). dma_alloc_coherent() only takes care of the > kernel mapping. > If I understand correctly (and if not SGI people please correct me) it doesn't matter how memory is mapped into userspace, what is important what address is passed to HCA to use for DMAing data into host memory. It just so happens that dma_alloc_coherent() returns this special address that causes flush of all outstanding writes in dma_handle. So what we should do is allocate memory with dma_alloc_coherent() in mthca mmap() function and save dma_handle in database indexed by vma->vm_start. Remap pointer returned by the function into userspace. When create_{qp,cq} is called later on, we should lookup corespondent dma_handle using address provided by userspace (if not found - return an error) and pass this address to HCA. Is this make sense? -- Gleb. From jlentini at netapp.com Wed Jan 10 09:07:33 2007 From: jlentini at netapp.com (James Lentini) Date: Wed, 10 Jan 2007 12:07:33 -0500 (EST) Subject: [openib-general] NFS(v4)-RDMA In-Reply-To: <1168392546.5086.64.camel@julia.et.endace.com> References: <1168392546.5086.64.camel@julia.et.endace.com> Message-ID: On Wed, 10 Jan 2007, vishal wrote: > Hi, > > I am getting an invalid argument error when I try to mount using > nfs(v4)-rdma. The mounts with nfs(v3)-rdma work fine. Following are the > details:- > > 1. Command Issued on the client side: - > ./nfsrdmamount -o rdma=10.0.0.2 -t nfs4 10.0.0.2:/ /data > > 2. /etc/exports on the client side > /data 10.0.0.1(rw,fsid=0,insecure,no_subtree_check,async) > > 3. Error recieved:- > nfsmount: Invalid argument > > 4. From strace:- > > mount("10.0.0.2:/", "/data", "nfs4", 0, "\1") = -1 EINVAL (Invalid > argument) Do NFSv4 mounts work over TCP? If not, make sure you have v4 enabled on the client and server. From jlentini at netapp.com Wed Jan 10 09:21:02 2007 From: jlentini at netapp.com (James Lentini) Date: Wed, 10 Jan 2007 12:21:02 -0500 (EST) Subject: [openib-general] NFS-RDMA Performance Issue In-Reply-To: <1168392243.5086.58.camel@julia.et.endace.com> References: <1168392243.5086.58.camel@julia.et.endace.com> Message-ID: On Wed, 10 Jan 2007, vishal wrote: > Hi, > > I am getting an average speed of ~100 MiB/s using NFS-RDMA over > Infiniband, writing to an md device. The local write speed was ~800 > MiB/s. The kernel being used is 2.6.18.3. The tests were performed using > dd (various block sizes have been tried). I have fiddled with the rsize > and wsize as well, but that didn't make any difference. Can anyone > please help me out! > > Thanks! > > Vishal > > P.S This is using NFS v3 You should use direct I/O. You will have to use a different tool, iozone, etc. From mshefty at ichips.intel.com Wed Jan 10 09:40:34 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 10 Jan 2007 09:40:34 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A49174.5040009@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> Message-ID: <45A52512.1040003@ichips.intel.com> > OK, I understand that adding a send only join param changes the > librdmacm/ucma ABI and further that you might be somehow busy to fully > implement the sendonly scheme at the multicast code for the 2.6.21 time > frame. > > How about adding sendonly param to the ABI and having the ucma kernel > code returning -EINVAL if someone tries to set it to true. Such code can > be pushed to 2.6.21 and when you have the time to complete the > implementation you can complete this? I don't think adding this is a huge deal; I just haven't gotten to it yet. However, I'd like to make sure there's enough time once the change is made to verify that we have the right result before pushing it upstream. > Is it what Dotan has reported? i recall the test does not use librdmacm > nor IPoIB, so how does it exercise the kernel ib_sa api at all ??? i > guess it uses libibmad or libibumad to send the joins etc. Woody has also seen this issue. And of course, I can't reproduce it on my systems, but I'm actively looking into the problem. It looks like some sort of issue with ipoib trying to join a non-existent multicast group. > Looking on the code, i understand that if an multicast consumer attempts > to join a group for which another consumer is already joined then it > just gets the group params, that is the mgid is your discriminator (with > the exception of an all zeros mgid which has a different treatment) > which makes much sense to me. Not exactly. The rdma_cm consumer gets the group parameters for the ipoib broadcast group. It uses this information as a template for joining new groups. > Going forward with this idea, a cma consumer that wants to use the ipv4 > broadcast group qkey can join the group and learn the qkey. One issue is that an rdma_cm consumer can first allocate a UD QP to use with UD traffic. When it later joins a multicast group, the qkey must be the same. How does ipoib handle this? > Since for our apps needs we do intend to join the 224.0.0.1 group, > resolving a) above is fine for us --> we will join 224.0.0.1 above, > provide the qkey to the rdma cm and it will join to the other group (eg > 224.5.5.5) with this qkey. > > what do you think? I'm not completely following you on this yet. >> } else { >> memset(rec, 0, sizeof *rec); >> ib_get_cached_gid(device, port_num, 0, &rec->port_gid); >> rec->pkey = 0xFFFF; >> get_random_bytes(&rec->qkey, sizeof rec->qkey); >> rec->join_state = 1; > > > can you remind me what the idea/trick here, aren't you supposed to > generate an mgid for this case? This either returns an existing MCMemberRecord that this node has joined, or it fills out an MCMemberRecord that can be used to join a new group. If the mgid is zero, the SA will assign one. - Sean From mshefty at ichips.intel.com Wed Jan 10 09:55:59 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 10 Jan 2007 09:55:59 -0800 Subject: [openib-general] Stale CM callbacks In-Reply-To: <45A4583B.8050807@veritas.com> References: <45A4583B.8050807@veritas.com> Message-ID: <45A528AF.2050105@ichips.intel.com> > I am using IB as a cluster interconnect. If a node which had established > several connections > with a remote node was reset (not rebooted) and it came back up quickly > is it possible for > the node to get stale REQ/DREQ callbacks ? If yes, is there an API to > purge stale states > in the CM or should it be detected by the module getting the callback ? It's possible for stale REQ/DREQ messages to appear at the reset node, but I don't see any problem with that occurring. The DREQs should be dropped, since there's no connections to match them with. The REQs should be rejected without a matching listen. If the listen occurs before the REQ appears, then a new connection would result. I don't see a problem in either case. As for purging stale states, I'm not sure what you mean. The reset node will have purged the local CM state. - Sean From mst at mellanox.co.il Wed Jan 10 10:07:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:07:35 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: References: Message-ID: <20070110180735.GC18953@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCHv4] IPoIB CM Experimental support > > > - Using path MTU discovery, multicast and UDP traffic to UD mode now work, > > only a small number of packets is dropped. > > How does this work? What happens if I set my MTU to 8K and send a > 8000-byte UDP message to a node that doesn't implement CM? What > happens if I send an 8000-byte UDP multicast? > > I didn't think UDP did path MTU discovery by default... Before packet drop, we reduce the dst mtu, so linux will start splitting following packets to this destination. -- MST From mst at mellanox.co.il Wed Jan 10 10:11:58 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:11:58 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: References: Message-ID: <20070110181158.GD18953@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCHv4] IPoIB CM Experimental support > > Also, I haven't really looked yet, but how does the connected mode > patch interact with the NAPI patches? The latest version uses prov->cq for all RX packets, so it's trivial to merge it with NAPI if we want: the relevant part of the patch is: static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) { - if (wc->wr_id & IPOIB_OP_RECV) + if (wc->wr_id & IPOIB_CM_OP_SRQ) + ipoib_cm_handle_rx_wc(dev, wc); + else if (wc->wr_id & IPOIB_OP_RECV) ipoib_ib_handle_rx_wc(dev, wc); else ipoib_ib_handle_tx_wc(dev, wc); I haven't tested this combination yet. -- MST From mst at mellanox.co.il Wed Jan 10 10:15:10 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:15:10 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: References: <20070108165714.GM20047@mellanox.co.il> <20070110075913.GA14774@mellanox.co.il> Message-ID: <20070110181510.GE18953@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCHv4] IPoIB CM Experimental support > > > Roland, can we queue this for 2.6.21? > > Yes, once I have a chance to really read it over. Maybe for-mm for now? -- MST From rdreier at cisco.com Wed Jan 10 10:18:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 10:18:45 -0800 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070110181510.GE18953@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 20:15:10 +0200") References: <20070108165714.GM20047@mellanox.co.il> <20070110075913.GA14774@mellanox.co.il> <20070110181510.GE18953@mellanox.co.il> Message-ID: > Maybe for-mm for now? Definitely, I just need to update all my branches. From mst at mellanox.co.il Wed Jan 10 10:20:45 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:20:45 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A49174.5040009@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> Message-ID: <20070110182045.GF18953@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: multicast code/merge status > > Sean Hefty wrote: > >> Other then that, as we discussed in SC06 there are some changes that > >> need to be integrated in the code to allow for interoperability > >> between a multicast rdma cm based app to IPoIB, specifically removing > >> the RDMA CM signature from the mgid which generated from the ip addr > >> and pkey, but not only. > > > ...I have not completed these changes yet. Specifically, I have not > > added a send only join parameter or changed the qkey. > > OK, I understand that adding a send only join param changes the > librdmacm/ucma ABI and further that you might be somehow busy to fully > implement the sendonly scheme at the multicast code for the 2.6.21 time > frame. > > How about adding sendonly param to the ABI and having the ucma kernel > code returning -EINVAL if someone tries to set it to true. Such code can > be pushed to 2.6.21 and when you have the time to complete the > implementation you can complete this? Since Voltaire requested this code to be in OFED 1.2, could you please clarify whether the code in Sean's branch is in the shape you want it to be in for OFED 1.2? If rdma_cm multicast can't interoperate with IPoIB, is it still useful for you? -- MST From swise at opengridcomputing.com Wed Jan 10 10:20:49 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 12:20:49 -0600 Subject: [openib-general] [PATCH] ~vlad/ofascripts.git - Add libcxgb3 to user kit Message-ID: <1168453249.27060.23.camel@stevo-desktop> Vlad, Here are the changes to ofa userspace scripts for adding Chelsio's RDMA library, libcxgb3. Steve. ----- Support for libcxgb3. From: Steve Wise - added libcxgb3 to configure - added libcxgb3 to Makefile Signed-off-by: Steve Wise --- Makefile | 17 ++++++++++++++++- configure | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletions(-) diff --git a/Makefile b/Makefile index b164ee0..4d57d82 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,8 @@ all: .PHONY: libibverbs libmthca libibcm perftest mstflint libsdp libibcommon libibumad libibmad osm diags mpi .PHONY: librdmacm install_librdmacm dapl dat tvflash install_tvflash .PHONY: install_libibverbs install_libmthca install_libibcm install_libsdp libibcommon -.PHONY: libipathverbs install_libipathverbs libehca install_libehca +.PHONY: libipathverbs install_libipathverbs libehca install_libehca +.PHONY: libcxgb3 install_libcxgb3 .PHONY: install_libibumad install_libibmad install_osm install_diags install_mstflint install_perftest .PHONY: srptools install_srptools ipoibtools install_ipoibtools @@ -49,6 +50,11 @@ user: libehca install_user: install_libehca endif +ifeq ($(WITH_CXGB3),yes) +user: libcxgb3 +install_user: install_libcxgb3 +endif + ifeq ($(WITH_CM),yes) user: libibcm install_user: install_libibcm @@ -139,6 +145,11 @@ libehca: libibverbs AM_LDFLAGS="-L../libibverbs/src -libverbs" @echo "Make libehca done" +libcxgb3: libibverbs + $(MAKE) -C $(SRC)userspace/libcxgb3 \ + AM_LDFLAGS="-L../libibverbs/src -libverbs" + @echo "Make libcxgb3 done" + libibcm: libibverbs $(MAKE) -C $(SRC)userspace/libibcm \ CPPFLAGS="-I../libibverbs/include" \ @@ -281,6 +292,9 @@ install_libipathverbs: install_libibverb install_libehca: install_libibverbs $(MAKE) -C $(SRC)userspace/libehca install +install_libcxgb3: install_libibverbs + $(MAKE) -C $(SRC)userspace/libcxgb3 install + install_libibcm: install_libmthca $(MAKE) -C $(SRC)userspace/libibcm install @@ -385,6 +399,7 @@ clean_user: $(MAKE) -C $(SRC)userspace/libmthca clean $(MAKE) -C $(SRC)userspace/libipathverbs clean $(MAKE) -C $(SRC)userspace/libehca clean + $(MAKE) -C $(SRC)userspace/libcxgb3 clean $(MAKE) -C $(SRC)userspace/libibcm clean # $(MAKE) -C $(SRC)userspace/useraccess-lib clean $(MAKE) -C $(SRC)userspace/perftest clean diff --git a/configure b/configure index 5f8253e..616ef5c 100755 --- a/configure +++ b/configure @@ -58,6 +58,9 @@ Usage: `basename $0` [options] --with-libehca include libehca package [yes] --without-libehca include libehca package [no] + --with-libcxgb3 include libcxgb3 package [yes] + --without-libcxgb3 include libcxgb3 package [no] + --with-libibcm include CM package [no] --without-libibcm don't include CM package [yes] @@ -277,6 +280,13 @@ main() --without-libehca) WITH_EHCA="no" ;; + --with-libcxgb3) + WITH_CXGB3="yes" + WITH_IBVERBS="yes" + ;; + --without-libcxgb3) + WITH_CXGB3="no" + ;; --with-libibcm) WITH_CM="yes" WITH_MTHCA="yes" @@ -410,6 +420,7 @@ main() # WITH_DAPL="yes" WITH_DIAGS="yes" # WITH_EHCA="yes" + WITH_CXGB3="yes" WITH_IBCOMMON="yes" WITH_IBMAD="yes" WITH_IBUMAD="yes" @@ -460,6 +471,7 @@ WITH_IBVERBS=${WITH_IBVERBS:-"no"} WITH_MTHCA=${WITH_MTHCA:-"no"} WITH_IPATHVERBS=${WITH_IPATHVERBS:-"no"} WITH_EHCA=${WITH_EHCA:-"no"} +WITH_CXGB3=${WITH_CXGB3:-"no"} WITH_CM=${WITH_CM:-"no"} WITH_SDP=${WITH_SDP:-"no"} WITH_DAPL=${WITH_DAPL:-"no"} @@ -501,6 +513,7 @@ WITH_IBVERBS=${WITH_IBVERBS} WITH_MTHCA=${WITH_MTHCA} WITH_IPATHVERBS=${WITH_IPATHVERBS} WITH_EHCA=${WITH_EHCA} +WITH_CXGB3=${WITH_CXGB3} WITH_CM=${WITH_CM} WITH_SDP=${WITH_SDP} WITH_DAPL=${WITH_DAPL} @@ -539,6 +552,9 @@ EOFCONFIG if [ "${WITH_EHCA}" == "yes" ]; then ullibs="${ullibs} libehca" fi + if [ "${WITH_CXGB3}" == "yes" ]; then + ullibs="${ullibs} libcxgb3" + fi if [ "${WITH_CM}" == "yes" ]; then ullibs="${ullibs} libibcm" fi From swise at opengridcomputing.com Wed Jan 10 10:22:50 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 12:22:50 -0600 Subject: [openib-general] [PATCH] ~vlad/ofabuild - Chelsio Support Message-ID: <1168453370.27060.26.camel@stevo-desktop> Here are the changes to the user build script to pull in libcxgb3. Steve. Chelsio T3 Library Support. From: Steve Wise Signed-off-by: Steve Wise --- build_ofa_user.sh | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index b6eee43..acb2676 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -17,7 +17,7 @@ LOG=${tmpdir}/${proj}-${revision}.build. git_extra_flags=${git_extra_flags:-"-q"} -ullibs="libibverbs libmthca libehca libipathverbs tvflash libibcm libsdp mstflint perftest srptools ipoibtools librdmacm dapl imgen management" +ullibs="libibverbs libmthca libehca libipathverbs tvflash libibcm libsdp mstflint perftest srptools ipoibtools librdmacm dapl imgen management libcxgb3" # User level git packages scripts_git="git://git.openfabrics.org/~vlad/ofascripts.git" @@ -54,6 +54,8 @@ ibutils_git="git://git.openfabrics.org/~ ibutils_branch="origin" management_git="git://git.openfabrics.org/~halr/management.git" management_branch="origin" +libcxgb3_git="git://git.openfabrics.org/~swise/libcxgb3.git" +libcxgb3_branch="origin" # Execute command w/ echo and exit if it fail ex() @@ -317,7 +319,8 @@ check_build() --with-perftest \ --with-mstflint \ --with-srptools \ - --with-ipoibtools" + --with-ipoibtools \ + --with-libcxgb3" cd ${tmpdir} ex tar xzf ${dest}/${proj}/${projdir}.tgz From wombat2 at us.ibm.com Wed Jan 10 10:28:24 2007 From: wombat2 at us.ibm.com (Bernard King-Smith) Date: Wed, 10 Jan 2007 13:28:24 -0500 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: Message-ID: ----- Message from "Roland Dreier" on Wed, 10 > Jan 2007 07:15:12 -0800 ----- > > To: > > "Michael S. Tsirkin" > > cc: > > openib-general at openib.org > > Subject: > > Re: [openib-general] [PATCHv4] IPoIB CM Experimental support > > > - Using path MTU discovery, multicast and UDP traffic to UD mode now work, > > only a small number of packets is dropped. > > How does this work? What happens if I set my MTU to 8K and send a > 8000-byte UDP message to a node that doesn't implement CM? What > happens if I send an 8000-byte UDP multicast? If we support a large interface MTU using RC, then I would expect both UDP and TCP to both use RC mode. It doesn't make sense for UDP to use UD mode because of the higher CPU overhead and lower bandwdith, when TCP uses RC. Can we not support UDP multicast over RC? > > I didn't think UDP did path MTU discovery by default... > > - R. > > > Bernie King-Smith IBM Corporation Server Group Cluster System Performance wombat2 at us.ibm.com (845)433-8483 Tie. 293-8483 or wombat2 on NOTES "We are not responsible for the world we are born into, only for the world we leave when we die. So we have to accept what has gone before us and work to change the only thing we can, -- The Future." William Shatner -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Wed Jan 10 10:39:16 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:39:16 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: References: <45A367D0.3060206@voltaire.com> Message-ID: <20070110183916.GG18953@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH/RFC] IB/ipoib: add selective tx signaling > > I played with this idea too a long time ago, but I don't think this > sort of naive implementation is acceptable. Having packets that take > a potentially unbounded amount of time before the send is completed > and the skb is freed just causes too many problems -- for example the > ping failures that you noted. > > Unless you can come up with a way that makes sure that all skbs are > completed even in low-traffic situations, I don't think this is > mergeable -- it's just too much of a usability nightmare to have a > flag that is essentially "break some workloads in a mysterious way to > make some benchmarks run a little faster." Yea, I really think the selective signalling is only good for cases where you post several WRs in one go, anything else is a misuse. Thinking about this more - why does this patch help some benchmarks? The amount of work it takes for the hardware to generate a completion is likely negligeable, and we still are scanning the same amount of TX WRs in a loop to unmap/free them. If you think about it this way, it becomes clear that your workload, for some reason, hits a path where you get an event very fast after the first completion and there is only a small number of completions to handle. So your patch helps just by delaying the event handler until there's more work to do. And I expect it wouldn't help TCP much if at all as there are RX WRs per each couple of TX WRs. So, can't we do attack the problem directly? I am thinking about detecting that we are getting a completion each couple of microseconds, and just polling a bit more (even if CQ is empty) simply to avoid paying 10s micro for the next interrupt. This is just a fuzzy idea, but maybe this can work? -- MST From mst at mellanox.co.il Wed Jan 10 10:42:40 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:42:40 +0200 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <20070109131558.GB16107@mellanox.co.il> References: <1168258813.4577.73483.camel@hal.voltaire.com> <20070109131558.GB16107@mellanox.co.il> Message-ID: <20070110184240.GH18953@mellanox.co.il> > Quoting Michael S. Tsirkin : > Subject: Re: [PATCH 1/2] OpenSM: Add socket support to OpenSM console > > > diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in > > index 1ccf5c6..2d52675 100644 > > --- a/osm/opensm/configure.in > > +++ b/osm/opensm/configure.in > > @@ -62,6 +62,22 @@ AC_ARG_ENABLE(debug, > > esac],[debug=false]) > > AM_CONDITIONAL(DEBUG, test x$debug = xtrue) > > > > +dnl Console over a socket connection > > +AC_ARG_ENABLE(console-socket, > > +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], > > +[case $enableval in > > + yes) console_socket=yes ;; > > + no) console_socket=no ;; > > + esac], > > + console_socket=yes) > > +if test $console_socket = yes; then > > + AC_CHECK_LIB(wrap, request_init, [], > > + AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) > > + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, > > + 1, > > + [Define as 1 if you want to enable a console on a socket connection]) > > +fi > > + > > dnl Provide user option to select vendor > > OPENIB_APP_OSMV_SEL > > > > Might it be better to set the default to off? > libwrap devel package might not be present on some systems - so build now fails > where previously it passed - and user didn't even ask for any new features. > > Further, could you please clarify: when compiled in, is opensm listening on a socket > by default or does it need to be enabled with a run-time option? > I hope it's the later. Just to clarify - you guys are going to address these 2 issues, correct? -- MST From or.gerlitz at gmail.com Wed Jan 10 10:52:25 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 10 Jan 2007 20:52:25 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <20070110182045.GF18953@mellanox.co.il> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <20070110182045.GF18953@mellanox.co.il> Message-ID: <15ddcffd0701101052h120f6b45l45e4bc5016625d6e@mail.gmail.com> On 1/10/07, Michael S. Tsirkin wrote: > > Quoting Or Gerlitz : > > Subject: Re: multicast code/merge status > Since Voltaire requested this code to be in OFED 1.2, could you please clarify > whether the code in Sean's branch is in the shape you want it to be in for OFED 1.2? > If rdma_cm multicast can't interoperate with IPoIB, is it still useful for you? Yes, we want this code in OFED 1.2 and we want it to be inter operable with IPoIB, there was no concrete objection for the interop requirement and the details seem quite simple, we have narrowed it now to the removal of the rdmacm signature bit and usage of the ipv4 broadcast group qkey by the rdmacm. The sendonly issue is not related to the interop and we can do well without it being implemneted for OFED 1.2 Or. From mst at mellanox.co.il Wed Jan 10 10:57:57 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 20:57:57 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <15ddcffd0701101052h120f6b45l45e4bc5016625d6e@mail.gmail.com> References: <15ddcffd0701101052h120f6b45l45e4bc5016625d6e@mail.gmail.com> Message-ID: <20070110185757.GJ18953@mellanox.co.il> > On 1/10/07, Michael S. Tsirkin wrote: > > > Quoting Or Gerlitz : > > > Subject: Re: multicast code/merge status > > > Since Voltaire requested this code to be in OFED 1.2, could you please clarify > > whether the code in Sean's branch is in the shape you want it to be in for OFED 1.2? > > If rdma_cm multicast can't interoperate with IPoIB, is it still useful for you? > > Yes, we want this code in OFED 1.2 and we want it to be inter operable > with IPoIB, there was no concrete objection for the interop > requirement and the details seem quite simple, we have narrowed it > now to the removal of the rdmacm signature bit and usage of the ipv4 > broadcast group qkey by the rdmacm. I am just trying to figure out when's the best time to import the multicast branch into OFED. So, there will still be changes there? > The sendonly issue is not related to the interop and we can do well > without it being implemneted for OFED 1.2 OK. -- MST From halr at voltaire.com Wed Jan 10 10:46:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 13:46:03 -0500 Subject: [openib-general] [PATCH 1/2] OpenSM: Add socket support to OpenSM console In-Reply-To: <20070110184240.GH18953@mellanox.co.il> References: <1168258813.4577.73483.camel@hal.voltaire.com> <20070109131558.GB16107@mellanox.co.il> <20070110184240.GH18953@mellanox.co.il> Message-ID: <1168454757.14647.156715.camel@hal.voltaire.com> On Wed, 2007-01-10 at 13:42, Michael S. Tsirkin wrote: > > Quoting Michael S. Tsirkin : > > Subject: Re: [PATCH 1/2] OpenSM: Add socket support to OpenSM console > > > > > diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in > > > index 1ccf5c6..2d52675 100644 > > > --- a/osm/opensm/configure.in > > > +++ b/osm/opensm/configure.in > > > @@ -62,6 +62,22 @@ AC_ARG_ENABLE(debug, > > > esac],[debug=false]) > > > AM_CONDITIONAL(DEBUG, test x$debug = xtrue) > > > > > > +dnl Console over a socket connection > > > +AC_ARG_ENABLE(console-socket, > > > +[ --enable-console-socket Enable a console socket, requires tcp_wrappers (default yes)], > > > +[case $enableval in > > > + yes) console_socket=yes ;; > > > + no) console_socket=no ;; > > > + esac], > > > + console_socket=yes) > > > +if test $console_socket = yes; then > > > + AC_CHECK_LIB(wrap, request_init, [], > > > + AC_MSG_ERROR([request_init() not found. console-socket requires libwrap.])) > > > + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, > > > + 1, > > > + [Define as 1 if you want to enable a console on a socket connection]) > > > +fi > > > + > > > dnl Provide user option to select vendor > > > OPENIB_APP_OSMV_SEL > > > > > > > Might it be better to set the default to off? > > libwrap devel package might not be present on some systems - so build now fails > > where previously it passed - and user didn't even ask for any new features. > > > > Further, could you please clarify: when compiled in, is opensm listening on a socket > > by default or does it need to be enabled with a run-time option? > > I hope it's the later. > > Just to clarify - you guys are going to address these 2 issues, correct? A patch on the first one (build default) went out yesterday and this is now in the management git tree. The second will be addressed shortly. -- Hal From or.gerlitz at gmail.com Wed Jan 10 10:47:50 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 10 Jan 2007 20:47:50 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A52512.1040003@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> Message-ID: <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> On 1/10/07, Sean Hefty wrote: >> How about adding sendonly param to the ABI and having the ucma kernel >> code returning -EINVAL if someone tries to set it to true. Such code can >> be pushed to 2.6.21 and when you have the time to complete the >> implementation you can complete this? > I don't think adding this is a huge deal; I just haven't gotten to it yet. > However, I'd like to make sure there's enough time once the change is made to > verify that we have the right result before pushing it upstream. Compared to the amount of functionality (ib_sa ref mcast counting and rdma cm u/k mcast API) provided by this code, i really think it can be done in two phases, first push a code that does not support sendonly and later once its implemented push a patch that adds this. Anyway, per the best of my knowledge you would not be able to fully verify the "right" result , i understand the opensm does not really support a sendonly join (*) in the sense of that it does not support configuring "one-way" MFTs for a subset of the branches/leaves of the multicast group spanning tree, but this is orthogonal to your code. (*) there are some more issues here which need to be addressed, see for example the "Some SMs don't support send-only yet" weird comment at ipoib_mcast_sendonly_join() > Woody has also seen this issue. And of course, I can't reproduce it on my > systems, but I'm actively looking into the problem. It looks like some sort of > issue with ipoib trying to join a non-existent multicast group. mmm, weird, are there active rdmacm multicast consumers involved in this settings? >> Looking on the code, i understand that if an multicast consumer attempts >> to join a group for which another consumer is already joined then it >> just gets the group params, that is the mgid is your discriminator (with >> the exception of an all zeros mgid which has a different treatment) >> which makes much sense to me. > Not exactly. The rdma_cm consumer gets the group parameters for the ipoib > broadcast group. It uses this information as a template for joining new groups. OK, got you at last (sorry but i have somehow ignored the call to ib_addr_get_mgid() at the rdmacm code). So to achieve interop with IPoIB all we need to do is remove the rdmacm signature bit and not to over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast group, are you ok with that? > One issue is that an rdma_cm consumer can first allocate a UD QP to use with UD > traffic. When it later joins a multicast group, the qkey must be the same. How > does ipoib handle this? Well, ipoib first sets a zero qkey into its qp in ipoib_init_qp() and later in ipoib_mcast_attach() does another qp modify providing priv->qkey which is the ipv4 broadcast group one, the rdma_cm can mimic this behaviour for qps created with rdma_create_qp and also for those created by the user. >> Since for our apps needs we do intend to join the 224.0.0.1 group, >> resolving a) above is fine for us --> we will join 224.0.0.1 above, >> provide the qkey to the rdma cm and it will join to the other group (eg >> 224.5.5.5) with this qkey. > I'm not completely following you on this yet. forget this, there is no need in such tricks since the rdmacm has the ipv4 broadcast group qkey. >> can you remind me what the idea/trick here, aren't you supposed to >> generate an mgid for this case? > This either returns an existing MCMemberRecord that this node has joined, or it > fills out an MCMemberRecord that can be used to join a new group. If the mgid > is zero, the SA will assign one. thanks Or. From mshefty at ichips.intel.com Wed Jan 10 10:58:04 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 10 Jan 2007 10:58:04 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> Message-ID: <45A5373C.2060306@ichips.intel.com> > mmm, weird, are there active rdmacm multicast consumers involved in > this settings? I believe the issue is restricted to ipoib only. > OK, got you at last (sorry but i have somehow ignored the call to > ib_addr_get_mgid() at the rdmacm code). So to achieve interop with > IPoIB all we need to do is remove the rdmacm signature bit and not to > over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast > group, are you ok with that? I believe this would achieve interop with ipoib. However, overwriting the qkey may break any existing UD communication that the user may have. I just need to think about this more, and see what we can come up with. - Sean From swise at opengridcomputing.com Wed Jan 10 11:00:21 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 13:00:21 -0600 Subject: [openib-general] ofed_1_2 / Chelsio Question In-Reply-To: <20070108191346.27253.16936.stgit@dell3.ogc.int> References: <20070108191346.27253.16936.stgit@dell3.ogc.int> Message-ID: <1168455622.7471.19.camel@stevo-desktop> OFED team, Will the ofed_1_2 git tree ever pull in 2.6.21? Or will a new ofed_1_3 tree be created for 2.6.21 support? I want to just commit the Chelsio drivers into ofed_1_2, but that might cause collision problems later. The chelsio drivers are going to be merged into kernel.org 2.6.21. So if the ofed_1_2 repository will pull in 2.6.21 at some point then we might have collision problems. But if ofed_1_2 is really just for 2.6.20, then I can commit directly and keep things simpler... Thoughts? Thanks, Steve. From swise at opengridcomputing.com Wed Jan 10 11:04:00 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 13:04:00 -0600 Subject: [openib-general] [PATCH] ~vlad/ofascripts.git - Add libcxgb3 to user kit In-Reply-To: <1168453249.27060.23.camel@stevo-desktop> References: <1168453249.27060.23.camel@stevo-desktop> Message-ID: <1168455840.7471.23.camel@stevo-desktop> I successfully built, installed, and sniff tested the userspace ofed 1.2 package with on SUSE 10.1. I also configured and compiled libcxgb3 on SLES9SP3, SLES10, and RHEL4u4. So I think this is ready to be pulled in unless folks have issues with the changes in this patch. Steve. On Wed, 2007-01-10 at 12:20 -0600, Steve Wise wrote: > Vlad, > > Here are the changes to ofa userspace scripts for adding Chelsio's RDMA library, libcxgb3. > > Steve. > > > ----- > > Support for libcxgb3. > > From: Steve Wise > > - added libcxgb3 to configure > - added libcxgb3 to Makefile > > Signed-off-by: Steve Wise > --- > > Makefile | 17 ++++++++++++++++- > configure | 16 ++++++++++++++++ > 2 files changed, 32 insertions(+), 1 deletions(-) > > diff --git a/Makefile b/Makefile > index b164ee0..4d57d82 100644 > --- a/Makefile > +++ b/Makefile > @@ -5,7 +5,8 @@ all: > .PHONY: libibverbs libmthca libibcm perftest mstflint libsdp libibcommon libibumad libibmad osm diags mpi > .PHONY: librdmacm install_librdmacm dapl dat tvflash install_tvflash > .PHONY: install_libibverbs install_libmthca install_libibcm install_libsdp libibcommon > -.PHONY: libipathverbs install_libipathverbs libehca install_libehca > +.PHONY: libipathverbs install_libipathverbs libehca install_libehca > +.PHONY: libcxgb3 install_libcxgb3 > .PHONY: install_libibumad install_libibmad install_osm install_diags install_mstflint install_perftest > .PHONY: srptools install_srptools ipoibtools install_ipoibtools > > @@ -49,6 +50,11 @@ user: libehca > install_user: install_libehca > endif > > +ifeq ($(WITH_CXGB3),yes) > +user: libcxgb3 > +install_user: install_libcxgb3 > +endif > + > ifeq ($(WITH_CM),yes) > user: libibcm > install_user: install_libibcm > @@ -139,6 +145,11 @@ libehca: libibverbs > AM_LDFLAGS="-L../libibverbs/src -libverbs" > @echo "Make libehca done" > > +libcxgb3: libibverbs > + $(MAKE) -C $(SRC)userspace/libcxgb3 \ > + AM_LDFLAGS="-L../libibverbs/src -libverbs" > + @echo "Make libcxgb3 done" > + > libibcm: libibverbs > $(MAKE) -C $(SRC)userspace/libibcm \ > CPPFLAGS="-I../libibverbs/include" \ > @@ -281,6 +292,9 @@ install_libipathverbs: install_libibverb > install_libehca: install_libibverbs > $(MAKE) -C $(SRC)userspace/libehca install > > +install_libcxgb3: install_libibverbs > + $(MAKE) -C $(SRC)userspace/libcxgb3 install > + > install_libibcm: install_libmthca > $(MAKE) -C $(SRC)userspace/libibcm install > > @@ -385,6 +399,7 @@ clean_user: > $(MAKE) -C $(SRC)userspace/libmthca clean > $(MAKE) -C $(SRC)userspace/libipathverbs clean > $(MAKE) -C $(SRC)userspace/libehca clean > + $(MAKE) -C $(SRC)userspace/libcxgb3 clean > $(MAKE) -C $(SRC)userspace/libibcm clean > # $(MAKE) -C $(SRC)userspace/useraccess-lib clean > $(MAKE) -C $(SRC)userspace/perftest clean > diff --git a/configure b/configure > index 5f8253e..616ef5c 100755 > --- a/configure > +++ b/configure > @@ -58,6 +58,9 @@ Usage: `basename $0` [options] > --with-libehca include libehca package [yes] > --without-libehca include libehca package [no] > > + --with-libcxgb3 include libcxgb3 package [yes] > + --without-libcxgb3 include libcxgb3 package [no] > + > --with-libibcm include CM package [no] > --without-libibcm don't include CM package [yes] > > @@ -277,6 +280,13 @@ main() > --without-libehca) > WITH_EHCA="no" > ;; > + --with-libcxgb3) > + WITH_CXGB3="yes" > + WITH_IBVERBS="yes" > + ;; > + --without-libcxgb3) > + WITH_CXGB3="no" > + ;; > --with-libibcm) > WITH_CM="yes" > WITH_MTHCA="yes" > @@ -410,6 +420,7 @@ main() > # WITH_DAPL="yes" > WITH_DIAGS="yes" > # WITH_EHCA="yes" > + WITH_CXGB3="yes" > WITH_IBCOMMON="yes" > WITH_IBMAD="yes" > WITH_IBUMAD="yes" > @@ -460,6 +471,7 @@ WITH_IBVERBS=${WITH_IBVERBS:-"no"} > WITH_MTHCA=${WITH_MTHCA:-"no"} > WITH_IPATHVERBS=${WITH_IPATHVERBS:-"no"} > WITH_EHCA=${WITH_EHCA:-"no"} > +WITH_CXGB3=${WITH_CXGB3:-"no"} > WITH_CM=${WITH_CM:-"no"} > WITH_SDP=${WITH_SDP:-"no"} > WITH_DAPL=${WITH_DAPL:-"no"} > @@ -501,6 +513,7 @@ WITH_IBVERBS=${WITH_IBVERBS} > WITH_MTHCA=${WITH_MTHCA} > WITH_IPATHVERBS=${WITH_IPATHVERBS} > WITH_EHCA=${WITH_EHCA} > +WITH_CXGB3=${WITH_CXGB3} > WITH_CM=${WITH_CM} > WITH_SDP=${WITH_SDP} > WITH_DAPL=${WITH_DAPL} > @@ -539,6 +552,9 @@ EOFCONFIG > if [ "${WITH_EHCA}" == "yes" ]; then > ullibs="${ullibs} libehca" > fi > + if [ "${WITH_CXGB3}" == "yes" ]; then > + ullibs="${ullibs} libcxgb3" > + fi > if [ "${WITH_CM}" == "yes" ]; then > ullibs="${ullibs} libibcm" > fi > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Wed Jan 10 11:10:37 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 21:10:37 +0200 Subject: [openib-general] ofed_1_2 / Chelsio Question In-Reply-To: <1168455622.7471.19.camel@stevo-desktop> References: <1168455622.7471.19.camel@stevo-desktop> Message-ID: <20070110191037.GK18953@mellanox.co.il> > Quoting Steve Wise : > Subject: ofed_1_2 / Chelsio Question > > OFED team, > > Will the ofed_1_2 git tree ever pull in 2.6.21? Or will a new ofed_1_3 > tree be created for 2.6.21 support? I want to just commit the Chelsio > drivers into ofed_1_2, but that might cause collision problems later. > The chelsio drivers are going to be merged into kernel.org 2.6.21. So > if the ofed_1_2 repository will pull in 2.6.21 at some point then we > might have collision problems. But if ofed_1_2 is really just for > 2.6.20, then I can commit directly and keep things simpler... Linus indicated that he wants to release 2.6.20 after LCA. So for now, it looks like 2.6.21-rc1 won't be out by the end of January (OFED code freeze), and I don't think we should pull an intermediate snapshot. Worst case, if the core changes (besides maybe Makefile/Kconfig) are in a separate patch, the rest of conflicts (adding new files basically) will be easy for us to resolve. -- MST From mst at mellanox.co.il Wed Jan 10 11:25:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 10 Jan 2007 21:25:09 +0200 Subject: [openib-general] [Fwd: ofed_1_2 / Chelsio Question] In-Reply-To: <1168456137.7471.29.camel@stevo-desktop> References: <1168456137.7471.29.camel@stevo-desktop> Message-ID: <20070110192509.GL18953@mellanox.co.il> We kind of do it together :) I try to review everything, Vlad does the final build/integration. The Cc does not matter much, I read openib-general. Quoting Steve Wise : Subject: [Fwd: [openib-general] ofed_1_2 / Chelsio Question] Hey Michael, I didn't CC you directly on this. I perhaps should have? Can you tell me who is the official maintainer for the 3 packaging repositories? ofed_1_2, ofascripts, and ofabuild. Is it you or Vlad or who? I just want to make sure I involve the correct folks. Thanks, Steve. -------- Forwarded Message -------- From: Steve Wise To: vlad at mellanox.co.il, mst at mellanox.co.il Cc: Roland Dreier , openib-general at openib.org Subject: [openib-general] ofed_1_2 / Chelsio Question Date: Wed, 10 Jan 2007 13:00:21 -0600 OFED team, Will the ofed_1_2 git tree ever pull in 2.6.21? Or will a new ofed_1_3 tree be created for 2.6.21 support? I want to just commit the Chelsio drivers into ofed_1_2, but that might cause collision problems later. The chelsio drivers are going to be merged into kernel.org 2.6.21. So if the ofed_1_2 repository will pull in 2.6.21 at some point then we might have collision problems. But if ofed_1_2 is really just for 2.6.20, then I can commit directly and keep things simpler... Thoughts? Thanks, Steve. _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From or.gerlitz at gmail.com Wed Jan 10 11:31:25 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 10 Jan 2007 21:31:25 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <20070110185757.GJ18953@mellanox.co.il> References: <15ddcffd0701101052h120f6b45l45e4bc5016625d6e@mail.gmail.com> <20070110185757.GJ18953@mellanox.co.il> Message-ID: <15ddcffd0701101131l2e7c3c7cwc719a626723f8759@mail.gmail.com> On 1/10/07, Michael S. Tsirkin wrote: > > On 1/10/07, Michael S. Tsirkin wrote: > > > > Quoting Or Gerlitz : > > > > Subject: Re: multicast code/merge status > > > > > Since Voltaire requested this code to be in OFED 1.2, could you please clarify > > > whether the code in Sean's branch is in the shape you want it to be in for OFED 1.2? > > > If rdma_cm multicast can't interoperate with IPoIB, is it still useful for you? > > > > Yes, we want this code in OFED 1.2 and we want it to be inter operable > > with IPoIB, there was no concrete objection for the interop > > requirement and the details seem quite simple, we have narrowed it > > now to the removal of the rdmacm signature bit and usage of the ipv4 > > broadcast group qkey by the rdmacm. > I am just trying to figure out when's the best time to import the multicast > branch into OFED. So, there will still be changes there? Pending on Sean's approval, basically my take is that the code should be queued to 2.6.21 and then integrated right away into OFED 1.2. So any ipoib testing would excersize the ipoib/ib_sa changes, the are very few little changes expected, mostly an "ipoib interop" patch of two lines removing from cma.c the setting on an "rdmacm signature bit" at the mgid and usage of its own qkey. Or. From halr at voltaire.com Wed Jan 10 11:31:38 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 14:31:38 -0500 Subject: [openib-general] multicast code/merge status In-Reply-To: <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> Message-ID: <1168457495.18817.1746.camel@hal.voltaire.com> On Wed, 2007-01-10 at 13:47, Or Gerlitz wrote: > On 1/10/07, Sean Hefty wrote: > >> How about adding sendonly param to the ABI and having the ucma kernel > >> code returning -EINVAL if someone tries to set it to true. Such code can > >> be pushed to 2.6.21 and when you have the time to complete the > >> implementation you can complete this? > > > I don't think adding this is a huge deal; I just haven't gotten to it yet. > > However, I'd like to make sure there's enough time once the change is made to > > verify that we have the right result before pushing it upstream. > > Compared to the amount of functionality (ib_sa ref mcast counting and > rdma cm u/k mcast API) provided by this code, i really think it can be > done in two phases, first push a code that does not support sendonly > and later once its implemented push a patch that adds this. > > Anyway, per the best of my knowledge you would not be able to fully > verify the "right" result , i understand the opensm does not really > support a sendonly join (*) in the sense of that it does not support > configuring "one-way" MFTs for a subset of the branches/leaves of the > multicast group spanning tree, but this is orthogonal to your code. Yes, this is orthogonal and only affects strict conformance vis a vis the reception of packets for this group which would be discarded anyhow. This is an optimization provided by the spec. > (*) there are some more issues here which need to be addressed, see > for example the "Some SMs don't support send-only yet" weird comment > at ipoib_mcast_sendonly_join() It's more likely an SA issue but I'm only guessing... It may also be historical... -- Hal > > Woody has also seen this issue. And of course, I can't reproduce it on my > > systems, but I'm actively looking into the problem. It looks like some sort of > > issue with ipoib trying to join a non-existent multicast group. > > mmm, weird, are there active rdmacm multicast consumers involved in > this settings? > > >> Looking on the code, i understand that if an multicast consumer attempts > >> to join a group for which another consumer is already joined then it > >> just gets the group params, that is the mgid is your discriminator (with > >> the exception of an all zeros mgid which has a different treatment) > >> which makes much sense to me. > > > Not exactly. The rdma_cm consumer gets the group parameters for the ipoib > > broadcast group. It uses this information as a template for joining new groups. > > OK, got you at last (sorry but i have somehow ignored the call to > ib_addr_get_mgid() at the rdmacm code). So to achieve interop with > IPoIB all we need to do is remove the rdmacm signature bit and not to > over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast > group, are you ok with that? > > > One issue is that an rdma_cm consumer can first allocate a UD QP to use with UD > > traffic. When it later joins a multicast group, the qkey must be the same. How > > does ipoib handle this? > > Well, ipoib first sets a zero qkey into its qp in ipoib_init_qp() and > later in ipoib_mcast_attach() does another qp modify providing > priv->qkey which is the ipv4 broadcast group one, the rdma_cm can > mimic this behaviour for qps created with rdma_create_qp and also for > those created by the user. > > >> Since for our apps needs we do intend to join the 224.0.0.1 group, > >> resolving a) above is fine for us --> we will join 224.0.0.1 above, > >> provide the qkey to the rdma cm and it will join to the other group (eg > >> 224.5.5.5) with this qkey. > > > I'm not completely following you on this yet. > > forget this, there is no need in such tricks since the rdmacm has the > ipv4 broadcast group qkey. > > >> can you remind me what the idea/trick here, aren't you supposed to > >> generate an mgid for this case? > > > This either returns an existing MCMemberRecord that this node has joined, or it > > fills out an MCMemberRecord that can be used to join a new group. If the mgid > > is zero, the SA will assign one. > > thanks > > Or. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From or.gerlitz at gmail.com Wed Jan 10 11:34:26 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 10 Jan 2007 21:34:26 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A5373C.2060306@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> Message-ID: <15ddcffd0701101134h35019f1br1eaee1839bc72cac@mail.gmail.com> On 1/10/07, Sean Hefty wrote: > > mmm, weird, are there active rdmacm multicast consumers involved in > > this settings? > > I believe the issue is restricted to ipoib only. What does it means in this context that ipoib attempts to join a non exisiting group, does it mean that ipoib assumes that the sm would create the group? > > OK, got you at last (sorry but i have somehow ignored the call to > > ib_addr_get_mgid() at the rdmacm code). So to achieve interop with > > IPoIB all we need to do is remove the rdmacm signature bit and not to > > over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast > > group, are you ok with that? > I believe this would achieve interop with ipoib. However, overwriting the qkey > may break any existing UD communication that the user may have. I just need to > think about this more, and see what we can come up with. Basically, since there is only one QKEY per UD QP, a user IB UD app should either set a qp per qkey or take the same approach as IPoIB does, one qp, one qkey, all unicast/mcast traffic and mcast joins use the same qkey, it sounds fine to me. Or. From or.gerlitz at gmail.com Wed Jan 10 11:36:14 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 10 Jan 2007 21:36:14 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <1168457495.18817.1746.camel@hal.voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> Message-ID: <15ddcffd0701101136q4f8a9baeg909aa55571afe7ce@mail.gmail.com> On 10 Jan 2007 14:31:38 -0500, Hal Rosenstock wrote: > On Wed, 2007-01-10 at 13:47, Or Gerlitz wrote: > > (*) there are some more issues here which need to be addressed, see > > for example the "Some SMs don't support send-only yet" weird comment > > at ipoib_mcast_sendonly_join() > It's more likely an SA issue but I'm only guessing... It may also be > historical... We are not a huge community, how about asking the person who put this comment to come and say "i did it" and "it was done b/c this or that" Or. From halr at voltaire.com Wed Jan 10 11:38:57 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 14:38:57 -0500 Subject: [openib-general] multicast code/merge status In-Reply-To: <15ddcffd0701101134h35019f1br1eaee1839bc72cac@mail.gmail.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <15ddcffd0701101134h35019f1br1eaee1839bc72cac@mail.gmail.com> Message-ID: <1168457935.18817.2199.camel@hal.voltaire.com> On Wed, 2007-01-10 at 14:34, Or Gerlitz wrote: > On 1/10/07, Sean Hefty wrote: > > > mmm, weird, are there active rdmacm multicast consumers involved in > > > this settings? > > > > I believe the issue is restricted to ipoib only. > > What does it means in this context that ipoib attempts to join a non > exisiting group, does it mean that ipoib assumes that the sm would > create the group? IPmc senders do send only joins and in IB this is insufficient to create the group. The group would need to be precreated or setup by the receiver prior to the sender. -- Hal > > > OK, got you at last (sorry but i have somehow ignored the call to > > > ib_addr_get_mgid() at the rdmacm code). So to achieve interop with > > > IPoIB all we need to do is remove the rdmacm signature bit and not to > > > over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast > > > group, are you ok with that? > > > I believe this would achieve interop with ipoib. However, overwriting the qkey > > may break any existing UD communication that the user may have. I just need to > > think about this more, and see what we can come up with. > > Basically, since there is only one QKEY per UD QP, a user IB UD app > should either set a qp per qkey or take the same approach as IPoIB > does, one qp, one qkey, all unicast/mcast traffic and mcast joins use > the same qkey, it sounds fine to me. > > Or. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From halr at voltaire.com Wed Jan 10 11:44:12 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 14:44:12 -0500 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A3D9BB.2080408@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> Message-ID: <1168458252.18817.2502.camel@hal.voltaire.com> On Tue, 2007-01-09 at 13:06, Sean Hefty wrote: > > My thought re this was that since the rest of the original patch > > sequence specifically the rdma_cm UDP and ucma code are merged in > > 2.6.20-rcX which is the code OFED 1.2 is based on, the easy path for you > > would be to stage the multicast code for upstream push to 2.6.21 and > > then push the code as to OFED 1.2, what do you think? > > I think shooting for 2.6.21 is fine, but... > > > Other then that, as we discussed in SC06 there are some changes that > > need to be integrated in the code to allow for interoperability between > > a multicast rdma cm based app to IPoIB, specifically removing the RDMA > > CM signature from the mgid which generated from the ip addr and pkey, > > but not only. > > ...I have not completed these changes yet. Specifically, I have not added a > send only join parameter or changed the qkey. > > I have also not full examined an issue where the SM log fills up with bad > multicast join requests. Are these send only joins ? If so, it is because the group does not yet exist (you need a receiver first or it to be precreated). The later is problematic for dynamic IPmc groups. > > The second change is related to the qkey, looking in the current code > > of cma_join_ib_multicast() (at the multicast-sa_cache branch of the > > rdma-dev git) i see that the qkey is the mc ip address, which is not > > consistent with what librdmacm is assuming (0x1234567 etc). > > This is a bug in the kernel code. It should be using the standard qkey of > 0x12345678 - for now anyway. Where does this standard qkey come from ? (I missed it). -- Hal > > Anyway, what we need here is to plug into the scheme of ipoib which uses > > the qkey associated with the ipv4 broadcast multicast group. It turns > > out that there is some twilight zone here which i am working to > > understand better. You can see that for the ipv4 brd group ipoib lets > > the SM to allocate the group and qkey (ie the create param of > > ipoib_mcast_join is zero), i will give it some thought and let you know > > how i think the rdma cm can plug into this scheme, will be happy to get > > other ideas as well. > > The rdma_cm knows the qkey that ipoib uses before it joins a multicast group. > See cma_join_ib_multicast() - call to ib_sa_get_mcmember_rec(). > > - Sean > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From vishal at endace.com Wed Jan 10 12:19:21 2007 From: vishal at endace.com (vishal) Date: Thu, 11 Jan 2007 09:19:21 +1300 Subject: [openib-general] NFS(v4)-RDMA In-Reply-To: References: <1168392546.5086.64.camel@julia.et.endace.com> Message-ID: <1168460361.5086.75.camel@julia.et.endace.com> Hi, NFSv4 does work over TCP..Any suggestions ? Thanks Vishla On Wed, 2007-01-10 at 12:07 -0500, James Lentini wrote: > On Wed, 10 Jan 2007, vishal wrote: > > > Hi, > > > > I am getting an invalid argument error when I try to mount using > > nfs(v4)-rdma. The mounts with nfs(v3)-rdma work fine. Following are the > > details:- > > > > 1. Command Issued on the client side: - > > ./nfsrdmamount -o rdma=10.0.0.2 -t nfs4 10.0.0.2:/ /data > > > > 2. /etc/exports on the client side > > /data 10.0.0.1(rw,fsid=0,insecure,no_subtree_check,async) > > > > 3. Error recieved:- > > nfsmount: Invalid argument > > > > 4. From strace:- > > > > mount("10.0.0.2:/", "/data", "nfs4", 0, "\1") = -1 EINVAL (Invalid > > argument) > > Do NFSv4 mounts work over TCP? > > If not, make sure you have v4 enabled on the client and server. From swise at opengridcomputing.com Wed Jan 10 13:12:06 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 15:12:06 -0600 Subject: [openib-general] ofed_1_2 / Chelsio Question In-Reply-To: <20070110191037.GK18953@mellanox.co.il> References: <1168455622.7471.19.camel@stevo-desktop> <20070110191037.GK18953@mellanox.co.il> Message-ID: <1168463526.13836.5.camel@stevo-desktop> On Wed, 2007-01-10 at 21:10 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: ofed_1_2 / Chelsio Question > > > > OFED team, > > > > Will the ofed_1_2 git tree ever pull in 2.6.21? Or will a new ofed_1_3 > > tree be created for 2.6.21 support? I want to just commit the Chelsio > > drivers into ofed_1_2, but that might cause collision problems later. > > The chelsio drivers are going to be merged into kernel.org 2.6.21. So > > if the ofed_1_2 repository will pull in 2.6.21 at some point then we > > might have collision problems. But if ofed_1_2 is really just for > > 2.6.20, then I can commit directly and keep things simpler... > > Linus indicated that he wants to release 2.6.20 after LCA. > So for now, it looks like 2.6.21-rc1 won't be out by the end > of January (OFED code freeze), and I don't think we should pull > an intermediate snapshot. > > Worst case, if the core changes (besides maybe Makefile/Kconfig) are in a > separate patch, the rest of conflicts (adding new files basically) will be > easy for us to resolve. > With my latest patch for the cq-index thingy, there are no core changes. So I'll proceed then assuming the chelsio drivers will just be commits into the ofed_1_2 repos. Steve. From rdreier at cisco.com Wed Jan 10 13:36:54 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 13:36:54 -0800 Subject: [openib-general] [PATCH] IB/mthca: Fix off-by-one in FMR handling on memfree Message-ID: From: Michael S. Tsirkin mthca_table_find() will return the wrong address when the table entry being searched for is exactly at the beginning of a sglist entry (other than the first), because it uses >= when it should use >. Example: assume we have 2 entries in scatterlist, 4K each, offset is 4K. The current code will return first entry + 4K when we really want the second entry. In particular this means mapping an FMR on a memfree HCA may end up writing the page table into the wrong place, leading to memory corruption and also causing the HCA to use an incorrect address translation table. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- This is upstream, and fixes a data corruption/crash bug with storage over SRP. drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 15cc2f6..6b19645 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { - if (chunk->mem[i].length >= offset) { + if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; } -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 0/7] userspace verbs changes Message-ID: <20071101412.muzmkz6JrWEte4a8@cisco.com> Here is a set of patches that finally update the libibverbs driver loading method (for the 1.1 development tree) as described before, and fix up all the device-specific libraries to match the new libibverbs. I've pushed out the changes to libibverbs itself and libmthca too. From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 2/7] libmthca: Update libmthca for new libibverbs driver handling In-Reply-To: <20071101412.hi60tzlrr0zcdMdG@cisco.com> Message-ID: <20071101412.tsmK45TBRLr34Cws@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libmthca-rdmav2.so in the ordinary library path, rather than infiniband/mthca.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libmthca to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 25 +++++++++++++++++-------- configure.in | 16 ++++++++++++++-- mthca.driver | 1 + src/mthca.c | 23 ++++++++++++----------- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/Makefile.am b/Makefile.am index d98bf05..e9be461 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,20 +1,29 @@ -mthcalibdir = $(libdir)/infiniband - -mthcalib_LTLIBRARIES = src/mthca.la - -src_mthca_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE mthca_version_script = @MTHCA_VERSION_SCRIPT@ -src_mthca_la_SOURCES = src/ah.c src/buf.c src/cq.c src/memfree.c src/mthca.c \ +MTHCA_SOURCES = src/ah.c src/buf.c src/cq.c src/memfree.c src/mthca.c \ src/qp.c src/srq.c src/verbs.c -src_mthca_la_LDFLAGS = -avoid-version -module $(mthca_version_script) + +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libmthca.la + src_libmthca_la_SOURCES = $(MTHCA_SOURCES) + src_libmthca_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + $(mthca_version_script) + mthcaconfdir = $(sysconfdir)/libibverbs.d + mthcaconf_DATA = mthca.driver +else + mthcalibdir = $(libdir)/infiniband + mthcalib_LTLIBRARIES = src/mthca.la + src_mthca_la_SOURCES = $(MTHCA_SOURCES) + src_mthca_la_LDFLAGS = -avoid-version -module $(mthca_version_script) +endif DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ debian/libmthca1.install debian/libmthca-dev.install debian/rules EXTRA_DIST = src/doorbell.h src/mthca.h src/mthca-abi.h src/wqe.h \ - src/mthca.map libmthca.spec.in + src/mthca.map libmthca.spec.in mthca.driver dist-hook: libmthca.spec cp libmthca.spec $(distdir) diff --git a/configure.in b/configure.in index 475fd56..a749a93 100644 --- a/configure.in +++ b/configure.in @@ -32,7 +32,6 @@ dnl Checks for header files. AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. libmthca requires libibverbs.])) AC_HEADER_STDC -AC_CHECK_HEADERS(sysfs/libsysfs.h) AC_CHECK_HEADER(valgrind/memcheck.h, memcheck_ok=yes, memcheck_ok=no) if test $want_valgrind = yes && test $memcheck_ok = no; then @@ -44,7 +43,20 @@ AC_C_CONST AC_CHECK_SIZEOF(long) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range \ + ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, [if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then diff --git a/mthca.driver b/mthca.driver new file mode 100644 index 0000000..5880a47 --- /dev/null +++ b/mthca.driver @@ -0,0 +1 @@ +driver mthca diff --git a/src/mthca.c b/src/mthca.c index 27a72a4..81ac891 100644 --- a/src/mthca.c +++ b/src/mthca.c @@ -43,7 +43,7 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H +#ifndef HAVE_IBV_REGISTER_DRIVER #include #endif @@ -256,8 +256,8 @@ static int ibv_read_sysfs_file(const char *dir, const char *file, } #endif /* HAVE_IBV_READ_SYSFS_FILE */ -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *mthca_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct mthca_device *dev; @@ -296,15 +296,16 @@ found: return &dev->ibv_dev; } +#ifdef HAVE_IBV_REGISTER_DRIVER +static __attribute__((constructor)) void mthca_register_driver(void) +{ + ibv_register_driver("mthca", mthca_driver_init); +} +#else /* * Export the old libsysfs sysfs_class_device-based driver entry point - * if libsysfs headers are installed. It doesn't hurt to export it, - * even if libibverbs is new enough not to use it; but if libsysfs - * headers are not installed, we can assume that the version of - * libibverbs we are building against is new enough not to use - * openib_driver_init(). + * if libibverbs does not export an ibv_register_driver() function. */ -#ifdef HAVE_SYSFS_LIBSYSFS_H struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -314,6 +315,6 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) value, sizeof value) > 0) abi_ver = strtol(value, NULL, 10); - return ibv_driver_init(sysdev->path, abi_ver); + return mthca_driver_init(sysdev->path, abi_ver); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ +#endif /* HAVE_IBV_REGISTER_DRIVER */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 4/7] libehca: Update to match new ibv_cmd_reg_mr() prototype In-Reply-To: <20071101412.FetAL0OLWbEXlqFX@cisco.com> Message-ID: <20071101412.1s8o418vcxoAEq4S@cisco.com> ibv_cmd_reg_mr() now takes extra parameters to allow low-level drivers to pass back a response. Add a test for the preprocessor define IBV_CMD_REG_MR_HAS_RESP_PARAMS, and if it exists, have libehca match the new prototype. Signed-off-by: Roland Dreier --- src/ehca_u_mrmw.c | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/src/ehca_u_mrmw.c b/src/ehca_u_mrmw.c index a10017d..72ffbc6 100644 --- a/src/ehca_u_mrmw.c +++ b/src/ehca_u_mrmw.c @@ -65,8 +65,18 @@ struct ibv_mr *ehcau_reg_mr(struct ibv_pd *pd, return NULL; } +#ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS + { + struct ibv_reg_mr_resp resp; + + ret = ibv_cmd_reg_mr(pd, addr, length, (u64)(unsigned long)addr, + access, mr, &cmd, sizeof(struct ibv_reg_mr), + &resp, sizeof(struct ibv_reg_mr_resp)); + } +#else ret = ibv_cmd_reg_mr(pd, addr, length, (u64)(unsigned long)addr, access, mr, &cmd, sizeof(struct ibv_reg_mr)); +#endif if (ret) { EDEB_ERR(4, "ibv_cmd_reg_mr ret=%x", ret); free(mr); -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 3/7] libipathverbs: Update libipathverbs for new libibverbs driver handling In-Reply-To: <20071101412.tsmK45TBRLr34Cws@cisco.com> Message-ID: <20071101412.FetAL0OLWbEXlqFX@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libipathverbs-rdmav2.so in the ordinary library path, rather than infiniband/ipathverbs.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. Since the current libipathverbs tree is targeted only for libibverbs development trees and won't work with libibverbs 1.0, this patch changes libipathverbs to work with the new libibverbs way of loading drivers without any autoconf tests. Signed-off-by: Roland Dreier --- Makefile.am | 15 ++++++++------- configure.in | 13 +++++++++++++ ipathverbs.driver | 1 + libipathverbs.spec.in | 5 +++-- src/ipathverbs.c | 9 +++++++-- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Makefile.am b/Makefile.am index f86e1e2..c1ef44f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,11 +32,9 @@ # combinations of this program with other software, or any other # product whatsoever. -ipathverbslibdir = $(libdir)/infiniband +lib_LTLIBRARIES = src/libipathverbs.la -ipathverbslib_LTLIBRARIES = src/ipathverbs.la - -src_ipathverbs_la_CFLAGS = -Wall -D_GNU_SOURCE +AM_CFLAGS = -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT ipathverbs_version_script = -Wl,--version-script=$(srcdir)/src/ipathverbs.map @@ -44,14 +42,17 @@ else ipathverbs_version_script = endif -src_ipathverbs_la_SOURCES = src/ipathverbs.c src/verbs.c -src_ipathverbs_la_LDFLAGS = -avoid-version -module \ +src_libipathverbs_la_SOURCES = src/ipathverbs.c src/verbs.c +src_libipathverbs_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ $(ipathverbs_version_script) +ipathverbsconfdir = $(sysconfdir)/libibverbs.d +ipathverbsconf_DATA = ipathverbs.driver EXTRA_DIST = src/ipathverbs.h \ src/ipath-abi.h \ src/ipathverbs.map \ - libipathverbs.spec.in + libipathverbs.spec.in \ + ipathverbs.driver dist-hook: libipathverbs.spec cp libipathverbs.spec $(distdir) diff --git a/configure.in b/configure.in index 727cd65..ab1a86e 100644 --- a/configure.in +++ b/configure.in @@ -56,6 +56,19 @@ dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_CHECK_SIZEOF(long) +dnl Now check if for libibverbs device library extension +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) +fi +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then ac_cv_version_script=yes diff --git a/ipathverbs.driver b/ipathverbs.driver new file mode 100644 index 0000000..d212578 --- /dev/null +++ b/ipathverbs.driver @@ -0,0 +1 @@ +driver ipathverbs diff --git a/libipathverbs.spec.in b/libipathverbs.spec.in index 4726102..1a4b2bd 100644 --- a/libipathverbs.spec.in +++ b/libipathverbs.spec.in @@ -77,9 +77,10 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root) -%{_libdir}/infiniband/ipathverbs.so +%{_libdir}/libipathverbs*.so %doc AUTHORS COPYING +%config %{_sysconfdir}/libibverbs.d/ipathverbs.driver %files devel %defattr(-,root,root,-) -%{_libdir}/infiniband/ipathverbs.a +%{_libdir}/infiniband/libipathverbs*.a diff --git a/src/ipathverbs.c b/src/ipathverbs.c index d9fadca..22f2f48 100644 --- a/src/ipathverbs.c +++ b/src/ipathverbs.c @@ -165,8 +165,8 @@ static struct ibv_device_ops ipath_dev_ops = { .free_context = ipath_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *ipathverbs_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct ipath_device *dev; @@ -204,3 +204,8 @@ found: return &dev->ibv_dev; } + +static __attribute__((constructor)) void ipathverbs_register_driver(void) +{ + ibv_register_driver("ipathverbs", ipathverbs_driver_init); +} -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20071101412.UL3KlXfzzPdPpGRy@cisco.com> Message-ID: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libcxgb3-rdmav2.so in the ordinary library path, rather than infiniband/cxgb3.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. Since the current libcxgb3 tree is targeted only for libibverbs development trees and won't work with libibverbs 1.0, this patch changes libcxgb3 to work with the new libibverbs way of loading drivers without any autoconf tests. Signed-off-by: Roland Dreier --- Makefile.am | 14 +++++++------- configure.in | 13 +++++++++++++ cxgb3.driver | 1 + libcxgb3.spec.in | 7 ++++--- src/iwch.c | 27 ++++----------------------- 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/Makefile.am b/Makefile.am index 8f3a679..3dab25e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,10 +1,8 @@ # $Id: Makefile.am 3802 2005-10-18 18:32:06Z tom $ -cxgb3libdir = $(libdir)/infiniband +lib_LTLIBRARIES = src/libcxgb3.la -cxgb3lib_LTLIBRARIES = src/cxgb3.la - -src_cxgb3_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT cxgb3_version_script = -Wl,--version-script=$(srcdir)/src/iwch.map @@ -12,16 +10,18 @@ else cxgb3_version_script = endif -src_cxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ +src_libcxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ src/verbs.c -src_cxgb3_la_LDFLAGS = -avoid-version -module \ +src_libcxgb3_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ $(cxgb3_version_script) +cxgb3confdir = $(sysconfdir)/libibverbs.d +cxgb3conf_DATA = cxgb3.driver #DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ # debian/libcxgb31.install debian/libcxgb3-dev.install debian/rules EXTRA_DIST = src/iwch.h src/iwch-abi.h \ - src/iwch.map libcxgb3.spec.in $(DEBIAN) + src/iwch.map libcxgb3.spec.in cxgb3.driver dist-hook: libcxgb3.spec cp libcxgb3.spec $(distdir) diff --git a/configure.in b/configure.in index d113da9..81b5a30 100644 --- a/configure.in +++ b/configure.in @@ -28,6 +28,19 @@ AC_C_CONST dnl Checks for library functions AC_CHECK_FUNCS(ibv_read_sysfs_file) +dnl Now check if for libibverbs device library extension +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) +fi +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then ac_cv_version_script=yes diff --git a/cxgb3.driver b/cxgb3.driver new file mode 100644 index 0000000..cfa6186 --- /dev/null +++ b/cxgb3.driver @@ -0,0 +1 @@ +driver cxgb3 diff --git a/libcxgb3.spec.in b/libcxgb3.spec.in index e710808..959f659 100644 --- a/libcxgb3.spec.in +++ b/libcxgb3.spec.in @@ -39,18 +39,19 @@ make %{?_smp_mflags} rm -rf $RPM_BUILD_ROOT %makeinstall # remove unpackaged files from the buildroot -rm -f $RPM_BUILD_ROOT%{_libdir}/infiniband/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la %clean rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) -%{_libdir}/infiniband/cxgb3.so +%{_libdir}/libcxgb3*.so %doc AUTHORS COPYING ChangeLog README +%config %{_sysconfdir}/libibverbs.d/cxgb3.driver %files devel %defattr(-,root,root,-) -%{_libdir}/infiniband/cxgb3.a +%{_libdir}/libcxgb3*.a %changelog diff --git a/src/iwch.c b/src/iwch.c index 17f1d61..e972460 100644 --- a/src/iwch.c +++ b/src/iwch.c @@ -41,16 +41,6 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H -#include -#endif - -#ifndef HAVE_IBV_READ_SYSFS_FILE -#include -#include -#include -#endif - #include "iwch.h" #include "iwch-abi.h" @@ -173,8 +163,8 @@ static struct ibv_device_ops iwch_dev_ops = { .free_context = iwch_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *cxgb3_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[16]; char s[32]; @@ -251,16 +241,7 @@ err1: return NULL; } -#ifdef HAVE_SYSFS_LIBSYSFS_H -struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) +static __attribute__((constructor)) void cxgb3_register_driver(void) { - int abi_ver = 0; - char value[8]; - - if (ibv_read_sysfs_file(sysdev->path, "abi_version", - value, sizeof value) > 0) - abi_ver = strtol(value, NULL, 10); - - return ibv_driver_init(sysdev->path, abi_ver); + ibv_register_driver("cxgb3", cxgb3_driver_init); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 5/7] libehca: Update libehca for new libibverbs driver handling In-Reply-To: <20071101412.1s8o418vcxoAEq4S@cisco.com> Message-ID: <20071101412.4gqKNyWQfmYJHI0N@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libehca-rdmav2.so in the ordinary library path, rather than infiniband/libehca.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libehca to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 30 +++++++++++++++++++----------- config.h.in | 6 +++--- configure.in | 15 +++++++++++++-- ehca.driver | 1 + src/ehca_uinit.c | 12 +++++++++--- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/Makefile.am b/Makefile.am index 46d639c..ca44a45 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,18 +37,25 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +AM_CFLAGS = -O2 -g -Wall -D_GNU_SOURCE -DP_SERIES -Isrc -ehcalibdir = $(libdir)/infiniband +EHCA_SOURCES = src/ehca_umain.c src/ehca_u_mrmw.c src/ehca_uinit.c \ + src/ehca_ureqs.c src/hcp_phyp.c -ehcalib_LTLIBRARIES = src/libehca.la - -src_libehca_la_SOURCES = src/ehca_umain.c src/ehca_u_mrmw.c src/ehca_uinit.c src/ehca_ureqs.c src/hcp_phyp.c - -src_libehca_la_CFLAGS = -O2 -g -Wall -D_GNU_SOURCE -DP_SERIES -I../libibverbs/include -Isrc - -src_libehca_la_LDFLAGS = -version-info 1 -export-dynamic \ - -Wl,--version-script=$(srcdir)/src/libehca.map \ - -lpthread -libverbs -nostdlib +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libehca.la + src_libehca_la_SOURCES = $(EHCA_SOURCES) + src_libehca_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + -Wl,--version-script=$(srcdir)/src/libehca.map \ + -lpthread -libverbs -nostdlib +else + ehcalibdir = $(libdir)/infiniband + ehcalib_LTLIBRARIES = src/libehca.la + src_libehca_la_SOURCES = $(EHCA_SOURCES) + src_libehca_la_LDFLAGS = -version-info 1 -export-dynamic \ + -Wl,--version-script=$(srcdir)/src/libehca.map \ + -lpthread -libverbs -nostdlib +endif EXTRA_DIST = src/ehca_asm.h \ src/ehca_galpa.h \ @@ -59,7 +66,8 @@ EXTRA_DIST = src/ehca_asm.h \ src/ehca_qes.h \ src/ehca_utools.h \ src/hipz_hw.h \ - src/libehca.map + src/libehca.map \ + src/ehca.driver # dist-hook: libehca.spec # cp libehca.spec $(distdir) diff --git a/config.h.in b/config.h.in index 03adb6e..add7607 100644 --- a/config.h.in +++ b/config.h.in @@ -6,6 +6,9 @@ /* Define to 1 if you have the `ibv_read_sysfs_file' function. */ #undef HAVE_IBV_READ_SYSFS_FILE +/* Define to 1 if you have the `ibv_register_driver' function. */ +#undef HAVE_IBV_REGISTER_DRIVER + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -27,9 +30,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H -/* Define to 1 if you have the header file. */ -#undef HAVE_SYSFS_LIBSYSFS_H - /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H diff --git a/configure.in b/configure.in index 21f97ff..591b9b0 100644 --- a/configure.in +++ b/configure.in @@ -30,9 +30,20 @@ AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. libehca requires libibverbs.])) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) fi -AC_CHECK_HEADERS(sysfs/libsysfs.h) dnl Checks for programs. AC_PROG_CC diff --git a/ehca.driver b/ehca.driver new file mode 100644 index 0000000..b0000e4 --- /dev/null +++ b/ehca.driver @@ -0,0 +1 @@ +driver ehca diff --git a/src/ehca_uinit.c b/src/ehca_uinit.c index 4618601..47866cf 100644 --- a/src/ehca_uinit.c +++ b/src/ehca_uinit.c @@ -184,8 +184,8 @@ static int ibv_read_sysfs_file(const char *dir, const char *file, } #endif /* HAVE_IBV_READ_SYSFS_FILE */ -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *ehca_driver_init(const char *uverbs_sys_path, + int abi_version) { struct ehcau_device *my_dev = NULL; char value[64]; @@ -225,6 +225,7 @@ struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, return &my_dev->ibv_dev; } +#ifndef HAVE_IBV_REGISTER_DRIVER struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -234,8 +235,9 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) value, sizeof value) > 0) abi_ver = strtol(value, NULL, 10); - return ibv_driver_init(sysdev->path, abi_ver); + return ehca_driver_init(sysdev->path, abi_ver); } +#endif /* HAVE_IBV_REGISTER_DRIVER */ /** @brief module initialization */ @@ -328,6 +330,10 @@ void __attribute__ ((constructor)) ehcau_init(void) } else { fprintf(libehca_fh, "tracelevel is:%i\n", libehca_trlevel); } + +#ifdef HAVE_IBV_REGISTER_DRIVER + ibv_register_driver("ehca", ehca_driver_init); +#endif } /* eof ehca_uinit.c */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 6/7] libamso: Update libamso for new libibverbs driver handling In-Reply-To: <20071101412.4gqKNyWQfmYJHI0N@cisco.com> Message-ID: <20071101412.UL3KlXfzzPdPpGRy@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libamso-rdmav2.so in the ordinary library path, rather than infiniband/amso.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libamso to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 25 +++++++++++++++---------- amso.driver | 1 + configure.in | 15 +++++++++++++-- src/amso.c | 16 +++++++++++----- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9e2cbc1..d1749fd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,10 +1,6 @@ # $Id: $ -amsolibdir = $(libdir)/infiniband - -amsolib_LTLIBRARIES = src/amso.la - -src_amso_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT amso_version_script = -Wl,--version-script=$(srcdir)/src/amso.map @@ -12,16 +8,25 @@ else amso_version_script = endif -src_amso_la_SOURCES = src/cq.c src/amso.c src/qp.c \ - src/verbs.c -src_amso_la_LDFLAGS = -avoid-version -module \ - $(amso_version_script) +AMSO_SOURCES = src/cq.c src/amso.c src/qp.c src/verbs.c + +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libamso.la + src_libamso_la_SOURCES = $(AMSO_SOURCES) + src_libamso_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + $(amso_version_script) +else + amsolibdir = $(libdir)/infiniband + amsolib_LTLIBRARIES = src/amso.la + src_amso_la_SOURCES = $(AMSO_SOURCES) + src_amso_la_LDFLAGS = -avoid-version -module $(amso_version_script) +endif #DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ # debian/libamso1.install debian/libamso-dev.install debian/rules EXTRA_DIST = src/amso.h src/amso-abi.h \ - src/amso.map libamso.spec.in $(DEBIAN) + src/amso.map libamso.spec.in amso.driver dist-hook: libamso.spec cp libamso.spec $(distdir) diff --git a/amso.driver b/amso.driver new file mode 100644 index 0000000..272dcc2 --- /dev/null +++ b/amso.driver @@ -0,0 +1 @@ +driver amso diff --git a/configure.in b/configure.in index 4a920c4..d3344d2 100644 --- a/configure.in +++ b/configure.in @@ -16,7 +16,6 @@ AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], AC_MSG_ERROR([ibv_get_device_list() not found. libmthca requires libibverbs.])) dnl Checks for header files. -AC_CHECK_HEADERS(sysfs/libsysfs.h) AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. Is libibverbs installed?])) AC_HEADER_STDC @@ -26,7 +25,19 @@ AC_C_CONST AC_CHECK_SIZEOF(long) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then diff --git a/src/amso.c b/src/amso.c index 60a44d1..bae4220 100644 --- a/src/amso.c +++ b/src/amso.c @@ -40,7 +40,7 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H +#ifndef HAVE_IBV_REGISTER_DRIVER #include #endif @@ -135,8 +135,8 @@ static struct ibv_device_ops amso_dev_ops = { .free_context = amso_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *amso_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct amso_device *dev; @@ -174,7 +174,13 @@ found: return &dev->ibv_dev; } -#ifdef HAVE_SYSFS_LIBSYSFS_H + +#ifdef HAVE_IBV_REGISTER_DRIVER +static __attribute__((constructor)) void amso_register_driver(void) +{ + ibv_register_driver("amso", amso_driver_init); +} +#else struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -186,4 +192,4 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) return ibv_driver_init(sysdev->path, abi_ver); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ +#endif /* HAVE_IBV_REGISTER_DRIVER */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:12:44 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:12:44 -0800 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: <20071101412.muzmkz6JrWEte4a8@cisco.com> Message-ID: <20071101412.hi60tzlrr0zcdMdG@cisco.com> Export an ibv_register_driver() entry point, and expect plugins to call it from __attribute__((constructor)) code. This will allow multiple drivers to be statically linked in. Also read config files and then use dlopen() with a relative path to find drivers (rather than searching a specific lib/infiniband/ directory for .so files). This allows multiple versions of a driver to be installed in parallel, and also allows for optimized drivers in places like /lib/i686. Drivers should no longer export an ibv_driver_init() function any more. Instead, they should add a function (which can be static) with __attribute__((constructor)) that calls ibv_register_driver() to register the driver's probe function. Also, drivers should install a file with a line "driver " under ${sysconfdir}/libibverbs.d. Signed-off-by: Roland Dreier --- Makefile.am | 2 +- README | 2 +- include/infiniband/driver.h | 19 +- src/ibverbs.h | 5 - src/init.c | 489 ++++++++++++++++++++++++++++++------------- src/libibverbs.map | 1 + 6 files changed, 350 insertions(+), 168 deletions(-) diff --git a/Makefile.am b/Makefile.am index 391c522..35f4468 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,7 +4,7 @@ lib_LTLIBRARIES = src/libibverbs.la AM_CFLAGS = -g -Wall -D_GNU_SOURCE -src_libibverbs_la_CFLAGS = -g -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\" +src_libibverbs_la_CFLAGS = $(AM_CFLAGS) -DIBV_CONFIG_DIR=\"$(sysconfdir)/libibverbs.d\" libibverbs_version_script = @LIBIBVERBS_VERSION_SCRIPT@ diff --git a/README b/README index 6190895..634fc3b 100644 --- a/README +++ b/README @@ -80,7 +80,7 @@ uninitialized" warnings. This code adds trivial overhead to the critical performance path, so it is disabled by default. The intent is that production users can use a "normal" build of libibverbs and developers can use the "valgrind debug" build by simply switching -their LD_LIBRARY_PATH and/or OPENIB_DRIVER_PATH environment variables. +their LD_LIBRARY_PATH environment variables. Libibverbs needs some header files from Valgrind in order to compile this support; it is important to use the header files from the same diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index f759d52..8e2418c 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -47,21 +47,16 @@ #endif /* __cplusplus */ /* - * Device-specific drivers should declare their device init function - * as below (the name must be "openib_driver_init"): - * - * struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - * int abi_version); - * - * libibverbs will call each driver's ibv_driver_init() function once - * for each InfiniBand device. If the device is one that the driver - * can support, it should return a struct ibv_device * with the ops - * member filled in. If the driver does not support the device, it - * should return NULL from openib_driver_init(). + * Extension that low-level drivers should add to their .so filename + * (probably via libtool "-release" option). For example a low-level + * driver named "libfoo" should build a plug-in named "libfoo-rdmav2.so". */ +#define IBV_DEVICE_LIBRARY_EXTENSION rdmav2 -typedef struct ibv_device *(*ibv_driver_init_func)(const char *, int); +typedef struct ibv_device *(*ibv_driver_init_func)(const char *uverbs_sys_path, + int abi_version); +void ibv_register_driver(const char *name, ibv_driver_init_func init_func); int ibv_cmd_get_context(struct ibv_context *context, struct ibv_get_context *cmd, size_t cmd_size, struct ibv_get_context_resp *resp, size_t resp_size); diff --git a/src/ibverbs.h b/src/ibverbs.h index 4c6c07e..14330f8 100644 --- a/src/ibverbs.h +++ b/src/ibverbs.h @@ -58,11 +58,6 @@ #define PFX "libibverbs: " -struct ibv_driver { - ibv_driver_init_func init_func; - struct ibv_driver *next; -}; - struct ibv_abi_compat_v2 { struct ibv_comp_channel channel; pthread_mutex_t in_use; diff --git a/src/init.c b/src/init.c index fe70852..3505733 100644 --- a/src/init.c +++ b/src/init.c @@ -46,150 +46,314 @@ #include "ibverbs.h" -#ifndef OPENIB_DRIVER_PATH_ENV -# define OPENIB_DRIVER_PATH_ENV "OPENIB_DRIVER_PATH" -#endif - HIDDEN int abi_ver; -static const char default_path[] = DRIVER_PATH; -static const char *user_path; - -static struct ibv_driver *driver_list; - -static void load_driver(char *so_path) +struct ibv_sysfs_dev { + char sysfs_name[IBV_SYSFS_NAME_MAX]; + char ibdev_name[IBV_SYSFS_NAME_MAX]; + char sysfs_path[IBV_SYSFS_PATH_MAX]; + char ibdev_path[IBV_SYSFS_PATH_MAX]; + struct ibv_sysfs_dev *next; + int abi_ver; + int have_driver; +}; + +struct ibv_driver_name { + char *name; + struct ibv_driver_name *next; +}; + +struct ibv_driver { + const char *name; + ibv_driver_init_func init_func; + struct ibv_driver *next; +}; + +static struct ibv_sysfs_dev *sysfs_dev_list; +static struct ibv_driver_name *driver_name_list; +static struct ibv_driver *head_driver, *tail_driver; + +static void find_sysfs_devs(void) { - void *dlhandle; - ibv_driver_init_func init_func; - struct ibv_driver *driver; + char class_path[IBV_SYSFS_PATH_MAX]; + DIR *class_dir; + struct dirent *dent; + struct ibv_sysfs_dev *sysfs_dev; + char value[8]; - dlhandle = dlopen(so_path, RTLD_NOW); - if (!dlhandle) { - fprintf(stderr, PFX "Warning: couldn't load driver %s: %s\n", - so_path, dlerror()); + snprintf(class_path, sizeof class_path, "%s/class/infiniband_verbs", + ibv_get_sysfs_path()); + + class_dir = opendir(class_path); + if (!class_dir) { + fprintf(stderr, PFX "Fatal: couldn't open sysfs class " + "directory '%s'.\n", class_path); return; } - dlerror(); - init_func = dlsym(dlhandle, "ibv_driver_init"); - if (dlerror() != NULL || !init_func) { - dlclose(dlhandle); - return; + while ((dent = readdir(class_dir))) { + if (dent->d_name[0] == '.' || dent->d_type == DT_REG) + continue; + + sysfs_dev = malloc(sizeof *sysfs_dev); + if (!sysfs_dev) { + fprintf(stderr, PFX "Warning: couldn't allocate sysfs dev " + "for '%s'.\n", dent->d_name); + continue; + } + + snprintf(sysfs_dev->sysfs_name, sizeof sysfs_dev->sysfs_name, + "%s", dent->d_name); + snprintf(sysfs_dev->sysfs_path, sizeof sysfs_dev->sysfs_path, + "%s/%s", class_path, dent->d_name); + + if (ibv_read_sysfs_file(sysfs_dev->sysfs_path, "ibdev", + sysfs_dev->ibdev_name, + sizeof sysfs_dev->ibdev_name) < 0) { + fprintf(stderr, PFX "Warning: no ibdev class attr for '%s'.\n", + dent->d_name); + free(sysfs_dev); + continue; + } + + snprintf(sysfs_dev->ibdev_path, sizeof sysfs_dev->ibdev_path, + "%s/class/infiniband/%s", ibv_get_sysfs_path(), + sysfs_dev->ibdev_name); + + sysfs_dev->next = NULL; + sysfs_dev->have_driver = 0; + if (ibv_read_sysfs_file(sysfs_dev->sysfs_path, "abi_version", + value, sizeof value) > 0) + sysfs_dev->abi_ver = strtol(value, NULL, 10); + else + sysfs_dev->abi_ver = 0; + + sysfs_dev_list = sysfs_dev; } + closedir(class_dir); +} + +void ibv_register_driver(const char *name, ibv_driver_init_func init_func) +{ + struct ibv_driver *driver; + driver = malloc(sizeof *driver); if (!driver) { - fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path); - dlclose(dlhandle); + fprintf(stderr, PFX "Warning: couldn't allocate driver for %s\n", name); return; } + driver->name = name; driver->init_func = init_func; - driver->next = driver_list; - driver_list = driver; + driver->next = NULL; + + if (tail_driver) + tail_driver->next = driver; + else + head_driver = driver; + tail_driver = driver; } -static void find_drivers(const char *dir) +static void load_driver(const char *name) { - size_t len = strlen(dir); - glob_t so_glob; - char *pat; - int ret; - int i; + char *so_name; + void *dlhandle; - if (!len) +#define __IBV_QUOTE(x) #x +#define IBV_QUOTE(x) __IBV_QUOTE(x) + + if (asprintf(&so_name, + "lib%s-" IBV_QUOTE(IBV_DEVICE_LIBRARY_EXTENSION) ".so", + name) < 0) { + fprintf(stderr, PFX "Warning: couldn't load driver '%s'.\n", + name); return; + } + + dlhandle = dlopen(so_name, RTLD_NOW); + if (!dlhandle) { + fprintf(stderr, PFX "Warning: couldn't load driver '%s': %s\n", + name, dlerror()); + goto out; + } + +out: + free(so_name); +} - while (len && dir[len - 1] == '/') - --len; +static void load_drivers(void) +{ + struct ibv_driver_name *name, *next_name; + const char *env; + char *list, *env_name; - asprintf(&pat, "%.*s/*.so", (int) len, dir); + /* + * Only use drivers passed in through the calling user's + * environment if we're not running setuid. + */ + if (getuid() == geteuid()) { + if ((env = getenv("RDMAV_DRIVERS"))) { + list = strdupa(env); + while ((env_name = strsep(&list, ":;"))) + load_driver(env_name); + } else if ((env = getenv("IBV_DRIVERS"))) { + list = strdupa(env); + while ((env_name = strsep(&list, ":;"))) + load_driver(env_name); + } + } - ret = glob(pat, 0, NULL, &so_glob); - free(pat); + for (name = driver_name_list, next_name = name ? name->next : NULL; + name; + name = next_name, next_name = name ? name->next : NULL) { + load_driver(name->name); + free(name->name); + free(name); + } +} - if (ret) { - if (ret != GLOB_NOMATCH) - fprintf(stderr, PFX "Warning: couldn't search %s\n", pat); +static void read_config_file(const char *dir, const char *name) +{ + char *path; + FILE *conf; + char *line = NULL; + char *config; + char *field; + size_t buflen = 0; + ssize_t len; + + if (asprintf(&path, "%s/%s", dir, name) < 0) { + fprintf(stderr, PFX "Warning: couldn't read config file %s/%s.\n", + dir, name); return; } - for (i = 0; i < so_glob.gl_pathc; ++i) - load_driver(so_glob.gl_pathv[i]); + conf = fopen(path, "r"); + if (!conf) { + fprintf(stderr, PFX "Warning: couldn't read config file %s.\n", + path); + goto out; + } + + while ((len = getline(&line, &buflen, conf)) != -1) { + config = line + strspn(line, "\t "); + if (config[0] == '\n' || config[0] == '#') + continue; + + field = strsep(&config, "\n\t "); + + if (strcmp(field, "driver") == 0) { + struct ibv_driver_name *driver_name; + + config += strspn(config, "\t "); + field = strsep(&config, "\n\t "); + + driver_name = malloc(sizeof *driver_name); + if (!driver_name) { + fprintf(stderr, PFX "Warning: couldn't allocate " + "driver name '%s'.\n", field); + continue; + } + + driver_name->name = strdup(field); + if (!driver_name->name) { + fprintf(stderr, PFX "Warning: couldn't allocate " + "driver name '%s'.\n", field); + free(driver_name); + continue; + } + + driver_name->next = driver_name_list; + driver_name_list = driver_name; + } else + fprintf(stderr, PFX "Warning: ignoring bad config directive " + "'%s' in file '%s'.\n", field, path); + } + + if (line) + free(line); + fclose(conf); - globfree(&so_glob); +out: + free(path); } -static struct ibv_device *init_drivers(const char *class_path, - const char *dev_name) +static void read_config(void) { - struct ibv_driver *driver; - struct ibv_device *dev; - int abi_ver = 0; - char sys_path[IBV_SYSFS_PATH_MAX]; - char ibdev_name[IBV_SYSFS_NAME_MAX]; - char ibdev_path[IBV_SYSFS_PATH_MAX]; - char value[8]; - enum ibv_node_type node_type; + DIR *conf_dir; + struct dirent *dent; - snprintf(sys_path, sizeof sys_path, "%s/%s", - class_path, dev_name); + conf_dir = opendir(IBV_CONFIG_DIR); + if (!conf_dir) { + fprintf(stderr, PFX "Warning: couldn't open config directory '%s'.\n", + IBV_CONFIG_DIR); + return; + } - if (ibv_read_sysfs_file(sys_path, "abi_version", value, sizeof value) > 0) - abi_ver = strtol(value, NULL, 10); + while ((dent = readdir(conf_dir))) { + if (dent->d_type != DT_REG) + continue; - if (ibv_read_sysfs_file(sys_path, "ibdev", ibdev_name, sizeof ibdev_name) < 0) { - fprintf(stderr, PFX "Warning: no ibdev class attr for %s\n", - sys_path); - return NULL; + read_config_file(IBV_CONFIG_DIR, dent->d_name); } - snprintf(ibdev_path, IBV_SYSFS_PATH_MAX, "%s/class/infiniband/%s", - ibv_get_sysfs_path(), ibdev_name); + closedir(conf_dir); +} - if (ibv_read_sysfs_file(ibdev_path, "node_type", value, sizeof value) < 0) { - fprintf(stderr, PFX "Warning: no node_type attr for %s\n", - ibdev_path); +static struct ibv_device *try_driver(struct ibv_driver *driver, + struct ibv_sysfs_dev *sysfs_dev) +{ + struct ibv_device *dev; + char value[8]; + enum ibv_node_type node_type; + + dev = driver->init_func(sysfs_dev->sysfs_path, sysfs_dev->abi_ver); + if (!dev) return NULL; + + if (ibv_read_sysfs_file(sysfs_dev->ibdev_path, "node_type", value, sizeof value) < 0) { + fprintf(stderr, PFX "Warning: no node_type attr under %s.\n", + sysfs_dev->ibdev_path); + node_type = IBV_NODE_UNKNOWN; + } else { + node_type = strtol(value, NULL, 10); + if (node_type < IBV_NODE_CA || node_type > IBV_NODE_RNIC) + node_type = IBV_NODE_UNKNOWN; } - node_type = strtol(value, NULL, 10); - if (node_type < IBV_NODE_CA || node_type > IBV_NODE_RNIC) - node_type = IBV_NODE_UNKNOWN; - for (driver = driver_list; driver; driver = driver->next) { - dev = driver->init_func(sys_path, abi_ver); - if (!dev) - continue; + switch (node_type) { + case IBV_NODE_CA: + case IBV_NODE_SWITCH: + case IBV_NODE_ROUTER: + dev->transport_type = IBV_TRANSPORT_IB; + break; + case IBV_NODE_RNIC: + dev->transport_type = IBV_TRANSPORT_IWARP; + break; + default: + dev->transport_type = IBV_TRANSPORT_UNKNOWN; + break; + } - dev->node_type = node_type; - - switch (node_type) { - case IBV_NODE_CA: - case IBV_NODE_SWITCH: - case IBV_NODE_ROUTER: - dev->transport_type = IBV_TRANSPORT_IB; - break; - case IBV_NODE_RNIC: - dev->transport_type = IBV_TRANSPORT_IWARP; - break; - default: - dev->transport_type = IBV_TRANSPORT_UNKNOWN; - break; - } + strcpy(dev->dev_name, sysfs_dev->sysfs_name); + strcpy(dev->dev_path, sysfs_dev->sysfs_path); + strcpy(dev->name, sysfs_dev->ibdev_name); + strcpy(dev->ibdev_path, sysfs_dev->ibdev_path); - strcpy(dev->dev_path, sys_path); - strcpy(dev->dev_name, dev_name); - strcpy(dev->name, ibdev_name); - strcpy(dev->ibdev_path, ibdev_path); + return dev; +} - return dev; - } +static struct ibv_device *try_drivers(struct ibv_sysfs_dev *sysfs_dev) +{ + struct ibv_driver *driver; + struct ibv_device *dev; - fprintf(stderr, PFX "Warning: no userspace device-specific driver found for %s\n" - " driver search path: ", dev_name); - if (user_path) - fprintf(stderr, "%s:", user_path); - fprintf(stderr, "%s\n", default_path); + for (driver = head_driver; driver; driver = driver->next) { + dev = try_driver(driver, sysfs_dev); + if (dev) + return dev; + } return NULL; } @@ -217,17 +381,33 @@ static int check_abi_version(const char *path) return 0; } +static void add_device(struct ibv_device *dev, + struct ibv_device ***dev_list, + int *num_devices, + int *list_size) +{ + struct ibv_device **new_list; + + if (*list_size <= *num_devices) { + *list_size = *list_size ? *list_size * 2 : 1; + new_list = realloc(*dev_list, *list_size * sizeof (struct ibv_device *)); + if (!new_list) + return; + *dev_list = new_list; + } + + (*dev_list)[(*num_devices)++] = dev; +} + HIDDEN int ibverbs_init(struct ibv_device ***list) { const char *sysfs_path; - char *wr_path, *dir; - char class_path[IBV_SYSFS_PATH_MAX]; - DIR *class_dir; - struct dirent *dent; + struct ibv_sysfs_dev *sysfs_dev, *next_dev; struct ibv_device *device; - struct ibv_device **new_list; int num_devices = 0; int list_size = 0; + int statically_linked = 0; + int no_driver = 0; *list = NULL; @@ -236,28 +416,6 @@ HIDDEN int ibverbs_init(struct ibv_device ***list) fprintf(stderr, PFX "Warning: fork()-safety requested " "but init failed\n"); - find_drivers(default_path); - - /* - * Only follow use path passed in through the calling user's - * environment if we're not running SUID. - */ - if (getuid() == geteuid()) { - user_path = getenv(OPENIB_DRIVER_PATH_ENV); - if (user_path) { - wr_path = strdupa(user_path); - while ((dir = strsep(&wr_path, ";:"))) - find_drivers(dir); - } - } - - /* - * Now check if a driver is statically linked. Since we push - * drivers onto our driver list, the last driver we find will - * be the first one we try. - */ - load_driver(NULL); - sysfs_path = ibv_get_sysfs_path(); if (!sysfs_path) { fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n"); @@ -267,36 +425,69 @@ HIDDEN int ibverbs_init(struct ibv_device ***list) if (check_abi_version(sysfs_path)) return 0; - snprintf(class_path, sizeof class_path, "%s/class/infiniband_verbs", - sysfs_path); - class_dir = opendir(class_path); - if (!class_dir) { - fprintf(stderr, PFX "Fatal: couldn't open sysfs class " - "directory '%s'.\n", class_path); - return 0; + read_config(); + + find_sysfs_devs(); + + for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) { + device = try_drivers(sysfs_dev); + if (device) { + add_device(device, list, &num_devices, &list_size); + sysfs_dev->have_driver = 1; + } else + no_driver = 1; } - while ((dent = readdir(class_dir))) { - if (dent->d_name[0] == '.' || dent->d_type == DT_REG) - continue; + if (!no_driver) + goto out; + + /* + * Check if we can dlopen() ourselves. If this fails, + * libibverbs is probably statically linked into the + * executable, and we should just give up, since trying to + * dlopen() a driver module will fail spectacularly (loading a + * driver .so will bring in dynamic copies of libibverbs and + * libdl to go along with the static copies the executable + * has, which quickly leads to a crash. + */ + { + void *hand = dlopen(NULL, RTLD_NOW); + if (!hand) { + fprintf(stderr, PFX "Warning: dlopen(NULL) failed, " + "assuming static linking.\n"); + statically_linked = 1; + goto out; + } + dlclose(hand); + } + + load_drivers(); - device = init_drivers(class_path, dent->d_name); - if (!device) + for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) { + if (sysfs_dev->have_driver) continue; - if (list_size <= num_devices) { - list_size = list_size ? list_size * 2 : 1; - new_list = realloc(*list, list_size * sizeof (struct ibv_device *)); - if (!new_list) - goto out; - *list = new_list; + device = try_drivers(sysfs_dev); + if (device) { + add_device(device, list, &num_devices, &list_size); + sysfs_dev->have_driver = 1; } - - (*list)[num_devices++] = device; } - closedir(class_dir); - out: + for (sysfs_dev = sysfs_dev_list, + next_dev = sysfs_dev ? sysfs_dev->next : NULL; + sysfs_dev; + sysfs_dev = next_dev, next_dev = sysfs_dev ? sysfs_dev->next : NULL) { + if (!sysfs_dev->have_driver) { + fprintf(stderr, PFX "Warning: no userspace device-specific " + "driver found for %s\n", sysfs_dev->sysfs_path); + if (statically_linked) + fprintf(stderr, " When linking libibverbs statically, " + "driver must be statically linked too.\n"); + } + free(sysfs_dev); + } + return num_devices; } diff --git a/src/libibverbs.map b/src/libibverbs.map index aeb707a..795dd55 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -77,6 +77,7 @@ IBVERBS_1.0 { ibv_fork_init; ibv_dontfork_range; ibv_dofork_range; + ibv_register_driver; local: *; }; -- 1.4.4.1 From swise at opengridcomputing.com Wed Jan 10 14:20:50 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 10 Jan 2007 16:20:50 -0600 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> Message-ID: <1168467650.13836.7.camel@stevo-desktop> So libibverbs 1.1 will be part of ofed 1.2? On Wed, 2007-01-10 at 14:12 -0800, Roland Dreier wrote: > The latest libibverbs development tree changes how low-level drivers > are found. The driver must be in a shared object like > libcxgb3-rdmav2.so in the ordinary library path, rather than > infiniband/cxgb3.so as for libibverbs 1.0. In addition, the driver > must call ibv_register_driver() to pass its entry point to libibverbs, > rather than exporting an ibv_driver_init() function. > > Since the current libcxgb3 tree is targeted only for libibverbs > development trees and won't work with libibverbs 1.0, this patch > changes libcxgb3 to work with the new libibverbs way of loading > drivers without any autoconf tests. > > Signed-off-by: Roland Dreier > --- > Makefile.am | 14 +++++++------- > configure.in | 13 +++++++++++++ > cxgb3.driver | 1 + > libcxgb3.spec.in | 7 ++++--- > src/iwch.c | 27 ++++----------------------- > 5 files changed, 29 insertions(+), 33 deletions(-) > > diff --git a/Makefile.am b/Makefile.am > index 8f3a679..3dab25e 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -1,10 +1,8 @@ > # $Id: Makefile.am 3802 2005-10-18 18:32:06Z tom $ > > -cxgb3libdir = $(libdir)/infiniband > +lib_LTLIBRARIES = src/libcxgb3.la > > -cxgb3lib_LTLIBRARIES = src/cxgb3.la > - > -src_cxgb3_la_CFLAGS = -g -Wall -D_GNU_SOURCE > +AM_CFLAGS = -g -Wall -D_GNU_SOURCE > > if HAVE_LD_VERSION_SCRIPT > cxgb3_version_script = -Wl,--version-script=$(srcdir)/src/iwch.map > @@ -12,16 +10,18 @@ else > cxgb3_version_script = > endif > > -src_cxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ > +src_libcxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ > src/verbs.c > -src_cxgb3_la_LDFLAGS = -avoid-version -module \ > +src_libcxgb3_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ > $(cxgb3_version_script) > +cxgb3confdir = $(sysconfdir)/libibverbs.d > +cxgb3conf_DATA = cxgb3.driver > > #DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ > # debian/libcxgb31.install debian/libcxgb3-dev.install debian/rules > > EXTRA_DIST = src/iwch.h src/iwch-abi.h \ > - src/iwch.map libcxgb3.spec.in $(DEBIAN) > + src/iwch.map libcxgb3.spec.in cxgb3.driver > > dist-hook: libcxgb3.spec > cp libcxgb3.spec $(distdir) > diff --git a/configure.in b/configure.in > index d113da9..81b5a30 100644 > --- a/configure.in > +++ b/configure.in > @@ -28,6 +28,19 @@ AC_C_CONST > dnl Checks for library functions > AC_CHECK_FUNCS(ibv_read_sysfs_file) > > +dnl Now check if for libibverbs device library extension > +dummy=if$$ > +cat < $dummy.c > +#include > +IBV_DEVICE_LIBRARY_EXTENSION > +IBV_VERSION > +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` > +rm -f $dummy.c > +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then > + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) > +fi > +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) > + > AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, > if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then > ac_cv_version_script=yes > diff --git a/cxgb3.driver b/cxgb3.driver > new file mode 100644 > index 0000000..cfa6186 > --- /dev/null > +++ b/cxgb3.driver > @@ -0,0 +1 @@ > +driver cxgb3 > diff --git a/libcxgb3.spec.in b/libcxgb3.spec.in > index e710808..959f659 100644 > --- a/libcxgb3.spec.in > +++ b/libcxgb3.spec.in > @@ -39,18 +39,19 @@ make %{?_smp_mflags} > rm -rf $RPM_BUILD_ROOT > %makeinstall > # remove unpackaged files from the buildroot > -rm -f $RPM_BUILD_ROOT%{_libdir}/infiniband/*.la > +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la > > %clean > rm -rf $RPM_BUILD_ROOT > > %files > %defattr(-,root,root,-) > -%{_libdir}/infiniband/cxgb3.so > +%{_libdir}/libcxgb3*.so > %doc AUTHORS COPYING ChangeLog README > +%config %{_sysconfdir}/libibverbs.d/cxgb3.driver > > %files devel > %defattr(-,root,root,-) > -%{_libdir}/infiniband/cxgb3.a > +%{_libdir}/libcxgb3*.a > > %changelog > diff --git a/src/iwch.c b/src/iwch.c > index 17f1d61..e972460 100644 > --- a/src/iwch.c > +++ b/src/iwch.c > @@ -41,16 +41,6 @@ > #include > #include > > -#ifdef HAVE_SYSFS_LIBSYSFS_H > -#include > -#endif > - > -#ifndef HAVE_IBV_READ_SYSFS_FILE > -#include > -#include > -#include > -#endif > - > #include "iwch.h" > #include "iwch-abi.h" > > @@ -173,8 +163,8 @@ static struct ibv_device_ops iwch_dev_ops = { > .free_context = iwch_free_context > }; > > -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, > - int abi_version) > +static struct ibv_device *cxgb3_driver_init(const char *uverbs_sys_path, > + int abi_version) > { > char value[16]; > char s[32]; > @@ -251,16 +241,7 @@ err1: > return NULL; > } > > -#ifdef HAVE_SYSFS_LIBSYSFS_H > -struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) > +static __attribute__((constructor)) void cxgb3_register_driver(void) > { > - int abi_ver = 0; > - char value[8]; > - > - if (ibv_read_sysfs_file(sysdev->path, "abi_version", > - value, sizeof value) > 0) > - abi_ver = strtol(value, NULL, 10); > - > - return ibv_driver_init(sysdev->path, abi_ver); > + ibv_register_driver("cxgb3", cxgb3_driver_init); > } > -#endif /* HAVE_SYSFS_LIBSYSFS_H */ From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 2/7] libmthca: Update libmthca for new libibverbs driver handling In-Reply-To: <20071101421.G6zer7drP3SHahok@cisco.com> Message-ID: <20071101421.gBvhtdE56t6K1h8P@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libmthca-rdmav2.so in the ordinary library path, rather than infiniband/mthca.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libmthca to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 25 +++++++++++++++++-------- configure.in | 16 ++++++++++++++-- mthca.driver | 1 + src/mthca.c | 23 ++++++++++++----------- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/Makefile.am b/Makefile.am index d98bf05..e9be461 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,20 +1,29 @@ -mthcalibdir = $(libdir)/infiniband - -mthcalib_LTLIBRARIES = src/mthca.la - -src_mthca_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE mthca_version_script = @MTHCA_VERSION_SCRIPT@ -src_mthca_la_SOURCES = src/ah.c src/buf.c src/cq.c src/memfree.c src/mthca.c \ +MTHCA_SOURCES = src/ah.c src/buf.c src/cq.c src/memfree.c src/mthca.c \ src/qp.c src/srq.c src/verbs.c -src_mthca_la_LDFLAGS = -avoid-version -module $(mthca_version_script) + +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libmthca.la + src_libmthca_la_SOURCES = $(MTHCA_SOURCES) + src_libmthca_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + $(mthca_version_script) + mthcaconfdir = $(sysconfdir)/libibverbs.d + mthcaconf_DATA = mthca.driver +else + mthcalibdir = $(libdir)/infiniband + mthcalib_LTLIBRARIES = src/mthca.la + src_mthca_la_SOURCES = $(MTHCA_SOURCES) + src_mthca_la_LDFLAGS = -avoid-version -module $(mthca_version_script) +endif DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ debian/libmthca1.install debian/libmthca-dev.install debian/rules EXTRA_DIST = src/doorbell.h src/mthca.h src/mthca-abi.h src/wqe.h \ - src/mthca.map libmthca.spec.in + src/mthca.map libmthca.spec.in mthca.driver dist-hook: libmthca.spec cp libmthca.spec $(distdir) diff --git a/configure.in b/configure.in index 475fd56..a749a93 100644 --- a/configure.in +++ b/configure.in @@ -32,7 +32,6 @@ dnl Checks for header files. AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. libmthca requires libibverbs.])) AC_HEADER_STDC -AC_CHECK_HEADERS(sysfs/libsysfs.h) AC_CHECK_HEADER(valgrind/memcheck.h, memcheck_ok=yes, memcheck_ok=no) if test $want_valgrind = yes && test $memcheck_ok = no; then @@ -44,7 +43,20 @@ AC_C_CONST AC_CHECK_SIZEOF(long) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range \ + ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, [if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then diff --git a/mthca.driver b/mthca.driver new file mode 100644 index 0000000..5880a47 --- /dev/null +++ b/mthca.driver @@ -0,0 +1 @@ +driver mthca diff --git a/src/mthca.c b/src/mthca.c index 27a72a4..81ac891 100644 --- a/src/mthca.c +++ b/src/mthca.c @@ -43,7 +43,7 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H +#ifndef HAVE_IBV_REGISTER_DRIVER #include #endif @@ -256,8 +256,8 @@ static int ibv_read_sysfs_file(const char *dir, const char *file, } #endif /* HAVE_IBV_READ_SYSFS_FILE */ -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *mthca_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct mthca_device *dev; @@ -296,15 +296,16 @@ found: return &dev->ibv_dev; } +#ifdef HAVE_IBV_REGISTER_DRIVER +static __attribute__((constructor)) void mthca_register_driver(void) +{ + ibv_register_driver("mthca", mthca_driver_init); +} +#else /* * Export the old libsysfs sysfs_class_device-based driver entry point - * if libsysfs headers are installed. It doesn't hurt to export it, - * even if libibverbs is new enough not to use it; but if libsysfs - * headers are not installed, we can assume that the version of - * libibverbs we are building against is new enough not to use - * openib_driver_init(). + * if libibverbs does not export an ibv_register_driver() function. */ -#ifdef HAVE_SYSFS_LIBSYSFS_H struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -314,6 +315,6 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) value, sizeof value) > 0) abi_ver = strtol(value, NULL, 10); - return ibv_driver_init(sysdev->path, abi_ver); + return mthca_driver_init(sysdev->path, abi_ver); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ +#endif /* HAVE_IBV_REGISTER_DRIVER */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 0/7] userspace verbs changes Message-ID: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> Here is a set of patches that finally update the libibverbs driver loading method (for the 1.1 development tree) as described before, and fix up all the device-specific libraries to match the new libibverbs. I've pushed out the changes to libibverbs itself and libmthca too. From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 4/7] libehca: Update to match new ibv_cmd_reg_mr() prototype In-Reply-To: <20071101421.rBs4ILCYFLryStPj@cisco.com> Message-ID: <20071101421.ppvNxwuXku4yj4IF@cisco.com> ibv_cmd_reg_mr() now takes extra parameters to allow low-level drivers to pass back a response. Add a test for the preprocessor define IBV_CMD_REG_MR_HAS_RESP_PARAMS, and if it exists, have libehca match the new prototype. Signed-off-by: Roland Dreier --- src/ehca_u_mrmw.c | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/src/ehca_u_mrmw.c b/src/ehca_u_mrmw.c index a10017d..72ffbc6 100644 --- a/src/ehca_u_mrmw.c +++ b/src/ehca_u_mrmw.c @@ -65,8 +65,18 @@ struct ibv_mr *ehcau_reg_mr(struct ibv_pd *pd, return NULL; } +#ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS + { + struct ibv_reg_mr_resp resp; + + ret = ibv_cmd_reg_mr(pd, addr, length, (u64)(unsigned long)addr, + access, mr, &cmd, sizeof(struct ibv_reg_mr), + &resp, sizeof(struct ibv_reg_mr_resp)); + } +#else ret = ibv_cmd_reg_mr(pd, addr, length, (u64)(unsigned long)addr, access, mr, &cmd, sizeof(struct ibv_reg_mr)); +#endif if (ret) { EDEB_ERR(4, "ibv_cmd_reg_mr ret=%x", ret); free(mr); -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 6/7] libamso: Update libamso for new libibverbs driver handling In-Reply-To: <20071101421.3YBqlT2XXId4NxdY@cisco.com> Message-ID: <20071101421.ImYuNVBtRs7hYXkl@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libamso-rdmav2.so in the ordinary library path, rather than infiniband/amso.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libamso to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 25 +++++++++++++++---------- amso.driver | 1 + configure.in | 15 +++++++++++++-- src/amso.c | 16 +++++++++++----- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9e2cbc1..d1749fd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,10 +1,6 @@ # $Id: $ -amsolibdir = $(libdir)/infiniband - -amsolib_LTLIBRARIES = src/amso.la - -src_amso_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT amso_version_script = -Wl,--version-script=$(srcdir)/src/amso.map @@ -12,16 +8,25 @@ else amso_version_script = endif -src_amso_la_SOURCES = src/cq.c src/amso.c src/qp.c \ - src/verbs.c -src_amso_la_LDFLAGS = -avoid-version -module \ - $(amso_version_script) +AMSO_SOURCES = src/cq.c src/amso.c src/qp.c src/verbs.c + +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libamso.la + src_libamso_la_SOURCES = $(AMSO_SOURCES) + src_libamso_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + $(amso_version_script) +else + amsolibdir = $(libdir)/infiniband + amsolib_LTLIBRARIES = src/amso.la + src_amso_la_SOURCES = $(AMSO_SOURCES) + src_amso_la_LDFLAGS = -avoid-version -module $(amso_version_script) +endif #DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ # debian/libamso1.install debian/libamso-dev.install debian/rules EXTRA_DIST = src/amso.h src/amso-abi.h \ - src/amso.map libamso.spec.in $(DEBIAN) + src/amso.map libamso.spec.in amso.driver dist-hook: libamso.spec cp libamso.spec $(distdir) diff --git a/amso.driver b/amso.driver new file mode 100644 index 0000000..272dcc2 --- /dev/null +++ b/amso.driver @@ -0,0 +1 @@ +driver amso diff --git a/configure.in b/configure.in index 4a920c4..d3344d2 100644 --- a/configure.in +++ b/configure.in @@ -16,7 +16,6 @@ AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], AC_MSG_ERROR([ibv_get_device_list() not found. libmthca requires libibverbs.])) dnl Checks for header files. -AC_CHECK_HEADERS(sysfs/libsysfs.h) AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. Is libibverbs installed?])) AC_HEADER_STDC @@ -26,7 +25,19 @@ AC_C_CONST AC_CHECK_SIZEOF(long) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then diff --git a/src/amso.c b/src/amso.c index 60a44d1..bae4220 100644 --- a/src/amso.c +++ b/src/amso.c @@ -40,7 +40,7 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H +#ifndef HAVE_IBV_REGISTER_DRIVER #include #endif @@ -135,8 +135,8 @@ static struct ibv_device_ops amso_dev_ops = { .free_context = amso_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *amso_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct amso_device *dev; @@ -174,7 +174,13 @@ found: return &dev->ibv_dev; } -#ifdef HAVE_SYSFS_LIBSYSFS_H + +#ifdef HAVE_IBV_REGISTER_DRIVER +static __attribute__((constructor)) void amso_register_driver(void) +{ + ibv_register_driver("amso", amso_driver_init); +} +#else struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -186,4 +192,4 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) return ibv_driver_init(sysdev->path, abi_ver); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ +#endif /* HAVE_IBV_REGISTER_DRIVER */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 3/7] libipathverbs: Update libipathverbs for new libibverbs driver handling In-Reply-To: <20071101421.gBvhtdE56t6K1h8P@cisco.com> Message-ID: <20071101421.rBs4ILCYFLryStPj@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libipathverbs-rdmav2.so in the ordinary library path, rather than infiniband/ipathverbs.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. Since the current libipathverbs tree is targeted only for libibverbs development trees and won't work with libibverbs 1.0, this patch changes libipathverbs to work with the new libibverbs way of loading drivers without any autoconf tests. Signed-off-by: Roland Dreier --- Makefile.am | 15 ++++++++------- configure.in | 13 +++++++++++++ ipathverbs.driver | 1 + libipathverbs.spec.in | 5 +++-- src/ipathverbs.c | 9 +++++++-- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Makefile.am b/Makefile.am index f86e1e2..c1ef44f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,11 +32,9 @@ # combinations of this program with other software, or any other # product whatsoever. -ipathverbslibdir = $(libdir)/infiniband +lib_LTLIBRARIES = src/libipathverbs.la -ipathverbslib_LTLIBRARIES = src/ipathverbs.la - -src_ipathverbs_la_CFLAGS = -Wall -D_GNU_SOURCE +AM_CFLAGS = -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT ipathverbs_version_script = -Wl,--version-script=$(srcdir)/src/ipathverbs.map @@ -44,14 +42,17 @@ else ipathverbs_version_script = endif -src_ipathverbs_la_SOURCES = src/ipathverbs.c src/verbs.c -src_ipathverbs_la_LDFLAGS = -avoid-version -module \ +src_libipathverbs_la_SOURCES = src/ipathverbs.c src/verbs.c +src_libipathverbs_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ $(ipathverbs_version_script) +ipathverbsconfdir = $(sysconfdir)/libibverbs.d +ipathverbsconf_DATA = ipathverbs.driver EXTRA_DIST = src/ipathverbs.h \ src/ipath-abi.h \ src/ipathverbs.map \ - libipathverbs.spec.in + libipathverbs.spec.in \ + ipathverbs.driver dist-hook: libipathverbs.spec cp libipathverbs.spec $(distdir) diff --git a/configure.in b/configure.in index 727cd65..ab1a86e 100644 --- a/configure.in +++ b/configure.in @@ -56,6 +56,19 @@ dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_CHECK_SIZEOF(long) +dnl Now check if for libibverbs device library extension +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) +fi +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then ac_cv_version_script=yes diff --git a/ipathverbs.driver b/ipathverbs.driver new file mode 100644 index 0000000..d212578 --- /dev/null +++ b/ipathverbs.driver @@ -0,0 +1 @@ +driver ipathverbs diff --git a/libipathverbs.spec.in b/libipathverbs.spec.in index 4726102..1a4b2bd 100644 --- a/libipathverbs.spec.in +++ b/libipathverbs.spec.in @@ -77,9 +77,10 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root) -%{_libdir}/infiniband/ipathverbs.so +%{_libdir}/libipathverbs*.so %doc AUTHORS COPYING +%config %{_sysconfdir}/libibverbs.d/ipathverbs.driver %files devel %defattr(-,root,root,-) -%{_libdir}/infiniband/ipathverbs.a +%{_libdir}/infiniband/libipathverbs*.a diff --git a/src/ipathverbs.c b/src/ipathverbs.c index d9fadca..22f2f48 100644 --- a/src/ipathverbs.c +++ b/src/ipathverbs.c @@ -165,8 +165,8 @@ static struct ibv_device_ops ipath_dev_ops = { .free_context = ipath_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *ipathverbs_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[8]; struct ipath_device *dev; @@ -204,3 +204,8 @@ found: return &dev->ibv_dev; } + +static __attribute__((constructor)) void ipathverbs_register_driver(void) +{ + ibv_register_driver("ipathverbs", ipathverbs_driver_init); +} -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> Message-ID: <20071101421.G6zer7drP3SHahok@cisco.com> Export an ibv_register_driver() entry point, and expect plugins to call it from __attribute__((constructor)) code. This will allow multiple drivers to be statically linked in. Also read config files and then use dlopen() with a relative path to find drivers (rather than searching a specific lib/infiniband/ directory for .so files). This allows multiple versions of a driver to be installed in parallel, and also allows for optimized drivers in places like /lib/i686. Drivers should no longer export an ibv_driver_init() function any more. Instead, they should add a function (which can be static) with __attribute__((constructor)) that calls ibv_register_driver() to register the driver's probe function. Also, drivers should install a file with a line "driver " under ${sysconfdir}/libibverbs.d. Signed-off-by: Roland Dreier --- Makefile.am | 2 +- README | 2 +- include/infiniband/driver.h | 19 +- src/ibverbs.h | 5 - src/init.c | 489 ++++++++++++++++++++++++++++++------------- src/libibverbs.map | 1 + 6 files changed, 350 insertions(+), 168 deletions(-) diff --git a/Makefile.am b/Makefile.am index 391c522..35f4468 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,7 +4,7 @@ lib_LTLIBRARIES = src/libibverbs.la AM_CFLAGS = -g -Wall -D_GNU_SOURCE -src_libibverbs_la_CFLAGS = -g -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\" +src_libibverbs_la_CFLAGS = $(AM_CFLAGS) -DIBV_CONFIG_DIR=\"$(sysconfdir)/libibverbs.d\" libibverbs_version_script = @LIBIBVERBS_VERSION_SCRIPT@ diff --git a/README b/README index 6190895..634fc3b 100644 --- a/README +++ b/README @@ -80,7 +80,7 @@ uninitialized" warnings. This code adds trivial overhead to the critical performance path, so it is disabled by default. The intent is that production users can use a "normal" build of libibverbs and developers can use the "valgrind debug" build by simply switching -their LD_LIBRARY_PATH and/or OPENIB_DRIVER_PATH environment variables. +their LD_LIBRARY_PATH environment variables. Libibverbs needs some header files from Valgrind in order to compile this support; it is important to use the header files from the same diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index f759d52..8e2418c 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -47,21 +47,16 @@ #endif /* __cplusplus */ /* - * Device-specific drivers should declare their device init function - * as below (the name must be "openib_driver_init"): - * - * struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - * int abi_version); - * - * libibverbs will call each driver's ibv_driver_init() function once - * for each InfiniBand device. If the device is one that the driver - * can support, it should return a struct ibv_device * with the ops - * member filled in. If the driver does not support the device, it - * should return NULL from openib_driver_init(). + * Extension that low-level drivers should add to their .so filename + * (probably via libtool "-release" option). For example a low-level + * driver named "libfoo" should build a plug-in named "libfoo-rdmav2.so". */ +#define IBV_DEVICE_LIBRARY_EXTENSION rdmav2 -typedef struct ibv_device *(*ibv_driver_init_func)(const char *, int); +typedef struct ibv_device *(*ibv_driver_init_func)(const char *uverbs_sys_path, + int abi_version); +void ibv_register_driver(const char *name, ibv_driver_init_func init_func); int ibv_cmd_get_context(struct ibv_context *context, struct ibv_get_context *cmd, size_t cmd_size, struct ibv_get_context_resp *resp, size_t resp_size); diff --git a/src/ibverbs.h b/src/ibverbs.h index 4c6c07e..14330f8 100644 --- a/src/ibverbs.h +++ b/src/ibverbs.h @@ -58,11 +58,6 @@ #define PFX "libibverbs: " -struct ibv_driver { - ibv_driver_init_func init_func; - struct ibv_driver *next; -}; - struct ibv_abi_compat_v2 { struct ibv_comp_channel channel; pthread_mutex_t in_use; diff --git a/src/init.c b/src/init.c index fe70852..3505733 100644 --- a/src/init.c +++ b/src/init.c @@ -46,150 +46,314 @@ #include "ibverbs.h" -#ifndef OPENIB_DRIVER_PATH_ENV -# define OPENIB_DRIVER_PATH_ENV "OPENIB_DRIVER_PATH" -#endif - HIDDEN int abi_ver; -static const char default_path[] = DRIVER_PATH; -static const char *user_path; - -static struct ibv_driver *driver_list; - -static void load_driver(char *so_path) +struct ibv_sysfs_dev { + char sysfs_name[IBV_SYSFS_NAME_MAX]; + char ibdev_name[IBV_SYSFS_NAME_MAX]; + char sysfs_path[IBV_SYSFS_PATH_MAX]; + char ibdev_path[IBV_SYSFS_PATH_MAX]; + struct ibv_sysfs_dev *next; + int abi_ver; + int have_driver; +}; + +struct ibv_driver_name { + char *name; + struct ibv_driver_name *next; +}; + +struct ibv_driver { + const char *name; + ibv_driver_init_func init_func; + struct ibv_driver *next; +}; + +static struct ibv_sysfs_dev *sysfs_dev_list; +static struct ibv_driver_name *driver_name_list; +static struct ibv_driver *head_driver, *tail_driver; + +static void find_sysfs_devs(void) { - void *dlhandle; - ibv_driver_init_func init_func; - struct ibv_driver *driver; + char class_path[IBV_SYSFS_PATH_MAX]; + DIR *class_dir; + struct dirent *dent; + struct ibv_sysfs_dev *sysfs_dev; + char value[8]; - dlhandle = dlopen(so_path, RTLD_NOW); - if (!dlhandle) { - fprintf(stderr, PFX "Warning: couldn't load driver %s: %s\n", - so_path, dlerror()); + snprintf(class_path, sizeof class_path, "%s/class/infiniband_verbs", + ibv_get_sysfs_path()); + + class_dir = opendir(class_path); + if (!class_dir) { + fprintf(stderr, PFX "Fatal: couldn't open sysfs class " + "directory '%s'.\n", class_path); return; } - dlerror(); - init_func = dlsym(dlhandle, "ibv_driver_init"); - if (dlerror() != NULL || !init_func) { - dlclose(dlhandle); - return; + while ((dent = readdir(class_dir))) { + if (dent->d_name[0] == '.' || dent->d_type == DT_REG) + continue; + + sysfs_dev = malloc(sizeof *sysfs_dev); + if (!sysfs_dev) { + fprintf(stderr, PFX "Warning: couldn't allocate sysfs dev " + "for '%s'.\n", dent->d_name); + continue; + } + + snprintf(sysfs_dev->sysfs_name, sizeof sysfs_dev->sysfs_name, + "%s", dent->d_name); + snprintf(sysfs_dev->sysfs_path, sizeof sysfs_dev->sysfs_path, + "%s/%s", class_path, dent->d_name); + + if (ibv_read_sysfs_file(sysfs_dev->sysfs_path, "ibdev", + sysfs_dev->ibdev_name, + sizeof sysfs_dev->ibdev_name) < 0) { + fprintf(stderr, PFX "Warning: no ibdev class attr for '%s'.\n", + dent->d_name); + free(sysfs_dev); + continue; + } + + snprintf(sysfs_dev->ibdev_path, sizeof sysfs_dev->ibdev_path, + "%s/class/infiniband/%s", ibv_get_sysfs_path(), + sysfs_dev->ibdev_name); + + sysfs_dev->next = NULL; + sysfs_dev->have_driver = 0; + if (ibv_read_sysfs_file(sysfs_dev->sysfs_path, "abi_version", + value, sizeof value) > 0) + sysfs_dev->abi_ver = strtol(value, NULL, 10); + else + sysfs_dev->abi_ver = 0; + + sysfs_dev_list = sysfs_dev; } + closedir(class_dir); +} + +void ibv_register_driver(const char *name, ibv_driver_init_func init_func) +{ + struct ibv_driver *driver; + driver = malloc(sizeof *driver); if (!driver) { - fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path); - dlclose(dlhandle); + fprintf(stderr, PFX "Warning: couldn't allocate driver for %s\n", name); return; } + driver->name = name; driver->init_func = init_func; - driver->next = driver_list; - driver_list = driver; + driver->next = NULL; + + if (tail_driver) + tail_driver->next = driver; + else + head_driver = driver; + tail_driver = driver; } -static void find_drivers(const char *dir) +static void load_driver(const char *name) { - size_t len = strlen(dir); - glob_t so_glob; - char *pat; - int ret; - int i; + char *so_name; + void *dlhandle; - if (!len) +#define __IBV_QUOTE(x) #x +#define IBV_QUOTE(x) __IBV_QUOTE(x) + + if (asprintf(&so_name, + "lib%s-" IBV_QUOTE(IBV_DEVICE_LIBRARY_EXTENSION) ".so", + name) < 0) { + fprintf(stderr, PFX "Warning: couldn't load driver '%s'.\n", + name); return; + } + + dlhandle = dlopen(so_name, RTLD_NOW); + if (!dlhandle) { + fprintf(stderr, PFX "Warning: couldn't load driver '%s': %s\n", + name, dlerror()); + goto out; + } + +out: + free(so_name); +} - while (len && dir[len - 1] == '/') - --len; +static void load_drivers(void) +{ + struct ibv_driver_name *name, *next_name; + const char *env; + char *list, *env_name; - asprintf(&pat, "%.*s/*.so", (int) len, dir); + /* + * Only use drivers passed in through the calling user's + * environment if we're not running setuid. + */ + if (getuid() == geteuid()) { + if ((env = getenv("RDMAV_DRIVERS"))) { + list = strdupa(env); + while ((env_name = strsep(&list, ":;"))) + load_driver(env_name); + } else if ((env = getenv("IBV_DRIVERS"))) { + list = strdupa(env); + while ((env_name = strsep(&list, ":;"))) + load_driver(env_name); + } + } - ret = glob(pat, 0, NULL, &so_glob); - free(pat); + for (name = driver_name_list, next_name = name ? name->next : NULL; + name; + name = next_name, next_name = name ? name->next : NULL) { + load_driver(name->name); + free(name->name); + free(name); + } +} - if (ret) { - if (ret != GLOB_NOMATCH) - fprintf(stderr, PFX "Warning: couldn't search %s\n", pat); +static void read_config_file(const char *dir, const char *name) +{ + char *path; + FILE *conf; + char *line = NULL; + char *config; + char *field; + size_t buflen = 0; + ssize_t len; + + if (asprintf(&path, "%s/%s", dir, name) < 0) { + fprintf(stderr, PFX "Warning: couldn't read config file %s/%s.\n", + dir, name); return; } - for (i = 0; i < so_glob.gl_pathc; ++i) - load_driver(so_glob.gl_pathv[i]); + conf = fopen(path, "r"); + if (!conf) { + fprintf(stderr, PFX "Warning: couldn't read config file %s.\n", + path); + goto out; + } + + while ((len = getline(&line, &buflen, conf)) != -1) { + config = line + strspn(line, "\t "); + if (config[0] == '\n' || config[0] == '#') + continue; + + field = strsep(&config, "\n\t "); + + if (strcmp(field, "driver") == 0) { + struct ibv_driver_name *driver_name; + + config += strspn(config, "\t "); + field = strsep(&config, "\n\t "); + + driver_name = malloc(sizeof *driver_name); + if (!driver_name) { + fprintf(stderr, PFX "Warning: couldn't allocate " + "driver name '%s'.\n", field); + continue; + } + + driver_name->name = strdup(field); + if (!driver_name->name) { + fprintf(stderr, PFX "Warning: couldn't allocate " + "driver name '%s'.\n", field); + free(driver_name); + continue; + } + + driver_name->next = driver_name_list; + driver_name_list = driver_name; + } else + fprintf(stderr, PFX "Warning: ignoring bad config directive " + "'%s' in file '%s'.\n", field, path); + } + + if (line) + free(line); + fclose(conf); - globfree(&so_glob); +out: + free(path); } -static struct ibv_device *init_drivers(const char *class_path, - const char *dev_name) +static void read_config(void) { - struct ibv_driver *driver; - struct ibv_device *dev; - int abi_ver = 0; - char sys_path[IBV_SYSFS_PATH_MAX]; - char ibdev_name[IBV_SYSFS_NAME_MAX]; - char ibdev_path[IBV_SYSFS_PATH_MAX]; - char value[8]; - enum ibv_node_type node_type; + DIR *conf_dir; + struct dirent *dent; - snprintf(sys_path, sizeof sys_path, "%s/%s", - class_path, dev_name); + conf_dir = opendir(IBV_CONFIG_DIR); + if (!conf_dir) { + fprintf(stderr, PFX "Warning: couldn't open config directory '%s'.\n", + IBV_CONFIG_DIR); + return; + } - if (ibv_read_sysfs_file(sys_path, "abi_version", value, sizeof value) > 0) - abi_ver = strtol(value, NULL, 10); + while ((dent = readdir(conf_dir))) { + if (dent->d_type != DT_REG) + continue; - if (ibv_read_sysfs_file(sys_path, "ibdev", ibdev_name, sizeof ibdev_name) < 0) { - fprintf(stderr, PFX "Warning: no ibdev class attr for %s\n", - sys_path); - return NULL; + read_config_file(IBV_CONFIG_DIR, dent->d_name); } - snprintf(ibdev_path, IBV_SYSFS_PATH_MAX, "%s/class/infiniband/%s", - ibv_get_sysfs_path(), ibdev_name); + closedir(conf_dir); +} - if (ibv_read_sysfs_file(ibdev_path, "node_type", value, sizeof value) < 0) { - fprintf(stderr, PFX "Warning: no node_type attr for %s\n", - ibdev_path); +static struct ibv_device *try_driver(struct ibv_driver *driver, + struct ibv_sysfs_dev *sysfs_dev) +{ + struct ibv_device *dev; + char value[8]; + enum ibv_node_type node_type; + + dev = driver->init_func(sysfs_dev->sysfs_path, sysfs_dev->abi_ver); + if (!dev) return NULL; + + if (ibv_read_sysfs_file(sysfs_dev->ibdev_path, "node_type", value, sizeof value) < 0) { + fprintf(stderr, PFX "Warning: no node_type attr under %s.\n", + sysfs_dev->ibdev_path); + node_type = IBV_NODE_UNKNOWN; + } else { + node_type = strtol(value, NULL, 10); + if (node_type < IBV_NODE_CA || node_type > IBV_NODE_RNIC) + node_type = IBV_NODE_UNKNOWN; } - node_type = strtol(value, NULL, 10); - if (node_type < IBV_NODE_CA || node_type > IBV_NODE_RNIC) - node_type = IBV_NODE_UNKNOWN; - for (driver = driver_list; driver; driver = driver->next) { - dev = driver->init_func(sys_path, abi_ver); - if (!dev) - continue; + switch (node_type) { + case IBV_NODE_CA: + case IBV_NODE_SWITCH: + case IBV_NODE_ROUTER: + dev->transport_type = IBV_TRANSPORT_IB; + break; + case IBV_NODE_RNIC: + dev->transport_type = IBV_TRANSPORT_IWARP; + break; + default: + dev->transport_type = IBV_TRANSPORT_UNKNOWN; + break; + } - dev->node_type = node_type; - - switch (node_type) { - case IBV_NODE_CA: - case IBV_NODE_SWITCH: - case IBV_NODE_ROUTER: - dev->transport_type = IBV_TRANSPORT_IB; - break; - case IBV_NODE_RNIC: - dev->transport_type = IBV_TRANSPORT_IWARP; - break; - default: - dev->transport_type = IBV_TRANSPORT_UNKNOWN; - break; - } + strcpy(dev->dev_name, sysfs_dev->sysfs_name); + strcpy(dev->dev_path, sysfs_dev->sysfs_path); + strcpy(dev->name, sysfs_dev->ibdev_name); + strcpy(dev->ibdev_path, sysfs_dev->ibdev_path); - strcpy(dev->dev_path, sys_path); - strcpy(dev->dev_name, dev_name); - strcpy(dev->name, ibdev_name); - strcpy(dev->ibdev_path, ibdev_path); + return dev; +} - return dev; - } +static struct ibv_device *try_drivers(struct ibv_sysfs_dev *sysfs_dev) +{ + struct ibv_driver *driver; + struct ibv_device *dev; - fprintf(stderr, PFX "Warning: no userspace device-specific driver found for %s\n" - " driver search path: ", dev_name); - if (user_path) - fprintf(stderr, "%s:", user_path); - fprintf(stderr, "%s\n", default_path); + for (driver = head_driver; driver; driver = driver->next) { + dev = try_driver(driver, sysfs_dev); + if (dev) + return dev; + } return NULL; } @@ -217,17 +381,33 @@ static int check_abi_version(const char *path) return 0; } +static void add_device(struct ibv_device *dev, + struct ibv_device ***dev_list, + int *num_devices, + int *list_size) +{ + struct ibv_device **new_list; + + if (*list_size <= *num_devices) { + *list_size = *list_size ? *list_size * 2 : 1; + new_list = realloc(*dev_list, *list_size * sizeof (struct ibv_device *)); + if (!new_list) + return; + *dev_list = new_list; + } + + (*dev_list)[(*num_devices)++] = dev; +} + HIDDEN int ibverbs_init(struct ibv_device ***list) { const char *sysfs_path; - char *wr_path, *dir; - char class_path[IBV_SYSFS_PATH_MAX]; - DIR *class_dir; - struct dirent *dent; + struct ibv_sysfs_dev *sysfs_dev, *next_dev; struct ibv_device *device; - struct ibv_device **new_list; int num_devices = 0; int list_size = 0; + int statically_linked = 0; + int no_driver = 0; *list = NULL; @@ -236,28 +416,6 @@ HIDDEN int ibverbs_init(struct ibv_device ***list) fprintf(stderr, PFX "Warning: fork()-safety requested " "but init failed\n"); - find_drivers(default_path); - - /* - * Only follow use path passed in through the calling user's - * environment if we're not running SUID. - */ - if (getuid() == geteuid()) { - user_path = getenv(OPENIB_DRIVER_PATH_ENV); - if (user_path) { - wr_path = strdupa(user_path); - while ((dir = strsep(&wr_path, ";:"))) - find_drivers(dir); - } - } - - /* - * Now check if a driver is statically linked. Since we push - * drivers onto our driver list, the last driver we find will - * be the first one we try. - */ - load_driver(NULL); - sysfs_path = ibv_get_sysfs_path(); if (!sysfs_path) { fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n"); @@ -267,36 +425,69 @@ HIDDEN int ibverbs_init(struct ibv_device ***list) if (check_abi_version(sysfs_path)) return 0; - snprintf(class_path, sizeof class_path, "%s/class/infiniband_verbs", - sysfs_path); - class_dir = opendir(class_path); - if (!class_dir) { - fprintf(stderr, PFX "Fatal: couldn't open sysfs class " - "directory '%s'.\n", class_path); - return 0; + read_config(); + + find_sysfs_devs(); + + for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) { + device = try_drivers(sysfs_dev); + if (device) { + add_device(device, list, &num_devices, &list_size); + sysfs_dev->have_driver = 1; + } else + no_driver = 1; } - while ((dent = readdir(class_dir))) { - if (dent->d_name[0] == '.' || dent->d_type == DT_REG) - continue; + if (!no_driver) + goto out; + + /* + * Check if we can dlopen() ourselves. If this fails, + * libibverbs is probably statically linked into the + * executable, and we should just give up, since trying to + * dlopen() a driver module will fail spectacularly (loading a + * driver .so will bring in dynamic copies of libibverbs and + * libdl to go along with the static copies the executable + * has, which quickly leads to a crash. + */ + { + void *hand = dlopen(NULL, RTLD_NOW); + if (!hand) { + fprintf(stderr, PFX "Warning: dlopen(NULL) failed, " + "assuming static linking.\n"); + statically_linked = 1; + goto out; + } + dlclose(hand); + } + + load_drivers(); - device = init_drivers(class_path, dent->d_name); - if (!device) + for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) { + if (sysfs_dev->have_driver) continue; - if (list_size <= num_devices) { - list_size = list_size ? list_size * 2 : 1; - new_list = realloc(*list, list_size * sizeof (struct ibv_device *)); - if (!new_list) - goto out; - *list = new_list; + device = try_drivers(sysfs_dev); + if (device) { + add_device(device, list, &num_devices, &list_size); + sysfs_dev->have_driver = 1; } - - (*list)[num_devices++] = device; } - closedir(class_dir); - out: + for (sysfs_dev = sysfs_dev_list, + next_dev = sysfs_dev ? sysfs_dev->next : NULL; + sysfs_dev; + sysfs_dev = next_dev, next_dev = sysfs_dev ? sysfs_dev->next : NULL) { + if (!sysfs_dev->have_driver) { + fprintf(stderr, PFX "Warning: no userspace device-specific " + "driver found for %s\n", sysfs_dev->sysfs_path); + if (statically_linked) + fprintf(stderr, " When linking libibverbs statically, " + "driver must be statically linked too.\n"); + } + free(sysfs_dev); + } + return num_devices; } diff --git a/src/libibverbs.map b/src/libibverbs.map index aeb707a..795dd55 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -77,6 +77,7 @@ IBVERBS_1.0 { ibv_fork_init; ibv_dontfork_range; ibv_dofork_range; + ibv_register_driver; local: *; }; -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 5/7] libehca: Update libehca for new libibverbs driver handling In-Reply-To: <20071101421.ppvNxwuXku4yj4IF@cisco.com> Message-ID: <20071101421.3YBqlT2XXId4NxdY@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libehca-rdmav2.so in the ordinary library path, rather than infiniband/libehca.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. This patch adds autoconf tests to libehca to detect whether it is being built against a stable libibverbs 1.0 tree or the new libibverbs development tree. Then based on the result, it builds a library with the appropriate name and with the correct driver initialization handling. Signed-off-by: Roland Dreier --- Makefile.am | 30 +++++++++++++++++++----------- config.h.in | 6 +++--- configure.in | 15 +++++++++++++-- ehca.driver | 1 + src/ehca_uinit.c | 12 +++++++++--- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/Makefile.am b/Makefile.am index 46d639c..ca44a45 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,18 +37,25 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +AM_CFLAGS = -O2 -g -Wall -D_GNU_SOURCE -DP_SERIES -Isrc -ehcalibdir = $(libdir)/infiniband +EHCA_SOURCES = src/ehca_umain.c src/ehca_u_mrmw.c src/ehca_uinit.c \ + src/ehca_ureqs.c src/hcp_phyp.c -ehcalib_LTLIBRARIES = src/libehca.la - -src_libehca_la_SOURCES = src/ehca_umain.c src/ehca_u_mrmw.c src/ehca_uinit.c src/ehca_ureqs.c src/hcp_phyp.c - -src_libehca_la_CFLAGS = -O2 -g -Wall -D_GNU_SOURCE -DP_SERIES -I../libibverbs/include -Isrc - -src_libehca_la_LDFLAGS = -version-info 1 -export-dynamic \ - -Wl,--version-script=$(srcdir)/src/libehca.map \ - -lpthread -libverbs -nostdlib +if HAVE_IBV_DEVICE_LIBRARY_EXTENSION + lib_LTLIBRARIES = src/libehca.la + src_libehca_la_SOURCES = $(EHCA_SOURCES) + src_libehca_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ + -Wl,--version-script=$(srcdir)/src/libehca.map \ + -lpthread -libverbs -nostdlib +else + ehcalibdir = $(libdir)/infiniband + ehcalib_LTLIBRARIES = src/libehca.la + src_libehca_la_SOURCES = $(EHCA_SOURCES) + src_libehca_la_LDFLAGS = -version-info 1 -export-dynamic \ + -Wl,--version-script=$(srcdir)/src/libehca.map \ + -lpthread -libverbs -nostdlib +endif EXTRA_DIST = src/ehca_asm.h \ src/ehca_galpa.h \ @@ -59,7 +66,8 @@ EXTRA_DIST = src/ehca_asm.h \ src/ehca_qes.h \ src/ehca_utools.h \ src/hipz_hw.h \ - src/libehca.map + src/libehca.map \ + src/ehca.driver # dist-hook: libehca.spec # cp libehca.spec $(distdir) diff --git a/config.h.in b/config.h.in index 03adb6e..add7607 100644 --- a/config.h.in +++ b/config.h.in @@ -6,6 +6,9 @@ /* Define to 1 if you have the `ibv_read_sysfs_file' function. */ #undef HAVE_IBV_READ_SYSFS_FILE +/* Define to 1 if you have the `ibv_register_driver' function. */ +#undef HAVE_IBV_REGISTER_DRIVER + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -27,9 +30,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H -/* Define to 1 if you have the header file. */ -#undef HAVE_SYSFS_LIBSYSFS_H - /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H diff --git a/configure.in b/configure.in index 21f97ff..591b9b0 100644 --- a/configure.in +++ b/configure.in @@ -30,9 +30,20 @@ AC_CHECK_HEADER(infiniband/driver.h, [], AC_MSG_ERROR([ not found. libehca requires libibverbs.])) dnl Checks for library functions -AC_CHECK_FUNCS(ibv_read_sysfs_file) +AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_register_driver) + +dnl Now check if for libibverbs 1.0 vs 1.1 +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION, + test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION) +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) fi -AC_CHECK_HEADERS(sysfs/libsysfs.h) dnl Checks for programs. AC_PROG_CC diff --git a/ehca.driver b/ehca.driver new file mode 100644 index 0000000..b0000e4 --- /dev/null +++ b/ehca.driver @@ -0,0 +1 @@ +driver ehca diff --git a/src/ehca_uinit.c b/src/ehca_uinit.c index 4618601..47866cf 100644 --- a/src/ehca_uinit.c +++ b/src/ehca_uinit.c @@ -184,8 +184,8 @@ static int ibv_read_sysfs_file(const char *dir, const char *file, } #endif /* HAVE_IBV_READ_SYSFS_FILE */ -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *ehca_driver_init(const char *uverbs_sys_path, + int abi_version) { struct ehcau_device *my_dev = NULL; char value[64]; @@ -225,6 +225,7 @@ struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, return &my_dev->ibv_dev; } +#ifndef HAVE_IBV_REGISTER_DRIVER struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; @@ -234,8 +235,9 @@ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) value, sizeof value) > 0) abi_ver = strtol(value, NULL, 10); - return ibv_driver_init(sysdev->path, abi_ver); + return ehca_driver_init(sysdev->path, abi_ver); } +#endif /* HAVE_IBV_REGISTER_DRIVER */ /** @brief module initialization */ @@ -328,6 +330,10 @@ void __attribute__ ((constructor)) ehcau_init(void) } else { fprintf(libehca_fh, "tracelevel is:%i\n", libehca_trlevel); } + +#ifdef HAVE_IBV_REGISTER_DRIVER + ibv_register_driver("ehca", ehca_driver_init); +#endif } /* eof ehca_uinit.c */ -- 1.4.4.1 From rolandd at cisco.com Wed Jan 10 14:21:50 2007 From: rolandd at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:21:50 -0800 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20071101421.ImYuNVBtRs7hYXkl@cisco.com> Message-ID: <20071101421.5u8V9SqfYQDm90K9@cisco.com> The latest libibverbs development tree changes how low-level drivers are found. The driver must be in a shared object like libcxgb3-rdmav2.so in the ordinary library path, rather than infiniband/cxgb3.so as for libibverbs 1.0. In addition, the driver must call ibv_register_driver() to pass its entry point to libibverbs, rather than exporting an ibv_driver_init() function. Since the current libcxgb3 tree is targeted only for libibverbs development trees and won't work with libibverbs 1.0, this patch changes libcxgb3 to work with the new libibverbs way of loading drivers without any autoconf tests. Signed-off-by: Roland Dreier --- Makefile.am | 14 +++++++------- configure.in | 13 +++++++++++++ cxgb3.driver | 1 + libcxgb3.spec.in | 7 ++++--- src/iwch.c | 27 ++++----------------------- 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/Makefile.am b/Makefile.am index 8f3a679..3dab25e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,10 +1,8 @@ # $Id: Makefile.am 3802 2005-10-18 18:32:06Z tom $ -cxgb3libdir = $(libdir)/infiniband +lib_LTLIBRARIES = src/libcxgb3.la -cxgb3lib_LTLIBRARIES = src/cxgb3.la - -src_cxgb3_la_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE if HAVE_LD_VERSION_SCRIPT cxgb3_version_script = -Wl,--version-script=$(srcdir)/src/iwch.map @@ -12,16 +10,18 @@ else cxgb3_version_script = endif -src_cxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ +src_libcxgb3_la_SOURCES = src/cq.c src/iwch.c src/qp.c \ src/verbs.c -src_cxgb3_la_LDFLAGS = -avoid-version -module \ +src_libcxgb3_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ $(cxgb3_version_script) +cxgb3confdir = $(sysconfdir)/libibverbs.d +cxgb3conf_DATA = cxgb3.driver #DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ # debian/libcxgb31.install debian/libcxgb3-dev.install debian/rules EXTRA_DIST = src/iwch.h src/iwch-abi.h \ - src/iwch.map libcxgb3.spec.in $(DEBIAN) + src/iwch.map libcxgb3.spec.in cxgb3.driver dist-hook: libcxgb3.spec cp libcxgb3.spec $(distdir) diff --git a/configure.in b/configure.in index d113da9..81b5a30 100644 --- a/configure.in +++ b/configure.in @@ -28,6 +28,19 @@ AC_C_CONST dnl Checks for library functions AC_CHECK_FUNCS(ibv_read_sysfs_file) +dnl Now check if for libibverbs device library extension +dummy=if$$ +cat < $dummy.c +#include +IBV_DEVICE_LIBRARY_EXTENSION +IBV_VERSION +IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` +rm -f $dummy.c +if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then + AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) +fi +AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) + AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then ac_cv_version_script=yes diff --git a/cxgb3.driver b/cxgb3.driver new file mode 100644 index 0000000..cfa6186 --- /dev/null +++ b/cxgb3.driver @@ -0,0 +1 @@ +driver cxgb3 diff --git a/libcxgb3.spec.in b/libcxgb3.spec.in index e710808..959f659 100644 --- a/libcxgb3.spec.in +++ b/libcxgb3.spec.in @@ -39,18 +39,19 @@ make %{?_smp_mflags} rm -rf $RPM_BUILD_ROOT %makeinstall # remove unpackaged files from the buildroot -rm -f $RPM_BUILD_ROOT%{_libdir}/infiniband/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la %clean rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) -%{_libdir}/infiniband/cxgb3.so +%{_libdir}/libcxgb3*.so %doc AUTHORS COPYING ChangeLog README +%config %{_sysconfdir}/libibverbs.d/cxgb3.driver %files devel %defattr(-,root,root,-) -%{_libdir}/infiniband/cxgb3.a +%{_libdir}/libcxgb3*.a %changelog diff --git a/src/iwch.c b/src/iwch.c index 17f1d61..e972460 100644 --- a/src/iwch.c +++ b/src/iwch.c @@ -41,16 +41,6 @@ #include #include -#ifdef HAVE_SYSFS_LIBSYSFS_H -#include -#endif - -#ifndef HAVE_IBV_READ_SYSFS_FILE -#include -#include -#include -#endif - #include "iwch.h" #include "iwch-abi.h" @@ -173,8 +163,8 @@ static struct ibv_device_ops iwch_dev_ops = { .free_context = iwch_free_context }; -struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, - int abi_version) +static struct ibv_device *cxgb3_driver_init(const char *uverbs_sys_path, + int abi_version) { char value[16]; char s[32]; @@ -251,16 +241,7 @@ err1: return NULL; } -#ifdef HAVE_SYSFS_LIBSYSFS_H -struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) +static __attribute__((constructor)) void cxgb3_register_driver(void) { - int abi_ver = 0; - char value[8]; - - if (ibv_read_sysfs_file(sysdev->path, "abi_version", - value, sizeof value) > 0) - abi_ver = strtol(value, NULL, 10); - - return ibv_driver_init(sysdev->path, abi_ver); + ibv_register_driver("cxgb3", cxgb3_driver_init); } -#endif /* HAVE_SYSFS_LIBSYSFS_H */ -- 1.4.4.1 From rdreier at cisco.com Wed Jan 10 14:24:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:24:18 -0800 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <1168467650.13836.7.camel@stevo-desktop> (Steve Wise's message of "Wed, 10 Jan 2007 16:20:50 -0600") References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> Message-ID: > So libibverbs 1.1 will be part of ofed 1.2? That's the goal, and I guess you're counting on it for libcxg3.... I hope I can get to a good point to freeze things again for a while, although I think I'll probably drop memory windows at least and possibly the reregister MR operation too. - R. From rdreier at cisco.com Wed Jan 10 14:24:44 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 14:24:44 -0800 Subject: [openib-general] [PATCH 0/7] userspace verbs changes In-Reply-To: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> (Roland Dreier's message of "Wed, 10 Jan 2007 14:21:50 -0800") References: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> Message-ID: oops ... sorry for all the dupes... From rdreier at cisco.com Wed Jan 10 15:10:15 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 15:10:15 -0800 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <20070110183916.GG18953@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 20:39:16 +0200") References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> Message-ID: > I am thinking about detecting that we are getting a completion each > couple of microseconds, and just polling a bit more (even if CQ is empty) > simply to avoid paying 10s micro for the next interrupt. > This is just a fuzzy idea, but maybe this can work? It's a good area to look at (assuming it's worth spending yet more time on IPoIB performance), but I think it's quite difficult to come up with a good solution. For example search for Jamal's paper "When NAPI Comes to Town" to see some related work that was a failure in the end. From sashak at voltaire.com Wed Jan 10 15:19:17 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 11 Jan 2007 01:19:17 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45A49A18.8050702@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> Message-ID: <20070110231917.GD17240@sashak.voltaire.com> Hi Yevgeny, On 09:47 Wed 10 Jan , Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: > >> Hi Sasha. > >> Thanks for spotting this issue - I missed it when I reviewed the patch. > >> Here's how things look in windows: > >> * openlog() and closelog() are redefined to nothing in windows config header > >> * cl_log_event() implementation on windows does all the work - gets handler > >> to event log, logs the message, and closes the handler. > >> It does *not* uses syslog(). > >> > >> To use syslog/sysopen/sysclose on windows, a user have to install some > >> package (e.g. "Interix") that provides various daemons (such as syslogd), > >> similar to Linux/Unix: > >> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 > >> http://www.interix.com/ > > > > And this is not done for WinOpenSM now. Is this would be useful option? I > > guess this could solve a lot of compatibility problems and simplify the > > porting dramatically, right? > > I'll address this question to the windows guys. But I seriously doubt that > they would want to make OSM depend on some third-party product, whether it > is commercial or free. I doubt too :(. But ask them if it is not too hard. Interesting, how WinOSM development works? AFAIK it is open source and published on the net. Who is the maintainer? Is there any active mailing list? > >> So replacing cl_log_event() with syslog() would definitely cause a problem. > >> > >> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() > >> on windows and redefining syslog() to use this wrapper. > > > > Like #define syslog() cl_log_event() ? Not so ugly (better than > > combination of openlog()/closelog() and "unrelated" cl_log_event()), > > and perfectly simple IMO. > > "#define syslog() cl_log_event()" will not do the job, since these functions > have different definitios: > syslog(int priority, char *message, ...); > cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); > so there should be wrapper just for windows that will prepare the message > for cl_log_event() as a single string. Understood. And what the purpose of: #ifdef WIN32 OsmReportState(buffer); #endif /* WIN32 */ in osm_log() (right after cl_log_event()/syslog())? Sasha From rdreier at cisco.com Wed Jan 10 15:11:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 10 Jan 2007 15:11:04 -0800 Subject: [openib-general] [RFC] Support for Additional Scopes for IPoIB MC Groups In-Reply-To: <1168445329.14647.147877.camel@hal.voltaire.com> (Hal Rosenstock's message of "10 Jan 2007 11:08:52 -0500") References: <1168445329.14647.147877.camel@hal.voltaire.com> Message-ID: > In preparation for IB routers, other scopes should be supported. > > Is an acceptable approach to supporting additional scopes to add a > module parameter for this ? I don't think that's a good solution, since it forces every IPoIB interface to use the same scope. I think there should be some per-interface way to set the scope, either via sysfs or an ethtool interface. From vatsa at veritas.com Wed Jan 10 17:18:39 2007 From: vatsa at veritas.com (vatsa at veritas.com) Date: Wed, 10 Jan 2007 17:18:39 -0800 Subject: [openib-general] Stale CM callbacks In-Reply-To: <45A528AF.2050105@ichips.intel.com> References: <45A4583B.8050807@veritas.com> <45A528AF.2050105@ichips.intel.com> Message-ID: <45A5906F.4000107@veritas.com> Hi, Sean Hefty wrote: >> I am using IB as a cluster interconnect. If a node which had >> established several connections >> with a remote node was reset (not rebooted) and it came back up >> quickly is it possible for >> the node to get stale REQ/DREQ callbacks ? If yes, is there an API >> to purge stale states >> in the CM or should it be detected by the module getting the callback ? > > It's possible for stale REQ/DREQ messages to appear at the reset node, > but I don't see any problem with that occurring. The DREQs should be > dropped, since there's no connections to match them with. The REQs > should be rejected without a matching listen. If the listen occurs > before the REQ appears, then a new connection would result. I don't > see a problem in either case. Our code isn't handling stale callbacks. Thanks for clarifying it. > > As for purging stale states, I'm not sure what you mean. The reset > node will have purged the local CM state. This is what I meant, but please note that I have yet to confirm the behaviour. If a node which has established several connections, - reboots (goes down and comes back up gracefully) then it seems there is no problem establishing connections the next time. - resets (goes down abruptly and comes back up) then it seems it is more likely to get stale callbacks from the CM. In the above scenario the node comes back up quickly in the reset case than the reboot case. So, i was just wondering if the extra delay in the reboot case was causing the problem to not occur. In other words, does the switch cache the reset node state and discards it after some fixed amount of time. Also, should a remote node with which the reset node had established connections call ib_destroy_cm_id() during its disconnect processing ? Currently, our code only destroys the QPs (by calling ib_destroy_cq() and ib_destroy_qp()). Thanks, Sreevatsa > > - Sean From halr at voltaire.com Wed Jan 10 19:28:04 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 10 Jan 2007 22:28:04 -0500 Subject: [openib-general] OpenSM core dump - file size exceeded In-Reply-To: References: Message-ID: <1168486082.18817.30694.camel@hal.voltaire.com> On Fri, 2006-12-15 at 17:05, Woodruff, Robert J wrote: > Hal wrote, > >Any idea what filled up the log ? but that's a side issue. > > Yes we were getting a bunch of multicast errors, Sean is investigating > this. > > >This has been discussed on the list before. This is one option which > can > >help with this issue: > > > -L, --log_limit > > This option defines maximal log file size in MB. When > specified > > the log file will be truncated upon reaching this limit. > > Ok, thanks. Logrotation (based on Ira's original patch with some modifications) is now supported in the latest management git tree (as of this AM). -- Hal > > woody From eitan at sw053.yok.mtl.com Wed Jan 10 21:26:33 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Thu, 11 Jan 2007 07:26:33 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-11:normal completion Message-ID: <200701110526.l0B5QXrD031048@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Wed_Jan_10_09:04:26_2007 0979c2 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From mst at mellanox.co.il Wed Jan 10 23:04:17 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 09:04:17 +0200 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> Message-ID: <20070111070417.GC24724@mellanox.co.il> > Subject: Re: [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling > > > So libibverbs 1.1 will be part of ofed 1.2? > > That's the goal, and I guess you're counting on it for libcxg3.... BTW, you can now drop d5b9ab3d7009b77ee45e98827e803205d322ce7d since latest version of chelsio driver does not need to pass driver data in req notify cq anymore. I guess this means libcxg3 can be made to work with libibverbs 1.0 if desired. -- MST From ogerlitz at voltaire.com Wed Jan 10 23:25:57 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 11 Jan 2007 09:25:57 +0200 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20070111070417.GC24724@mellanox.co.il> References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> <20070111070417.GC24724@mellanox.co.il> Message-ID: <45A5E685.5070904@voltaire.com> Michael S. Tsirkin wrote: >> > So libibverbs 1.1 will be part of ofed 1.2? >> That's the goal, and I guess you're counting on it for libcxg3.... > I guess this means libcxg3 can be made to work with libibverbs 1.0 if > desired. Just a reminder for the importance of including libibverbs 1.1 in OFED 1.2 ---> to have the ***fork*** supported merged at last to an official release. Or. From vlad at mellanox.co.il Thu Jan 11 00:28:02 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 11 Jan 2007 10:28:02 +0200 Subject: [openib-general] [PATCH] ~vlad/ofascripts.git - Add libcxgb3to user kit In-Reply-To: <1168453249.27060.23.camel@stevo-desktop> References: <1168453249.27060.23.camel@stevo-desktop> Message-ID: <1168504082.5111.6.camel@vladsk-laptop> Applied, Regards, Vladimir On Wed, 2007-01-10 at 12:20 -0600, Steve Wise wrote: > Vlad, > > Here are the changes to ofa userspace scripts for adding Chelsio's RDMA library, libcxgb3. > > Steve. > > > ----- > > Support for libcxgb3. > > From: Steve Wise > > - added libcxgb3 to configure > - added libcxgb3 to Makefile > > Signed-off-by: Steve Wise > --- > > Makefile | 17 ++++++++++++++++- > configure | 16 ++++++++++++++++ > 2 files changed, 32 insertions(+), 1 deletions(-) > > diff --git a/Makefile b/Makefile > index b164ee0..4d57d82 100644 > --- a/Makefile > +++ b/Makefile > @@ -5,7 +5,8 @@ all: > .PHONY: libibverbs libmthca libibcm perftest mstflint libsdp libibcommon libibumad libibmad osm diags mpi > .PHONY: librdmacm install_librdmacm dapl dat tvflash install_tvflash > .PHONY: install_libibverbs install_libmthca install_libibcm install_libsdp libibcommon > -.PHONY: libipathverbs install_libipathverbs libehca install_libehca > +.PHONY: libipathverbs install_libipathverbs libehca install_libehca > +.PHONY: libcxgb3 install_libcxgb3 > .PHONY: install_libibumad install_libibmad install_osm install_diags install_mstflint install_perftest > .PHONY: srptools install_srptools ipoibtools install_ipoibtools > > @@ -49,6 +50,11 @@ user: libehca > install_user: install_libehca > endif > > +ifeq ($(WITH_CXGB3),yes) > +user: libcxgb3 > +install_user: install_libcxgb3 > +endif > + > ifeq ($(WITH_CM),yes) > user: libibcm > install_user: install_libibcm > @@ -139,6 +145,11 @@ libehca: libibverbs > AM_LDFLAGS="-L../libibverbs/src -libverbs" > @echo "Make libehca done" > > +libcxgb3: libibverbs > + $(MAKE) -C $(SRC)userspace/libcxgb3 \ > + AM_LDFLAGS="-L../libibverbs/src -libverbs" > + @echo "Make libcxgb3 done" > + > libibcm: libibverbs > $(MAKE) -C $(SRC)userspace/libibcm \ > CPPFLAGS="-I../libibverbs/include" \ > @@ -281,6 +292,9 @@ install_libipathverbs: install_libibverb > install_libehca: install_libibverbs > $(MAKE) -C $(SRC)userspace/libehca install > > +install_libcxgb3: install_libibverbs > + $(MAKE) -C $(SRC)userspace/libcxgb3 install > + > install_libibcm: install_libmthca > $(MAKE) -C $(SRC)userspace/libibcm install > > @@ -385,6 +399,7 @@ clean_user: > $(MAKE) -C $(SRC)userspace/libmthca clean > $(MAKE) -C $(SRC)userspace/libipathverbs clean > $(MAKE) -C $(SRC)userspace/libehca clean > + $(MAKE) -C $(SRC)userspace/libcxgb3 clean > $(MAKE) -C $(SRC)userspace/libibcm clean > # $(MAKE) -C $(SRC)userspace/useraccess-lib clean > $(MAKE) -C $(SRC)userspace/perftest clean > diff --git a/configure b/configure > index 5f8253e..616ef5c 100755 > --- a/configure > +++ b/configure > @@ -58,6 +58,9 @@ Usage: `basename $0` [options] > --with-libehca include libehca package [yes] > --without-libehca include libehca package [no] > > + --with-libcxgb3 include libcxgb3 package [yes] > + --without-libcxgb3 include libcxgb3 package [no] > + > --with-libibcm include CM package [no] > --without-libibcm don't include CM package [yes] > > @@ -277,6 +280,13 @@ main() > --without-libehca) > WITH_EHCA="no" > ;; > + --with-libcxgb3) > + WITH_CXGB3="yes" > + WITH_IBVERBS="yes" > + ;; > + --without-libcxgb3) > + WITH_CXGB3="no" > + ;; > --with-libibcm) > WITH_CM="yes" > WITH_MTHCA="yes" > @@ -410,6 +420,7 @@ main() > # WITH_DAPL="yes" > WITH_DIAGS="yes" > # WITH_EHCA="yes" > + WITH_CXGB3="yes" > WITH_IBCOMMON="yes" > WITH_IBMAD="yes" > WITH_IBUMAD="yes" > @@ -460,6 +471,7 @@ WITH_IBVERBS=${WITH_IBVERBS:-"no"} > WITH_MTHCA=${WITH_MTHCA:-"no"} > WITH_IPATHVERBS=${WITH_IPATHVERBS:-"no"} > WITH_EHCA=${WITH_EHCA:-"no"} > +WITH_CXGB3=${WITH_CXGB3:-"no"} > WITH_CM=${WITH_CM:-"no"} > WITH_SDP=${WITH_SDP:-"no"} > WITH_DAPL=${WITH_DAPL:-"no"} > @@ -501,6 +513,7 @@ WITH_IBVERBS=${WITH_IBVERBS} > WITH_MTHCA=${WITH_MTHCA} > WITH_IPATHVERBS=${WITH_IPATHVERBS} > WITH_EHCA=${WITH_EHCA} > +WITH_CXGB3=${WITH_CXGB3} > WITH_CM=${WITH_CM} > WITH_SDP=${WITH_SDP} > WITH_DAPL=${WITH_DAPL} > @@ -539,6 +552,9 @@ EOFCONFIG > if [ "${WITH_EHCA}" == "yes" ]; then > ullibs="${ullibs} libehca" > fi > + if [ "${WITH_CXGB3}" == "yes" ]; then > + ullibs="${ullibs} libcxgb3" > + fi > if [ "${WITH_CM}" == "yes" ]; then > ullibs="${ullibs} libibcm" > fi > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at mellanox.co.il Thu Jan 11 00:28:13 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 11 Jan 2007 10:28:13 +0200 Subject: [openib-general] [PATCH] ~vlad/ofabuild - Chelsio Support In-Reply-To: <1168453370.27060.26.camel@stevo-desktop> References: <1168453370.27060.26.camel@stevo-desktop> Message-ID: <1168504093.5111.8.camel@vladsk-laptop> Applied, Regards, Vladimir On Wed, 2007-01-10 at 12:22 -0600, Steve Wise wrote: > Here are the changes to the user build script to pull in libcxgb3. > > > Steve. > > > Chelsio T3 Library Support. > > From: Steve Wise > > Signed-off-by: Steve Wise > --- > > build_ofa_user.sh | 7 +++++-- > 1 files changed, 5 insertions(+), 2 deletions(-) > > diff --git a/build_ofa_user.sh b/build_ofa_user.sh > index b6eee43..acb2676 100755 > --- a/build_ofa_user.sh > +++ b/build_ofa_user.sh > @@ -17,7 +17,7 @@ LOG=${tmpdir}/${proj}-${revision}.build. > > git_extra_flags=${git_extra_flags:-"-q"} > > -ullibs="libibverbs libmthca libehca libipathverbs tvflash libibcm libsdp mstflint perftest srptools ipoibtools librdmacm dapl imgen management" > +ullibs="libibverbs libmthca libehca libipathverbs tvflash libibcm libsdp mstflint perftest srptools ipoibtools librdmacm dapl imgen management libcxgb3" > > # User level git packages > scripts_git="git://git.openfabrics.org/~vlad/ofascripts.git" > @@ -54,6 +54,8 @@ ibutils_git="git://git.openfabrics.org/~ > ibutils_branch="origin" > management_git="git://git.openfabrics.org/~halr/management.git" > management_branch="origin" > +libcxgb3_git="git://git.openfabrics.org/~swise/libcxgb3.git" > +libcxgb3_branch="origin" > > # Execute command w/ echo and exit if it fail > ex() > @@ -317,7 +319,8 @@ check_build() > --with-perftest \ > --with-mstflint \ > --with-srptools \ > - --with-ipoibtools" > + --with-ipoibtools \ > + --with-libcxgb3" > > cd ${tmpdir} > ex tar xzf ${dest}/${proj}/${projdir}.tgz > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Thu Jan 11 01:51:20 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 11:51:20 +0200 Subject: [openib-general] Fwd: Re: [PATCH] [MINOR] perftest: send_bw: fix dangling else Message-ID: <20070111095120.GC602@mellanox.co.il> ----- Forwarded message from "Michael S. Tsirkin" ----- Date: Thu, 11 Jan 2007 11:32:24 +0200 From: "Michael S. Tsirkin" Subject: Re: [PATCH] [MINOR] perftest: send_bw: fix dangling else Reply-To: "Michael S. Tsirkin" References: <45A2823F.5000302 at voltaire.com> In-Reply-To: <45A2823F.5000302 at voltaire.com> > Symptom: ib_send_bw reports 'inf' bandwidth > Cause: dangling else > > Signed-off-by: Yosef Etigin Applied, thanks. -- MST ----- End forwarded message ----- -- MST From mst at mellanox.co.il Thu Jan 11 02:01:41 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 12:01:41 +0200 Subject: [openib-general] this is a test, please ignore Message-ID: <20070111100141.GA1759@mellanox.co.il> sorry -- MST From pasquale.davide at gmail.com Thu Jan 11 02:08:43 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Thu, 11 Jan 2007 11:08:43 +0100 Subject: [openib-general] OFED 1.1 build issue Message-ID: Hi all! We are working on a HP Blade cluster using Rocks Cluster 4.2.1. version. The infiniband card inside the blade are identified as Mellanox MT25204. We need to build Ibadm, for fabric management, wich prerequisite is the OFED stack. We have problems compiling the source package, the build process hangs giving this errors: flint.cpp: In member function `bool Operations::checkGen(FBase&, u_int32_t, u_int32_t, u_int32_t&, const char*)': flint.cpp:4172: error: no matching function for call to `_GLIBCXX_STD::_Bit_iterator::_Bit_iterator(u_int8_t*)' /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:185: note: candidates are: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(const _GLIBCXX_STD::_Bit_iterator&) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:192: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(_GLIBCXX_STD::_Bit_type*, unsigned int) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:190: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator() flint.cpp:4173: error: no matching function for call to `_GLIBCXX_STD::_Bit_iterator::_Bit_iterator(u_int8_t*)' /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:185: note: candidates are: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(const _GLIBCXX_STD::_Bit_iterator&) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:192: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(_GLIBCXX_STD::_Bit_type*, unsigned int) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_bvector.h:190: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator() flint.cpp: In member function `bool Operations::DumpConf(const char*)': flint.cpp:4287: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4287: warning: unused variable '_fw_conf_sect' flint.cpp:4292: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4295: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4299: warning: taking address of temporary flint.cpp:4309: warning: taking address of temporary flint.cpp: In function `int main(int, char**)': Our kernel is 2.6.9-42.0.2.ELsmp and gcc -v gives out: gcc -v Reading specs from /usr/lib/gcc/x86_64-redhat-linux/3.4.6/specs Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --enable-threads=posix --disable-checking --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-java-awt=gtk --host=x86_64-redhat-linux Thread model: posix gcc version 3.4.6 20060404 (Red Hat 3.4.6-3) On a second cluster node we have installed with success openib-usermode.rpmand using openib utilities I obtain this informations: ibstatus Infiniband device 'mthca0' port 1 status: default gid: fe80:0000:0000:0000:0018:feff:ff8c:7a8d base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) Thanks, Davide. -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Thu Jan 11 02:18:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 12:18:22 +0200 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: Message-ID: <20070111101822.GC1759@mellanox.co.il> Looks like g++ headers are not installed properly. Quoting Davide Pasquale : Subject: OFED 1.1 build issue Hi all! We are working on a HP Blade cluster using Rocks Cluster 4.2.1. version. The infiniband card inside the blade are identified as Mellanox MT25204. We need to build Ibadm, for fabric management, wich prerequisite is the OFED stack. We have problems compiling the source package, the build process hangs giving this errors: flint.cpp: In member function `bool Operations::checkGen(FBase&, u_int32_t, u_int32_t, u_int32_t&, const char*)': flint.cpp:4172: error: no matching function for call to `_GLIBCXX_STD::_Bit_iterator::_Bit_iterator(u_int8_t*)' /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:185: note: candidates are: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(const _GLIBCXX_STD::_Bit_iterator&) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:192: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(_GLIBCXX_STD::_Bit_type*, unsigned int) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:190: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator() flint.cpp:4173: error: no matching function for call to `_GLIBCXX_STD::_Bit_iterator::_Bit_iterator(u_int8_t*)' /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:185: note: candidates are: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(const _GLIBCXX_STD::_Bit_iterator&) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:192: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator(_GLIBCXX_STD::_Bit_type*, unsigned int) /usr/lib/gcc/x86_64-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/ stl_bvector.h:190: note: _GLIBCXX_STD::_Bit_iterator::_Bit_iterator() flint.cpp: In member function `bool Operations::DumpConf(const char*)': flint.cpp:4287: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4287: warning: unused variable '_fw_conf_sect' flint.cpp:4292: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4295: error: `_fw_conf_sect' was not declared in this scope flint.cpp:4299: warning: taking address of temporary flint.cpp:4309: warning: taking address of temporary flint.cpp: In function `int main(int, char**)': Our kernel is 2.6.9-42.0.2.ELsmp and gcc -v gives out: gcc -v Reading specs from /usr/lib/gcc/x86_64-redhat-linux/3.4.6/specs Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/ usr/share/info --enable-shared --enable-threads=posix --disable-checking --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-java-awt=gtk --host=x86_64-redhat-linux Thread model: posix gcc version 3.4.6 20060404 (Red Hat 3.4.6-3) On a second cluster node we have installed with success openib-usermode.rpm and using openib utilities I obtain this informations: ibstatus Infiniband device 'mthca0' port 1 status: default gid: fe80:0000:0000:0000:0018:feff:ff8c:7a8d base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) Thanks, Davide. _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From pasquale.davide at gmail.com Thu Jan 11 02:29:36 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Thu, 11 Jan 2007 11:29:36 +0100 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: <20070111101822.GC1759@mellanox.co.il> References: <20070111101822.GC1759@mellanox.co.il> Message-ID: On 1/11/07, Michael S. Tsirkin wrote: > > Looks like g++ headers are not installed properly. Thanks Michael! I have installed this packages from yum repository: gcc-c++-3.4.6-3 compat-libgcc-296-2.96-132.7.2 libgcc-3.4.6-3 gcc-3.4.6-3 libgcc-3.4.6-3 glibc-headers-2.3.4-2.25 glibc-common-2.3.4-2.25 glibc-2.3.4-2.25 glibc-kernheaders-2.4-9.1.98.EL glibc-devel-2.3.4-2.25 glibc-2.3.4-2.25 glibc-devel-2.3.4-2.25 Can you suggest me if I have missed something ? Thanks, Davide. -------------- next part -------------- An HTML attachment was scrubbed... URL: From k_mahesh85 at yahoo.co.in Thu Jan 11 03:40:20 2007 From: k_mahesh85 at yahoo.co.in (keshetti mahesh) Date: Thu, 11 Jan 2007 11:40:20 +0000 (GMT) Subject: [openib-general] [query]maximum no. of connections per session in iser initiator Message-ID: <564007.88233.qm@web8322.mail.in.yahoo.com> the max_conn(maximum no. of connections allowed per session ?) field in iscsi_transport structure is not initialised at all while registering with the iSCSI layer in the iSER initiator code. Is that variable is initialised to any default value? (like iSCSI/TCP has max_conn in iscsi_transport initialised to 1) what are the maximum no. of connections allowed per session in iSER initiator? regards, Mahesh keshetti mahesh wrote: >I guess that you're talking about this piece of code: >#define ISCSI_ISER_MAX_CONN 8 >#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ >ISER_QP_MAX_REQ_DTOS) * \ >ISCSI_ISER_MAX_CONN) >It is related to the cq used by iSER. We use it in order to define the >max len of the cq (the same cq is used for the sq & rq). It means that >per HCA, 8 connections may be opened. Yes, I was talking about this part only. Isn't this variable is equivalent to the maximum no. of connections allowed per the session? what is constraint for chosing this value as 8? >Where do you see that in iscsi_tcp? In the structure iscsi_transport there is a variable max_conn and it is set to 1 (In open-iscsi intiator file name iscsi_tcp.c) > whether this limit is imposed by iSCSI or by transport layer(TCP > or iSER) ? thanks and regards, Mahesh Send free SMS to your Friends on Mobile from your Yahoo! Messenger. Download Now! http://messenger.yahoo.com/download.php --------------------------------- Here’s a new way to find what you're looking for - Yahoo! Answers -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at dev.mellanox.co.il Thu Jan 11 04:15:28 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 11 Jan 2007 14:15:28 +0200 Subject: [openib-general] Reminder: OFED 1.2 coordination meeting next Monday at 9am PST Message-ID: <45A62A60.7000503@dev.mellanox.co.il> Hi All, After a long holidays break we are going to have our next OFED 1.2 coordination meeting on Monday Jan-15 at 9am PST (Jeff sent bridge info) The only agenda item I have is reviewing components' readiness for the end of month code freeze. If you have other items for the agenda please let me know Thanks, Tziporet From mst at mellanox.co.il Thu Jan 11 04:21:05 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 14:21:05 +0200 Subject: [openib-general] [Bug 320] test: does this work? Message-ID: <20070111122105.GE1759@mellanox.co.il> -- MST From mst at mellanox.co.il Thu Jan 11 04:22:27 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 14:22:27 +0200 Subject: [openib-general] bugzilla email gateway Message-ID: <20070111122227.GF1759@mellanox.co.il> Was bugzilla migrated to staging? Is the bugzilla email gateway functional in bugzilla? -- MST From mst at mellanox.co.il Thu Jan 11 04:28:01 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 14:28:01 +0200 Subject: [openib-general] [Bug 320] In-Reply-To: <20070111122105.GE1759@mellanox.co.il> References: <20070111122105.GE1759@mellanox.co.il> Message-ID: <20070111122801.GG1759@mellanox.co.il> test, pls ignore. -- MST From mst at mellanox.co.il Thu Jan 11 04:29:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 14:29:51 +0200 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: <20070111101822.GC1759@mellanox.co.il> Message-ID: <20070111122951.GH1759@mellanox.co.il> > On 1/11/07, Michael S. Tsirkin wrote: > > Looks like g++ headers are not installed properly. > > > > Thanks Michael! > > I have installed this packages from yum repository: > > gcc-c++-3.4.6-3 > compat-libgcc-296-2.96-132.7.2 > libgcc-3.4.6-3 > gcc-3.4.6-3 > libgcc-3.4.6-3 > glibc-headers-2.3.4-2.25 > glibc-common-2.3.4-2.25 > glibc-2.3.4-2.25 > glibc-kernheaders-2.4-9.1.98.EL > glibc-devel-2.3.4-2.25 > glibc-2.3.4-2.25 > glibc-devel-2.3.4-2.25 > > Can you suggest me if I have missed something ? No idea. Try running a preprocessor on flint.cpp (add -E flag to g++ line) find the relevant line in the output and see why does it try to pull in bitvector. -- MST From mst at mellanox.co.il Thu Jan 11 04:42:42 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 14:42:42 +0200 Subject: [openib-general] patch for -stable Message-ID: <20070111124242.GI1759@mellanox.co.il> Roland, I think commit 39798695b4bcc7b145f8910ca56195808d3a7637 shoul go into -stable. It fixes crash triggered by ipoib_ha script when bringing interface up/down in a loop. Could you forward it pls? Or if not, OK if I do so? -- MST From jsquyres at cisco.com Thu Jan 11 04:58:09 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 11 Jan 2007 07:58:09 -0500 Subject: [openib-general] bugzilla email gateway In-Reply-To: <20070111122227.GF1759@mellanox.co.il> References: <20070111122227.GF1759@mellanox.co.il> Message-ID: <296ACF11-7596-4D9D-8F9A-BB0A19C89939@cisco.com> On Jan 11, 2007, at 7:22 AM, Michael S. Tsirkin wrote: > Was bugzilla migrated to staging? If you're using bugs.openfabrics.org, then yes. I believe that Michael shut down the bugzilla on the old server (right, Michael?). > Is the bugzilla email gateway functional in bugzilla? None of the e-mail for openfabrics or openib have been moved to the new server; we're still sorting out DNS issues. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Thu Jan 11 05:01:25 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 15:01:25 +0200 Subject: [openib-general] bugzilla email gateway In-Reply-To: <296ACF11-7596-4D9D-8F9A-BB0A19C89939@cisco.com> References: <296ACF11-7596-4D9D-8F9A-BB0A19C89939@cisco.com> Message-ID: <20070111130125.GL1759@mellanox.co.il> > Quoting Jeff Squyres : > > Is the bugzilla email gateway functional in bugzilla? > > None of the e-mail for openfabrics or openib have been moved to the > new server; we're still sorting out DNS issues. So, can you make bugzilla email gateway work with old e-mail addresses, or is this broken for now? -- MST From steve.apo at googlemail.com Thu Jan 11 05:14:42 2007 From: steve.apo at googlemail.com (Steven Wooding) Date: Thu, 11 Jan 2007 13:14:42 +0000 Subject: [openib-general] WC Error code question Message-ID: <2cfcf21e0701110514t3a8b5954s760f26a0ab0f7321@mail.gmail.com> Hi, I'm getting an IBV_WC_LOC_ACCESS_ERR when getting a work completion item related to an RDMA with ImmData transfer. What does this error actually mean? Thanks, Steve. -------------- next part -------------- An HTML attachment was scrubbed... URL: From jsquyres at cisco.com Thu Jan 11 05:19:53 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 11 Jan 2007 08:19:53 -0500 Subject: [openib-general] bugzilla email gateway In-Reply-To: <20070111130125.GL1759@mellanox.co.il> References: <296ACF11-7596-4D9D-8F9A-BB0A19C89939@cisco.com> <20070111130125.GL1759@mellanox.co.il> Message-ID: Michael will have to answer that. My assumption is that it's going to be broken for now. We took the approach of moving everything else first, and then moving all mail-related services second. Perhaps that was a mistake. :-\ Depending on how much longer it takes to migrate all the rest of the mail services, this problem could go away soon anyway. On Jan 11, 2007, at 8:01 AM, Michael S. Tsirkin wrote: >> Quoting Jeff Squyres : >>> Is the bugzilla email gateway functional in bugzilla? >> >> None of the e-mail for openfabrics or openib have been moved to the >> new server; we're still sorting out DNS issues. > > So, can you make bugzilla email gateway work with old e-mail > addresses, > or is this broken for now? > > -- > MST -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From or at os.is Thu Jan 11 05:27:13 2007 From: or at os.is (=?ISO-8859-1?Q?=D3lafur_R=F6gnvaldsson?=) Date: Thu, 11 Jan 2007 13:27:13 +0000 Subject: [openib-general] Building OFED-1.1 w. PGI compilers Message-ID: <45A63B31.3070409@os.is> Hello all, I'm currently running into troubles when I try to create the mpich rpm package using the PGI compiler suite on a 64 bit AMD machine (RH Enterprise 4). The error message is:- ===== or at svarkur ~/ofed/OFED-1.1.1 > tail /tmp/OFED.6180.log ipoibtools ################################################## mstflint ################################################## srptools ################################################## Preparing... ################################################## ofed-docs ################################################## Preparing... ################################################## ofed-scripts ################################################## ERROR: Failed dependencies: libpgc.so()(64bit) is needed by mpich_mlx_pgi-0.9.7_mlx2.2.0-1.x86_64 ERROR: Failed executing "/bin/rpm -Uhv /home/or/ofed/OFED-1.1.1/RPMS/redhat-release-4ES-5.5/mpich_mlx_pgi-0.9.7_mlx2.2.0-1.x86_64.rpm" or at svarkur ~/ofed/OFED-1.1.1 ===== I've contacted the PGI support and added /opt/pgi/linux86-64/6.2/libso/pgi.ld to LDFLAGS as suggested but to no avail. I was wondering if anyone out there has encountered this problem and solved it. Best regards, Ólafur. Ps. I have no problems building the rpm packages for the GCC compilers, just the PGI (which I need to compile and run the MM5 / WRF weather models). -------------- next part -------------- begin:vcard fn;quoted-printable:=C3=93lafur R=C3=B6gnvaldsson n;quoted-printable;quoted-printable:R=C3=B6gnvaldsson;=C3=93lafur org;quoted-printable:Reiknistofa =C3=AD ve=C3=B0urfr=C3=A6=C3=B0i adr;quoted-printable;quoted-printable;quoted-printable:Grens=C3=A1svegur 9;;Orkugar=C3=B0ur;Reykjav=C3=ADk;;IS-150;ICELAND email;internet:or at os.is tel;work:+354 528 1500 tel;fax:+354 528 1699 tel;home:+354 528 1690 tel;cell:+354 864 4943 x-mozilla-html:FALSE url:http://www.belgingur.is version:2.1 end:vcard From dotanb at dev.mellanox.co.il Thu Jan 11 05:54:38 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 11 Jan 2007 15:54:38 +0200 Subject: [openib-general] WC Error code question In-Reply-To: <2cfcf21e0701110514t3a8b5954s760f26a0ab0f7321@mail.gmail.com> References: <2cfcf21e0701110514t3a8b5954s760f26a0ab0f7321@mail.gmail.com> Message-ID: <45A6419E.5000701@dev.mellanox.co.il> Steven Wooding wrote: > Hi, > > I'm getting an IBV_WC_LOC_ACCESS_ERR when getting a work completion > item related to an RDMA with ImmData transfer. > > What does this error actually mean? > > Thanks, > > Steve. in which side do you get this completion? My guess is that you are trying to send an RDMA Write with immediate which has bad rkey (rkey which doesn't match the remote address), and you get this status at the responder side. If you still have this problem, i will need some more info .. Dotan From monis at voltaire.com Thu Jan 11 06:00:48 2007 From: monis at voltaire.com (Moni Shoua) Date: Thu, 11 Jan 2007 16:00:48 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <20070110183916.GG18953@mellanox.co.il> References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> Message-ID: <45A64310.504@voltaire.com> > Thinking about this more - why does this patch help some benchmarks? > The amount of work it takes for the hardware to generate a completion > is likely negligeable, and we still are scanning the same amount > of TX WRs in a loop to unmap/free them. This makes sense but I think you should also consider the fact that the tx_lock is taken once per per tx_completion so, with the patch, the driver spends less time under lock. > If you think about it this way, it becomes clear that your workload, > for some reason, hits a path where you get an event very fast > after the first completion and there is only a small number of completions > to handle. So your patch helps just by delaying the event handler until > there's more work to do. And I expect it wouldn't help TCP much if at all > as there are RX WRs per each couple of TX WRs. > This is a good point to check. I hope I can get to it and spend time over it next week. From swise at opengridcomputing.com Thu Jan 11 06:13:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 08:13:56 -0600 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20070111070417.GC24724@mellanox.co.il> References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> <20070111070417.GC24724@mellanox.co.il> Message-ID: <1168524836.3055.2.camel@stevo-desktop> On Thu, 2007-01-11 at 09:04 +0200, Michael S. Tsirkin wrote: > > Subject: Re: [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling > > > > > So libibverbs 1.1 will be part of ofed 1.2? > > > > That's the goal, and I guess you're counting on it for libcxg3.... > > BTW, you can now drop d5b9ab3d7009b77ee45e98827e803205d322ce7d > since latest version of chelsio driver does not need to pass driver data > in req notify cq anymore. > > I guess this means libcxg3 can be made to work with libibverbs 1.0 if > desired. libcxgb3 needs other changes that are post-1.0: d1c048705a4effda43f8a74290ea3bc89870c463 11822dad4437ab580cc3d0ee99c5115482a80367 From swise at opengridcomputing.com Thu Jan 11 06:23:00 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 08:23:00 -0600 Subject: [openib-general] Reminder: OFED 1.2 coordination meeting next Monday at 9am PST In-Reply-To: <45A62A60.7000503@dev.mellanox.co.il> References: <45A62A60.7000503@dev.mellanox.co.il> Message-ID: <1168525380.3055.12.camel@stevo-desktop> Could someone email me the bridge information? I didn't see any email from Jeff. Thanks, Steve. On Thu, 2007-01-11 at 14:15 +0200, Tziporet Koren wrote: > Hi All, > After a long holidays break we are going to have our next OFED 1.2 > coordination meeting on Monday Jan-15 at 9am PST (Jeff sent bridge info) > > The only agenda item I have is reviewing components' readiness for the > end of month code freeze. > If you have other items for the agenda please let me know > > Thanks, > Tziporet > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From jsquyres at cisco.com Thu Jan 11 06:27:13 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 11 Jan 2007 09:27:13 -0500 Subject: [openib-general] Reminder: OFED 1.2 coordination meeting next Monday at 9am PST In-Reply-To: <1168525380.3055.12.camel@stevo-desktop> References: <45A62A60.7000503@dev.mellanox.co.il> <1168525380.3055.12.camel@stevo-desktop> Message-ID: It's an e-mail from a long time ago -- the information hasn't changed (initially scheduled for Dec 4, even 2 weeks, for 10 occurrences): Scroll down to the very bottom for world-wide access numbers. ________________________________________________________________________ ______ Jeffrey Squyres has invited you to a Cisco MeetingPlace Conference Date/Time: DEC 4, 2006 at 12:00PM America/New_York Length: 60 Frequency: 10 Meeting ID: 2106670 Meeting Password: Global Access Numbers: http://cisco.com/en/US/about/doing_business/conferencing/index.html US/Canada: +1.866.432.9903 United Kingdom: +44.20.8824.0117 India: +91.80.4103.3979 Germany: +49.619.6773.9002 Japan: +81.3.5763.9394 China: +86.10.8515.5666 TO ATTEND A WEB AND VOICE CONFERENCE CISCO INTRANET ATTENDEES Join the Web & Voice Conference* 1. Go to http://meetingplaceinternal.cisco.com/join.asp?2106670 2. Enter your CEC User ID & Password then click OK - Accept any security warnings you receive and wait for the Meeting Room to initialize 3. Click on CONNECT from the Meeting Room to join the Voice Conference portion of the meeting EXTERNAL ATTENDEES - Outside the Cisco Intranet** Join the Web & Voice Conference* 1. Go to http://meetingplace.cisco.com/join.asp?2106670 2. Fill in the My Name is field then click Attend Meeting - If you have a CEC User ID, click on the Cisco icon - Accept any security warnings you receive and wait for the Meeting Room to initialize 3. Click on CONNECT from the Meeting Room to join the Voice Conference portion of the meeting *If this is your first time attending a Web Conference, disable any pop-up blockers and visit http://meetingplace.cisco.com/mpweb/scripts/ browsertestupper.asp to test your web browser for compatibility with the Web Conference. **Not all meetings are scheduled to allow external attendees into the Web Conference portion of the meeting, if the URL does not work, please follow the Voice only Conference instructions below to attend. TO ATTEND A VOICE ONLY CONFERENCE 1. Dial into Cisco MeetingPlace (view the Access Numbers and link above) 2. Press 1 to attend the meeting 3. Follow the prompts to enter the Meeting ID 2106670 and join the meeting SUPPORT Information about this Conference: Contact Jeffrey Squyres, 914085250971 Cisco IT Support Center: Attend the Voice Conference and then press #0 on your phone keypad GLOBAL ACCESS NUMBERS COUNTRY LOCATION LOCAL NUMBER TOLL FREE-FREEFONE AMERICAS United States East +1.919.392.3330 1.866.349.3520 West +1.408.525.6800 1.866.432.9903 Argentina Buenos Aires +54.11.4341.0101 Brazil Brasilia +55.613.424.0220 Rio de Janeiro +55.21.2483.6302 Sao Paulo +55.11.5508.6311 Canada Calgary +1.403.514.2435 Edmonton +1.780.441.3715 Halifax +1.902.474.0214 Kanata +1.613.254.0005 Markham +1.905.470.4810 Montreal +1.514.847.6875 Ottawa +1.613.788.7250 Quebec +1.418.634.5645 Regina +1.306.566.6410 Toronto +1.416.306.7230 Vancouver +1.604.647.2350 Winnipeg +1.204.336.6610 Chile Santiago +56.2.431.4936 Costa Rica San Jose +506.201.3617 Columbia Bogota +57.1.325.6065 Dominican Republic Santo Domingo +1.809.683.1222 Mexico Guadalajara +52.33.3770.1206 Mexico City +52.55.5267.1800 Monterrey +52.818.221.2462 Peru Lima +51.1.215.5101 Puerto Rico San Juan +1.787.620.1865 Venezuela Caracas +58.212.902.0210 EUROPE Austria Vienna +43.12.4030.6022 Belgium Diegem +32.2.704.5072 Bulgaria Sofia +359.2.937.5938 Croatia Zagreb +385.1.462.8908 Czech Republic Prague +420.22.143.5100 Denmark Aabyhoj +45.8.939.7131 Copenhagen +45.3.958.5010 Estonia Tallinn +358.204.70.6227 Finland Espoo +358.204.70.6227 France Paris +33.15.804.3116 Germany Eschborn +49.619.6773.9002 Hallbergmoos +49.811.554.3016 Greece Athens +30.210.638.1303 Hungary Budapest +36.1.225.4621 Ireland Dublin +358.1.819.2717 Israel Netanya +972.9.892.7026 Italy Rome +39.06.5164.4006 Latvia Riga +358.204.70.6227 Netherlands Amsterdam +31.20.357.1487 Norway Oslo +47.23.27.3647 Poland Warsaw +48.22.572.2615 Portugal Lisbon +351.21.446.8756 Slovakia Bratislava +421.2.5825.5309 South Africa Cape Town +27.21.413.4502 Johannesburg +27.11.267.1011 Pretoria +27.12.844.7401 Spain Barcelona +34.93.393.4037 Madrid +34.91.201.2149 Sweden Gothenburg +46.31.63.4409 Stockholm +46.8.685.9035 Switzerland Glattzentrum +41.44.878.7335 Turkey Istanbul +90.212.335.0208 United Arab Emirates (UAE) Dubai +971.4.390.7840 United Kingdom Bedfont Lakes +44.20.8824.0117 Edinburgh +44.131.561.3643 London City +44.20.7496.3743 ASIA PACIFIC Australia Melbourne +61.3.9659.4173 North Sydney +61.2.8446.5260 China Beijing +86.10.8515.5666 Shanghai +86.21.2302.4200 Hong Kong Hong Kong +852.3414.1802 India Bangalore +91.80.4103.3979 Hyderabad +91.40.4022.3450 Mumbai IL & FS +91.22.4043.4030 New Delhi +91.11.4261.1088 Indonesia Jakarta +62.21.7854.7476 Japan Tokyo Akasaka +81.3.5763.9394 South Korea Seoul Asem +82.2.3429.8102 Malaysia Kuala Lumpur +60.3.7723.8620 Penang +60.4.631.5125 New Zealand Auckland +64.9.355.1968 Wellington +64.4.496.5554 Philippines Makati (Manila) +63.2.750.5886 Singapore Singapore Capital +65.6317.7088 Taiwan Taipei +886.2.8758.7088 Thailand Bangkok +66.2.263.7008 Vietnam Hanoi +84.4.974.6250 Ho Chi Minh City +84.8.823.3418 (Saigon) ________________________________________________________________________ _______ On Jan 11, 2007, at 9:23 AM, Steve Wise wrote: > Could someone email me the bridge information? I didn't see any email > from Jeff. > > Thanks, > > Steve. > > > On Thu, 2007-01-11 at 14:15 +0200, Tziporet Koren wrote: >> Hi All, >> After a long holidays break we are going to have our next OFED 1.2 >> coordination meeting on Monday Jan-15 at 9am PST (Jeff sent bridge >> info) >> >> The only agenda item I have is reviewing components' readiness for >> the >> end of month code freeze. >> If you have other items for the agenda please let me know >> >> Thanks, >> Tziporet >> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/ >> openib-general >> > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From monis at voltaire.com Thu Jan 11 06:47:00 2007 From: monis at voltaire.com (Moni Shoua) Date: Thu, 11 Jan 2007 16:47:00 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: References: <45A367D0.3060206@voltaire.com> Message-ID: <45A64DE4.50707@voltaire.com> > Unless you can come up with a way that makes sure that all skbs are > completed even in low-traffic situations, I don't think this is > mergeable -- it's just too much of a usability nightmare to have a > flag that is essentially "break some workloads in a mysterious way to > make some benchmarks run a little faster." Thanks for the comment. My thinking on how to address this issue is: add a periodic task that checks if there are uncompleted sends beyond some threshold. If there are such, it sets a flag that causes the ipoib tx logic to enforce a signal on the next post and sends a packet which is practically a NO-OP. This packet can be for example a unicast arp (reply) with src and dst being this interface IP. From ogerlitz at voltaire.com Thu Jan 11 07:03:13 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 11 Jan 2007 17:03:13 +0200 Subject: [openib-general] [RFC] [PATCH V2 0/3] bonding support for operation over IPoIB In-Reply-To: References: Message-ID: <45A651B1.9090402@voltaire.com> Or Gerlitz wrote: > This patch series is a second version (see below link to V1) of the suggested > changes to the bonding driver such that it would be able to support non ARPHRD_ETHER > netdevices for its High-Availability (active-backup) mode. > > The motivation is to enable the bonding driver on its HA mode to work with the > IP over Infiniband (IPoIB) driver. With these patches I was able to enslave > IPoIB netdevices and run TCP, UDP, IP (UDP) Multicast and ICMP traffic with > fail-over and fail-back working fine. My working env was the net-2.6.20 git. > These patches are not enough for configuration of IPoIB bonding through tools > (eg /sbin/ifenslave and /sbin/ifup) provided by packages such as sysconfig and > initscripts, specifically since these tools sets the bonding device to be UP > before enslaving anything. Once this patchset gets positive/feedback the next step > would be to look how to enhance the tools/packages so it would be possible to > bond/enslave with the modified code. As suggested by the bonding maintainer, this > step can potentially involve converting ifenslave to be a script based on the bonding > sysfs infrastructure rather on the somehow obsoleted Documentation/networking/ifenslave.c Jay, I would like to move forward and push the V2 patch series upstream through netdev and then start working on the configuration tools etc changes needed to support bonding IPoIB devices through non direct bonding sysfs scripts... are you OK with that? If you agree to the push, who is doing this nowadays, is it Jeff Garzik or David Miller? Roland - any other comments/concerns that you might have are very much appreciated. Or. From rdreier at cisco.com Thu Jan 11 07:11:43 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 07:11:43 -0800 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A64310.504@voltaire.com> (Moni Shoua's message of "Thu, 11 Jan 2007 16:00:48 +0200") References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> <45A64310.504@voltaire.com> Message-ID: > This makes sense but I think you should also consider the fact that > the tx_lock is taken once per per tx_completion so, with the patch, > the driver spends less time under lock. I think we could get rid of the tx lock on completion entirely... I have the old patch below lying around. It no longer applies, and it needs some careful thought to make sure there are no memory ordering problems (which need to be addressed with appropriate barriers when looking at tx_head/tx_tail) --- infiniband/ulp/ipoib/ipoib_main.c (revision 7542) +++ infiniband/ulp/ipoib/ipoib_main.c (working copy) @@ -634,6 +634,14 @@ static int ipoib_start_xmit(struct sk_bu return NETDEV_TX_BUSY; } + /* + * Because tx_lock is not held when updating tx_tail in the + * send completion handler, we may receive a spurious wakeup + * that starts our queue when there really isn't space yet. + */ + if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size)) + return NETDEV_TX_BUSY; + if (skb->dst && skb->dst->neighbour) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); @@ -703,6 +711,21 @@ static struct net_device_stats *ipoib_ge static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned long flags; + int lost_wakeup = 0; + + spin_lock_irqsave(&priv->tx_lock, flags); + if (netif_queue_stopped(dev) && + priv->tx_head - priv->tx_tail < ipoib_sendq_size) { + ipoib_dbg(priv, "lost wakeup, head %u, tail %u\n", + priv->tx_head, priv->tx_tail); + lost_wakeup = 1; + netif_wake_queue(dev); + } + spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (lost_wakeup) + return; ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev->trans_start)); --- infiniband/ulp/ipoib/ipoib_ib.c (revision 7542) +++ infiniband/ulp/ipoib/ipoib_ib.c (working copy) @@ -244,7 +244,6 @@ static void ipoib_ib_handle_wc(struct ne } else { struct ipoib_tx_buf *tx_req; - unsigned long flags; if (wr_id >= ipoib_sendq_size) { ipoib_warn(priv, "completion event with wrid %d (> %d)\n", @@ -266,12 +265,17 @@ static void ipoib_ib_handle_wc(struct ne dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; + + /* + * Since we don't hold tx_lock here, this may lead to + * both lost wakeups (which we deal with in our + * watchdog) and spurious wakeups (which we deal with + * by handling TX ring overflows in the xmit function). + */ if (netif_queue_stopped(dev) && priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) From rdreier at cisco.com Thu Jan 11 07:18:32 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 07:18:32 -0800 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20070111070417.GC24724@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 11 Jan 2007 09:04:17 +0200") References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> <20070111070417.GC24724@mellanox.co.il> Message-ID: > BTW, you can now drop d5b9ab3d7009b77ee45e98827e803205d322ce7d > since latest version of chelsio driver does not need to pass driver data > in req notify cq anymore. Good point -- I'll revert that since the kernel side doesn't work anyway. From rf at q-leap.de Thu Jan 11 07:26:14 2007 From: rf at q-leap.de (Roland Fehrenbacher) Date: Thu, 11 Jan 2007 16:26:14 +0100 Subject: [openib-general] Trouble with 2.6.20 and QLE7140/ipath driver Message-ID: <17830.22294.792058.979306@gargle.gargle.HOWL> Hi, trying to load the ib_ipath module with a 2.6.20-rc4 kernel, I obtain: --------------------------------------------------------------------------- [ 57.208017] PCI: Found disabled HT MSI Mapping on 0000:00:0b.0 [ 57.213886] PCI: MSI quirk detected. MSI disabled on chipset 0000:00:0b.0. [ 57.220792] PCI: Found disabled HT MSI Mapping on 0000:00:0c.0 [ 57.226655] PCI: MSI quirk detected. MSI disabled on chipset 0000:00:0c.0. [ 57.233562] PCI: Found disabled HT MSI Mapping on 0000:00:0d.0 [ 57.239427] PCI: MSI quirk detected. MSI disabled on chipset 0000:00:0d.0. [ 57.246333] PCI: Found disabled HT MSI Mapping on 0000:00:0e.0 [ 57.252198] PCI: MSI quirk detected. MSI disabled on chipset 0000:00:0e.0. [ 57.259300] PCI: Setting latency timer of device 0000:00:0b.0 to 64 [ 57.259318] assign_interrupt_mode Found MSI capability [ 57.264528] Allocate Port Service[0000:00:0b.0:pcie00] [ 57.264602] PCI: Setting latency timer of device 0000:00:0c.0 to 64 [ 57.264618] assign_interrupt_mode Found MSI capability [ 57.269795] Allocate Port Service[0000:00:0c.0:pcie00] [ 57.269858] PCI: Setting latency timer of device 0000:00:0d.0 to 64 [ 57.269873] assign_interrupt_mode Found MSI capability [ 57.275059] Allocate Port Service[0000:00:0d.0:pcie00] [ 57.275119] PCI: Setting latency timer of device 0000:00:0e.0 to 64 [ 57.275133] assign_interrupt_mode Found MSI capability [ 57.280310] Allocate Port Service[0000:00:0e.0:pcie00] ......... [ 63.462486] ib_ipath 0000:04:00.0: infinipath0: pci_enable_msi failed: -22, interrupts may not work [ 63.471589] ib_ipath 0000:04:00.0: infinipath0: irq is 0, BIOS error? Interrupts won't work --------------------------------------------------------------------------- and the link of the QLE7140 remains down. I looked at the Pathscale patches for kernel 2.6.16, but it seems these fixes are already integrated into 2.6.20. lspci shows that the PCIe slot supports MSI: --------------------------------------------------------------------------- 0000:04:00.0 InfiniBand: Unknown device 1fc1:0010 (rev 01) Subsystem: Unknown device 1fc1:0010 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- (Roland Fehrenbacher's message of "Thu, 11 Jan 2007 16:26:14 +0100") References: <17830.22294.792058.979306@gargle.gargle.HOWL> Message-ID: Does the patch in http://lkml.org/lkml/2007/1/5/215 help? - R. From swise at opengridcomputing.com Thu Jan 11 07:38:06 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 09:38:06 -0600 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> <20070111070417.GC24724@mellanox.co.il> Message-ID: <1168529886.3055.31.camel@stevo-desktop> On Thu, 2007-01-11 at 07:18 -0800, Roland Dreier wrote: > > BTW, you can now drop d5b9ab3d7009b77ee45e98827e803205d322ce7d > > since latest version of chelsio driver does not need to pass driver data > > in req notify cq anymore. > > Good point -- I'll revert that since the kernel side doesn't work anyway. And I'll update libcxgb3 once the libibverbs change is committed. From rdreier at cisco.com Thu Jan 11 08:03:59 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 08:03:59 -0800 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <1168529886.3055.31.camel@stevo-desktop> (Steve Wise's message of "Thu, 11 Jan 2007 09:38:06 -0600") References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> <1168467650.13836.7.camel@stevo-desktop> <20070111070417.GC24724@mellanox.co.il> <1168529886.3055.31.camel@stevo-desktop> Message-ID: OK, I pushed it out so it should be there pending mirroring delays... From halr at voltaire.com Thu Jan 11 08:12:23 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 11 Jan 2007 11:12:23 -0500 Subject: [openib-general] [PATCH][MINOR]OpenSM/osm_sa_path_record.c: Reorder some code in __osm_pr_rcv_get_path_parms Message-ID: <1168531940.18817.75788.camel@hal.voltaire.com> OpenSM/osm_sa_path_record.c: Reorder some code in __osm_pr_rcv_get_path_parms Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 22cbe0c..e5e6381 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -637,10 +637,6 @@ __osm_pr_rcv_get_path_parms( if (status != IB_SUCCESS) goto Exit; - p_parms->mtu = mtu; - p_parms->rate = rate; - p_parms->pkt_life = pkt_life; - if( comp_mask & IB_PR_COMPMASK_RAWTRAFFIC && cl_ntoh32( p_pr->hop_flow_raw ) & ( 1<<31 ) ) pkey = osm_physp_find_common_pkey( p_physp, p_dest_physp ); @@ -698,6 +694,9 @@ __osm_pr_rcv_get_path_parms( goto Exit; } + p_parms->mtu = mtu; + p_parms->rate = rate; + p_parms->pkt_life = pkt_life; p_parms->pkey = pkey; p_parms->sl = sl; From swise at opengridcomputing.com Thu Jan 11 08:19:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 10:19:37 -0600 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: <20071101421.G6zer7drP3SHahok@cisco.com> References: <20071101421.G6zer7drP3SHahok@cisco.com> Message-ID: <1168532377.3055.36.camel@stevo-desktop> Roland, I'm having problems with these changes. I built and installed libibverbs, libmthca, libcxgb3, and libamso. I rebuild librdmacm, but I don't think I need to. Trying rping gets this: vic13:/home/swise/git/libibverbs # rping -c -VvC10 -a 192.168.71.144 -p 9999 libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 The files look like they got installed ok: vic13:/etc/sysconfig # ls -l /usr/local/lib total 14830 -rw-r--r-- 1 root root 145744 2007-01-11 10:06 libamso.a -rwxr-xr-x 1 root root 813 2007-01-11 10:06 libamso.la -rwxr-xr-x 1 root root 68436 2007-01-11 10:06 libamso-rdmav2.so lrwxrwxrwx 1 root root 17 2007-01-11 10:06 libamso.so -> libamso-rdmav2.so -rw-r--r-- 1 root root 228964 2007-01-11 10:01 libcxgb3.a -rwxr-xr-x 1 root root 820 2007-01-11 10:01 libcxgb3.la -rwxr-xr-x 1 root root 114238 2007-01-11 10:01 libcxgb3-rdmav2.so lrwxrwxrwx 1 root root 18 2007-01-11 10:01 libcxgb3.so -> libcxgb3-rdmav2.s o -rw-r--r-- 1 root root 310170 2007-01-11 10:08 libibverbs.a -rwxr-xr-x 1 root root 828 2007-01-11 10:08 libibverbs.la lrwxrwxrwx 1 root root 19 2007-01-11 10:08 libibverbs.so -> libibverbs.so.2 .0.0 lrwxrwxrwx 1 root root 19 2007-01-11 10:08 libibverbs.so.2 -> libibverbs.so .2.0.0 -rwxr-xr-x 1 root root 169793 2007-01-11 10:08 libibverbs.so.2.0.0 -rw-r--r-- 1 root root 330322 2007-01-11 10:01 libmthca.a -rwxr-xr-x 1 root root 820 2007-01-11 10:01 libmthca.la -rwxr-xr-x 1 root root 166124 2007-01-11 10:01 libmthca-rdmav2.so lrwxrwxrwx 1 root root 18 2007-01-11 10:01 libmthca.so -> libmthca-rdmav2.s o I'm debugging, but thought I'd pass this along... From mst at mellanox.co.il Thu Jan 11 08:19:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 18:19:38 +0200 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support Message-ID: <20070111161938.GP1759@mellanox.co.il> The following patch adds experimental support for IPoIB connected mode. The idea is to increase performance by increasing the MTU from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. With this code, I'm able to get 800MByte/sec or more with netperf without options on a Mellanox 4x back-to-back DDR system. Signed-off-by: Michael S. Tsirkin --- Please review, and put in -mm. This applies on top of the following patch: IB/verbs: return qp pointer as part of ib_wc qp pointer is used on receive side to detect stale passive side connections Changes from PATCHv4: - Fix TX ring full recovery when TX ring is destroyed (bug 320) Changes from PATCHv3: - Fix TX ring full recovery - Whitespace fix Changes from PATCHv2: - Using path MTU discovery, multicast and UDP traffic to UD mode now work, only a small number of packets is dropped. - Use timer to clean up stale RX connections - Make CM use same CQ IPoIB uses for UD (good for mixed UD/CM traffic and for NAPI if we ever enable it) - Tone down warning messages - only some packets are now dropped in CM/UD setup I still kept the sysfs flag to enable/disable CM - is this still a good idea, or is it better to go back to only looking at the device MTU now that multicast works? CM support is also still labeled as experimental, and set it to disabled by default, although its been very stable for me, and the code is complete as far as I'm concerned. Would it be easier to merge it this way in the future? Note that the connected mode support adds very little overhead when not activated at run time, and zero data-path overhead when not activated at compile time. Here's a short description of what the patch does: a. The code's here: git://staging.openfabrics.org/~mst/linux-2.6/.git ipoib_cm_branch This is based on 2.6.19, so ~>git diff v2.6.19..ipoib_cm_branch will show what I have done so far. b. How to activate: Server: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netserver Client: #modprobe ib_ipoib #echo connected > /sys/class/net/ib0/mode #/sbin/ifconfig ib0 mtu 65520 #./netperf-2.4.2/src/netperf -H 11.4.3.68 -f M TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.4.3.68 (11.4.3.68) port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. MBytes/sec 87380 16384 16384 10.01 891.21 c. TODO list (Optional) Send side S/G support d. Limitations With MTU > 2044, UDP multicast and UDP connections to IPoIB UD mode currently will drop some packets since we sometimes get packets that are too large to send over a UD QP. Typically a single packet will be dropped each several minutes until path MTU discovery kicks in and lowers the path MTU to this destination. e. Some notes on code 1. SRQ is used for scalability to large cluster sizes 2. Only RC connections are used (UC does not support SRQ now) 3. Retry count is set to 0 since spec draft warns against retries 4. Each connection is used for data transfers in only 1 direction, so each connection is either active(TX) or passive (RX). 2 sides that want to communicate create 2 connections. 5. Each active (TX) connection has a separate CQ for send completions - this keeps the code simple without CQ resize and other tricks diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index c75322d..0ffca11 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -8,6 +8,20 @@ config INFINIBAND_IPOIB See Documentation/infiniband/ipoib.txt for more information +config INFINIBAND_IPOIB_CM + bool "IP-over-InfiniBand Connected Mode support" + depends on INFINIBAND_IPOIB && EXPERIMENTAL + default n + ---help--- + This option enables experimental support for IPoIB connected mode. + After enabling this option, you need to switch to connected mode through + /sys/class/net/ibXXX/mode to actually create connections, and then increase + the interface MTU with e.g. ifconfig ib0 mtu 65520. + + WARNING: Enabling connected mode will trigger some + packet drops for multicast and UD mode traffic from this interface, + unless you limit mtu for these destinations to 2044. + config INFINIBAND_IPOIB_DEBUG bool "IP-over-InfiniBand debugging" if EMBEDDED depends on INFINIBAND_IPOIB diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 8935e74..98ee38e 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \ ipoib_multicast.o \ ipoib_verbs.o \ ipoib_vlan.o +ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07deee8..7bac722 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -62,6 +62,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ + IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, + IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, + IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, IPOIB_RX_RING_SIZE = 128, IPOIB_TX_RING_SIZE = 64, IPOIB_MAX_QUEUE_SIZE = 8192, @@ -81,6 +85,8 @@ enum { IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, IPOIB_MCAST_STARTED = 8, + IPOIB_FLAG_NETIF_STOPPED = 9, + IPOIB_FLAG_ADMIN_CM = 10, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -90,6 +96,14 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, }; + +#define IPOIB_OP_RECV (1ul << 31) +#ifdef CONFIG_INFINIBAND_IPOIB_CM +#define IPOIB_CM_OP_SRQ (1ul << 30) +#else +#define IPOIB_CM_OP_SRQ (0) +#endif + /* structs */ struct ipoib_header { @@ -113,6 +127,61 @@ struct ipoib_tx_buf { u64 mapping; }; +#ifdef CONFIG_INFINIBAND_IPOIB_CM +struct ib_cm_id; + +struct ipoib_cm_data { + __be32 qpn; /* High byte MUST be ignored on receive */ + __be32 mtu; +}; + +struct ipoib_cm_rx { + struct ib_cm_id *id; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + unsigned long jiffies; +}; + +struct ipoib_cm_tx { + struct ib_cm_id *id; + struct ib_cq *cq; + struct ib_qp *qp; + struct list_head list; + struct net_device *dev; + struct ipoib_neigh *neigh; + struct ipoib_path *path; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + unsigned long flags; + u32 mtu; + struct ib_wc ibwc[IPOIB_NUM_WC]; +}; + +struct ipoib_cm_rx_buf { + struct sk_buff *skb; + u64 mapping[IPOIB_CM_RX_SG]; +}; + +struct ipoib_cm_dev_priv { + struct ib_srq *srq; + struct ipoib_cm_rx_buf *srq_ring; + struct ib_cm_id *id; + struct list_head passive_ids; + struct work_struct start_task; + struct work_struct reap_task; + struct work_struct skb_task; + struct delayed_work stale_task; + struct sk_buff_head skb_queue; + struct list_head start_list; + struct list_head reap_list; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; + struct ib_recv_wr rx_wr; +}; + +#endif /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -179,6 +248,10 @@ struct ipoib_dev_priv { struct list_head child_intfs; struct list_head list; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_dev_priv cm; +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct list_head fs_list; struct dentry *mcg_dentry; @@ -212,6 +285,9 @@ struct ipoib_path { struct ipoib_neigh { struct ipoib_ah *ah; +#ifdef CONFIG_INFINIBAND_IPOIB_CM + struct ipoib_cm_tx *cm; +#endif union ib_gid dgid; struct sk_buff_head queue; @@ -315,6 +391,145 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); +#ifdef CONFIG_INFINIBAND_IPOIB_CM + +#define IPOIB_FLAGS_RC 0x80 +#define IPOIB_FLAGS_UC 0x40 + +#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC | IPOIB_FLAGS_UC)) + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(dev->dev_addr) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return IPOIB_CM_SUPPORTED(n->ha) && + test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags); +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return neigh->cm; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ + neigh->cm = tx; +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); +int ipoib_cm_dev_open(struct net_device *dev); +void ipoib_cm_dev_stop(struct net_device *dev); +int ipoib_cm_dev_init(struct net_device *dev); +int ipoib_cm_add_mode_attr(struct net_device *dev); +void ipoib_cm_dev_cleanup(struct net_device *dev); +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh); +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu); +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); +#else + +struct ipoib_cm_tx; + +static inline int ipoib_cm_admin_enabled(struct net_device *dev) +{ + return 0; +} +static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) + +{ + return 0; +} + +static inline int ipoib_cm_up(struct ipoib_neigh *neigh) + +{ + return 0; +} + +static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) +{ +} + +static inline +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_dev_open(struct net_device *dev) +{ + return 0; +} + +static inline +void ipoib_cm_dev_stop(struct net_device *dev) +{ + return; +} + +static inline +int ipoib_cm_dev_init(struct net_device *dev) +{ + return -ENOSYS; +} + +static inline +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + return; +} + +static inline +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + return NULL; +} + +static inline +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + return; +} + +static inline +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return 0; +} + +static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + dev_kfree_skb_any(skb); +} + +static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ +} + +#endif + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG void ipoib_create_debug_files(struct net_device *dev); void ipoib_delete_debug_files(struct net_device *dev); @@ -392,4 +607,6 @@ extern int ipoib_debug_level; #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) +#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c new file mode 100644 index 0000000..bb6a9b1 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -0,0 +1,1233 @@ +/* + * Copyright (c) 2006 Mellanox Technologies. All rights reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include + +#ifdef CONFIG_IPV6 +#include +#endif + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA +static int data_debug_level; + +module_param_named(cm_data_debug_level, data_debug_level, int, 0644); +MODULE_PARM_DESC(cm_data_debug_level, + "Enable data path debug tracing for connected mode if > 0"); +#endif + +#include "ipoib.h" + +#define IPOIB_CM_IETF_ID 0x1000000000000000ULL + +#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) +#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) +#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) +#define IPOIB_CM_RX_UPDATE_MASK (0x3) + +struct ipoib_cm_id { + struct ib_cm_id *id; + int flags; + u32 remote_qpn; + u32 remote_mtu; +}; + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event); + +static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_CM_RX_SG]) +{ + int i; + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); +} + +static int ipoib_cm_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int i, ret; + + priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[id].skb); + priv->cm.srq_ring[id].skb = NULL; + } + + return ret; +} + +static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, + u64 mapping[IPOIB_CM_RX_SG]) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + int i; + + skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); + if (unlikely(!skb)) + return -ENOMEM; + + /* + * IPoIB adds a 4 byte header. So we need 12 more bytes to align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 12); + + mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return -EIO; + } + + for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) { + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + goto partial_error; + skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); + + mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, + 0, PAGE_SIZE, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) + goto partial_error; + } + + priv->cm.srq_ring[id].skb = skb; + return 0; + +partial_error: + + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); + + for (; i >= 0; --i) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + + kfree_skb(skb); + return -ENOMEM; +} + +static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, + struct ipoib_cm_rx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = { + .send_cq = priv->cq, /* does not matter, we never send anything */ + .recv_cq = priv->cq, + .srq = priv->cm.srq, + .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ + .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ + .sq_sig_type = IB_SIGNAL_ALL_WR, + .qp_type = IB_QPT_RC, + .qp_context = p, + }; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_modify_rx_rts(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); + return ret; + } + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); + return ret; + } + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, + struct ib_qp *qp, struct ib_cm_req_event_param *req) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_rep_param rep = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + rep.private_data = &data; + rep.private_data_len = sizeof data; + rep.flow_control = 0; + rep.rnr_retry_count = req->rnr_retry_count; + rep.target_ack_delay = 20; /* FIXME */ + rep.srq = 1; + rep.qp_num = qp->qp_num; + rep.starting_psn = 0 /* FIXME */; + return ib_send_cm_rep(cm_id, &rep); +} + +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct net_device *dev = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + int ret; + + ipoib_dbg(priv, "REQ arrived\n"); + p = kzalloc(sizeof *p, GFP_KERNEL); + if (!p) + return -ENOMEM; + p->dev = dev; + p->id = cm_id; + p->qp = ipoib_cm_create_rx_qp(dev, p); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + goto err_qp; + } + + ret = ipoib_cm_modify_rx_rts(dev, cm_id, p->qp); + if (ret) + goto err_modify; + + ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd); + if (ret) { + ipoib_warn(priv, "failed to send REP: %d\n", ret); + goto err_rep; + } + + cm_id->context = p; + p->jiffies = jiffies; + spin_lock_irqsave(&priv->lock, flags); + list_add(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + return 0; + +err_rep: +err_modify: + ib_destroy_qp(p->qp); +err_qp: + kfree(p); + return ret; +} + +static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_rx *p; + struct ipoib_dev_priv *priv; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_REQ_RECEIVED: + return ipoib_cm_req_handler(cm_id, event); + case IB_CM_DREQ_RECEIVED: + p = cm_id->context; + ib_send_cm_drep(cm_id, NULL, 0); + /* Fall through */ + case IB_CM_REJ_RECEIVED: + p = cm_id->context; + priv = netdev_priv(p->dev); + spin_lock_irqsave(&priv->lock, flags); + if (list_empty(&p->list)) + ret = 0; /* Connection is going away already. */ + else { + list_del_init(&p->list); + ret = -ECONNRESET; + } + spin_unlock_irqrestore(&priv->lock, flags); + if (ret) { + ib_destroy_qp(p->qp); + kfree(p); + return ret; + } + return 0; + default: + return 0; + } +} +/* Adjust length of skb with fragments to match received data */ +static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, + unsigned int length) +{ + int i, num_frags; + unsigned int size; + + /* put header into skb */ + size = min(length, hdr_space); + skb->tail += size; + skb->len += size; + length -= size; + + num_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (length == 0) { + /* don't need this page */ + __free_page(frag->page); + --skb_shinfo(skb)->nr_frags; + } else { + size = min(length, (unsigned) PAGE_SIZE); + + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } + } +} + +void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; + struct sk_buff *skb; + struct ipoib_cm_rx *p; + unsigned long flags; + u64 mapping[IPOIB_CM_RX_SG]; + + ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_recvq_size)) { + ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); + return; + } + + skb = priv->cm.srq_ring[wr_id].skb; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + ipoib_dbg(priv, "cm recv error " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + ++priv->stats.rx_dropped; + goto repost; + } + + if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { + p = wc->qp->qp_context; + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { + spin_lock_irqsave(&priv->lock, flags); + p->jiffies = jiffies; + /* Move this entry to list head, but do + * not re-add it if it has been removed. */ + if (!list_empty(&p->list)) + list_move(&p->list, &priv->cm.passive_ids); + spin_unlock_irqrestore(&priv->lock, flags); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + } + } + + if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) { + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); + ++priv->stats.rx_dropped; + goto repost; + } + + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping); + memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping); + + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); + + skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len); + + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb->mac.raw = skb->data; + skb_pull(skb, IPOIB_ENCAP_LEN); + + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); + +repost: + if (unlikely(ipoib_cm_post_receive(dev, wr_id))) + ipoib_warn(priv, "ipoib_cm_post_receive failed " + "for buf %d\n", wr_id); +} + +static inline int post_send(struct ipoib_dev_priv *priv, + struct ipoib_cm_tx *tx, + unsigned int wr_id, + u64 addr, int len) +{ + struct ib_send_wr *bad_wr; + + priv->tx_sge.addr = addr; + priv->tx_sge.length = len; + + priv->tx_wr.wr_id = wr_id; + + return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); +} + +void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_tx_buf *tx_req; + u64 addr; + + if (unlikely(skb->len > tx->mtu)) { + ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", + skb->len, tx->mtu); + ++priv->stats.tx_dropped; + ++priv->stats.tx_errors; + ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN); + return; + } + + ipoib_dbg_data(priv, "sending packet %p, head %d length=%d connection=%p\n", + skb, tx->tx_head, skb->len, tx); + + /* + * We put the skb into the tx_ring _before_ we call post_send() + * because it's entirely possible that the completion handler will + * run before we execute anything after the post_send(). That + * means we have to make sure everything is properly recorded and + * our state is consistent before we call post_send(). + */ + tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; + tx_req->skb = skb; + addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + + tx_req->mapping = addr; + + if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), + addr, skb->len))) { + ipoib_warn(priv, "post_send failed\n"); + ++priv->stats.tx_errors; + ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + } else { + dev->trans_start = jiffies; + ++tx->tx_head; + + if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { + ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); + netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); + } + } +} + +static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, + struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + struct ipoib_tx_buf *tx_req; + unsigned long flags; + + ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_sendq_size)) { + ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", + wr_id, ipoib_sendq_size); + return; + } + + tx_req = &tx->tx_ring[wr_id]; + + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + + /* FIXME: is this right? Shouldn't we only increment on success? */ + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; + + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++tx->tx_tail; + if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && + tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); + netif_wake_queue(dev); + } + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) { + struct ipoib_neigh *neigh; + + ipoib_dbg(priv, "failed cm send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + + /* queue would be re-started anyway when TX is destroyed, + * but it makes sense to do it ASAP here. */ + if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) + netif_wake_queue(dev); + + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); + + spin_unlock(&priv->lock); + } + + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) +{ + struct ipoib_cm_tx *tx = tx_ptr; + int n, i; + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + do { + n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc); + for (i = 0; i < n; ++i) + ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i); + } while (n == IPOIB_NUM_WC); +} + +int ipoib_cm_dev_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return 0; + + priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); + if (IS_ERR(priv->cm.id)) { + printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); + return IS_ERR(priv->cm.id); + } + + ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), + 0, NULL); + if (ret) { + printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, + IPOIB_CM_IETF_ID | priv->qp->qp_num); + ib_destroy_cm_id(priv->cm.id); + return ret; + } + return 0; +} + +void ipoib_cm_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *p; + unsigned long flags; + + if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) + return; + + ib_destroy_cm_id(priv->cm.id); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); + + cancel_delayed_work(&priv->cm.stale_task); +} + +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct ipoib_cm_tx *p = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_cm_data *data = event->private_data; + struct sk_buff_head skqueue; + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + struct sk_buff *skb; + unsigned long flags; + + p->mtu = be32_to_cpu(data->mtu); + + if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { + ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", + p->mtu, priv->dev->mtu); + return -EINVAL; + } + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); + return ret; + } + + qp_attr.rq_psn = 0 /* FIXME */; + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); + return ret; + } + ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); + return ret; + } + + skb_queue_head_init(&skqueue); + + spin_lock_irqsave(&priv->lock, flags); + set_bit(IPOIB_FLAG_OPER_UP, &p->flags); + if (p->neigh) + while ((skb = __skb_dequeue(&p->neigh->queue))) + __skb_queue_tail(&skqueue, skb); + spin_unlock_irqrestore(&priv->lock, flags); + + while ((skb = __skb_dequeue(&skqueue))) { + skb->dev = p->dev; + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed " + "to requeue packet\n"); + } + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + if (ret) { + ipoib_warn(priv, "failed to send RTU: %d\n", ret); + return ret; + } + return 0; +} + +static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr attr = {}; + attr.recv_cq = priv->cq; + attr.srq = priv->cm.srq; + attr.cap.max_send_wr = ipoib_sendq_size; + attr.cap.max_send_sge = 1; + attr.sq_sig_type = IB_SIGNAL_ALL_WR; + attr.qp_type = IB_QPT_RC; + attr.send_cq = cq; + return ib_create_qp(priv->pd, &attr); +} + +static int ipoib_cm_send_req(struct net_device *dev, + struct ib_cm_id *id, struct ib_qp *qp, + u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_data data = {}; + struct ib_cm_req_param req = {}; + + data.qpn = cpu_to_be32(priv->qp->qp_num); + data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); + + req.primary_path = pathrec; + req.alternate_path = NULL; + req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); + req.qp_num = qp->qp_num; + req.qp_type = qp->qp_type; + req.private_data = &data; + req.private_data_len = sizeof data; + req.flow_control = 0; + + req.starting_psn = 0; /* FIXME */ + + /* + * Pick some arbitrary defaults here; we could make these + * module parameters if anyone cared about setting them. + */ + req.responder_resources = 4; + req.remote_cm_response_timeout = 20; + req.local_cm_response_timeout = 20; + req.retry_count = 0; /* RFC draft warns against retries */ + req.rnr_retry_count = 0; /* RFC draft warns against retries */ + req.max_cm_retries = 15; + req.srq = 15; + return ib_send_cm_req(id, &req); +} + +static int ipoib_cm_modify_tx_init(struct net_device *dev, + struct ib_cm_id *cm_id, struct ib_qp *qp) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); + if (ret) { + ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret); + return ret; + } + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.port_num = priv->port; + qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; + + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); + return ret; + } + return 0; +} + +static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, + struct ib_sa_path_rec *pathrec) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + int ret; + + ipoib_dbg(priv, "Request connection %p for gid " IPOIB_GID_FMT " qpn 0x%x\n", + p, IPOIB_GID_ARG(pathrec->dgid), qpn); + + p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_KERNEL); + if (!p->tx_ring) { + ipoib_warn(priv, "failed to allocate tx ring\n"); + ret = -ENOMEM; + goto err_tx; + } + + p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, + ipoib_sendq_size + 1); + if (IS_ERR(p->cq)) { + ret = PTR_ERR(p->cq); + ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret); + goto err_cq; + } + + ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP); + if (ret) { + ipoib_warn(priv, "failed to request completion notification: %d\n", ret); + goto err_req_notify; + } + + p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq); + if (IS_ERR(p->qp)) { + ret = PTR_ERR(p->qp); + ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + goto err_qp; + } + + p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); + if (IS_ERR(p->id)) { + ret = PTR_ERR(p->id); + ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); + goto err_id; + } + + ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp); + if (ret) { + ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); + goto err_modify; + } + + ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec); + if (ret) { + ipoib_warn(priv, "failed to send cm req: %d\n", ret); + goto err_send_cm; + } + return 0; + +err_send_cm: +err_modify: + ib_destroy_cm_id(p->id); +err_id: + p->id = NULL; + ib_destroy_qp(p->qp); +err_req_notify: +err_qp: + p->qp = NULL; + ib_destroy_cq(p->cq); +err_cq: + p->cq = NULL; +err_tx: + return ret; +} + +static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) +{ + struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_tx_buf *tx_req; + + ipoib_dbg(priv, "Destroy active connection %p. head 0x%x tail 0x%x\n", + p, p->tx_head, p->tx_tail); + + if (p->id) + ib_destroy_cm_id(p->id); + + if (p->qp) + ib_destroy_qp(p->qp); + + if (p->cq) + ib_destroy_cq(p->cq); + + if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags)) + netif_wake_queue(p->dev); + + if (p->tx_ring) { + while ((int) p->tx_tail - (int) p->tx_head < 0) { + tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_req->skb); + ++p->tx_tail; + } + + kfree(p->tx_ring); + } + + kfree(p); +} + +static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ipoib_cm_tx *tx = cm_id->context; + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + unsigned long flags; + int ret; + + switch (event->event) { + case IB_CM_DREQ_RECEIVED: + ipoib_dbg(priv, "DREQ received.\n"); + ib_send_cm_drep(cm_id, NULL, 0); + break; + case IB_CM_REP_RECEIVED: + ipoib_dbg(priv, "REP received.\n"); + ret = ipoib_cm_rep_handler(cm_id, event); + if (ret) + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + break; + case IB_CM_REQ_ERROR: + case IB_CM_REJ_RECEIVED: + case IB_CM_TIMEWAIT_EXIT: + ipoib_dbg(priv, "CM error %d.\n", event->event); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + neigh = tx->neigh; + + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + + tx->neigh = NULL; + } + + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + } + + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + break; + default: + break; + } + + return 0; +} + +struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, + struct ipoib_neigh *neigh) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_tx *tx; + + tx = kzalloc(sizeof *tx, GFP_ATOMIC); + if (!tx) + return NULL; + + neigh->cm = tx; + tx->neigh = neigh; + tx->path = path; + tx->dev = dev; + list_add(&tx->list, &priv->cm.start_list); + set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); + queue_work(ipoib_workqueue, &priv->cm.start_task); + return tx; +} + +void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) +{ + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + list_move(&tx->list, &priv->cm.reap_list); + queue_work(ipoib_workqueue, &priv->cm.reap_task); + ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(tx->neigh->dgid)); + tx->neigh = NULL; + } +} + +static void ipoib_cm_tx_start(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.start_task); + struct net_device *dev = priv->dev; + struct ipoib_neigh *neigh; + struct ipoib_cm_tx *p; + unsigned long flags; + int ret; + + struct ib_sa_path_rec pathrec; + u32 qpn; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.start_list)) { + p = list_entry(priv->cm.start_list.next, typeof(*p), list); + list_del_init(&p->list); + neigh = p->neigh; + qpn = IPOIB_QPN(neigh->neighbour->ha); + memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + if (ret) { + neigh = p->neigh; + if (neigh) { + neigh->cm = NULL; + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + } + list_del(&p->list); + kfree(p); + } + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_tx_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.reap_task); + struct ipoib_cm_tx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while (!list_empty(&priv->cm.reap_list)) { + p = list_entry(priv->cm.reap_list.next, typeof(*p), list); + list_del(&p->list); + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + ipoib_cm_tx_destroy(p); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +static void ipoib_cm_skb_reap(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.skb_task); + struct net_device *dev = priv->dev; + struct sk_buff *skb; + unsigned long flags; + + __be32 mtu = cpu_to_be32(priv->mcast_mtu); + + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); +#ifdef CONFIG_IPV6 + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); +#endif + dev_kfree_skb_any(skb); + spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock(&priv->lock); + } + spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, + unsigned int mtu) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int e = skb_queue_empty(&priv->cm.skb_queue); + + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + skb_queue_tail(&priv->cm.skb_queue, skb); + if (e) + queue_work(ipoib_workqueue, &priv->cm.skb_task); +} + +static void ipoib_cm_stale_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + cm.stale_task.work); + struct ipoib_cm_rx *p; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.passive_ids)) { + /* List if sorted by LRU, start from tail, + * stop when we see a recently used entry */ + p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); + if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) + break; + list_del_init(&p->list); + spin_unlock_irqrestore(&priv->lock, flags); + ib_destroy_cm_id(p->id); + ib_destroy_qp(p->qp); + kfree(p); + spin_lock_irqsave(&priv->lock, flags); + } + spin_unlock_irqrestore(&priv->lock, flags); +} + + +static ssize_t show_mode(struct class_device *cdev, char *buf) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) + return sprintf(buf, "connected\n"); + else + return sprintf(buf, "datagram\n"); +} + +static ssize_t set_mode(struct class_device *cdev, + const char *buf, size_t count) +{ + struct net_device *dev = container_of(cdev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + /* flush paths if we switch modes so that connections are restarted */ + if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { + set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_warn(priv, "enabling connected mode " + "will cause multicast packet drops\n"); + ipoib_flush_paths(dev); + return count; + } + + if (!strcmp(buf, "datagram\n")) { + clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + ipoib_flush_paths(dev); + return count; + } + + return -EINVAL; +} + +static CLASS_DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode); + +int ipoib_cm_add_mode_attr(struct net_device *dev) +{ + return class_device_create_file(&dev->class_dev, &class_device_attr_mode); +} + +int ipoib_cm_dev_init(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_srq_init_attr srq_init_attr = { + .attr = { + .max_wr = ipoib_recvq_size, + .max_sge = IPOIB_CM_RX_SG + } + }; + int ret, i; + + INIT_LIST_HEAD(&priv->cm.passive_ids); + INIT_LIST_HEAD(&priv->cm.reap_list); + INIT_LIST_HEAD(&priv->cm.start_list); + INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); + INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); + INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); + INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); + + skb_queue_head_init(&priv->cm.skb_queue); + + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); + if (IS_ERR(priv->cm.srq)) { + ret = PTR_ERR(priv->cm.srq); + priv->cm.srq = NULL; + return ret; + } + + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, + GFP_KERNEL); + if (!priv->cm.srq_ring) { + printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].lkey = priv->mr->lkey; + + priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].length = PAGE_SIZE; + priv->cm.rx_wr.next = NULL; + priv->cm.rx_wr.sg_list = priv->cm.rx_sge; + priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; + + for (i = 0; i < ipoib_recvq_size; ++i) { + if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } + if (ipoib_cm_post_receive(dev, i)) { + ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -EIO; + } + } + + priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; + return 0; +} + +void ipoib_cm_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i, ret; + + if (!priv->cm.srq) + return; + + ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); + + ret = ib_destroy_srq(priv->cm.srq); + if (ret) + ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); + + priv->cm.srq = NULL; + if (!priv->cm.srq_ring) + return; + for (i = 0; i < ipoib_recvq_size; ++i) + if (priv->cm.srq_ring[i].skb) { + ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[i].skb); + priv->cm.srq_ring[i].skb = NULL; + } + kfree(priv->cm.srq_ring); + priv->cm.srq_ring = NULL; +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 59d9594..f2aa923 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level, "Enable data path debug tracing if > 0"); #endif -#define IPOIB_OP_RECV (1ul << 31) - static DEFINE_MUTEX(pkey_mutex); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, @@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; - if (netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) + if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && + priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); netif_wake_queue(dev); + } spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && @@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) { - if (wc->wr_id & IPOIB_OP_RECV) + if (wc->wr_id & IPOIB_CM_OP_SRQ) + ipoib_cm_handle_rx_wc(dev, wc); + else if (wc->wr_id & IPOIB_OP_RECV) ipoib_ib_handle_rx_wc(dev, wc); else ipoib_ib_handle_tx_wc(dev, wc); @@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; u64 addr; - if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { + if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", - skb->len, dev->mtu + INFINIBAND_ALEN); + skb->len, priv->mcast_mtu + INFINIBAND_ALEN); ++priv->stats.tx_dropped; ++priv->stats.tx_errors; - dev_kfree_skb_any(skb); + ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); return; } @@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); + set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); } } } @@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } + ret = ipoib_cm_dev_open(dev); + if (ret) { + ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); + ipoib_ib_dev_stop(dev); + return -1; + } + clear_bit(IPOIB_STOP_REAPER, &priv->flags); queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); @@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_cm_dev_stop(dev); + /* * Move our QP to the error state and then reinitialize in * when all work requests have completed or have been flushed. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 705eb1d..19e82db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,8 +49,6 @@ #include -#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) - MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); + clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); + /* * Now flush workqueue to make sure a scheduled task doesn't * bring our internal state back up. @@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + /* dev->mtu > 2K ==> connected mode */ + if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { + if (new_mtu > priv->mcast_mtu) + ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", + priv->mcast_mtu); + dev->mtu = new_mtu; + return 0; + } + + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { return -EINVAL; + } priv->admin_mtu = new_mtu; @@ -414,6 +424,20 @@ static void path_rec_completion(int status, memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, + path, + neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + continue; + } + } + while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } @@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (!ipoib_cm_get(neigh)) + ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); + if (!ipoib_cm_get(neigh)) { + list_del(&neigh->list); + if (neigh->ah) + ipoib_put_ah(neigh->ah); + ipoib_neigh_free(dev, neigh); + goto err_drop; + } + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + __skb_queue_tail(&neigh->queue, skb); + else { + ipoib_warn(priv, "queue length limit %d. Packet drop.\n", + skb_queue_len(&neigh->queue)); + goto err_drop; + } + } else + ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); } else { neigh->ah = NULL; @@ -538,6 +580,7 @@ err_list: err_path: ipoib_neigh_free(dev, neigh); +err_drop: ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) neigh = *to_ipoib_neigh(skb->dst->neighbour); - if (likely(neigh->ah)) { + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto out; + } + } else if (neigh->ah) { if (unlikely(memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, sizeof(union ib_gid)))) { @@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) neigh->neighbour = neighbour; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); + ipoib_cm_set(neigh, NULL); return neigh; } @@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); } + if (ipoib_cm_get(neigh)) + ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); kfree(neigh); } @@ -1081,6 +1132,8 @@ static struct net_device *ipoib_add_port(const char *format, ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; if (class_device_create_file(&priv->dev->class_dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b04b72c..fea737f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - IPOIB_ENCAP_LEN; - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + + if (!ipoib_cm_admin_enabled(dev)) + dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b717c6..3cb551b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .qp_type = IB_QPT_UD }; + int ret, size; + priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, - ipoib_sendq_size + ipoib_recvq_size + 1); + priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(priv->mr)) { + printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); + goto out_free_pd; + } + + size = ipoib_sendq_size + ipoib_recvq_size + 1; + ret = ipoib_cm_dev_init(dev); + if (!ret) + size += ipoib_recvq_size; + + priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size); if (IS_ERR(priv->cq)) { printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); - goto out_free_pd; + goto out_free_mr; } if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) goto out_free_cq; - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_cq; - } - init_attr.send_cq = priv->cq; init_attr.recv_cq = priv->cq, priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_mr; + goto out_free_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return 0; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_cq: ib_destroy_cq(priv->cq); +out_free_mr: + ib_dereg_mr(priv->mr); + out_free_pd: ib_dealloc_pd(priv->pd); return -ENODEV; @@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - if (ib_destroy_cq(priv->cq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); + ipoib_cm_dev_cleanup(dev); + + if (ib_dereg_mr(priv->mr)) + ipoib_warn(priv, "ib_dereg_mr failed\n"); + if (ib_dealloc_pd(priv->pd)) ipoib_warn(priv, "ib_dealloc_pd failed\n"); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index f887780..d9fd82d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ipoib_create_debug_files(priv->dev); + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; -- MST From kliteyn at dev.mellanox.co.il Thu Jan 11 08:24:44 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 11 Jan 2007 18:24:44 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070110231917.GD17240@sashak.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> Message-ID: <45A664CC.8050200@dev.mellanox.co.il> Hi Sasha, Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 09:47 Wed 10 Jan , Yevgeny Kliteynik wrote: >> Hi Sasha, >> >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> >>> On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: >>>> Hi Sasha. >>>> Thanks for spotting this issue - I missed it when I reviewed the patch. >>>> Here's how things look in windows: >>>> * openlog() and closelog() are redefined to nothing in windows config header >>>> * cl_log_event() implementation on windows does all the work - gets handler >>>> to event log, logs the message, and closes the handler. >>>> It does *not* uses syslog(). >>>> >>>> To use syslog/sysopen/sysclose on windows, a user have to install some >>>> package (e.g. "Interix") that provides various daemons (such as syslogd), >>>> similar to Linux/Unix: >>>> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 >>>> http://www.interix.com/ >>> And this is not done for WinOpenSM now. Is this would be useful option? I >>> guess this could solve a lot of compatibility problems and simplify the >>> porting dramatically, right? >> I'll address this question to the windows guys. But I seriously doubt that >> they would want to make OSM depend on some third-party product, whether it >> is commercial or free. > > I doubt too :(. But ask them if it is not too hard. Interesting, how > WinOSM development works? AFAIK it is open source and published on the > net. Who is the maintainer? Is there any active mailing list? If the product is commercial, then the answer is no. If it is free, then there might be some license issues that may cause problems. But theoretically, if there would be some high-quality free product than will not pose any legal problems distributing it, and if it will be *really* useful (not just for logging functions), then it's possible. As for the mailing list it's openib-windows at openib.org. You can access it here: http://openib.org/mailman/listinfo/openib-windows >>>> So replacing cl_log_event() with syslog() would definitely cause a problem. >>>> >>>> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() >>>> on windows and redefining syslog() to use this wrapper. >>> Like #define syslog() cl_log_event() ? Not so ugly (better than >>> combination of openlog()/closelog() and "unrelated" cl_log_event()), >>> and perfectly simple IMO. >> "#define syslog() cl_log_event()" will not do the job, since these functions >> have different definitios: >> syslog(int priority, char *message, ...); >> cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); >> so there should be wrapper just for windows that will prepare the message >> for cl_log_event() as a single string. > > Understood. > > And what the purpose of: > > #ifdef WIN32 > OsmReportState(buffer); > #endif /* WIN32 */ > > in osm_log() (right after cl_log_event()/syslog())? It reports SM states when SM is running as window service. -- Yevgeny > Sasha > From mst at mellanox.co.il Thu Jan 11 08:22:36 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 18:22:36 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> <45A64310.504@voltaire.com> Message-ID: <20070111162236.GQ1759@mellanox.co.il> > > This makes sense but I think you should also consider the fact that > > the tx_lock is taken once per per tx_completion so, with the patch, > > the driver spends less time under lock. > > I think we could get rid of the tx lock on completion entirely... > > I have the old patch below lying around. It no longer applies, and it > needs some careful thought to make sure there are no memory ordering > problems (which need to be addressed with appropriate barriers when > looking at tx_head/tx_tail) netif_queue_stopped is an atomic, I think that would be sufficient, no? -- MST From rf at q-leap.de Thu Jan 11 08:20:36 2007 From: rf at q-leap.de (Roland Fehrenbacher) Date: Thu, 11 Jan 2007 17:20:36 +0100 Subject: [openib-general] Trouble with 2.6.20 and QLE7140/ipath driver In-Reply-To: References: <17830.22294.792058.979306@gargle.gargle.HOWL> Message-ID: <17830.25556.684669.91503@gargle.gargle.HOWL> >>>>> "Roland" == Roland Dreier writes: Roland> Does the patch in http://lkml.org/lkml/2007/1/5/215 help? That was fast, and it even fixed the problem ;-). I guess the patch really should go into rc5 if still possible Thanks, Roland From mst at mellanox.co.il Thu Jan 11 08:24:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 18:24:15 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> Message-ID: <20070111162415.GR1759@mellanox.co.il> > > I am thinking about detecting that we are getting a completion each > > couple of microseconds, and just polling a bit more (even if CQ is empty) > > simply to avoid paying 10s micro for the next interrupt. > > This is just a fuzzy idea, but maybe this can work? > > It's a good area to look at (assuming it's worth spending yet more > time on IPoIB performance), but I think it's quite difficult to come > up with a good solution. For example search for Jamal's paper "When > NAPI Comes to Town" to see some related work that was a failure in the > end. Thanks for the link. What's a bit different here is that we mostly want to do this for TX interrupts. The article speaks about RX interrupts. -- MST From mst at mellanox.co.il Thu Jan 11 08:33:05 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 18:33:05 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <45A64310.504@voltaire.com> References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> <45A64310.504@voltaire.com> Message-ID: <20070111163305.GT1759@mellanox.co.il> > > Thinking about this more - why does this patch help some benchmarks? > > The amount of work it takes for the hardware to generate a completion > > is likely negligeable, and we still are scanning the same amount > > of TX WRs in a loop to unmap/free them. > > This makes sense but I think you should also consider the fact that > the tx_lock is taken once per per tx_completion so, with the patch, > the driver spends less time under lock. Try removing tx_lock from completion path just for the fun of it. I think you'll find it gains you 5% tops. -- MST From dotanb at dev.mellanox.co.il Thu Jan 11 08:39:34 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 11 Jan 2007 18:39:34 +0200 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples Message-ID: <1168533575.1220.0.camel@mtls05.yok.mtl.com> Adding acks to all of the CQ events in the pingpong examples to show how completion events should be used. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-11 13:50:34.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-11 13:53:23.000000000 +0200 @@ -621,6 +621,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Failed to get cq_event\n"); return 1; } + ibv_ack_cq_events(ev_cq, 1); if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); Index: gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-10 17:02:24.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-11 13:53:37.000000000 +0200 @@ -709,6 +709,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Failed to get cq_event\n"); return 1; } + ibv_ack_cq_events(ev_cq, 1); if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); Index: gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-10 17:02:24.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-11 13:53:45.000000000 +0200 @@ -609,6 +609,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Failed to get cq_event\n"); return 1; } + ibv_ack_cq_events(ev_cq, 1); if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); Index: gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-10 17:02:24.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-11 13:53:30.000000000 +0200 @@ -607,6 +607,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Failed to get cq_event\n"); return 1; } + ibv_ack_cq_events(ev_cq, 1); if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); From swise at opengridcomputing.com Thu Jan 11 08:49:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 10:49:51 -0600 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: <1168532377.3055.36.camel@stevo-desktop> References: <20071101421.G6zer7drP3SHahok@cisco.com> <1168532377.3055.36.camel@stevo-desktop> Message-ID: <1168534191.8149.4.camel@stevo-desktop> For some reason, the config entries don't show up as type DT_REG?? Their type is 0 (DT_UNKNOWN)... 294 while ((dent = readdir(conf_dir))) { (gdb) n 295 if (dent->d_type != DT_REG) (gdb) p *dent $10 = {d_ino = 446638, d_off = 1541991424, d_reclen = 32, d_type = 0 '\0', d_name = "mthca.driver\000\b\uffff\006\000\000\000\000\000\001\uffff\uffff[\000\000\000\000 \000\000cxgb3.driver", '\0' } (gdb) n 294 while ((dent = readdir(conf_dir))) { (gdb) n 295 if (dent->d_type != DT_REG) (gdb) p *dent $11 = {d_ino = 447496, d_off = 1541991425, d_reclen = 32, d_type = 0 '\0', d_name = "cxgb3.driver", '\0' } (gdb) n 294 while ((dent = readdir(conf_dir))) { (gdb) n 301 closedir(conf_dir); (gdb) n 302 } (gdb) vic13:/home/swise/git/librdmacm # file /usr/local/etc/libibverbs.d/* /usr/local/etc/libibverbs.d/cxgb3.driver: ASCII text /usr/local/etc/libibverbs.d/mthca.driver: ASCII text vic13:/home/swise/git/librdmacm # cat /usr/local/etc/libibverbs.d/* driver cxgb3 driver mthca On Thu, 2007-01-11 at 10:19 -0600, Steve Wise wrote: > Roland, > > I'm having problems with these changes. I built and installed > libibverbs, libmthca, libcxgb3, and libamso. I rebuild librdmacm, but I > don't think I need to. > > Trying rping gets this: > > vic13:/home/swise/git/libibverbs # rping -c -VvC10 -a 192.168.71.144 -p 9999 > libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 > libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 > libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 > > > The files look like they got installed ok: > > vic13:/etc/sysconfig # ls -l /usr/local/lib > total 14830 > -rw-r--r-- 1 root root 145744 2007-01-11 10:06 libamso.a > -rwxr-xr-x 1 root root 813 2007-01-11 10:06 libamso.la > -rwxr-xr-x 1 root root 68436 2007-01-11 10:06 libamso-rdmav2.so > lrwxrwxrwx 1 root root 17 2007-01-11 10:06 libamso.so -> libamso-rdmav2.so > -rw-r--r-- 1 root root 228964 2007-01-11 10:01 libcxgb3.a > -rwxr-xr-x 1 root root 820 2007-01-11 10:01 libcxgb3.la > -rwxr-xr-x 1 root root 114238 2007-01-11 10:01 libcxgb3-rdmav2.so > lrwxrwxrwx 1 root root 18 2007-01-11 10:01 libcxgb3.so -> libcxgb3-rdmav2.s o > -rw-r--r-- 1 root root 310170 2007-01-11 10:08 libibverbs.a > -rwxr-xr-x 1 root root 828 2007-01-11 10:08 libibverbs.la > lrwxrwxrwx 1 root root 19 2007-01-11 10:08 libibverbs.so -> libibverbs.so.2 .0.0 > lrwxrwxrwx 1 root root 19 2007-01-11 10:08 libibverbs.so.2 -> libibverbs.so .2.0.0 > -rwxr-xr-x 1 root root 169793 2007-01-11 10:08 libibverbs.so.2.0.0 > -rw-r--r-- 1 root root 330322 2007-01-11 10:01 libmthca.a > -rwxr-xr-x 1 root root 820 2007-01-11 10:01 libmthca.la > -rwxr-xr-x 1 root root 166124 2007-01-11 10:01 libmthca-rdmav2.so > lrwxrwxrwx 1 root root 18 2007-01-11 10:01 libmthca.so -> libmthca-rdmav2.s o > > > I'm debugging, but thought I'd pass this along... > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Thu Jan 11 09:00:39 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 09:00:39 -0800 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: <1168534191.8149.4.camel@stevo-desktop> (Steve Wise's message of "Thu, 11 Jan 2007 10:49:51 -0600") References: <20071101421.G6zer7drP3SHahok@cisco.com> <1168532377.3055.36.camel@stevo-desktop> <1168534191.8149.4.camel@stevo-desktop> Message-ID: OK, I think I get it. Out of curiousity what filesystem is the config directory on? - R. From swise at opengridcomputing.com Thu Jan 11 09:04:58 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 11:04:58 -0600 Subject: [openib-general] [PATCH 1/7] libibverbs: Implement new method for finding and loading device-specific drivers In-Reply-To: References: <20071101421.G6zer7drP3SHahok@cisco.com> <1168532377.3055.36.camel@stevo-desktop> <1168534191.8149.4.camel@stevo-desktop> Message-ID: <1168535098.8149.8.camel@stevo-desktop> On Thu, 2007-01-11 at 09:00 -0800, Roland Dreier wrote: > OK, I think I get it. Out of curiousity what filesystem is the config > directory on? > > - R. reiserfs. SUSE 10.1. 2.6.20-rc4 + chelsio stuff. From hnguyen at linux.vnet.ibm.com Thu Jan 11 09:07:08 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 18:07:08 +0100 Subject: [openib-general] [PATCH 2.6.21 0/8] ehca: remove use of do_mmap() from kernel space and minor cleanup Message-ID: <200701111807.08593.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! Here is a set of patches for ehca, whose main purpose is to remove unproper use of do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" changes are: * Remove "dead" prototype declarations (those without code implementation) * Use SLAB_ defines instead GFP_ ones when allocating memory from slab cache Actually I should separate those patches for more clarity. Unfortunately that code cleanup above has been incorporated much earlier in our repository, and I had not paid attention on when I started to rework the mmap() stuff. Sorry for this inconvenience! Now more detail on mmap() rework: - For eHCA hardware register block we use remap_pfn_range() as previously. - For queue pages we call pattern vm_insert_page() to register each allocated kernel page. - For each mmap-ed resource (hardware register block, send/recv and completion queue) we introduce a use counter that is incremented and decremented by the call-backs open()/close(). Destroying a completion queue or queue pair will succeed only if all associated counters are zero. That means those resources must be mmap-ed resp. munmap-ed properly by user space. Thanks Nam From mshefty at ichips.intel.com Thu Jan 11 09:34:22 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 11 Jan 2007 09:34:22 -0800 Subject: [openib-general] Stale CM callbacks In-Reply-To: <45A5906F.4000107@veritas.com> References: <45A4583B.8050807@veritas.com> <45A528AF.2050105@ichips.intel.com> <45A5906F.4000107@veritas.com> Message-ID: <45A6751E.6080707@ichips.intel.com> > Our code isn't handling stale callbacks. Thanks for clarifying it. I don't believe that your code needs to do anything special. Most of this should be handled by the ib_cm. > In the above scenario the node comes back up quickly in the reset case than > the reboot case. So, i was just wondering if the extra delay in the reboot > case was causing the problem to not occur. In other words, does the switch > cache the reset node state and discards it after some fixed amount of time. The extra delay could result in messages timing out, which may not be the case if a fast reset occurs. > Also, should a remote node with which the reset node had established > connections call ib_destroy_cm_id() during its disconnect processing ? > Currently, our code only destroys the QPs (by calling ib_destroy_cq() and > ib_destroy_qp()). This depends on whether you want to reuse the cm_id. If not, it should be destroyed, but there shouldn't be any real harm in keeping it around. If you are seeing any issues with stale connections, please let me know. It's possible that the cm is not handling things correctly. - Sean From mst at mellanox.co.il Thu Jan 11 10:15:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 20:15:23 +0200 Subject: [openib-general] [PATCH 0/7] userspace verbs changes In-Reply-To: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> References: <20071101421.hNJYLZrMYBxmo1ah@cisco.com> Message-ID: <20070111181523.GE17999@mellanox.co.il> > Quoting Roland Dreier : > Subject: [PATCH 0/7] userspace verbs changes > > Here is a set of patches that finally update the libibverbs driver > loading method (for the 1.1 development tree) as described before, and > fix up all the device-specific libraries to match the new libibverbs. > > I've pushed out the changes to libibverbs itself and libmthca too. We took this up to OFED. All low level driver maintainers, pls update your trees. -- MST From hnguyen at linux.vnet.ibm.com Thu Jan 11 10:22:27 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 19:22:27 +0100 Subject: [openib-general] [PATCH 2.6.21 0/8] ehca: remove use of do_mmap() from kernel space and minor cleanup In-Reply-To: <200701111807.08593.hnguyen@linux.vnet.ibm.com> References: <200701111807.08593.hnguyen@linux.vnet.ibm.com> Message-ID: <200701111922.27474.hnguyen@linux.vnet.ibm.com> Please ignore this. Pushed the send button to fast again. Regards Nam On Thursday 11 January 2007 18:07, Hoang-Nam Nguyen wrote: > Hello Roland and Christoph H.! > Here is a set of patches for ehca, whose main purpose is to remove unproper use of > do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" changes > are: > * Remove "dead" prototype declarations (those without code implementation) > * Use SLAB_ defines instead GFP_ ones when allocating memory from slab cache > > Actually I should separate those patches for more clarity. Unfortunately that > code cleanup above has been incorporated much earlier in our repository, and > I had not paid attention on when I started to rework the mmap() stuff. Sorry > for this inconvenience! > > Now more detail on mmap() rework: > - For eHCA hardware register block we use remap_pfn_range() as previously. > - For queue pages we call pattern vm_insert_page() to register each allocated > kernel page. > - For each mmap-ed resource (hardware register block, send/recv and completion > queue) we introduce a use counter that is incremented and decremented by > the call-backs open()/close(). Destroying a completion queue or queue pair > will succeed only if all associated counters are zero. That means those resources > must be mmap-ed resp. munmap-ed properly by user space. > > Thanks > Nam > From mshefty at ichips.intel.com Thu Jan 11 10:38:14 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 11 Jan 2007 10:38:14 -0800 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> References: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> Message-ID: <45A68416.1080707@ichips.intel.com> Sean Hefty wrote: > Adding this functionality to the existing ib_umad module would add an extra > dependency of ib_umad on the ib_sa module. Multicast join / leave operations > could be done by adding additional IOCTLs, by embedding the request as a > send_mad call, or by modifying the ib_umad send interface. Given that the ibibumad interface is intended to send and receive MADs, I would rather not abuse the interface by changing the behavior of umad_send/umad_recv. These calls map directly to ib_umad write and read. Would we be okay with extending the IOCTL interface to allow multicast joins, notice registration, and event reporting? Or would it be acceptable to change the ib_umad read/write interface to add a command? > As an alternative, a new kernel userspace SA module could be created to > explicitly interface with the kernel ib_sa. Or do people preferred this approach over changing the ib_umad interface? I'm looking for something that will be acceptable to merge upstream. - Sean From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:07:33 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:07:33 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 0/5] ehca: remove use of do_mmap() from kernel space Message-ID: <200701112007.33854.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! Here is a set of patches for ehca, whose main purpose is to remove unproper use of do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" changes are: * Remove "dead" prototype declarations (those without code implementation) Now detail on mmap() rework: - For eHCA hardware register block we use remap_pfn_range() as previously. - For queue pages we call pattern vm_insert_page() to register each allocated kernel page. - For each mmap-ed resource (hardware register block, send/recv and completion queue) we introduce a use counter that is incremented and decremented by the call-backs open()/close(). Destroying a completion queue or queue pair will succeed only if all associated counters are zero. That means those resources must be mmap-ed and munmap-ed properly in user space. The actual calls of mmap64() and munmap() are done then in ehca user space lib, ie. libehca, for which I will send a separate patch for another review (by ofed group). Thanks Nam ehca_classes.h | 29 +----- ehca_cq.c | 65 +++----------- ehca_iverbs.h | 10 -- ehca_main.c | 6 - ehca_qp.c | 78 +++-------------- ehca_uverbs.c | 253 ++++++++++++++++++--------------------------------------- 6 files changed, 121 insertions(+), 320 deletions(-) From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:07:49 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:07:49 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 1/5] ehca: declaration of queue structures Message-ID: <200701112007.49620.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! This is a patch for ehca structs. It enhances completion queue and queue pair with use counters for associated mmap-ed resources, ie. hardware register block and queue pages. Furthermore it removes redundant prototypes. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_classes.h | 29 +++++++---------------------- ehca_iverbs.h | 10 +--------- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1c72203..cf95ee4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -119,13 +119,14 @@ struct ehca_qp { struct ipz_qp_handle ipz_qp_handle; struct ehca_pfqp pf; struct ib_qp_init_attr init_attr; - u64 uspace_squeue; - u64 uspace_rqueue; - u64 uspace_fwh; struct ehca_cq *send_cq; struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; }; /* must be power of 2 */ @@ -142,13 +143,14 @@ struct ehca_cq { struct ipz_cq_handle ipz_cq_handle; struct ehca_pfcq pf; spinlock_t cb_lock; - u64 uspace_queue; - u64 uspace_fwh; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; u32 nr_callbacks; spinlock_t task_lock; u32 ownpid; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; }; enum ehca_mr_flag { @@ -248,20 +250,6 @@ struct ehca_ucontext { struct ib_ucontext ib_ucontext; }; -struct ehca_module *ehca_module_new(void); - -int ehca_module_delete(struct ehca_module *me); - -int ehca_eq_ctor(struct ehca_eq *eq); - -int ehca_eq_dtor(struct ehca_eq *eq); - -struct ehca_shca *ehca_shca_new(void); - -int ehca_shca_delete(struct ehca_shca *me); - -struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); - int ehca_init_pd_cache(void); void ehca_cleanup_pd_cache(void); int ehca_init_cq_cache(void); @@ -283,7 +271,6 @@ extern int ehca_port_act_time; extern int ehca_use_hp_mr; struct ipzu_queue_resp { - u64 queue; /* points to first queue entry */ u32 qe_size; /* queue entry size */ u32 act_nr_of_sg; u32 queue_length; /* queue length allocated in bytes */ @@ -296,7 +283,6 @@ struct ehca_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; - struct h_galpas galpas; }; struct ehca_create_qp_resp { @@ -309,7 +295,6 @@ struct ehca_create_qp_resp { u32 dummy; /* padding for 8 byte alignment */ struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; - struct h_galpas galpas; }; struct ehca_alloc_cq_parms { diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index cd7789f..85e7916 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -171,19 +171,11 @@ int ehca_mmap(struct ib_ucontext *contex void ehca_poll_eqs(unsigned long data); -int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, - struct vm_area_struct **vma); - -int ehca_mmap_register(u64 physical,void **mapped, - struct vm_area_struct **vma); - -int ehca_munmap(unsigned long addr, size_t len); - #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void*) get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:08:15 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:08:15 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 2/5] ehca: ehca_uverbs.c: "proper" use of mmap Message-ID: <200701112008.15841.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! This is a patch for ehca_uverbs.c. It implements ehca-specific mmap in the following way (as recommended by Christoph H.): - Call remap_pfn_range() for hardware register block - Use vm_insert_page() to register memory allocated for completion queues and queue pairs - The actual mmap() call/trigger is now controlled by user space, ie. libehca, for which I will send a separate patch for later review This patch also removes superfluous resp. obsolete functions. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_uverbs.c | 253 ++++++++++++++++++---------------------------------------- 1 files changed, 80 insertions(+), 173 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index e08764e..250eac6 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -68,105 +68,59 @@ int ehca_dealloc_ucontext(struct ib_ucon return 0; } -struct page *ehca_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static void mm_open(struct vm_area_struct *vma) { - struct page *mypage = NULL; - u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; - u32 idr_handle = fileoffset >> 32; - u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ - u32 cur_pid = current->tgid; - unsigned long flags; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ehca_pd *pd; - u64 offset; - void *vaddr; - - switch (q_type) { - case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - /* make sure this mmap really belongs to the authorized user */ - if (!cq) { - ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; - } - - if (cq->ownpid != cur_pid) { - ehca_err(cq->ib_cq.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, cq->ownpid); - return NOPAGE_SIGBUS; - } - - if (rsrc_type == 2) { - ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); - ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } - break; - - case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); - qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - - /* make sure this mmap really belongs to the authorized user */ - if (!qp) { - ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; - } - - pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); - if (pd->ownpid != cur_pid) { - ehca_err(qp->ib_qp.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, pd->ownpid); - return NOPAGE_SIGBUS; - } - - if (rsrc_type == 2) { /* rqueue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } else if (rsrc_type == 3) { /* squeue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } - break; - - default: - ehca_gen_err("bad queue type %x", q_type); - return NOPAGE_SIGBUS; + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - if (!mypage) { - ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; +static void mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; } - get_page(mypage); - - return mypage; + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); } -static struct vm_operations_struct ehcau_vm_ops = { - .nopage = ehca_nopage, +static struct vm_operations_struct vm_ops = { + .open = mm_open, + .close = mm_close, }; +static int ehca_mmap_qpages(struct vm_area_struct *vma, struct ipz_queue *queue) +{ + u64 start, ofs; + struct page *page; + int rc = 0; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + rc = vm_insert_page(vma, start, page); + if (unlikely(rc)) { + ehca_gen_err("vm_insert_page() failed rc=%x", rc); + return rc; + } + start += PAGE_SIZE; + } + return 0; +} + int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; @@ -204,7 +158,6 @@ int ehca_mmap(struct ib_ucontext *contex switch (rsrc_type) { case 1: /* galpa fw handle */ ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); - vma->vm_flags |= VM_RESERVED; vsize = vma->vm_end - vma->vm_start; if (vsize != EHCA_PAGESIZE) { ehca_err(cq->ib_cq.device, "invalid vsize=%lx", @@ -214,25 +167,34 @@ int ehca_mmap(struct ib_ucontext *contex physical = cq->galpas.user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; ehca_dbg(cq->ib_cq.device, "vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, vsize, vma->vm_page_prot); - if (ret) { + if (unlikely(ret)) { ehca_err(cq->ib_cq.device, "remap_pfn_range() failed ret=%x", ret); return -ENOMEM; } + vma->vm_private_data = &cq->mm_count_galpa; + cq->mm_count_galpa++; break; case 2: /* cq queue_addr */ ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; + ret = ehca_mmap_qpages(vma, &cq->ipz_queue); + if (unlikely(ret)) { + ehca_gen_err("ehca_mmap_qpages() failed rc=%x " + "cq_num=%x", ret, cq->cq_number); + return ret; + } + vma->vm_private_data = &cq->mm_count_queue; + cq->mm_count_queue++; break; default: @@ -265,7 +227,6 @@ int ehca_mmap(struct ib_ucontext *contex switch (rsrc_type) { case 1: /* galpa fw handle */ ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); - vma->vm_flags |= VM_RESERVED; vsize = vma->vm_end - vma->vm_start; if (vsize != EHCA_PAGESIZE) { ehca_err(qp->ib_qp.device, "invalid vsize=%lx", @@ -275,31 +236,49 @@ int ehca_mmap(struct ib_ucontext *contex physical = qp->galpas.user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, vsize, vma->vm_page_prot); - if (ret) { + if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "remap_pfn_range() failed ret=%x", ret); return -ENOMEM; } + vma->vm_private_data = &qp->mm_count_galpa; + qp->mm_count_galpa++; break; case 2: /* qp rqueue_addr */ ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; + ret = ehca_mmap_qpages(vma, &qp->ipz_rqueue); + if (unlikely(ret)) { + ehca_gen_err("ehca_mmap_qpages(rq) failed " + "rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + vma->vm_private_data = &qp->mm_count_rqueue; + qp->mm_count_rqueue++; break; case 3: /* qp squeue_addr */ ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; + ret = ehca_mmap_qpages(vma, &qp->ipz_squeue); + if (unlikely(ret)) { + ehca_gen_err("ehca_mmap_qpages(sq) failed " + "rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + vma->vm_private_data = &qp->mm_count_squeue; + qp->mm_count_squeue++; break; default: @@ -314,79 +293,7 @@ int ehca_mmap(struct ib_ucontext *contex return -EINVAL; } - return 0; -} - -int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, - struct vm_area_struct **vma) -{ - down_write(¤t->mm->mmap_sem); - *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - foffset); - up_write(¤t->mm->mmap_sem); - if (!(*mapped)) { - ehca_gen_err("couldn't mmap foffset=%lx length=%lx", - foffset, length); - return -EINVAL; - } - - *vma = find_vma(current->mm, (u64)*mapped); - if (!(*vma)) { - down_write(¤t->mm->mmap_sem); - do_munmap(current->mm, 0, length); - up_write(¤t->mm->mmap_sem); - ehca_gen_err("couldn't find vma queue=%p", *mapped); - return -EINVAL; - } - (*vma)->vm_flags |= VM_RESERVED; - (*vma)->vm_ops = &ehcau_vm_ops; + vma->vm_ops = &vm_ops; return 0; } - -int ehca_mmap_register(u64 physical, void **mapped, - struct vm_area_struct **vma) -{ - int ret; - unsigned long vsize; - /* ehca hw supports only 4k page */ - ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); - if (ret) { - ehca_gen_err("could'nt mmap physical=%lx", physical); - return ret; - } - - (*vma)->vm_flags |= VM_RESERVED; - vsize = (*vma)->vm_end - (*vma)->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", - (*vma)->vm_end - (*vma)->vm_start); - return -EINVAL; - } - - (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); - (*vma)->vm_flags |= VM_IO | VM_RESERVED; - - ret = remap_pfn_range((*vma), (*vma)->vm_start, - physical >> PAGE_SHIFT, vsize, - (*vma)->vm_page_prot); - if (ret) { - ehca_gen_err("remap_pfn_range() failed ret=%x", ret); - return -ENOMEM; - } - - return 0; - -} - -int ehca_munmap(unsigned long addr, size_t len) { - int ret = 0; - struct mm_struct *mm = current->mm; - if (mm) { - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - } - return ret; -} From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:08:36 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:08:36 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() Message-ID: <200701112008.37236.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap() when creating and destroying a completion queue respectively. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_cq.c | 65 +++++++++++++++----------------------------------------------- 1 files changed, 16 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 93995b6..e86585a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; - struct vm_area_struct *vma; memset(&resp, 0, sizeof(resp)); resp.cq_number = my_cq->cq_number; resp.token = my_cq->token; @@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d resp.ipz_queue.queue_length = ipz_queue->queue_length; resp.ipz_queue.pagesize = ipz_queue->pagesize; resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, - ipz_queue->queue_length, - (void**)&resp.ipz_queue.queue, - &vma); - if (ret) { - ehca_err(device, "Could not mmap queue pages"); - cq = ERR_PTR(ret); - goto create_cq_exit4; - } - my_cq->uspace_queue = resp.ipz_queue.queue; - resp.galpas = my_cq->galpas; - ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(device, "Could not mmap fw_handle"); - cq = ERR_PTR(ret); - goto create_cq_exit5; - } - my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); - goto create_cq_exit6; + goto create_cq_exit4; } } return cq; -create_cq_exit6: - ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - -create_cq_exit5: - ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); - create_cq_exit4: ipz_queue_dtor(&my_cq->ipz_queue); @@ -333,7 +306,6 @@ create_cq_exit1: int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; - int ret; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int cq_num = my_cq->cq_number; struct ib_device *device = cq->device; @@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq) u32 cur_pid = current->tgid; unsigned long flags; + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + if (my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x " + "cq_num=%x", + cur_pid, my_cq->ownpid, my_cq->cq_number); + return -EINVAL; + } + } + spin_lock_irqsave(&ehca_cq_idr_lock, flags); while (my_cq->nr_callbacks) yield(); @@ -350,25 +336,6 @@ int ehca_destroy_cq(struct ib_cq *cq) idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { - ehca_err(device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_cq->ownpid); - return -EINVAL; - } - - /* un-mmap if vma alloc */ - if (my_cq->uspace_queue ) { - ret = ehca_munmap(my_cq->uspace_queue, - my_cq->ipz_queue.queue_length); - if (ret) - ehca_err(device, "Could not munmap queue ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(device, "Could not munmap fwh ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - } - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ @@ -397,7 +364,7 @@ int ehca_resize_cq(struct ib_cq *cq, int struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); u32 cur_pid = current->tgid; - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + if (cq->uobject && my_cq->ownpid != cur_pid) { ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", cur_pid, my_cq->ownpid); return -EINVAL; From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:09:08 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:09:08 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 4/5] ehca: queue pair: remove use of do_mmap() Message-ID: <200701112009.08712.hnguyen@linux.vnet.ibm.com> Hello Roland and Christoph H.! This is a patch for ehca_qp.c. It removes all direct calls of do_mmap()/munmap() when creating and destroying a queue pair respectively. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_qp.c | 78 +++++++++++--------------------------------------------------- 1 files changed, 14 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 34b8555..95efef9 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_p struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; - struct vm_area_struct * vma; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; @@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_p resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, - ipz_rqueue->queue_length, - (void**)&resp.ipz_rqueue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap rqueue pages"); - goto create_qp_exit3; - } - my_qp->uspace_rqueue = resp.ipz_rqueue.queue; /* squeue properties */ resp.ipz_squeue.qe_size = ipz_squeue->qe_size; resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; resp.ipz_squeue.queue_length = ipz_squeue->queue_length; resp.ipz_squeue.pagesize = ipz_squeue->pagesize; resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, - ipz_squeue->queue_length, - (void**)&resp.ipz_squeue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap squeue pages"); - goto create_qp_exit4; - } - my_qp->uspace_squeue = resp.ipz_squeue.queue; - /* fw_handle */ - resp.galpas = my_qp->galpas; - ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap fw_handle"); - goto create_qp_exit5; - } - my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit3; } } return &my_qp->ib_qp; -create_qp_exit6: - ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - -create_qp_exit5: - ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); - -create_qp_exit4: - ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); - create_qp_exit3: ipz_queue_dtor(&my_qp->ipz_rqueue); ipz_queue_dtor(&my_qp->ipz_squeue); @@ -931,7 +892,7 @@ static int internal_modify_qp(struct ib_ my_qp->qp_type == IB_QPT_SMI) && statetrans == IB_QPST_SQE2RTS) { /* mark next free wqe if kernel */ - if (my_qp->uspace_squeue == 0) { + if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); @@ -1417,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp) enum ib_qp_type qp_type; unsigned long flags; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; + if (ibqp->uobject) { + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(ibqp->device, "Resources still referenced in " + "user space qp_num=%x", ibqp->qp_num); + return -EINVAL; + } + if (my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } } if (my_qp->send_cq) { @@ -1439,24 +1407,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp) idr_remove(&ehca_qp_idr, my_qp->token); spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - /* un-mmap if vma alloc */ - if (my_qp->uspace_rqueue) { - ret = ehca_munmap(my_qp->uspace_rqueue, - my_qp->ipz_rqueue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap rqueue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_squeue, - my_qp->ipz_squeue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap squeue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", - qp_num); - } - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " From hnguyen at linux.vnet.ibm.com Thu Jan 11 11:09:30 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Thu, 11 Jan 2007 20:09:30 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 5/5] ehca: ehca_main.c: version numbering Message-ID: <200701112009.31068.hnguyen@linux.vnet.ibm.com> Hello Roland! This is a patch for ehca_main.c. It mainly updates ehca version appropriately. Also the abi_version is increased in order to distinguish this from earlier releases. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_main.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 6574fbb..839beaa 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "hcp_if.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0019"); +MODULE_VERSION("SVNEHCA_0020"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -288,7 +287,7 @@ int ehca_init_device(struct ehca_shca *s strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_abi_ver = 6; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -790,7 +789,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0019)\n"); + "(Rel.: SVNEHCA_0020)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); From rdreier at cisco.com Thu Jan 11 11:14:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:14:30 -0800 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: <20070111162236.GQ1759@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 11 Jan 2007 18:22:36 +0200") References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> <45A64310.504@voltaire.com> <20070111162236.GQ1759@mellanox.co.il> Message-ID: > netif_queue_stopped is an atomic, I think that would be sufficient, no? netif_queue_stopped does test_bit(), which is an implicit memory barrier. But I think it's pretty subtle -- you have to look at every place tx_head and tx_tail are updated and decide if there needs to be a corresponding barrier. For example, in: + if (netif_queue_stopped(dev) && + priv->tx_head - priv->tx_tail < ipoib_sendq_size) { there is a barrier in netif_queue_stopped() but no control in what order tx_head and tx_tail are read... - R. From rdreier at cisco.com Thu Jan 11 11:15:55 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:15:55 -0800 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples In-Reply-To: <1168533575.1220.0.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Thu, 11 Jan 2007 18:39:34 +0200") References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> Message-ID: OK, this is correct -- but since the examples don't destroy the CQ, is there any point in acking the events? - R. From hch at infradead.org Thu Jan 11 11:14:25 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:14:25 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 1/5] ehca: declaration of queue structures In-Reply-To: <200701112007.49620.hnguyen@linux.vnet.ibm.com> References: <200701112007.49620.hnguyen@linux.vnet.ibm.com> Message-ID: <20070111191425.GA24623@infradead.org> On Thu, Jan 11, 2007 at 08:07:49PM +0100, Hoang-Nam Nguyen wrote: > -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) > +#define ehca_alloc_fw_ctrlblock(flags) ((void*) get_zeroed_page(flags)) This indentation changes moves away from the preffered form. Except for that the patch looks fine. From rdreier at cisco.com Thu Jan 11 11:17:21 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:17:21 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21 1/5] ehca: declaration of queue structures In-Reply-To: <20070111191425.GA24623@infradead.org> (Christoph Hellwig's message of "Thu, 11 Jan 2007 19:14:25 +0000") References: <200701112007.49620.hnguyen@linux.vnet.ibm.com> <20070111191425.GA24623@infradead.org> Message-ID: > This indentation changes moves away from the preffered form. I will fix when I merge it -- no need to resend. > Except for that the patch looks fine. Christoph, did you look over all 5 or just this one so far? Thanks, Roland From hch at infradead.org Thu Jan 11 11:20:56 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:20:56 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: <200701112008.37236.hnguyen@linux.vnet.ibm.com> References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> Message-ID: <20070111192056.GB24623@infradead.org> On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote: > Hello Roland and Christoph H.! > This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap() > when creating and destroying a completion queue respectively. > Thanks > Nam This patch looks good, but there are some issues with the surrounding code: > + if (my_cq->ownpid != cur_pid) { > + ehca_err(device, "Invalid caller pid=%x ownpid=%x " > + "cq_num=%x", > + cur_pid, my_cq->ownpid, my_cq->cq_number); > + return -EINVAL; > + } (for other reviewers: this is not new code, just moved around) Owner tracking by pid is really dangerous. File descriptors can be passed around by unix sockets, a single process can have files open more than once, etc.. It seems ehca wants to prevent threads other than the creating one from performing most operations. Can you explain the reason for this? > spin_lock_irqsave(&ehca_cq_idr_lock, flags); > while (my_cq->nr_callbacks) > yield(); Calling yield is a very bad idea in general. You should probably add a waitqueue that gets woken when nr_callbacks reaches zero to sleep effectively here. From hch at infradead.org Thu Jan 11 11:21:24 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:21:24 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 4/5] ehca: queue pair: remove use of do_mmap() In-Reply-To: <200701112009.08712.hnguyen@linux.vnet.ibm.com> References: <200701112009.08712.hnguyen@linux.vnet.ibm.com> Message-ID: <20070111192124.GC24623@infradead.org> On Thu, Jan 11, 2007 at 08:09:08PM +0100, Hoang-Nam Nguyen wrote: > Hello Roland and Christoph H.! > This is a patch for ehca_qp.c. It removes all direct calls of do_mmap()/munmap() > when creating and destroying a queue pair respectively. Looks good. From hch at infradead.org Thu Jan 11 11:22:29 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:22:29 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 1/5] ehca: declaration of queue structures In-Reply-To: References: <200701112007.49620.hnguyen@linux.vnet.ibm.com> <20070111191425.GA24623@infradead.org> Message-ID: <20070111192229.GD24623@infradead.org> On Thu, Jan 11, 2007 at 11:17:21AM -0800, Roland Dreier wrote: > > This indentation changes moves away from the preffered form. > > I will fix when I merge it -- no need to resend. > > > Except for that the patch looks fine. > > Christoph, did you look over all 5 or just this one so far? I've looked over all briefly, but I need a few more minutes to understand everything that's going on in patch 2. From ntl at pobox.com Thu Jan 11 11:40:54 2007 From: ntl at pobox.com (Nathan Lynch) Date: Thu, 11 Jan 2007 13:40:54 -0600 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: <20070111192056.GB24623@infradead.org> References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> <20070111192056.GB24623@infradead.org> Message-ID: <20070111194054.GA11770@localdomain> Christoph Hellwig wrote: > On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote: > > > spin_lock_irqsave(&ehca_cq_idr_lock, flags); > > while (my_cq->nr_callbacks) > > yield(); > > Calling yield is a very bad idea in general. You should probably > add a waitqueue that gets woken when nr_callbacks reaches zero to > sleep effectively here. Isn't that code outright buggy? Calling into the scheduler with a spinlock held and local interrupts disabled... From rdreier at cisco.com Thu Jan 11 11:41:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:41:18 -0800 Subject: [openib-general] patch for -stable In-Reply-To: <20070111124242.GI1759@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 11 Jan 2007 14:42:42 +0200") References: <20070111124242.GI1759@mellanox.co.il> Message-ID: > Roland, I think commit 39798695b4bcc7b145f8910ca56195808d3a7637 > shoul go into -stable. It fixes crash triggered by ipoib_ha script > when bringing interface up/down in a loop. makes sense -- will do. From hch at infradead.org Thu Jan 11 11:40:00 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:40:00 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 2/5] ehca: ehca_uverbs.c: "proper" use of mmap In-Reply-To: <200701112008.15841.hnguyen@linux.vnet.ibm.com> References: <200701112008.15841.hnguyen@linux.vnet.ibm.com> Message-ID: <20070111194000.GE24623@infradead.org> On Thu, Jan 11, 2007 at 08:08:15PM +0100, Hoang-Nam Nguyen wrote: > +static void mm_open(struct vm_area_struct *vma) This should be name ehca_vma_open, dito for mm_close/ehca_vma_close and vm_ops/ehca_vm_ops. > + u32 *count = (u32*)vma->vm_private_data; No need for the cast here (both in the open and close routine) > + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { > + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); > + page = virt_to_page(virt_addr); > + rc = vm_insert_page(vma, start, page); > + if (unlikely(rc)) { > + ehca_gen_err("vm_insert_page() failed rc=%x", rc); > + return rc; > + } > + start += PAGE_SIZE; Not required for now, but long term you really should rework your whole queue abstraction to operate on an array of struct pages, that makes things like this and various other bits in ipz_pt_fn.[ch] a lot simpler. > int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) > { Can you split this monster routine into individual functions for each type of mmap please? With two helpers to get and verify the cq/qp shared by the individual sub-variants, that would also help to get rid of all those magic offsets. Actually, this routine directly comes from ib_device.mmap - Roland, can you shed some light on what's going on here? Also after applying this patch I have a prototype and various callers for ehca_mmap_nopage but no actual implementation. Could it be that there are some bits missing? From hch at infradead.org Thu Jan 11 11:43:46 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 11 Jan 2007 19:43:46 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: <20070111194054.GA11770@localdomain> References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> <20070111192056.GB24623@infradead.org> <20070111194054.GA11770@localdomain> Message-ID: <20070111194346.GA29607@infradead.org> On Thu, Jan 11, 2007 at 01:40:54PM -0600, Nathan Lynch wrote: > Christoph Hellwig wrote: > > On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote: > > > > > spin_lock_irqsave(&ehca_cq_idr_lock, flags); > > > while (my_cq->nr_callbacks) > > > yield(); > > > > Calling yield is a very bad idea in general. You should probably > > add a waitqueue that gets woken when nr_callbacks reaches zero to > > sleep effectively here. > > Isn't that code outright buggy? Calling into the scheduler with a > spinlock held and local interrupts disabled... Umm, yes - of course. I missed the spin_lock_irqsave line just above. From rdreier at cisco.com Thu Jan 11 11:42:49 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:42:49 -0800 Subject: [openib-general] [PATCH] IB/mad: Fix race between cancel and receive completion Message-ID: When ib_cancel_mad() is called, it puts the canceled send on a list and schedules a "flushed" callback from process context. However, this leaves a window where a receive completion could be processed before the send is fully flushed. This is fine, except that ib_find_send_mad() will find the MAD and return it to the receive processing, which results in the sender getting both a successful receive and a "flushed" send completion for the same request. Understandably, this confuses the sender, which is expecting only one of these two callbacks, and leads to grief such as a use-after-free in IPoIB. Fix this by changing ib_find_send_mad() to return a send struct only if the status is still successful (and not "flushed"). The search of the send_list already had this check, so this patch just adds the same check to the search of the wait_list. Signed-off-by: Roland Dreier --- This fixes a crash seen in production when switching between IPoIB interfaces in a HA setup. drivers/infiniband/core/mad.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 493f4c6..a72bcea 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1750,7 +1750,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, */ (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) - return wr; + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } /* -- 1.4.4.1 From rdreier at cisco.com Thu Jan 11 11:54:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:54:58 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21 2/5] ehca: ehca_uverbs.c: "proper" use of mmap In-Reply-To: <20070111194000.GE24623@infradead.org> (Christoph Hellwig's message of "Thu, 11 Jan 2007 19:40:00 +0000") References: <200701112008.15841.hnguyen@linux.vnet.ibm.com> <20070111194000.GE24623@infradead.org> Message-ID: > > int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) > > { > > Can you split this monster routine into individual functions for > each type of mmap please? With two helpers to get and verify the cq/qp > shared by the individual sub-variants, that would also help to get rid > of all those magic offsets. > > Actually, this routine directly comes from ib_device.mmap - Roland, > can you shed some light on what's going on here? Each userspace-accessible IB device gets a single device node like /dev/infiniband/uverbsX. Opening that gives userspace a "context". One of the things userspace can do with that fd is mmap() on it -- that was originally envisioned as a way to map a page of hardware registers directly in to the userspace process. It seems ehca needs to allocate lots of different things in the kernel via mmap(). What you're saying I guess is that ideally each of these would be mmap() on a different fd rather than using different offsets. It's a little awkward to open multiple device nodes to get multiple fds, since there's not a good way to attach them all to the same context. I guess we could create some hack to return more file handles, but I think that cure is worse than the disease of using magic offsets... Maybe longer term we need to look at a scheme like cell's spufs but I'm still not confident we have the RDMA interface quite ready to freeze at the system call level. - R. From rdreier at cisco.com Thu Jan 11 11:56:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 11:56:18 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: <20070111194054.GA11770@localdomain> (Nathan Lynch's message of "Thu, 11 Jan 2007 13:40:54 -0600") References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> <20070111192056.GB24623@infradead.org> <20070111194054.GA11770@localdomain> Message-ID: > > spin_lock_irqsave(&ehca_cq_idr_lock, flags); > > while (my_cq->nr_callbacks) > > yield(); > Isn't that code outright buggy? Calling into the scheduler with a > spinlock held and local interrupts disabled... Yes, absolutely -- if nr_callbacks is ever nonzero then this will obviously crash instantly. - R. From mst at mellanox.co.il Thu Jan 11 12:55:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 22:55:30 +0200 Subject: [openib-general] [PATCH/RFC] IB/ipoib: add selective tx signaling In-Reply-To: References: <45A367D0.3060206@voltaire.com> <20070110183916.GG18953@mellanox.co.il> <45A64310.504@voltaire.com> <20070111162236.GQ1759@mellanox.co.il> Message-ID: <20070111205530.GF17999@mellanox.co.il> > Subject: Re: [PATCH/RFC] IB/ipoib: add selective tx signaling > > > netif_queue_stopped is an atomic, I think that would be sufficient, no? > > netif_queue_stopped does test_bit(), which is an implicit memory > barrier. But I think it's pretty subtle -- you have to look at every > place tx_head and tx_tail are updated and decide if there needs to be > a corresponding barrier. I certainly agree it is subtle. > For example, in: > > + if (netif_queue_stopped(dev) && > + priv->tx_head - priv->tx_tail < ipoib_sendq_size) { > > there is a barrier in netif_queue_stopped() but no control in what > order tx_head and tx_tail are read... In this case, I think its fine since tx_tail is only updated in the completion handler. -- MST From rdreier at cisco.com Thu Jan 11 13:42:12 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 13:42:12 -0800 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: <20070111161938.GP1759@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 11 Jan 2007 18:19:38 +0200") References: <20070111161938.GP1759@mellanox.co.il> Message-ID: OK, I pulled this into my tree, but I had to fix this up: Space in indent is followed by a tab. /users/rdreier/ipoib-cm.txt:316: test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); Space in indent is followed by a tab. /users/rdreier/ipoib-cm.txt:323: test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); Adds trailing whitespace. /users/rdreier/ipoib-cm.txt:398: return; Space in indent is followed by a tab. /users/rdreier/ipoib-cm.txt:752: ret = 0; /* Connection is going away already. */ Space in indent is followed by a tab. /users/rdreier/ipoib-cm.txt:1369: neigh = tx->neigh; Space in indent is followed by a tab. /users/rdreier/ipoib-cm.txt:1402: tx = kzalloc(sizeof *tx, GFP_ATOMIC); fatal: 6 lines add trailing whitespaces. stg import: Patch does not apply cleanly please fix in future revisions. From swise at opengridcomputing.com Thu Jan 11 13:47:47 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 15:47:47 -0600 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: References: <20070111161938.GP1759@mellanox.co.il> Message-ID: <1168552067.8149.43.camel@stevo-desktop> What's the easy way to remove trailing spaces? I seem to fat-finger them into my patches too. You would think since its such a fatal error, that the stg would have an option to nuke them when importing. ;-) On Thu, 2007-01-11 at 13:42 -0800, Roland Dreier wrote: > OK, I pulled this into my tree, but I had to fix this up: > > Space in indent is followed by a tab. > /users/rdreier/ipoib-cm.txt:316: test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); > Space in indent is followed by a tab. > /users/rdreier/ipoib-cm.txt:323: test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); > Adds trailing whitespace. > /users/rdreier/ipoib-cm.txt:398: return; > Space in indent is followed by a tab. > /users/rdreier/ipoib-cm.txt:752: ret = 0; /* Connection is going away already. */ > Space in indent is followed by a tab. > /users/rdreier/ipoib-cm.txt:1369: neigh = tx->neigh; > Space in indent is followed by a tab. > /users/rdreier/ipoib-cm.txt:1402: tx = kzalloc(sizeof *tx, GFP_ATOMIC); > fatal: 6 lines add trailing whitespaces. > stg import: Patch does not apply cleanly > > please fix in future revisions. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Thu Jan 11 13:51:48 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 13:51:48 -0800 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: <1168552067.8149.43.camel@stevo-desktop> (Steve Wise's message of "Thu, 11 Jan 2007 15:47:47 -0600") References: <20070111161938.GP1759@mellanox.co.il> <1168552067.8149.43.camel@stevo-desktop> Message-ID: > You would think since its such a fatal error, that the stg would have an > option to nuke them when importing. ;-) Not sure about stg import but git-apply has "--whitespace=strip" From rdreier at cisco.com Thu Jan 11 13:50:49 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 13:50:49 -0800 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <20070110181158.GD18953@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 10 Jan 2007 20:11:58 +0200") References: <20070110181158.GD18953@mellanox.co.il> Message-ID: OK, I pushed out a new tree with both IPoIB NAPI and CM in the for-mm branch. From mst at mellanox.co.il Thu Jan 11 13:51:37 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 11 Jan 2007 23:51:37 +0200 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: References: Message-ID: <20070111215137.GJ17999@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCHv5] IPoIB CM Experimental support > > OK, I pulled this into my tree, but I had to fix this up: Sorry. BTW, git has some flag to check whitespace, does it not? -- MST From mst at mellanox.co.il Thu Jan 11 14:00:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 00:00:30 +0200 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: References: Message-ID: <20070111220030.GL17999@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [openib-general] [PATCHv5] IPoIB CM Experimental support > > > You would think since its such a fatal error, that the stg would have an > > option to nuke them when importing. ;-) > > Not sure about stg import but git-apply has "--whitespace=strip" Actually, git commit is supposed to verify commits for whitespace. the man page says: --no-verify By default, the command looks for suspicious lines the commit introduces, and aborts committing if there is one. The definition of suspicious lines is currently the lines that has trailing whitespaces, and the lines whose indentation has a SP character immediately followed by a TAB character. This option turns off the check. So since I develop everything under git, I am really surprised I did not get this warning. -- MST From swise at opengridcomputing.com Thu Jan 11 14:14:52 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:14:52 -0600 Subject: [openib-general] [PATCH v2 0/2] ofed_1_2 - Chelsio T3 RDMA Support Message-ID: <20070111221452.9236.800.stgit@dell3.ogc.int> Michael/Vlad, Here is version 2 of the patch series for adding the Chelsio kernel drivers. Changes since version 1 / RFC: - removed core changes - no longer needed - only build drivers/net/cxgb3 instead of all of drivers/net - made the qpnum->qpptr change for T3 as its own patch file In addition to this patch series, the Chelsio driver code needs to be pulled from git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 I've tested this on 2.6.20-rc4. I ask that you pull these patches in and pull the T3 drivers into vlad's ofed_1_2 git tree. That will give us a base package for Chelsio's drivers. Then I'll post additional patches for the backports as I get them implemented and tested. How's that sound? If you would rather a patch for the chelsio drivers, lemme know and I'll email it to you directly. Signed-off-by: Steve Wise From swise at opengridcomputing.com Thu Jan 11 14:14:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:14:56 -0600 Subject: [openib-general] [PATCH v2 2/2] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070111221452.9236.800.stgit@dell3.ogc.int> References: <20070111221452.9236.800.stgit@dell3.ogc.int> Message-ID: <20070111221456.9236.16285.stgit@dell3.ogc.int> - added cxgb3 and iw_cxgb3 config stuff - visit and build driver/net/cxgb3 to get the cxgb3 driver Signed-off-by: Steve Wise --- ofed_scripts/Makefile | 9 +++++++-- ofed_scripts/configure | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile index d63b1d2..8942385 100644 --- a/ofed_scripts/Makefile +++ b/ofed_scripts/Makefile @@ -46,8 +46,10 @@ kernel: @echo "Kernel sources: $(KSRC)" env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ - -I$(CWD)/drivers/infiniband/debug" \ - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ + -I$(CWD)/drivers/infiniband/debug \ + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ + -I$(CWD)/drivers/net/cxgb3 " \ + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ @@ -74,6 +76,9 @@ kernel: CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ LINUXINCLUDE=' \ $(BACKPORT_INCLUDES) \ -I$(CWD)/include \ diff --git a/ofed_scripts/configure b/ofed_scripts/configure index a0557e2..151b154 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -126,6 +126,12 @@ Usage: `basename $0` [options] --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] --without-vnic_stats-mod [yes] + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] + --without-cxgb3-mod [yes] + + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] + --without-cxgb3_debug-mod [yes] + --help - print out options @@ -607,6 +613,20 @@ main() --without-vnic_stats-mod) CONFIG_INFINIBAND_VNIC_STATS= ;; + --with-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3="m" + CONFIG_CHELSIO_T3="m" + ;; + --without-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3= + CONFIG_CHELSIO_T3= + ;; + --with-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG="y" + ;; + --without-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG= + ;; --with-modprobe|--without-modprobe) ;; -h | --help) @@ -679,6 +699,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} @@ -689,6 +711,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} # Check for minimal supported kernel version if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then @@ -742,6 +765,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} @@ -752,6 +777,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} EOFCONFIG echo "Created ${CONFIG}:" @@ -887,6 +913,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" else DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" fi +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" +else + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" +fi +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" +else + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" +fi +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" +else + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" +fi cat >> ${AUTOCONF_H} << EOFAUTOCONF #undef CONFIG_INFINIBAND #undef CONFIG_INFINIBAND_IPOIB @@ -908,6 +949,9 @@ #undef CONFIG_INFINIBAND_MADEYE #undef CONFIG_INFINIBAND_VNIC #undef CONFIG_INFINIBAND_VNIC_DEBUG #undef CONFIG_INFINIBAND_VNIC_STATS +#undef CONFIG_INFINIBAND_CXGB3 +#undef CONFIG_INFINIBAND_CXGB3_DEBUG +#undef CONFIG_CHELSIO_T3 #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY @@ -927,6 +971,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_ADDR_TRANS} ${DEFINE_INFINIBAND_MTHCA} ${DEFINE_INFINIBAND_VNIC} +${DEFINE_INFINIBAND_CXGB3} +${DEFINE_CHELSIO_T3} ${DEFINE_INFINIBAND_IPOIB_DEBUG} ${DEFINE_INFINIBAND_ISER} @@ -937,6 +983,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_RDS_DEBUG} ${DEFINE_INFINIBAND_VNIC_DEBUG} ${DEFINE_INFINIBAND_VNIC_STATS} +${DEFINE_INFINIBAND_CXGB3_DEBUG} ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} From swise at opengridcomputing.com Thu Jan 11 14:14:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:14:54 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070111221452.9236.800.stgit@dell3.ogc.int> References: <20070111221452.9236.800.stgit@dell3.ogc.int> Message-ID: <20070111221454.9236.80109.stgit@dell3.ogc.int> - genalloc backport. - qp_num -> qp ptr patch for cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 + kernel_patches/fixes/genalloc.patch | 392 +++++++++++++++++++++++++ 2 files changed, 415 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch new file mode 100644 index 0000000..0763f70 --- /dev/null +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch @@ -0,0 +1,23 @@ +Update T3 driver: qp_num no longer in ib_wc. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " diff --git a/kernel_patches/fixes/genalloc.patch b/kernel_patches/fixes/genalloc.patch new file mode 100644 index 0000000..c44a98f --- /dev/null +++ b/kernel_patches/fixes/genalloc.patch @@ -0,0 +1,392 @@ +Backport of the Linux 2.6.20 generic allocator. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/Kconfig | 1 + drivers/infiniband/hw/cxgb3/Makefile | 3 + drivers/infiniband/hw/cxgb3/core/cxio_hal.h | 4 + drivers/infiniband/hw/cxgb3/core/cxio_resource.c | 20 +- + drivers/infiniband/hw/cxgb3/core/cxio_resource.h | 2 + drivers/infiniband/hw/cxgb3/core/genalloc.c | 196 ++++++++++++++++++++++ + drivers/infiniband/hw/cxgb3/core/genalloc.h | 36 ++++ + 7 files changed, 247 insertions(+), 15 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig +index d3db264..0361a72 100644 +--- a/drivers/infiniband/hw/cxgb3/Kconfig ++++ b/drivers/infiniband/hw/cxgb3/Kconfig +@@ -1,7 +1,6 @@ + config INFINIBAND_CXGB3 + tristate "Chelsio RDMA Driver" + depends on CHELSIO_T3 && INFINIBAND +- select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T3 1GbE and + 10GbE adapters. +diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile +index 7a89f6d..12e7a94 100644 +--- a/drivers/infiniband/hw/cxgb3/Makefile ++++ b/drivers/infiniband/hw/cxgb3/Makefile +@@ -4,7 +4,8 @@ EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/ + obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o + + iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ +- iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o ++ iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o \ ++ core/genalloc.o + + ifdef CONFIG_INFINIBAND_CXGB3_DEBUG + EXTRA_CFLAGS += -DDEBUG -g +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +index e5e702d..a9e8452 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +@@ -104,8 +104,8 @@ struct cxio_rdev { + u32 qpnr; + u32 qpmask; + struct cxio_ucontext uctx; +- struct gen_pool *pbl_pool; +- struct gen_pool *rqt_pool; ++ struct iwch_gen_pool *pbl_pool; ++ struct iwch_gen_pool *rqt_pool; + }; + + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +index d1d8722..cecb27b 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +@@ -265,7 +265,7 @@ #define PBL_CHUNK 2*1024*1024 + + u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size); + return (u32)addr; + } +@@ -273,24 +273,24 @@ u32 cxio_hal_pblpool_alloc(struct cxio_r + void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size); +- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); ++ iwch_gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); + } + + int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); ++ rdev_p->pbl_pool = iwch_gen_pool_create(MIN_PBL_SHIFT, -1); + if (rdev_p->pbl_pool) + for (i = rdev_p->rnic_info.pbl_base; + i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; + i += PBL_CHUNK) +- gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); + return rdev_p->pbl_pool ? 0 : -ENOMEM; + } + + void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->pbl_pool); ++ iwch_gen_pool_destroy(rdev_p->pbl_pool); + } + + /* +@@ -302,7 +302,7 @@ #define RQT_CHUNK 2*1024*1024 + + u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6); + return (u32)addr; + } +@@ -310,22 +310,22 @@ u32 cxio_hal_rqtpool_alloc(struct cxio_r + void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6); +- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); ++ iwch_gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); + } + + int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); ++ rdev_p->rqt_pool = iwch_gen_pool_create(MIN_RQT_SHIFT, -1); + if (rdev_p->rqt_pool) + for (i = rdev_p->rnic_info.rqt_base; + i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; + i += RQT_CHUNK) +- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); + return rdev_p->rqt_pool ? 0 : -ENOMEM; + } + + void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->rqt_pool); ++ iwch_gen_pool_destroy(rdev_p->rqt_pool); + } +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +index a6bbe83..06a8076 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +@@ -39,7 +39,7 @@ #include + #include + #include + #include +-#include ++#include "genalloc.h" + #include "cxio_hal.h" + + extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.c b/drivers/infiniband/hw/cxgb3/core/genalloc.c +new file mode 100644 +index 0000000..27ba8ec +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.c +@@ -0,0 +1,196 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * Copyright 2005 (C) Jes Sorensen ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++#include ++#include "genalloc.h" ++ ++ ++/** ++ * iwch_gen_pool_create - create a new special memory pool ++ * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents ++ * @nid: node id of the node the pool structure should be allocated on, or -1 ++ * ++ * Create a new special memory pool that can be used to manage special purpose ++ * memory not managed by the regular kmalloc/kfree interface. ++ */ ++struct iwch_gen_pool *iwch_gen_pool_create(int min_alloc_order, int nid) ++{ ++ struct iwch_gen_pool *pool; ++ ++ pool = kmalloc_node(sizeof(struct iwch_gen_pool), GFP_KERNEL, nid); ++ if (pool != NULL) { ++ rwlock_init(&pool->lock); ++ INIT_LIST_HEAD(&pool->chunks); ++ pool->min_alloc_order = min_alloc_order; ++ } ++ return pool; ++} ++ ++/** ++ * iwch_gen_pool_add - add a new chunk of special memory to the pool ++ * @pool: pool to add new memory chunk to ++ * @addr: starting address of memory chunk to add to pool ++ * @size: size in bytes of the memory chunk to add to pool ++ * @nid: node id of the node the chunk structure and bitmap should be ++ * allocated on, or -1 ++ * ++ * Add a new chunk of special memory to the specified pool. ++ */ ++int iwch_gen_pool_add(struct iwch_gen_pool *pool, unsigned long addr, size_t size, int nid) ++{ ++ struct iwch_gen_pool_chunk *chunk; ++ int nbits = size >> pool->min_alloc_order; ++ int nbytes = sizeof(struct iwch_gen_pool_chunk) + ++ (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; ++ ++ chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); ++ if (unlikely(chunk == NULL)) ++ return -1; ++ ++ memset(chunk, 0, nbytes); ++ spin_lock_init(&chunk->lock); ++ chunk->start_addr = addr; ++ chunk->end_addr = addr + size; ++ ++ write_lock(&pool->lock); ++ list_add(&chunk->next_chunk, &pool->chunks); ++ write_unlock(&pool->lock); ++ ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_destroy - destroy a special memory pool ++ * @pool: pool to destroy ++ * ++ * Destroy the specified special memory pool. Verifies that there are no ++ * outstanding allocations. ++ */ ++void iwch_gen_pool_destroy(struct iwch_gen_pool *pool) ++{ ++ struct list_head *_chunk, *_next_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ int order = pool->min_alloc_order; ++ int bit, end_bit; ++ ++ ++ write_lock(&pool->lock); ++ list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ list_del(&chunk->next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ bit = find_next_bit(chunk->bits, end_bit, 0); ++ BUG_ON(bit < end_bit); ++ ++ kfree(chunk); ++ } ++ kfree(pool); ++ return; ++} ++ ++/** ++ * iwch_gen_pool_alloc - allocate special memory from the pool ++ * @pool: pool to allocate from ++ * @size: number of bytes to allocate from the pool ++ * ++ * Allocate the requested number of bytes from the specified pool. ++ * Uses a first-fit algorithm. ++ */ ++unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *pool, size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long addr, flags; ++ int order = pool->min_alloc_order; ++ int nbits, bit, start_bit, end_bit; ++ ++ if (size == 0) ++ return 0; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ end_bit -= nbits + 1; ++ ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = -1; ++ while (bit + 1 < end_bit) { ++ bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); ++ if (bit >= end_bit) ++ break; ++ ++ start_bit = bit; ++ if (nbits > 1) { ++ bit = find_next_bit(chunk->bits, bit + nbits, ++ bit + 1); ++ if (bit - start_bit < nbits) ++ continue; ++ } ++ ++ addr = chunk->start_addr + ++ ((unsigned long)start_bit << order); ++ while (nbits--) ++ __set_bit(start_bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ read_unlock(&pool->lock); ++ return addr; ++ } ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ } ++ read_unlock(&pool->lock); ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_free - free allocated special memory back to the pool ++ * @pool: pool to free to ++ * @addr: starting address of memory to free back to pool ++ * @size: size in bytes of memory to free ++ * ++ * Free previously allocated special memory back to the specified pool. ++ */ ++void iwch_gen_pool_free(struct iwch_gen_pool *pool, unsigned long addr, ++ size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long flags; ++ int order = pool->min_alloc_order; ++ int bit, nbits; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ if (addr >= chunk->start_addr && addr < chunk->end_addr) { ++ BUG_ON(addr + size > chunk->end_addr); ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = (addr - chunk->start_addr) >> order; ++ while (nbits--) ++ __clear_bit(bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ break; ++ } ++ } ++ BUG_ON(nbits > 0); ++ read_unlock(&pool->lock); ++} +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.h b/drivers/infiniband/hw/cxgb3/core/genalloc.h +new file mode 100644 +index 0000000..1dc336c +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.h +@@ -0,0 +1,36 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++ ++/* ++ * General purpose special memory pool descriptor. ++ */ ++struct iwch_gen_pool { ++ rwlock_t lock; ++ struct list_head chunks; /* list of chunks in this pool */ ++ int min_alloc_order; /* minimum allocation order */ ++}; ++ ++/* ++ * General purpose special memory pool chunk descriptor. ++ */ ++struct iwch_gen_pool_chunk { ++ spinlock_t lock; ++ struct list_head next_chunk; /* next chunk in pool */ ++ unsigned long start_addr; /* starting address of memory chunk */ ++ unsigned long end_addr; /* ending address of memory chunk */ ++ unsigned long bits[0]; /* bitmap for allocating memory chunk */ ++}; ++ ++extern struct iwch_gen_pool *iwch_gen_pool_create(int, int); ++extern int iwch_gen_pool_add(struct iwch_gen_pool *, unsigned long, size_t, int); ++extern void iwch_gen_pool_destroy(struct iwch_gen_pool *); ++extern unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *, size_t); ++extern void iwch_gen_pool_free(struct iwch_gen_pool *, unsigned long, size_t); From swise at opengridcomputing.com Thu Jan 11 14:23:21 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:23:21 -0600 Subject: [openib-general] [PATCH] ~vlad/ofabuild.git - Add Cheslio into kernel build script Message-ID: <1168554201.8149.47.camel@stevo-desktop> This can be applied after you pull the chelsio support into ofed_1_2. Changes for Chelsio T3 Support. Signed-off-by: Steve Wise --- build_ofa_kernel.sh | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/build_ofa_kernel.sh b/build_ofa_kernel.sh index b44353c..ea1d2da 100755 --- a/build_ofa_kernel.sh +++ b/build_ofa_kernel.sh @@ -32,6 +32,7 @@ WITH_PACKAGES=" \ --with-user_access-mod \ --with-mthca-mod \ --with-core-mod \ + --with-cxgb3-mod \ --with-addr_trans-mod" arch=${arch:-"$(uname -m)"} @@ -76,6 +77,7 @@ get_src() include/scsi/iscsi_if.h \ include/scsi/libiscsi.h \ include/scsi/scsi_transport_iscsi.h \ + drivers/net \ ` echo ref: refs/heads/${git_branch} > .git/HEAD From swise at opengridcomputing.com Thu Jan 11 14:27:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:27:56 -0600 Subject: [openib-general] [PATCH 7/7] libcxgb3: Update libcxgb3 for new libibverbs driver handling In-Reply-To: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> References: <20071101412.mpr6i5wfIJUeMh3m@cisco.com> Message-ID: <1168554476.8149.49.camel@stevo-desktop> Applied. Thanks, Steve. From mst at mellanox.co.il Thu Jan 11 14:33:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 00:33:23 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070111221454.9236.80109.stgit@dell3.ogc.int> References: <20070111221452.9236.800.stgit@dell3.ogc.int> <20070111221454.9236.80109.stgit@dell3.ogc.int> Message-ID: <20070111223323.GN17999@mellanox.co.il> For genalloc, 1. It's a backport, why do you put it in fixes? 2. I think a better way would be to stick it under kernel_addons/backports than as a patch. Please see my earlier mail on how this works. It is *much* easier to maintain this way (you see the full file). And also benefits all the subsystem and not just chelsio. We currently have infrastructure to add headers only, so what you do for C is stick iit under backports//src/genpool.c and add a backport patch that just adds a small file pulling in the real code, like this: +#include "src/genpool.c" and also adds the relevant line to Makefile to stick it in core. Look at how we did this for kernel_addons/backport/2.6.5_sles9_sp3/include/src/stream.c Or, if you have an idea how to get rid of the two-line patch that would be fine too. Sticking all these things in core is ugly and creates Makefile patches that often trigger conflicts. Maybe we should have an ib_backports module built? This might be a good idea, I'll think about it next week. 3. Is there a real reason to call it iwch_gen_pool and not just gen_pool as in 2.6.20? The patch would be much smaller then. 4. I think you want to remove playing with EXTRA_CFLAGS for your driver: these things should be controlled centrally. Instead of -DDEBUG just do #ifdef CONFIG_INFINIBAND_CXGB3_DEBUG in your code. you really do not want to touch the global DEBUG macro. And -g is something user should ask explicitly. Quoting r. Steve Wise : Subject: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. - genalloc backport. - qp_num -> qp ptr patch for cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 + kernel_patches/fixes/genalloc.patch | 392 +++++++++++++++++++++++++ 2 files changed, 415 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch new file mode 100644 index 0000000..0763f70 --- /dev/null +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch @@ -0,0 +1,23 @@ +Update T3 driver: qp_num no longer in ib_wc. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " diff --git a/kernel_patches/fixes/genalloc.patch b/kernel_patches/fixes/genalloc.patch new file mode 100644 index 0000000..c44a98f --- /dev/null +++ b/kernel_patches/fixes/genalloc.patch @@ -0,0 +1,392 @@ +Backport of the Linux 2.6.20 generic allocator. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/Kconfig | 1 + drivers/infiniband/hw/cxgb3/Makefile | 3 + drivers/infiniband/hw/cxgb3/core/cxio_hal.h | 4 + drivers/infiniband/hw/cxgb3/core/cxio_resource.c | 20 +- + drivers/infiniband/hw/cxgb3/core/cxio_resource.h | 2 + drivers/infiniband/hw/cxgb3/core/genalloc.c | 196 ++++++++++++++++++++++ + drivers/infiniband/hw/cxgb3/core/genalloc.h | 36 ++++ + 7 files changed, 247 insertions(+), 15 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig +index d3db264..0361a72 100644 +--- a/drivers/infiniband/hw/cxgb3/Kconfig ++++ b/drivers/infiniband/hw/cxgb3/Kconfig +@@ -1,7 +1,6 @@ + config INFINIBAND_CXGB3 + tristate "Chelsio RDMA Driver" + depends on CHELSIO_T3 && INFINIBAND +- select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T3 1GbE and + 10GbE adapters. +diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile +index 7a89f6d..12e7a94 100644 +--- a/drivers/infiniband/hw/cxgb3/Makefile ++++ b/drivers/infiniband/hw/cxgb3/Makefile +@@ -4,7 +4,8 @@ EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/ + obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o + + iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ +- iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o ++ iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o \ ++ core/genalloc.o + + ifdef CONFIG_INFINIBAND_CXGB3_DEBUG + EXTRA_CFLAGS += -DDEBUG -g +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +index e5e702d..a9e8452 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h +@@ -104,8 +104,8 @@ struct cxio_rdev { + u32 qpnr; + u32 qpmask; + struct cxio_ucontext uctx; +- struct gen_pool *pbl_pool; +- struct gen_pool *rqt_pool; ++ struct iwch_gen_pool *pbl_pool; ++ struct iwch_gen_pool *rqt_pool; + }; + + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +index d1d8722..cecb27b 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +@@ -265,7 +265,7 @@ #define PBL_CHUNK 2*1024*1024 + + u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size); + return (u32)addr; + } +@@ -273,24 +273,24 @@ u32 cxio_hal_pblpool_alloc(struct cxio_r + void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size); +- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); ++ iwch_gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); + } + + int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); ++ rdev_p->pbl_pool = iwch_gen_pool_create(MIN_PBL_SHIFT, -1); + if (rdev_p->pbl_pool) + for (i = rdev_p->rnic_info.pbl_base; + i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; + i += PBL_CHUNK) +- gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); + return rdev_p->pbl_pool ? 0 : -ENOMEM; + } + + void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->pbl_pool); ++ iwch_gen_pool_destroy(rdev_p->pbl_pool); + } + + /* +@@ -302,7 +302,7 @@ #define RQT_CHUNK 2*1024*1024 + + u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) + { +- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); ++ unsigned long addr = iwch_gen_pool_alloc(rdev_p->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6); + return (u32)addr; + } +@@ -310,22 +310,22 @@ u32 cxio_hal_rqtpool_alloc(struct cxio_r + void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) + { + PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6); +- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); ++ iwch_gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); + } + + int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) + { + unsigned long i; +- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); ++ rdev_p->rqt_pool = iwch_gen_pool_create(MIN_RQT_SHIFT, -1); + if (rdev_p->rqt_pool) + for (i = rdev_p->rnic_info.rqt_base; + i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; + i += RQT_CHUNK) +- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); ++ iwch_gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); + return rdev_p->rqt_pool ? 0 : -ENOMEM; + } + + void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) + { +- gen_pool_destroy(rdev_p->rqt_pool); ++ iwch_gen_pool_destroy(rdev_p->rqt_pool); + } +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +index a6bbe83..06a8076 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.h ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.h +@@ -39,7 +39,7 @@ #include + #include + #include + #include +-#include ++#include "genalloc.h" + #include "cxio_hal.h" + + extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.c b/drivers/infiniband/hw/cxgb3/core/genalloc.c +new file mode 100644 +index 0000000..27ba8ec +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.c +@@ -0,0 +1,196 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * Copyright 2005 (C) Jes Sorensen ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++#include ++#include "genalloc.h" ++ ++ ++/** ++ * iwch_gen_pool_create - create a new special memory pool ++ * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents ++ * @nid: node id of the node the pool structure should be allocated on, or -1 ++ * ++ * Create a new special memory pool that can be used to manage special purpose ++ * memory not managed by the regular kmalloc/kfree interface. ++ */ ++struct iwch_gen_pool *iwch_gen_pool_create(int min_alloc_order, int nid) ++{ ++ struct iwch_gen_pool *pool; ++ ++ pool = kmalloc_node(sizeof(struct iwch_gen_pool), GFP_KERNEL, nid); ++ if (pool != NULL) { ++ rwlock_init(&pool->lock); ++ INIT_LIST_HEAD(&pool->chunks); ++ pool->min_alloc_order = min_alloc_order; ++ } ++ return pool; ++} ++ ++/** ++ * iwch_gen_pool_add - add a new chunk of special memory to the pool ++ * @pool: pool to add new memory chunk to ++ * @addr: starting address of memory chunk to add to pool ++ * @size: size in bytes of the memory chunk to add to pool ++ * @nid: node id of the node the chunk structure and bitmap should be ++ * allocated on, or -1 ++ * ++ * Add a new chunk of special memory to the specified pool. ++ */ ++int iwch_gen_pool_add(struct iwch_gen_pool *pool, unsigned long addr, size_t size, int nid) ++{ ++ struct iwch_gen_pool_chunk *chunk; ++ int nbits = size >> pool->min_alloc_order; ++ int nbytes = sizeof(struct iwch_gen_pool_chunk) + ++ (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; ++ ++ chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); ++ if (unlikely(chunk == NULL)) ++ return -1; ++ ++ memset(chunk, 0, nbytes); ++ spin_lock_init(&chunk->lock); ++ chunk->start_addr = addr; ++ chunk->end_addr = addr + size; ++ ++ write_lock(&pool->lock); ++ list_add(&chunk->next_chunk, &pool->chunks); ++ write_unlock(&pool->lock); ++ ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_destroy - destroy a special memory pool ++ * @pool: pool to destroy ++ * ++ * Destroy the specified special memory pool. Verifies that there are no ++ * outstanding allocations. ++ */ ++void iwch_gen_pool_destroy(struct iwch_gen_pool *pool) ++{ ++ struct list_head *_chunk, *_next_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ int order = pool->min_alloc_order; ++ int bit, end_bit; ++ ++ ++ write_lock(&pool->lock); ++ list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ list_del(&chunk->next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ bit = find_next_bit(chunk->bits, end_bit, 0); ++ BUG_ON(bit < end_bit); ++ ++ kfree(chunk); ++ } ++ kfree(pool); ++ return; ++} ++ ++/** ++ * iwch_gen_pool_alloc - allocate special memory from the pool ++ * @pool: pool to allocate from ++ * @size: number of bytes to allocate from the pool ++ * ++ * Allocate the requested number of bytes from the specified pool. ++ * Uses a first-fit algorithm. ++ */ ++unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *pool, size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long addr, flags; ++ int order = pool->min_alloc_order; ++ int nbits, bit, start_bit, end_bit; ++ ++ if (size == 0) ++ return 0; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ end_bit = (chunk->end_addr - chunk->start_addr) >> order; ++ end_bit -= nbits + 1; ++ ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = -1; ++ while (bit + 1 < end_bit) { ++ bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); ++ if (bit >= end_bit) ++ break; ++ ++ start_bit = bit; ++ if (nbits > 1) { ++ bit = find_next_bit(chunk->bits, bit + nbits, ++ bit + 1); ++ if (bit - start_bit < nbits) ++ continue; ++ } ++ ++ addr = chunk->start_addr + ++ ((unsigned long)start_bit << order); ++ while (nbits--) ++ __set_bit(start_bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ read_unlock(&pool->lock); ++ return addr; ++ } ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ } ++ read_unlock(&pool->lock); ++ return 0; ++} ++ ++/** ++ * iwch_gen_pool_free - free allocated special memory back to the pool ++ * @pool: pool to free to ++ * @addr: starting address of memory to free back to pool ++ * @size: size in bytes of memory to free ++ * ++ * Free previously allocated special memory back to the specified pool. ++ */ ++void iwch_gen_pool_free(struct iwch_gen_pool *pool, unsigned long addr, ++ size_t size) ++{ ++ struct list_head *_chunk; ++ struct iwch_gen_pool_chunk *chunk; ++ unsigned long flags; ++ int order = pool->min_alloc_order; ++ int bit, nbits; ++ ++ nbits = (size + (1UL << order) - 1) >> order; ++ ++ read_lock(&pool->lock); ++ list_for_each(_chunk, &pool->chunks) { ++ chunk = list_entry(_chunk, struct iwch_gen_pool_chunk, ++ next_chunk); ++ ++ if (addr >= chunk->start_addr && addr < chunk->end_addr) { ++ BUG_ON(addr + size > chunk->end_addr); ++ spin_lock_irqsave(&chunk->lock, flags); ++ bit = (addr - chunk->start_addr) >> order; ++ while (nbits--) ++ __clear_bit(bit++, &chunk->bits); ++ spin_unlock_irqrestore(&chunk->lock, flags); ++ break; ++ } ++ } ++ BUG_ON(nbits > 0); ++ read_unlock(&pool->lock); ++} +diff --git a/drivers/infiniband/hw/cxgb3/core/genalloc.h b/drivers/infiniband/hw/cxgb3/core/genalloc.h +new file mode 100644 +index 0000000..1dc336c +--- /dev/null ++++ b/drivers/infiniband/hw/cxgb3/core/genalloc.h +@@ -0,0 +1,36 @@ ++/* ++ * Basic general purpose allocator for managing special purpose memory ++ * not managed by the regular kmalloc/kfree interface. ++ * Uses for this includes on-device special memory, uncached memory ++ * etc. ++ * ++ * This source code is licensed under the GNU General Public License, ++ * Version 2. See the file COPYING for more details. ++ */ ++ ++ ++/* ++ * General purpose special memory pool descriptor. ++ */ ++struct iwch_gen_pool { ++ rwlock_t lock; ++ struct list_head chunks; /* list of chunks in this pool */ ++ int min_alloc_order; /* minimum allocation order */ ++}; ++ ++/* ++ * General purpose special memory pool chunk descriptor. ++ */ ++struct iwch_gen_pool_chunk { ++ spinlock_t lock; ++ struct list_head next_chunk; /* next chunk in pool */ ++ unsigned long start_addr; /* starting address of memory chunk */ ++ unsigned long end_addr; /* ending address of memory chunk */ ++ unsigned long bits[0]; /* bitmap for allocating memory chunk */ ++}; ++ ++extern struct iwch_gen_pool *iwch_gen_pool_create(int, int); ++extern int iwch_gen_pool_add(struct iwch_gen_pool *, unsigned long, size_t, int); ++extern void iwch_gen_pool_destroy(struct iwch_gen_pool *); ++extern unsigned long iwch_gen_pool_alloc(struct iwch_gen_pool *, size_t); ++extern void iwch_gen_pool_free(struct iwch_gen_pool *, unsigned long, size_t); _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mst at mellanox.co.il Thu Jan 11 14:34:40 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 00:34:40 +0200 Subject: [openib-general] [PATCH] ~vlad/ofabuild.git - Add Cheslio into kernel build script In-Reply-To: <1168554201.8149.47.camel@stevo-desktop> References: <1168554201.8149.47.camel@stevo-desktop> Message-ID: <20070111223440.GO17999@mellanox.co.il> > arch=${arch:-"$(uname -m)"} > @@ -76,6 +77,7 @@ get_src() > include/scsi/iscsi_if.h \ > include/scsi/libiscsi.h \ > include/scsi/scsi_transport_iscsi.h \ > + drivers/net \ > ` > echo ref: refs/heads/${git_branch} > .git/HEAD Will this pull in the whole drivers/net? I think we only want the relevant drivers. -- MST From robert.j.woodruff at intel.com Thu Jan 11 14:45:11 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 11 Jan 2007 14:45:11 -0800 Subject: [openib-general] Request for assistance from DWG In-Reply-To: Message-ID: I have added the appropriate maintainers to this thread and sending it to the open fabrics email lists. Tziporet for OFED (mutex-backport.h file used by OFED) Bryan for ipath Vladimir for ipoib tools Steve for cxgb3 driver All please review the list of GPL only files that are (or were in your components before we moved to git) and please correct the code so that meets the BSD+GPL requirements that we all agreed to when we joined openfabrics, if you have not done so already. Thanks woody ________________________________ From: Chet Mehta [mailto:chetm at us.ibm.com] Sent: Thursday, January 11, 2007 2:17 PM To: Woodruff, Robert J Cc: openfabrics-lwg at openfabrics.org Subject: RE: Request for assistance from DWG Bob, Thanks for the reply. Attached below is a list of files that we believe have only a GPL license (the list was generated on 12/5/06). As I mentioned in my last note, the search wasn't exhaustive so there may be others. During the time we also noticed that there were some utility-type files (e.g. files used for extraction, make or build) that were GPL only. I'm assuming the DWG will decide if these should be dual licensed or be placed on the exception list. Lastly I suspect there are files that only have a BSD license - i.e. no GPL. While personally that's less worrisome, going strictly by the Bylaws I would suspect those should be corrected also. Thanks for your help to get this corrected! ################################################################ #mutex-backport.h (Cisco Systems) ################################################################ ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu tex-backport.h.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/mutex-backport.h ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba ckport.h ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex t-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ################################################################ #ipath_eth.c (PathScale) ################################################################ ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et h.c ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex t-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas e/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i path_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. svn/text-base/ipath_eth.c.svn-base ################################################################ #lnstat.c Linux network stats # Copyright (C) 2004 by Harald Welte # Development of this code was funded by Astaro AG, http://www.astaro.com/ # Based on original concept and ideas from predecessor rtstat.c: # Copyright 2001 by Robert Olsson # Uppsala University, Sweden ################################################################ ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta t.c.svn-base ################################################################ #list.h ( kazutomo at mcs.anl.gov) ################################################################ ./gen2/trunk/src/userspace/ipoibtools/list.h ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base ################################################################ #cxgb3_main.c (Chelsio Communications) ################################################################ ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx gb3_main.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s vn/text-base/cxgb3_main.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma in.c.svn-base :Chet. From swise at opengridcomputing.com Thu Jan 11 14:49:08 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 16:49:08 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070111223323.GN17999@mellanox.co.il> References: <20070111221452.9236.800.stgit@dell3.ogc.int> <20070111221454.9236.80109.stgit@dell3.ogc.int> <20070111223323.GN17999@mellanox.co.il> Message-ID: <1168555748.8149.62.camel@stevo-desktop> On Fri, 2007-01-12 at 00:33 +0200, Michael S. Tsirkin wrote: > For genalloc, > 1. It's a backport, why do you put it in fixes? Because its not exactly a backport. The exact file exists in 2.6.20 but it is only conditionally compiled into the kernel. But I can put it backport if you want. That makes more sense I guess. > 2. I think a better way would be to stick it under kernel_addons/backports than > as a patch. Please see my earlier mail on how this works. > Um, what earlier mail? > It is *much* easier to maintain this way (you see the full file). > And also benefits all the subsystem and not just chelsio. > We currently have infrastructure to add headers only, so what you do > for C is stick iit under backports//src/genpool.c and > add a backport patch that just adds a small file pulling > in the real code, like this: > > +#include "src/genpool.c" > > and also adds the relevant line to Makefile to stick it in core. > Ok, so add this into ib_core then? > Look at how we did this for > kernel_addons/backport/2.6.5_sles9_sp3/include/src/stream.c > ok. > > Or, if you have an idea how to get rid of the two-line patch > that would be fine too. Sticking all these things in core is ugly > and creates Makefile patches that often trigger conflicts. > Maybe we should have an ib_backports module built? > This might be a good idea, I'll think about it next week. > A backports modules sounds interesting... > 3. Is there a real reason to call it iwch_gen_pool and not just gen_pool > as in 2.6.20? The patch would be much smaller then. > What if its already built in and export in the kernel we're trying to load into? Will this cause a load problem? I was assuming it would...that's why I changed the names. Am I wrong? > 4. I think you want to remove playing with EXTRA_CFLAGS > for your driver: these things should be controlled centrally. > Instead of -DDEBUG just do > #ifdef CONFIG_INFINIBAND_CXGB3_DEBUG in your code. > you really do not want to touch the global DEBUG macro. > This was feedback we got from reviewers on the amso driver. They said use DEBUG. > And -g is something user should ask explicitly. > The user asks explicitly by requesting to compile the debug module. Steve. From mst at mellanox.co.il Thu Jan 11 14:56:07 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 00:56:07 +0200 Subject: [openib-general] Request for assistance from DWG In-Reply-To: References: Message-ID: <20070111225607.GQ17999@mellanox.co.il> The BSD+GPL can only apply to code developed by us. Backport headers, list.h and iproute2 are not our code and can not be "corrected". Quoting Woodruff, Robert J : Subject: Request for assistance from DWG I have added the appropriate maintainers to this thread and sending it to the open fabrics email lists. Tziporet for OFED (mutex-backport.h file used by OFED) Bryan for ipath Vladimir for ipoib tools Steve for cxgb3 driver All please review the list of GPL only files that are (or were in your components before we moved to git) and please correct the code so that meets the BSD+GPL requirements that we all agreed to when we joined openfabrics, if you have not done so already. Thanks woody ________________________________ From: Chet Mehta [mailto:chetm at us.ibm.com] Sent: Thursday, January 11, 2007 2:17 PM To: Woodruff, Robert J Cc: openfabrics-lwg at openfabrics.org Subject: RE: Request for assistance from DWG Bob, Thanks for the reply. Attached below is a list of files that we believe have only a GPL license (the list was generated on 12/5/06). As I mentioned in my last note, the search wasn't exhaustive so there may be others. During the time we also noticed that there were some utility-type files (e.g. files used for extraction, make or build) that were GPL only. I'm assuming the DWG will decide if these should be dual licensed or be placed on the exception list. Lastly I suspect there are files that only have a BSD license - i.e. no GPL. While personally that's less worrisome, going strictly by the Bylaws I would suspect those should be corrected also. Thanks for your help to get this corrected! ################################################################ #mutex-backport.h (Cisco Systems) ################################################################ ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu tex-backport.h.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/mutex-backport.h ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba ckport.h ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex t-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ################################################################ #ipath_eth.c (PathScale) ################################################################ ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et h.c ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex t-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas e/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i path_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. svn/text-base/ipath_eth.c.svn-base ################################################################ #lnstat.c Linux network stats # Copyright (C) 2004 by Harald Welte # Development of this code was funded by Astaro AG, http://www.astaro.com/ # Based on original concept and ideas from predecessor rtstat.c: # Copyright 2001 by Robert Olsson # Uppsala University, Sweden ################################################################ ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta t.c.svn-base ################################################################ #list.h ( kazutomo at mcs.anl.gov) ################################################################ ./gen2/trunk/src/userspace/ipoibtools/list.h ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base ################################################################ #cxgb3_main.c (Chelsio Communications) ################################################################ ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx gb3_main.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s vn/text-base/cxgb3_main.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma in.c.svn-base :Chet. _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From swise at opengridcomputing.com Thu Jan 11 15:01:22 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:01:22 -0600 Subject: [openib-general] [PATCH] ~vlad/ofabuild.git - Add Cheslio into kernel build script In-Reply-To: <20070111223440.GO17999@mellanox.co.il> References: <1168554201.8149.47.camel@stevo-desktop> <20070111223440.GO17999@mellanox.co.il> Message-ID: <1168556482.8149.71.camel@stevo-desktop> On Fri, 2007-01-12 at 00:34 +0200, Michael S. Tsirkin wrote: > > arch=${arch:-"$(uname -m)"} > > @@ -76,6 +77,7 @@ get_src() > > include/scsi/iscsi_if.h \ > > include/scsi/libiscsi.h \ > > include/scsi/scsi_transport_iscsi.h \ > > + drivers/net \ > > ` > > echo ref: refs/heads/${git_branch} > .git/HEAD > > Will this pull in the whole drivers/net? > > I think we only want the relevant drivers. > yes. I missed this one. Thanks. From mst at mellanox.co.il Thu Jan 11 15:06:57 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:06:57 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <1168555748.8149.62.camel@stevo-desktop> References: <20070111221452.9236.800.stgit@dell3.ogc.int> <20070111221454.9236.80109.stgit@dell3.ogc.int> <20070111223323.GN17999@mellanox.co.il> <1168555748.8149.62.camel@stevo-desktop> Message-ID: <20070111230657.GR17999@mellanox.co.il> > Quoting Steve Wise : > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. > > On Fri, 2007-01-12 at 00:33 +0200, Michael S. Tsirkin wrote: > > For genalloc, > > 1. It's a backport, why do you put it in fixes? > > Because its not exactly a backport. The exact file exists in 2.6.20 but > it is only conditionally compiled into the kernel. I don't really understand. Maybe I am missing something? So you want it for older kernels, right? So how is it not a backport? > But I can put it > backport if you want. That makes more sense I guess. > > > 2. I think a better way would be to stick it under kernel_addons/backports than > > as a patch. Please see my earlier mail on how this works. > > > > Um, what earlier mail? > > > It is *much* easier to maintain this way (you see the full file). > > And also benefits all the subsystem and not just chelsio. > > We currently have infrastructure to add headers only, so what you do > > for C is stick iit under backports//src/genpool.c and > > add a backport patch that just adds a small file pulling > > in the real code, like this: > > > > +#include "src/genpool.c" > > > > and also adds the relevant line to Makefile to stick it in core. > > > > Ok, so add this into ib_core then? Yes. But the bulk of the code goes under kernel_addons/backports//include/src/genpool.c and kernel_addons//include/src/genpool.h > > Look at how we did this for > > kernel_addons/backport/2.6.5_sles9_sp3/include/src/stream.c > > > > ok. > > > > > Or, if you have an idea how to get rid of the two-line patch > > that would be fine too. Sticking all these things in core is ugly > > and creates Makefile patches that often trigger conflicts. > > Maybe we should have an ib_backports module built? > > This might be a good idea, I'll think about it next week. > > > > A backports modules sounds interesting... In a good way? > > 3. Is there a real reason to call it iwch_gen_pool and not just gen_pool > > as in 2.6.20? The patch would be much smaller then. > > > > What if its already built in and export in the kernel we're trying to > load into? Will this cause a load problem? I was assuming it > would...that's why I changed the names. Am I wrong? Yes. that's why kernel_addons/backports/ has lots of directories for each kernel version, and same for kernel_patches/backports. Backports (patches and addons) are per kernel. So for kernels which have genpool (e.g. 2.6.20) you do not add this file and so there will be no conflict. For older kernels you don't. > > 4. I think you want to remove playing with EXTRA_CFLAGS > > for your driver: these things should be controlled centrally. > > Instead of -DDEBUG just do > > #ifdef CONFIG_INFINIBAND_CXGB3_DEBUG in your code. > > you really do not want to touch the global DEBUG macro. > > > > This was feedback we got from reviewers on the amso driver. They said > use DEBUG. Hmm, sounds weird, and that's not what we do for other modules. the question is not ofed specific, same applies to upstream. Roland? > > And -g is something user should ask explicitly. > > > > The user asks explicitly by requesting to compile the debug module. Kernel has a global option "compile with debug symbols" which adds -g. Why does your driver need a separate one? -- MST From mst at mellanox.co.il Thu Jan 11 15:14:54 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:14:54 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <1168555748.8149.62.camel@stevo-desktop> References: <1168555748.8149.62.camel@stevo-desktop> Message-ID: <20070111231454.GS17999@mellanox.co.il> > What if its already built in and export in the kernel we're trying to > load into? Will this cause a load problem? I was assuming it > would...that's why I changed the names. Am I wrong? BTW, if you want to change a definition of an existing symbol, you can use macro trick like the folowing (this is from ./2.6.9_U2/include/net/sock.h): static inline void sock_init_data_new(struct socket *sock, struct sock *sk) { sock_init_data(sock, sk); sk->sk_owner = THIS_MODULE; } #define sock_init_data sock_init_data_new -- MST From swise at opengridcomputing.com Thu Jan 11 15:23:30 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:23:30 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070111230657.GR17999@mellanox.co.il> References: <20070111221452.9236.800.stgit@dell3.ogc.int> <20070111221454.9236.80109.stgit@dell3.ogc.int> <20070111223323.GN17999@mellanox.co.il> <1168555748.8149.62.camel@stevo-desktop> <20070111230657.GR17999@mellanox.co.il> Message-ID: <1168557810.8149.89.camel@stevo-desktop> On Fri, 2007-01-12 at 01:06 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. > > > > On Fri, 2007-01-12 at 00:33 +0200, Michael S. Tsirkin wrote: > > > For genalloc, > > > 1. It's a backport, why do you put it in fixes? > > > > Because its not exactly a backport. The exact file exists in 2.6.20 but > > it is only conditionally compiled into the kernel. > > I don't really understand. Maybe I am missing something? > So you want it for older kernels, right? > So how is it not a backport? > The issue is slightly different for genalloc. In 2.6.18 and later genalloc.c exists in lib/genalloc.c. But it is configured into the kernel build ONLY if somebody else in the kernel wants it by having a config dependency on CONFIG_GENERIC_ALLOCATOR. So on any given customer system with 2.6.19, for instance, they may or may not have built this in based on what other modules they've built that may or may not have a config dependency on CONFIG_GENERIC_ALLOCATOR. Now, If they DID build it in, then we cannot add in our own genalloc functions with the same symbol names (I think) because they are exported from that kernel. But if they are _not_ exported on the customer's system/kernel, then we need the functionality for sure. I chose to simply always add the genalloc service _and_ change the function names so that I wouldn't collide with kernels that have it configured in. Does this make sense? So I'm claiming that under my current scheme/design (which is of course can change :) I need these services in every build regardless of which kernel we're building against. > > But I can put it > > backport if you want. That makes more sense I guess. > > > > > 2. I think a better way would be to stick it under kernel_addons/backports than > > > as a patch. Please see my earlier mail on how this works. > > > > > > > Um, what earlier mail? > > > > > It is *much* easier to maintain this way (you see the full file). > > > And also benefits all the subsystem and not just chelsio. > > > We currently have infrastructure to add headers only, so what you do > > > for C is stick iit under backports//src/genpool.c and > > > add a backport patch that just adds a small file pulling > > > in the real code, like this: > > > > > > +#include "src/genpool.c" > > > > > > and also adds the relevant line to Makefile to stick it in core. > > > > > > > Ok, so add this into ib_core then? > > Yes. But the bulk of the code goes under > kernel_addons/backports//include/src/genpool.c > and kernel_addons//include/src/genpool.h > > > > Look at how we did this for > > > kernel_addons/backport/2.6.5_sles9_sp3/include/src/stream.c > > > > > > > ok. > > > > > > > > Or, if you have an idea how to get rid of the two-line patch > > > that would be fine too. Sticking all these things in core is ugly > > > and creates Makefile patches that often trigger conflicts. > > > Maybe we should have an ib_backports module built? > > > This might be a good idea, I'll think about it next week. > > > > > > > A backports modules sounds interesting... > > In a good way? > Yea. :-) > > > 3. Is there a real reason to call it iwch_gen_pool and not just gen_pool > > > as in 2.6.20? The patch would be much smaller then. > > > > > > > What if its already built in and export in the kernel we're trying to > > load into? Will this cause a load problem? I was assuming it > > would...that's why I changed the names. Am I wrong? > > Yes. that's why kernel_addons/backports/ has lots of directories > for each kernel version, and same for kernel_patches/backports. > Backports (patches and addons) are per kernel. So for kernels which have genpool > (e.g. 2.6.20) you do not add this file and so there will be no conflict. > For older kernels you don't. > See my explanation above. > > > 4. I think you want to remove playing with EXTRA_CFLAGS > > > for your driver: these things should be controlled centrally. > > > Instead of -DDEBUG just do > > > #ifdef CONFIG_INFINIBAND_CXGB3_DEBUG in your code. > > > you really do not want to touch the global DEBUG macro. > > > > > > > This was feedback we got from reviewers on the amso driver. They said > > use DEBUG. > > Hmm, sounds weird, and that's not what we do for other modules. > the question is not ofed specific, same applies to upstream. > Roland? > This review comment came from lklm. They said "dont create new debug #defines...use -DDEBUG... > > > And -g is something user should ask explicitly. > > > > > > > The user asks explicitly by requesting to compile the debug module. > > Kernel has a global option "compile with debug symbols" which adds -g. > Why does your driver need a separate one? > Perhaps not. From mst at mellanox.co.il Thu Jan 11 15:25:05 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:25:05 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <20070111231454.GS17999@mellanox.co.il> References: <1168555748.8149.62.camel@stevo-desktop> <20070111231454.GS17999@mellanox.co.il> Message-ID: <20070111232505.GT17999@mellanox.co.il> > Quoting Michael S. Tsirkin : > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. > > > > What if its already built in and export in the kernel we're trying to > > load into? Will this cause a load problem? I was assuming it > > would...that's why I changed the names. Am I wrong? > > BTW, if you want to change a definition of an existing symbol, > you can use macro trick like the folowing (this is from > ./2.6.9_U2/include/net/sock.h): > > static inline > void sock_init_data_new(struct socket *sock, struct sock *sk) > { > sock_init_data(sock, sk); > sk->sk_owner = THIS_MODULE; > } > > #define sock_init_data sock_init_data_new Another final note: I am not sure it makes sense to try supporting 2.6.20 with CONFIG_GENERIC_ALLOCATOR turned off. Distributions typically just enable everything so I think this won't be a problem in practice. What do you think? -- MST From swise at opengridcomputing.com Thu Jan 11 15:28:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:28:42 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <20070111232505.GT17999@mellanox.co.il> References: <1168555748.8149.62.camel@stevo-desktop> <20070111231454.GS17999@mellanox.co.il> <20070111232505.GT17999@mellanox.co.il> Message-ID: <1168558122.8149.93.camel@stevo-desktop> On Fri, 2007-01-12 at 01:25 +0200, Michael S. Tsirkin wrote: > > Quoting Michael S. Tsirkin : > > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. > > > > > > > What if its already built in and export in the kernel we're trying to > > > load into? Will this cause a load problem? I was assuming it > > > would...that's why I changed the names. Am I wrong? > > > > BTW, if you want to change a definition of an existing symbol, > > you can use macro trick like the folowing (this is from > > ./2.6.9_U2/include/net/sock.h): > > > > static inline > > void sock_init_data_new(struct socket *sock, struct sock *sk) > > { > > sock_init_data(sock, sk); > > sk->sk_owner = THIS_MODULE; > > } > > > > #define sock_init_data sock_init_data_new > > Another final note: I am not sure it makes sense to try supporting > 2.6.20 with CONFIG_GENERIC_ALLOCATOR turned off. > Distributions typically just enable everything so I think this won't > be a problem in practice. > > What do you think? There is no way to enable this option via make menuconfig. It is only enabled if some other subsystem requires it by adding this to their Kconfig: select GENERIC_ALLOCATOR I think the best solution is to treat it as a backport, change the names (either explicitly or with a #define trick) and back-port it to every kernel version we're going to support. From swise at opengridcomputing.com Thu Jan 11 15:35:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:35:53 -0600 Subject: [openib-general] [PATCH 6/7] libamso: Update libamso for new libibverbs driver handling In-Reply-To: <20071101412.UL3KlXfzzPdPpGRy@cisco.com> References: <20071101412.UL3KlXfzzPdPpGRy@cisco.com> Message-ID: <1168558553.8149.95.camel@stevo-desktop> Applied with the following addition. Thanks. diff --git a/Makefile.am b/Makefile.am index d1749fd..efbcd59 100644 --- a/Makefile.am +++ b/Makefile.am @@ -15,6 +15,9 @@ if HAVE_IBV_DEVICE_LIBRARY_EXTENSION src_libamso_la_SOURCES = $(AMSO_SOURCES) src_libamso_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ $(amso_version_script) + amsoconfdir = $(sysconfdir)/libibverbs.d + amsoconf_DATA = amso.driver + else amsolibdir = $(libdir)/infiniband amsolib_LTLIBRARIES = src/amso.la From mst at mellanox.co.il Thu Jan 11 15:34:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:34:15 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <1168557810.8149.89.camel@stevo-desktop> References: <1168557810.8149.89.camel@stevo-desktop> Message-ID: <20070111233415.GU17999@mellanox.co.il> > > > Quoting Steve Wise : > > > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. > > > > > > On Fri, 2007-01-12 at 00:33 +0200, Michael S. Tsirkin wrote: > > > > For genalloc, > > > > 1. It's a backport, why do you put it in fixes? > > > > > > Because its not exactly a backport. The exact file exists in 2.6.20 but > > > it is only conditionally compiled into the kernel. > > > > I don't really understand. Maybe I am missing something? > > So you want it for older kernels, right? > > So how is it not a backport? > > > > The issue is slightly different for genalloc. In 2.6.18 and later > genalloc.c exists in lib/genalloc.c. But it is configured into the > kernel build ONLY if somebody else in the kernel wants it by having a > config dependency on CONFIG_GENERIC_ALLOCATOR. So on any given customer > system with 2.6.19, for instance, they may or may not have built this in > based on what other modules they've built that may or may not have a > config dependency on CONFIG_GENERIC_ALLOCATOR. Now, If they DID build > it in, then we cannot add in our own genalloc functions with the same > symbol names (I think) because they are exported from that kernel. But > if they are _not_ exported on the customer's system/kernel, then we need > the functionality for sure. > > I chose to simply always add the genalloc service _and_ change the > function names so that I wouldn't collide with kernels that have it > configured in. > > Does this make sense? > > So I'm claiming that under my current scheme/design (which is of course > can change :) I need these services in every build regardless of which > kernel we're building against. OK, I see. Also gen_pool_destroy is needed for older kernels - right? Some notes: - distros normally just enable everything, and customer that built his own kernel can be directed to just enable genalloc - no? In that case maybe its best to just rely on it being enabled. - Macro wrappers can be used to export functions like gen_pool_destroy_new but have code use gen_pool_destroy as it did previously. -- MST From mst at mellanox.co.il Thu Jan 11 15:37:02 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:37:02 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ forChelsioT3 Support. In-Reply-To: <1168558122.8149.93.camel@stevo-desktop> References: <1168558122.8149.93.camel@stevo-desktop> Message-ID: <20070111233702.GV17999@mellanox.co.il> > > > Quoting Michael S. Tsirkin : > > > Subject: Re: [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. > > > > > > > > > > What if its already built in and export in the kernel we're trying to > > > > load into? Will this cause a load problem? I was assuming it > > > > would...that's why I changed the names. Am I wrong? > > > > > > BTW, if you want to change a definition of an existing symbol, > > > you can use macro trick like the folowing (this is from > > > ./2.6.9_U2/include/net/sock.h): > > > > > > static inline > > > void sock_init_data_new(struct socket *sock, struct sock *sk) > > > { > > > sock_init_data(sock, sk); > > > sk->sk_owner = THIS_MODULE; > > > } > > > > > > #define sock_init_data sock_init_data_new > > > > Another final note: I am not sure it makes sense to try supporting > > 2.6.20 with CONFIG_GENERIC_ALLOCATOR turned off. > > Distributions typically just enable everything so I think this won't > > be a problem in practice. > > > > What do you think? > > There is no way to enable this option via make menuconfig. It is only > enabled if some other subsystem requires it by adding this to their > Kconfig: > > select GENERIC_ALLOCATOR I know, but what I'm saying is that distros tend to enable all drivers so in practice it will be enabled. > I think the best solution is to treat it as a backport, change the names > (either explicitly or with a #define trick) and back-port it to every > kernel version we're going to support. I'm not yet convinced the problem exists. I would suggest look at RHEL5. If that has genalloc enabled its a good indication that you don't need to put a copy of it in OFED. -- MST From swise at opengridcomputing.com Thu Jan 11 15:39:32 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:39:32 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <20070111233415.GU17999@mellanox.co.il> References: <1168557810.8149.89.camel@stevo-desktop> <20070111233415.GU17999@mellanox.co.il> Message-ID: <1168558772.8149.99.camel@stevo-desktop> > > > > So I'm claiming that under my current scheme/design (which is of course > > can change :) I need these services in every build regardless of which > > kernel we're building against. > > OK, I see. Also gen_pool_destroy is needed for older kernels - right? > Yep. It came out in 2.6.18 I think. Without gen_pool_destroy(). Which I added in 2.6.19 if I remember correctly. > Some notes: > - distros normally just enable everything, and customer that built > his own kernel can be directed to just enable genalloc - no? > In that case maybe its best to just rely on it being enabled. > > I don't think distros can enable this (see my subsequent email explain this). > - Macro wrappers can be used to export functions like > gen_pool_destroy_new but have code use gen_pool_destroy as it did > previously. > Ok. From rdreier at cisco.com Thu Jan 11 15:40:50 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 11 Jan 2007 15:40:50 -0800 Subject: [openib-general] [PATCH 6/7] libamso: Update libamso for new libibverbs driver handling In-Reply-To: <1168558553.8149.95.camel@stevo-desktop> (Steve Wise's message of "Thu, 11 Jan 2007 17:35:53 -0600") References: <20071101412.UL3KlXfzzPdPpGRy@cisco.com> <1168558553.8149.95.camel@stevo-desktop> Message-ID: > + amsoconfdir = $(sysconfdir)/libibverbs.d > + amsoconf_DATA = amso.driver oops, sorry for leaving that out. From mst at mellanox.co.il Thu Jan 11 15:41:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:41:51 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ forChelsioT3 Support. In-Reply-To: <20070111233702.GV17999@mellanox.co.il> References: <1168558122.8149.93.camel@stevo-desktop> <20070111233702.GV17999@mellanox.co.il> Message-ID: <20070111234151.GW17999@mellanox.co.il> > I'm not yet convinced the problem exists. I would suggest look at RHEL5. > If that has genalloc enabled its a good indication that you don't > need to put a copy of it in OFED. Since genalloc is there since 2.6.13, you can also look at SLES10 - most distros have it in I imagine. -- MST From swise at opengridcomputing.com Thu Jan 11 15:44:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:44:36 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ forChelsioT3 Support. In-Reply-To: <20070111233702.GV17999@mellanox.co.il> References: <1168558122.8149.93.camel@stevo-desktop> <20070111233702.GV17999@mellanox.co.il> Message-ID: <1168559076.8149.104.camel@stevo-desktop> > > > > There is no way to enable this option via make menuconfig. It is only > > enabled if some other subsystem requires it by adding this to their > > Kconfig: > > > > select GENERIC_ALLOCATOR > > I know, but what I'm saying is that distros tend to enable all drivers > so in practice it will be enabled. > Perusing the 2.6.20 src, I only see one subsystem using this other than my driver. That's arch/ia64/kernel/uncached.c. So it will only be turned on for IA64 builds. Also, how will a user running 2.6.20 from kernel.org turn it on? There's no way via make menuconfig. Believe me, I'd love to get rid this problem, but I haven't seen a good other than backporting it everywhere. > > I think the best solution is to treat it as a backport, change the names > > (either explicitly or with a #define trick) and back-port it to every > > kernel version we're going to support. > > I'm not yet convinced the problem exists. I would suggest look at RHEL5. > If that has genalloc enabled its a good indication that you don't > need to put a copy of it in OFED. > Is RHEL5 released? I don't have it. Steve. From mst at mellanox.co.il Thu Jan 11 15:47:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:47:48 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ for ChelsioT3 Support. In-Reply-To: <1168558772.8149.99.camel@stevo-desktop> References: <1168557810.8149.89.camel@stevo-desktop> <20070111233415.GU17999@mellanox.co.il> <1168558772.8149.99.camel@stevo-desktop> Message-ID: <20070111234748.GX17999@mellanox.co.il> > > Some notes: > > - distros normally just enable everything, and customer that built > > his own kernel can be directed to just enable genalloc - no? > > In that case maybe its best to just rely on it being enabled. > > > > > > I don't think distros can enable this (see my subsequent email explain > this). Hmm, I checked and it does seem you are right. So now I understand, and yes, this means a copy should go into kernel_addons. -- MST From mst at mellanox.co.il Thu Jan 11 15:49:40 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:49:40 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ forChelsioT3 Support. In-Reply-To: <1168559076.8149.104.camel@stevo-desktop> References: <1168558122.8149.93.camel@stevo-desktop> <20070111233702.GV17999@mellanox.co.il> <1168559076.8149.104.camel@stevo-desktop> Message-ID: <20070111234940.GY17999@mellanox.co.il> > > > > > > There is no way to enable this option via make menuconfig. It is only > > > enabled if some other subsystem requires it by adding this to their > > > Kconfig: > > > > > > select GENERIC_ALLOCATOR > > > > I know, but what I'm saying is that distros tend to enable all drivers > > so in practice it will be enabled. > > > > Perusing the 2.6.20 src, I only see one subsystem using this other than > my driver. That's arch/ia64/kernel/uncached.c. So it will only be > turned on for IA64 builds. > > Also, how will a user running 2.6.20 from kernel.org turn it on? > There's no way via make menuconfig. > > Believe me, I'd love to get rid this problem, but I haven't seen a good > other than backporting it everywhere. Yes, you are right. OK, let's carry a copy in OFED then. > > > I think the best solution is to treat it as a backport, change the names > > > (either explicitly or with a #define trick) and back-port it to every > > > kernel version we're going to support. > > > > I'm not yet convinced the problem exists. I would suggest look at RHEL5. > > If that has genalloc enabled its a good indication that you don't > > need to put a copy of it in OFED. > > > > Is RHEL5 released? I don't have it. Not sure, someone at work plays with it but might be beta too. -- MST From swise at opengridcomputing.com Thu Jan 11 15:51:30 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 11 Jan 2007 17:51:30 -0600 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/ forChelsioT3 Support. In-Reply-To: <20070111234151.GW17999@mellanox.co.il> References: <1168558122.8149.93.camel@stevo-desktop> <20070111233702.GV17999@mellanox.co.il> <20070111234151.GW17999@mellanox.co.il> Message-ID: <1168559490.8149.111.camel@stevo-desktop> On Fri, 2007-01-12 at 01:41 +0200, Michael S. Tsirkin wrote: > > I'm not yet convinced the problem exists. I would suggest look at RHEL5. > > If that has genalloc enabled its a good indication that you don't > > need to put a copy of it in OFED. > > Since genalloc is there since 2.6.13, you can also look at SLES10 - > most distros have it in I imagine. > My stock SUSE 10.0 system doesn't have it built in... # zcat /proc/config.gz |grep GENERIC_ALLOC # uname -a Linux dell3 2.6.13-15.11-smp #1 SMP Mon Jul 17 09:43:01 UTC 2006 i686 i686 i386 GNU/Linux From mst at mellanox.co.il Thu Jan 11 15:53:32 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 01:53:32 +0200 Subject: [openib-general] [PATCH v2 1/2] ofed_1_2 Changes to kernel_patches/forChelsioT3 Support. In-Reply-To: <1168559490.8149.111.camel@stevo-desktop> References: <1168559490.8149.111.camel@stevo-desktop> Message-ID: <20070111235332.GZ17999@mellanox.co.il> > > > I'm not yet convinced the problem exists. I would suggest look at RHEL5. > > > If that has genalloc enabled its a good indication that you don't > > > need to put a copy of it in OFED. > > > > Since genalloc is there since 2.6.13, you can also look at SLES10 - > > most distros have it in I imagine. > > > > My stock SUSE 10.0 system doesn't have it built in... > > # zcat /proc/config.gz |grep GENERIC_ALLOC > # uname -a > Linux dell3 2.6.13-15.11-smp #1 SMP Mon Jul 17 09:43:01 UTC 2006 i686 i686 i386 GNU/Linux OK, OK, I'm convinced. -- MST From mshefty at ichips.intel.com Thu Jan 11 15:53:57 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 11 Jan 2007 15:53:57 -0800 Subject: [openib-general] OpenSM core dump - file size exceeded In-Reply-To: <4582FB97.6010304@ichips.intel.com> References: <4582FB97.6010304@ichips.intel.com> Message-ID: <45A6CE15.1020009@ichips.intel.com> > Looking at the log file, the problem appears to be related to: > > http://openib.org/pipermail/openib-general/2006-December/029962.html This should be fixed in my rdma-dev tree. The problem was that a patch got lost moving between svn and git that caused failed multicast requests to be retried. - Sean From chas at cmf.nrl.navy.mil Thu Jan 11 15:47:30 2007 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Thu, 11 Jan 2007 18:47:30 -0500 Subject: [openib-general] [PATCHv5] IPoIB CM Experimental support In-Reply-To: <1168552067.8149.43.camel@stevo-desktop> Message-ID: <200701112347.l0BNlU7i004956@cmf.nrl.navy.mil> In message <1168552067.8149.43.camel at stevo-desktop>,"Steve Wise" writes: >What's the easy way to remove trailing spaces? I seem to fat-finger >them into my patches too. using vi, :%s/ *$//g ^^ -- this is two spaces *$ means atleast one space at the end of the line. From robert.j.woodruff at intel.com Thu Jan 11 16:02:09 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 11 Jan 2007 16:02:09 -0800 Subject: [openib-general] Request for assistance from DWG In-Reply-To: <20070111225607.GQ17999@mellanox.co.il> Message-ID: I guess the question is, should we be taking code that is GPL only from others and including it in our code base ? I also understand that it might be pretty difficult, if not impossible to code some backport without using some of the GPL only code from a newer kernel and porting it back to an older kernel. What do people think ? If we have to have some GPL only files for backport and such can we do that within openfabrics ? Probably a question for Bill Boas, and the rest of the promoters. Bill ?? and/or can we just isolate the GPL only code from GPL/BSD code so that people will know what is GPL/BSD and what is truely GPL only. -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Thursday, January 11, 2007 2:56 PM To: Woodruff, Robert J Cc: Chet Mehta; Steve Wise; Betsy Zeller; Tziporet Koren; vlad at mellanox.co.il; Bryan O'Sullivan; openib; openfabrics-lwg at openfabrics.org; openfabrics-ewg at openib.org Subject: Re: Request for assistance from DWG The BSD+GPL can only apply to code developed by us. Backport headers, list.h and iproute2 are not our code and can not be "corrected". Quoting Woodruff, Robert J : Subject: Request for assistance from DWG I have added the appropriate maintainers to this thread and sending it to the open fabrics email lists. Tziporet for OFED (mutex-backport.h file used by OFED) Bryan for ipath Vladimir for ipoib tools Steve for cxgb3 driver All please review the list of GPL only files that are (or were in your components before we moved to git) and please correct the code so that meets the BSD+GPL requirements that we all agreed to when we joined openfabrics, if you have not done so already. Thanks woody ________________________________ From: Chet Mehta [mailto:chetm at us.ibm.com] Sent: Thursday, January 11, 2007 2:17 PM To: Woodruff, Robert J Cc: openfabrics-lwg at openfabrics.org Subject: RE: Request for assistance from DWG Bob, Thanks for the reply. Attached below is a list of files that we believe have only a GPL license (the list was generated on 12/5/06). As I mentioned in my last note, the search wasn't exhaustive so there may be others. During the time we also noticed that there were some utility-type files (e.g. files used for extraction, make or build) that were GPL only. I'm assuming the DWG will decide if these should be dual licensed or be placed on the exception list. Lastly I suspect there are files that only have a BSD license - i.e. no GPL. While personally that's less worrisome, going strictly by the Bylaws I would suspect those should be corrected also. Thanks for your help to get this corrected! ################################################################ #mutex-backport.h (Cisco Systems) ################################################################ ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu tex-backport.h.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/mutex-backport.h ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba ckport.h ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex t-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ################################################################ #ipath_eth.c (PathScale) ################################################################ ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et h.c ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex t-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas e/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i path_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. svn/text-base/ipath_eth.c.svn-base ################################################################ #lnstat.c Linux network stats # Copyright (C) 2004 by Harald Welte # Development of this code was funded by Astaro AG, http://www.astaro.com/ # Based on original concept and ideas from predecessor rtstat.c: # Copyright 2001 by Robert Olsson # Uppsala University, Sweden ################################################################ ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta t.c.svn-base ################################################################ #list.h ( kazutomo at mcs.anl.gov) ################################################################ ./gen2/trunk/src/userspace/ipoibtools/list.h ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base ################################################################ #cxgb3_main.c (Chelsio Communications) ################################################################ ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx gb3_main.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s vn/text-base/cxgb3_main.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma in.c.svn-base :Chet. _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mshefty at ichips.intel.com Thu Jan 11 16:11:49 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 11 Jan 2007 16:11:49 -0800 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <1168457495.18817.1746.camel@hal.voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> Message-ID: <45A6D245.1020106@ichips.intel.com> Hal Rosenstock wrote: >>(*) there are some more issues here which need to be addressed, see >>for example the "Some SMs don't support send-only yet" weird comment >>at ipoib_mcast_sendonly_join() > > > It's more likely an SA issue but I'm only guessing... It may also be > historical... Based on observation, it looks like ipoib joins a couple of IPv6 multicast groups with send only membership. However it changes the join_state from 4 to 1 (send-only to full member). This results in the SA trying to create the multicast group, only the required MCMemberRecord components have not been set. I'm not sure if this indicates a serious problem, but I'm guessing not. The join request simply fails and returns an error back to ipoib. (Which would have happened for a send-only join if the group hadn't already been created.) - Sean From mst at mellanox.co.il Thu Jan 11 16:17:19 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 02:17:19 +0200 Subject: [openib-general] Request for assistance from DWG In-Reply-To: References: Message-ID: <20070112001719.GB17999@mellanox.co.il> > What do people think ? OFED sources tarball can mix in GPL code without making all of OFED GPL-only under the "mere aggregation" clause in GPLv2: "In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License." > and/or can we just isolate the GPL only code from GPL/BSD code so > that people will know what is GPL/BSD and what is truely GPL only. That's why files have a license in the header, no? Quoting Woodruff, Robert J : Subject: RE: Request for assistance from DWG I guess the question is, should we be taking code that is GPL only from others and including it in our code base ? I also understand that it might be pretty difficult, if not impossible to code some backport without using some of the GPL only code from a newer kernel and porting it back to an older kernel. What do people think ? If we have to have some GPL only files for backport and such can we do that within openfabrics ? Probably a question for Bill Boas, and the rest of the promoters. Bill ?? and/or can we just isolate the GPL only code from GPL/BSD code so that people will know what is GPL/BSD and what is truely GPL only. -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Thursday, January 11, 2007 2:56 PM To: Woodruff, Robert J Cc: Chet Mehta; Steve Wise; Betsy Zeller; Tziporet Koren; vlad at mellanox.co.il; Bryan O'Sullivan; openib; openfabrics-lwg at openfabrics.org; openfabrics-ewg at openib.org Subject: Re: Request for assistance from DWG The BSD+GPL can only apply to code developed by us. Backport headers, list.h and iproute2 are not our code and can not be "corrected". Quoting Woodruff, Robert J : Subject: Request for assistance from DWG I have added the appropriate maintainers to this thread and sending it to the open fabrics email lists. Tziporet for OFED (mutex-backport.h file used by OFED) Bryan for ipath Vladimir for ipoib tools Steve for cxgb3 driver All please review the list of GPL only files that are (or were in your components before we moved to git) and please correct the code so that meets the BSD+GPL requirements that we all agreed to when we joined openfabrics, if you have not done so already. Thanks woody ________________________________ From: Chet Mehta [mailto:chetm at us.ibm.com] Sent: Thursday, January 11, 2007 2:17 PM To: Woodruff, Robert J Cc: openfabrics-lwg at openfabrics.org Subject: RE: Request for assistance from DWG Bob, Thanks for the reply. Attached below is a list of files that we believe have only a GPL license (the list was generated on 12/5/06). As I mentioned in my last note, the search wasn't exhaustive so there may be others. During the time we also noticed that there were some utility-type files (e.g. files used for extraction, make or build) that were GPL only. I'm assuming the DWG will decide if these should be dual licensed or be placed on the exception list. Lastly I suspect there are files that only have a BSD license - i.e. no GPL. While personally that's less worrisome, going strictly by the Bylaws I would suspect those should be corrected also. Thanks for your help to get this corrected! ################################################################ #mutex-backport.h (Cisco Systems) ################################################################ ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/mutex-backport.h ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include /linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu tex-backport.h.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/mutex-backport.h ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i nclude/linux/.svn/text-base/mutex-backport.h.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba ckport.h ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex t-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ mutex-backport.h ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ .svn/text-base/mutex-backport.h.svn-base ################################################################ #ipath_eth.c (PathScale) ################################################################ ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et h.c ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex t-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/ipath_eth.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas e/ipath_eth.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i path_eth.c ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. svn/text-base/ipath_eth.c.svn-base ################################################################ #lnstat.c Linux network stats # Copyright (C) 2004 by Harald Welte # Development of this code was funded by Astaro AG, http://www.astaro.com/ # Based on original concept and ideas from predecessor rtstat.c: # Copyright 2001 by Robert Olsson # Uppsala University, Sweden ################################################################ ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta t.c.svn-base ################################################################ #list.h ( kazutomo at mcs.anl.gov) ################################################################ ./gen2/trunk/src/userspace/ipoibtools/list.h ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base ################################################################ #cxgb3_main.c (Chelsio Communications) ################################################################ ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx gb3_main.c ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s vn/text-base/cxgb3_main.c.svn-base ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma in.c.svn-base :Chet. _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST -- MST From mst at mellanox.co.il Thu Jan 11 16:19:56 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 02:19:56 +0200 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <45A6D245.1020106@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> Message-ID: <20070112001956.GC17999@mellanox.co.il> > Quoting Sean Hefty : > Subject: ipoib ipv6 multicast joins, was: multicast code/merge status > > Hal Rosenstock wrote: > >>(*) there are some more issues here which need to be addressed, see > >>for example the "Some SMs don't support send-only yet" weird comment > >>at ipoib_mcast_sendonly_join() > > > > > > It's more likely an SA issue but I'm only guessing... It may also be > > historical... > > Based on observation, it looks like ipoib joins a couple of IPv6 multicast > groups with send only membership. However it changes the join_state from 4 to 1 > (send-only to full member). This results in the SA trying to create the > multicast group, only the required MCMemberRecord components have not been set. > > I'm not sure if this indicates a serious problem, but I'm guessing not. The > join request simply fails and returns an error back to ipoib. (Which would have > happened for a send-only join if the group hadn't already been created.) So, this looks like a work-around for some broken SM, does it not? -- MST From mshefty at ichips.intel.com Thu Jan 11 16:51:11 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 11 Jan 2007 16:51:11 -0800 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <20070112001956.GC17999@mellanox.co.il> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> <20070112001956.GC17999@mellanox.co.il> Message-ID: <45A6DB7F.3060800@ichips.intel.com> > So, this looks like a work-around for some broken SM, does it not? Yes - I mentioned it because the resulting error message (wrong component mask) is what was filling up the opensm log file. Jan 11 14:21:36 083844 [40583BB0] -> osm_mcmr_rcv_join_mgrp: ERR 1B11: method = SubnAdmSet, scope_state = 0x1, component mask = 0x0000000000010083, expected com p mask = 0x00000000000130c7, MGID: 0xffffffffffff0000 : 0x2000001400020404 from port 0x0002c9010ad258f1 I've applied a missing patch to my rdma-dev git tree that should avoid filling up the opensm log file. But the error in the opensm log file is a result of this work-around. - Sean From erezz at voltaire.com Thu Jan 11 17:20:30 2007 From: erezz at voltaire.com (Erez Zilber) Date: Fri, 12 Jan 2007 03:20:30 +0200 Subject: [openib-general] [query]maximum no. of connections per session in iser initiator References: <564007.88233.qm@web8322.mail.in.yahoo.com> Message-ID: <3857BB049D83424D9DB82753D37CEA550AEA2D@taurus.voltaire.com> > the max_conn(maximum no. of connections allowed per session ?) field in iscsi_transport structure is not initialised at all while registering with the iSCSI layer in the iSER initiator code. > Is that variable is initialised to any default value? (like iSCSI/TCP has max_conn in iscsi_transport initialised to 1) > > what are the maximum no. of connections allowed per session in iSER initiator? > Let me check that. I'm currently out of the office. This will take some time. > regards, > Mahesh > > keshetti mahesh wrote: >> I guess that you're talking about this piece of code: >> #define ISCSI_ISER_MAX_CONN 8 >> #define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ >> ISER_QP_MAX_REQ_DTOS) * \ >> ISCSI_ISER_MAX_CONN) >> It is related to the cq used by iSER. We use it in order to define the >> max len of the cq (the same cq is used for the sq & rq). It means that >> per HCA, 8 connections may be opened. > > Yes, I was talking about this part only. > Isn't this variable is equivalent to the maximum no. of connections allowed per the session? No. It is exactly what I said: we need to define the cq len. In order to do that, we need to define the max number of connections on a single HCA. I never mentioned the session. > what is constraint for chosing this value as 8? It is on our ToDo list. We need to define it better. > >>Where do you see that in iscsi_tcp? > > In the structure iscsi_transport there is a variable max_conn and it is set to 1 > (In open-iscsi intiator file name iscsi_tcp.c) whether this limit is imposed by iSCSI or by transport layer(TCP > or iSER) ? Please send this question to the open-iscsi list. From eitan at sw053.yok.mtl.com Thu Jan 11 21:33:24 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Fri, 12 Jan 2007 07:33:24 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-12:normal completion Message-ID: <200701120533.l0C5XOjg004173@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Thu_Jan_11_11:23:42_2007 d20b83 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From pasquale.davide at gmail.com Fri Jan 12 02:28:41 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Fri, 12 Jan 2007 11:28:41 +0100 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: <20070111122951.GH1759@mellanox.co.il> References: <20070111101822.GC1759@mellanox.co.il> <20070111122951.GH1759@mellanox.co.il> Message-ID: > > > No idea. > Try running a preprocessor on flint.cpp (add -E flag to g++ line) > find the relevant line in the output and see why does it try > to pull in bitvector. > > -- > MST Ok! I have successfullly compiled OFED.1.1 on my platform after a new installation! Now how can I use opensm and openib utilities ? Thanks. -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Fri Jan 12 03:22:01 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 12 Jan 2007 13:22:01 +0200 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: Message-ID: <20070112112201.GB2802@mellanox.co.il> > Quoting Davide Pasquale : > Subject: Re: OFED 1.1 build issue > > > > > No idea. > Try running a preprocessor on flint.cpp (add -E flag to g++ line) > find the relevant line in the output and see why does it try > to pull in bitvector. > > -- > MST > > > Ok! > > > I have successfullly compiled OFED.1.1 on my platform after a new installation! > > Now how can I use opensm and openib utilities ? > Thanks. Try reading man opensm. -- MST From RAISCH at de.ibm.com Fri Jan 12 04:25:31 2007 From: RAISCH at de.ibm.com (Christoph Raisch) Date: Fri, 12 Jan 2007 13:25:31 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 2/5] ehca: ehca_uverbs.c: "proper" use of mmap In-Reply-To: Message-ID: Roland Dreier wrote on 11.01.2007 20:54:58: > > > int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) > > > { > > > > Can you split this monster routine into individual functions for > > each type of mmap please? With two helpers to get and verify the cq/qp > > shared by the individual sub-variants, that would also help to get rid > > of all those magic offsets. > > > > Actually, this routine directly comes from ib_device.mmap - Roland, > > can you shed some light on what's going on here? > > Each userspace-accessible IB device gets a single device node like > /dev/infiniband/uverbsX. Opening that gives userspace a "context". > One of the things userspace can do with that fd is mmap() on it -- > that was originally envisioned as a way to map a page of hardware > registers directly in to the userspace process. > > It seems ehca needs to allocate lots of different things in the kernel > via mmap(). What you're saying I guess is that ideally each of these > would be mmap() on a different fd rather than using different > offsets. It's a little awkward to open multiple device nodes to get > multiple fds, since there's not a good way to attach them all to the > same context. I guess we could create some hack to return more file > handles, but I think that cure is worse than the disease of using > magic offsets... > > Maybe longer term we need to look at a scheme like cell's spufs but > I'm still not confident we have the RDMA interface quite ready to > freeze at the system call level. > > - R. ...as Roland mentions, we're not completely free to change the filehandle usage, it's shared by ~5 drivers now. I'd say lets investigate the direction of an own filesystem unless there's no other clean solution. We can polish the current version a bit, but that won't change the "magic offsets". Roland, could you take this patchset into your tree? We hope it adresses the major security concern and vm_insert_page. We're preparing the next patch for the yield deadlock topic with this patchset as prereq. Gruss / Regards . . . Christoph Raisch From HNGUYEN at de.ibm.com Fri Jan 12 06:15:34 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Fri, 12 Jan 2007 15:15:34 +0100 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 coordination meeting next Monday at 9am PST In-Reply-To: Message-ID: Hello Tziporet and others from ofed board! I wish to know if and how ehca could transition from preview tech to production state for ofed 1.2, especially what do we need to accomplish in order to achieve that. If you could point me to a written procedure for that, it would be great. Thanks! Nam From hnguyen at linux.vnet.ibm.com Fri Jan 12 07:23:13 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 12 Jan 2007 16:23:13 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> <20070111194054.GA11770@localdomain> Message-ID: <200701121623.13687.hnguyen@linux.vnet.ibm.com> Hi Roland! > > > spin_lock_irqsave(&ehca_cq_idr_lock, flags); > > > while (my_cq->nr_callbacks) > > > yield(); > > > Isn't that code outright buggy? Calling into the scheduler with a > > spinlock held and local interrupts disabled... > > Yes, absolutely -- if nr_callbacks is ever nonzero then this will > obviously crash instantly. As Christoph R. mentioned in another thread I'm sending you a patch to fix this bug. Thanks to all for this hint! Purpose of the while loop is to wait until all completion entries have been processed by a running completion handler. First then the function continue with destroying completion queue. Thus, we do unlock and lock around yield(), ie yield() is now called from a normal process context without active lock. Hope that this pattern is ok. In addition of yield issue this patch also fixes an unproper use of spin_unlock() in ehca_irq.c. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_cq.c | 5 ++++- ehca_irq.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-11 19:54:06.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-12 15:27:50.000000000 +0100 @@ -330,8 +330,11 @@ int ehca_destroy_cq(struct ib_cq *cq) } spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-11 19:53:33.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-12 15:27:50.000000000 +0100 @@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore( + &ehca_cq_idr_lock, + flags); break; } From hnguyen at linux.vnet.ibm.com Fri Jan 12 07:36:15 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 12 Jan 2007 16:36:15 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() In-Reply-To: <20070111192056.GB24623@infradead.org> References: <200701112008.37236.hnguyen@linux.vnet.ibm.com> <20070111192056.GB24623@infradead.org> Message-ID: <200701121636.15989.hnguyen@linux.vnet.ibm.com> Hi, > > + if (my_cq->ownpid != cur_pid) { > > + ehca_err(device, "Invalid caller pid=%x ownpid=%x " > > + "cq_num=%x", > > + cur_pid, my_cq->ownpid, my_cq->cq_number); > > + return -EINVAL; > > + } > > (for other reviewers: this is not new code, just moved around) > > Owner tracking by pid is really dangerous. File descriptors can be > passed around by unix sockets, a single process can have files open > more than once, etc.. > > It seems ehca wants to prevent threads other than the creating one > from performing most operations. Can you explain the reason for this? you point to the right spot... This has a historic reason as we have needed to support fork(), system("date") etc for kernel 2.6.9, hence those vma flags manipulation and this pid checking as proactive protection/restriction. For newer kernel, I guess >=2.6.12, this checking were not necessary, but we would feel better after we had tested user space stuff more thoroughly without this piece of code. Since this is not new code, can we pls handle this later? Regards Nam From swise at opengridcomputing.com Fri Jan 12 08:20:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 12 Jan 2007 10:20:36 -0600 Subject: [openib-general] [PATCH v3 1/3] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070112162034.27341.33435.stgit@dell3.ogc.int> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> Message-ID: <20070112162036.27341.86812.stgit@dell3.ogc.int> - qp_num -> qp ptr patch for cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 +++++++++++++++++++++++ 1 files changed, 23 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch new file mode 100644 index 0000000..0763f70 --- /dev/null +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch @@ -0,0 +1,23 @@ +Update T3 driver: qp_num no longer in ib_wc. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " From swise at opengridcomputing.com Fri Jan 12 08:20:34 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 12 Jan 2007 10:20:34 -0600 Subject: [openib-general] [PATCH v3 0/3] ofed_1_2 - Chelsio T3 RDMA Support Message-ID: <20070112162034.27341.33435.stgit@dell3.ogc.int> Michael/Vlad, Here is version 3 of the patch series for adding the Chelsio kernel drivers. Changes since version 2: - made genalloc.[ch] a backport addon as per our discussions. At this point, I've got it working for 2.6.20. - use #defines to avoid patching cxgb3 files for the genalloc backport. In addition to this patch series, the Chelsio driver code needs to be pulled from git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 I've tested this on 2.6.20-rc4. If you're happy with the changes, I ask that you pull these patches in and pull the T3 drivers into vlad's ofed_1_2 git tree. That will give us a base package for Chelsio's drivers. Then I'll post additional patches for the backports to the supported distros as I get them implemented and tested. How's that sound? If you would rather a patch for the chelsio drivers, lemme know and I'll email it to you directly. Signed-off-by: Steve Wise From swise at opengridcomputing.com Fri Jan 12 08:20:38 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 12 Jan 2007 10:20:38 -0600 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070112162034.27341.33435.stgit@dell3.ogc.int> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> Message-ID: <20070112162038.27341.81922.stgit@dell3.ogc.int> - added cxgb3 and iw_cxgb3 config stuff - visit and build driver/net/cxgb3 to get the cxgb3 driver Signed-off-by: Steve Wise --- ofed_scripts/Makefile | 9 ++++++-- ofed_scripts/configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile index d63b1d2..8942385 100644 --- a/ofed_scripts/Makefile +++ b/ofed_scripts/Makefile @@ -46,8 +46,10 @@ kernel: @echo "Kernel sources: $(KSRC)" env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ - -I$(CWD)/drivers/infiniband/debug" \ - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ + -I$(CWD)/drivers/infiniband/debug \ + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ + -I$(CWD)/drivers/net/cxgb3 " \ + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ @@ -74,6 +76,9 @@ kernel: CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ LINUXINCLUDE=' \ $(BACKPORT_INCLUDES) \ -I$(CWD)/include \ diff --git a/ofed_scripts/configure b/ofed_scripts/configure index a0557e2..253427c 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -126,6 +126,12 @@ Usage: `basename $0` [options] --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] --without-vnic_stats-mod [yes] + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] + --without-cxgb3-mod [yes] + + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] + --without-cxgb3_debug-mod [yes] + --help - print out options @@ -207,7 +213,10 @@ get_backport_dir() 2.6.19*) echo 2.6.19 ;; - 2.6.2[0-9]*) + 2.6.20*) + echo 2.6.20 + ;; + 2.6.2[1-9]*) echo ;; *) @@ -607,6 +616,20 @@ main() --without-vnic_stats-mod) CONFIG_INFINIBAND_VNIC_STATS= ;; + --with-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3="m" + CONFIG_CHELSIO_T3="m" + ;; + --without-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3= + CONFIG_CHELSIO_T3= + ;; + --with-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG="y" + ;; + --without-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG= + ;; --with-modprobe|--without-modprobe) ;; -h | --help) @@ -679,6 +702,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} @@ -689,6 +714,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} # Check for minimal supported kernel version if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then @@ -742,6 +768,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} @@ -752,6 +780,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} EOFCONFIG echo "Created ${CONFIG}:" @@ -887,6 +916,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" else DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" fi +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" +else + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" +fi +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" +else + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" +fi +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" +else + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" +fi cat >> ${AUTOCONF_H} << EOFAUTOCONF #undef CONFIG_INFINIBAND #undef CONFIG_INFINIBAND_IPOIB @@ -908,6 +952,9 @@ #undef CONFIG_INFINIBAND_MADEYE #undef CONFIG_INFINIBAND_VNIC #undef CONFIG_INFINIBAND_VNIC_DEBUG #undef CONFIG_INFINIBAND_VNIC_STATS +#undef CONFIG_INFINIBAND_CXGB3 +#undef CONFIG_INFINIBAND_CXGB3_DEBUG +#undef CONFIG_CHELSIO_T3 #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY @@ -927,6 +974,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_ADDR_TRANS} ${DEFINE_INFINIBAND_MTHCA} ${DEFINE_INFINIBAND_VNIC} +${DEFINE_INFINIBAND_CXGB3} +${DEFINE_CHELSIO_T3} ${DEFINE_INFINIBAND_IPOIB_DEBUG} ${DEFINE_INFINIBAND_ISER} @@ -937,6 +986,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_RDS_DEBUG} ${DEFINE_INFINIBAND_VNIC_DEBUG} ${DEFINE_INFINIBAND_VNIC_STATS} +${DEFINE_INFINIBAND_CXGB3_DEBUG} ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} From swise at opengridcomputing.com Fri Jan 12 08:20:40 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 12 Jan 2007 10:20:40 -0600 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070112162034.27341.33435.stgit@dell3.ogc.int> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> Message-ID: <20070112162040.27341.78123.stgit@dell3.ogc.int> This is needed even on kernels that might have genalloc because it may not be configured into the running kernel. Right now the only subsystem that turns on the generic allocator config option is IA64. So chances are good that 2.6.20 kernels will _not_ have this compiled in. So we just backport it to all supported kernels. To avoid collisions with kernels that _do_ have this compiled in, change the names of the exported symbols in the backport. Changes to genalloc.c: - Change exported symbol names by prepending ib_ EG: gen_pool_alloc() becomes ib_gen_pool_alloc() Changes to genalloc.h: - Change exported symbol names by prepending ib_ - added #defines for original exported symbol names to new names. Thus code calling the allocator doesn't need to change. EG: #define gen_pool_alloc ib_gen_pool alloc Signed-off-by: Steve Wise --- .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 18 ++ 3 files changed, 258 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.20/include/src/genalloc.c b/kernel_addons/backport/2.6.20/include/src/genalloc.c new file mode 100644 index 0000000..5457abb --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *ib_gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(ib_gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int ib_gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(ib_gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void ib_gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(ib_gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long ib_gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(ib_gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void ib_gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(ib_gen_pool_free); diff --git a/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch new file mode 100644 index 0000000..7b7bba6 --- /dev/null +++ b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch @@ -0,0 +1,18 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,6 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++obj-$(CONFIG_INFINIBAND) += genalloc.o ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From halr at voltaire.com Fri Jan 12 08:50:54 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 11:50:54 -0500 Subject: [openib-general] [PATCH] OpenSM/osm_sa_path_record.c: Initial support for off subnet PathRecords Message-ID: <1168620641.31913.10045.camel@hal.voltaire.com> OpenSM/osm_sa_path_record.c: Initial support for off subnet PathRecords Off subnet PathRecords for both the unicast and multicast DGID cases are supported. HopLimit is set to maximum (0xFF). In the case of a unicast DGID request, the LID of the "first" router found on the subnet is used for the DLID. I think this is sufficient to get started. Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 0c5d4a9..c43ad31 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -66,6 +66,10 @@ #include #include #include +#ifdef ROUTER_EXP +#include +#include +#endif #define OSM_PR_RCV_POOL_MIN_SIZE 64 #define OSM_PR_RCV_POOL_GROW_SIZE 64 @@ -93,6 +97,11 @@ typedef struct osm_sa_pr_mcmr_search_ osm_pr_rcv_t *p_rcv; } osm_sa_pr_mcmr_search_ctxt_t; +static const ib_gid_t zero_gid = { { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }, }; + /********************************************************************** **********************************************************************/ void @@ -712,6 +721,7 @@ __osm_pr_rcv_build_pr( IN osm_pr_rcv_t* const p_rcv, IN const osm_port_t* const p_src_port, IN const osm_port_t* const p_dest_port, + IN const ib_gid_t* const p_dgid, IN const uint16_t src_lid_ho, IN const uint16_t dest_lid_ho, IN const uint8_t preference, @@ -720,14 +730,33 @@ __osm_pr_rcv_build_pr( { const osm_physp_t* p_src_physp; const osm_physp_t* p_dest_physp; + boolean_t is_nonzero_gid = 0; OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_build_pr ); p_src_physp = osm_port_get_default_phys_ptr( p_src_port ); +#ifndef ROUTER_EXP p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix( p_dest_physp ); p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid( p_dest_physp ); +#else + if ( p_dgid) + { + if ( memcmp( p_dgid, &zero_gid, sizeof(*p_dgid) ) ) + is_nonzero_gid = 1; + } + + if ( is_nonzero_gid ) + p_pr->dgid = *p_dgid; + else + { + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); + + p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix( p_dest_physp ); + p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid( p_dest_physp ); + } +#endif p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix( p_src_physp ); p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid( p_src_physp ); @@ -736,6 +765,11 @@ __osm_pr_rcv_build_pr( p_pr->slid = cl_hton16( src_lid_ho ); p_pr->hop_flow_raw &= cl_hton32(1<<31); +#ifdef ROUTER_EXP + /* Only set HopLimit if going through a router */ + if ( is_nonzero_gid ) + p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX); +#endif p_pr->pkey = p_parms->pkey; p_pr->sl = cl_hton16(p_parms->sl); @@ -766,6 +800,7 @@ __osm_pr_rcv_get_lid_pair_path( IN const ib_path_rec_t* const p_pr, IN const osm_port_t* const p_src_port, IN const osm_port_t* const p_dest_port, + IN const ib_gid_t* const p_dgid, IN const uint16_t src_lid_ho, IN const uint16_t dest_lid_ho, IN const ib_net64_t comp_mask, @@ -832,7 +867,7 @@ __osm_pr_rcv_get_lid_pair_path( } } - __osm_pr_rcv_build_pr( p_rcv, p_src_port, p_dest_port, + __osm_pr_rcv_build_pr( p_rcv, p_src_port, p_dest_port, p_dgid, src_lid_ho, dest_lid_ho, preference, &path_parms, &p_pr_item->path_rec ); @@ -850,6 +885,7 @@ __osm_pr_rcv_get_port_pair_paths( IN const osm_port_t* const p_req_port, IN const osm_port_t* const p_src_port, IN const osm_port_t* const p_dest_port, + IN const ib_gid_t* const p_dgid, IN const ib_net64_t comp_mask, IN cl_qlist_t* const p_list ) { @@ -1016,6 +1052,7 @@ __osm_pr_rcv_get_port_pair_paths( p_pr_item = __osm_pr_rcv_get_lid_pair_path( p_rcv, p_pr, p_src_port, p_dest_port, + p_dgid, src_lid_ho, dest_lid_ho, comp_mask, preference ); @@ -1083,6 +1120,7 @@ __osm_pr_rcv_get_port_pair_paths( p_pr_item = __osm_pr_rcv_get_lid_pair_path( p_rcv, p_pr, p_src_port, p_dest_port, + p_dgid, src_lid_ho, dest_lid_ho, comp_mask, preference ); @@ -1105,13 +1143,19 @@ __osm_pr_rcv_get_end_points( IN osm_pr_rcv_t* const p_rcv, IN const osm_madw_t* const p_madw, OUT const osm_port_t** const pp_src_port, - OUT const osm_port_t** const pp_dest_port ) + OUT const osm_port_t** const pp_dest_port, + OUT ib_gid_t* const p_dgid ) { const ib_path_rec_t* p_pr; const ib_sa_mad_t* p_sa_mad; ib_net64_t comp_mask; + ib_net64_t dest_guid; ib_api_status_t status; ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; +#ifdef ROUTER_EXP + osm_router_t* p_rtr; + osm_port_t* p_rtr_port; +#endif OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_end_points ); @@ -1198,31 +1242,52 @@ __osm_pr_rcv_get_end_points( } } + if ( p_dgid ) + memset( p_dgid, 0, sizeof(*p_dgid)); + if( comp_mask & IB_PR_COMPMASK_DGID ) { + dest_guid = p_pr->dgid.unicast.interface_id; if ( ! ib_gid_is_link_local( &p_pr->dgid ) ) { if ( ! ib_gid_is_multicast( &p_pr->dgid ) && ib_gid_get_subnet_prefix( &p_pr->dgid ) != p_rcv->p_subn->opt.subnet_prefix ) { + osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, + "__osm_pr_rcv_get_end_points: " + "Non local DGID subnet prefix 0x%016" PRIx64 "\n", + cl_ntoh64( p_pr->dgid.unicast.prefix ) ); +#ifndef ROUTER_EXP /* This 'error' is the client's fault (bad gid) so don't enter it as an error in our own log. Return an error response to the client. */ - osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " - "Non local DGID subnet prefix 0x%016" PRIx64 "\n", - cl_ntoh64( p_pr->dgid.unicast.prefix ) ); - sa_status = IB_SA_MAD_STATUS_INVALID_GID; goto Exit; +#else + /* Just use "first" router (if it exists) for now */ + p_rtr = (osm_router_t*)cl_qmap_head( &p_rcv->p_subn->rtr_guid_tbl ); + if ( p_rtr == (osm_router_t*)cl_qmap_end( &p_rcv->p_subn->rtr_guid_tbl ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_end_points: ERR 1F22: " + "Off subnet DGID but no routers found\n" ); + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + + p_rtr_port = osm_router_get_port_ptr( p_rtr ); + dest_guid = osm_port_get_guid( p_rtr_port ); + if ( p_dgid ) + *p_dgid = p_pr->dgid; +#endif } } *pp_dest_port = (osm_port_t*)cl_qmap_get( &p_rcv->p_subn->port_guid_tbl, - p_pr->dgid.unicast.interface_id ); + dest_guid ); if( *pp_dest_port == (osm_port_t*)cl_qmap_end( &p_rcv->p_subn->port_guid_tbl ) ) @@ -1235,7 +1300,7 @@ __osm_pr_rcv_get_end_points( osm_log( p_rcv->p_log, OSM_LOG_VERBOSE, "__osm_pr_rcv_get_end_points: " "No dest port with GUID 0x%016" PRIx64 "\n", - cl_ntoh64( p_pr->dgid.unicast.interface_id) ); + cl_ntoh64( dest_guid ) ); sa_status = IB_SA_MAD_STATUS_INVALID_GID; goto Exit; @@ -1279,6 +1344,7 @@ __osm_pr_rcv_process_world( IN osm_pr_rcv_t* const p_rcv, IN const osm_madw_t* const p_madw, IN const osm_port_t* const requester_port, + IN const ib_gid_t* const p_dgid, IN const ib_net64_t comp_mask, IN cl_qlist_t* const p_list ) { @@ -1305,7 +1371,7 @@ __osm_pr_rcv_process_world( while( p_src_port != (osm_port_t*)cl_qmap_end( p_tbl ) ) { __osm_pr_rcv_get_port_pair_paths( p_rcv, p_madw, requester_port, p_src_port, - p_dest_port, comp_mask, p_list ); + p_dest_port, p_dgid, comp_mask, p_list ); p_src_port = (osm_port_t*)cl_qmap_next( &p_src_port->map_item ); } @@ -1325,6 +1391,7 @@ __osm_pr_rcv_process_half( IN const osm_port_t* const requester_port, IN const osm_port_t* const p_src_port, IN const osm_port_t* const p_dest_port, + IN const ib_gid_t* const p_dgid, IN const ib_net64_t comp_mask, IN cl_qlist_t* const p_list ) { @@ -1349,7 +1416,7 @@ __osm_pr_rcv_process_half( while( p_port != (osm_port_t*)cl_qmap_end( p_tbl ) ) { __osm_pr_rcv_get_port_pair_paths( p_rcv, p_madw, requester_port, - p_src_port, p_port, + p_src_port, p_port, p_dgid, comp_mask, p_list ); p_port = (osm_port_t*)cl_qmap_next( &p_port->map_item ); } @@ -1363,7 +1430,7 @@ __osm_pr_rcv_process_half( while( p_port != (osm_port_t*)cl_qmap_end( p_tbl ) ) { __osm_pr_rcv_get_port_pair_paths( p_rcv, p_madw, requester_port, - p_port, p_dest_port, + p_port, p_dest_port, p_dgid, comp_mask, p_list ); p_port = (osm_port_t*)cl_qmap_next( &p_port->map_item ); } @@ -1381,13 +1448,14 @@ __osm_pr_rcv_process_pair( IN const osm_port_t* const requester_port, IN const osm_port_t* const p_src_port, IN const osm_port_t* const p_dest_port, + IN const ib_gid_t* const p_dgid, IN const ib_net64_t comp_mask, IN cl_qlist_t* const p_list ) { OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_process_pair ); __osm_pr_rcv_get_port_pair_paths( p_rcv, p_madw, requester_port, p_src_port, - p_dest_port, comp_mask, p_list ); + p_dest_port, p_dgid, comp_mask, p_list ); OSM_LOG_EXIT( p_rcv->p_log ); } @@ -1831,6 +1899,7 @@ osm_pr_rcv_process( const osm_port_t* p_src_port; const osm_port_t* p_dest_port; cl_qlist_t pr_list; + ib_gid_t dgid; ib_net16_t sa_status; osm_port_t* requester_port; int ret; @@ -1894,7 +1963,8 @@ osm_pr_rcv_process( "Unicast destination requested\n" ); sa_status = __osm_pr_rcv_get_end_points( p_rcv, p_madw, - &p_src_port, &p_dest_port ); + &p_src_port, &p_dest_port, + &dgid ); if( sa_status == IB_SA_MAD_STATUS_SUCCESS ) { @@ -1906,25 +1976,25 @@ osm_pr_rcv_process( { if( p_dest_port ) __osm_pr_rcv_process_pair( p_rcv, p_madw, requester_port, - p_src_port, p_dest_port, + p_src_port, p_dest_port, &dgid, p_sa_mad->comp_mask, &pr_list ); else __osm_pr_rcv_process_half( p_rcv, p_madw, requester_port, - p_src_port, NULL, + p_src_port, NULL, &dgid, p_sa_mad->comp_mask, &pr_list ); } else { if( p_dest_port ) __osm_pr_rcv_process_half( p_rcv, p_madw, requester_port, - NULL, p_dest_port, + NULL, p_dest_port, &dgid, p_sa_mad->comp_mask, &pr_list ); else /* Katie, bar the door! */ __osm_pr_rcv_process_world( p_rcv, p_madw, requester_port, - p_sa_mad->comp_mask, &pr_list ); + &dgid, p_sa_mad->comp_mask, &pr_list ); } } goto Unlock; @@ -1980,8 +2050,19 @@ osm_pr_rcv_process( ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, &sl, &flow_label, &hop_limit ); p_pr_item->path_rec.sl = cl_hton16( sl ); +#ifndef ROUTER_EXP p_pr_item->path_rec.hop_flow_raw = (uint32_t)(hop_limit) | (flow_label << 8); +#else + /* HopLimit is not yet set in non link local MC groups */ + /* If it were, this would not be needed */ + if ( ib_mgid_get_scope( &p_mgrp->mcmember_rec.mgid ) == MC_SCOPE_LINK_LOCAL ) + p_pr_item->path_rec.hop_flow_raw = (uint32_t)(hop_limit) | + (flow_label << 8); + else + p_pr_item->path_rec.hop_flow_raw = cl_hton32(IB_HOPLIMIT_MAX) | + (flow_label << 8); +#endif cl_qlist_insert_tail( &pr_list, (cl_list_item_t*)&p_pr_item->pool_item ); From sweitzen at cisco.com Fri Jan 12 08:54:57 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Fri, 12 Jan 2007 08:54:57 -0800 Subject: [openib-general] Reminder: OFED 1.2 coordination meeting next Monday at 9am PST In-Reply-To: <45A62A60.7000503@dev.mellanox.co.il> Message-ID: I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps Dr Panda's team can help get the source RPM integrated with OFED 1.2. Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Tziporet Koren > Sent: Thursday, January 11, 2007 4:15 AM > To: EWG > Cc: OPENIB > Subject: [openib-general] Reminder: OFED 1.2 coordination > meeting next Monday at 9am PST > > Hi All, > After a long holidays break we are going to have our next OFED 1.2 > coordination meeting on Monday Jan-15 at 9am PST (Jeff sent > bridge info) > > The only agenda item I have is reviewing components' > readiness for the > end of month code freeze. > If you have other items for the agenda please let me know > > Thanks, > Tziporet > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From halr at voltaire.com Fri Jan 12 09:11:10 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 12:11:10 -0500 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <45A3F1B9.2020500@ichips.intel.com> References: <45A3F1B9.2020500@ichips.intel.com> Message-ID: <1168621866.31913.11183.camel@hal.voltaire.com> On Tue, 2007-01-09 at 14:49, Sean Hefty wrote: > Tang, Changqing wrote: > > Where do you put these new user functions ? > > That is part of what I'd like input on. My thought was to add them to the > libibumad library, but how exactly is not clear yet. Wouldn't it be more straightforward to have it be a separate (libibsa) library ? libibumad is primarily send and receive user MADs (so it is much lower level). -- Hal > > Also when is it available ? > > I would like to have something by early February or sooner. > > - Sean > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From halr at voltaire.com Fri Jan 12 09:11:23 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 12:11:23 -0500 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <45A68416.1080707@ichips.intel.com> References: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> <45A68416.1080707@ichips.intel.com> Message-ID: <1168621872.31913.11185.camel@hal.voltaire.com> On Thu, 2007-01-11 at 13:38, Sean Hefty wrote: > Sean Hefty wrote: > > Adding this functionality to the existing ib_umad module would add an extra > > dependency of ib_umad on the ib_sa module. Multicast join / leave operations > > could be done by adding additional IOCTLs, by embedding the request as a > > send_mad call, or by modifying the ib_umad send interface. > > Given that the ibibumad interface is intended to send and receive MADs, I would > rather not abuse the interface by changing the behavior of umad_send/umad_recv. > These calls map directly to ib_umad write and read. I tend to agree with this. > Would we be okay with extending the IOCTL interface to allow multicast joins, > notice registration, and event reporting? Or would it be acceptable to change > the ib_umad read/write interface to add a command? What do you have in mind here ? > > As an alternative, a new kernel userspace SA module could be created to > > explicitly interface with the kernel ib_sa. IMO, this is the best way to go. -- Hal > Or do people preferred this approach over changing the ib_umad interface? > > I'm looking for something that will be acceptable to merge upstream. > > - Sean From halr at voltaire.com Fri Jan 12 09:23:12 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 12:23:12 -0500 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <45A6D245.1020106@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> Message-ID: <1168622584.31913.11877.camel@hal.voltaire.com> On Thu, 2007-01-11 at 19:11, Sean Hefty wrote: > Hal Rosenstock wrote: > >>(*) there are some more issues here which need to be addressed, see > >>for example the "Some SMs don't support send-only yet" weird comment > >>at ipoib_mcast_sendonly_join() > > > > > > It's more likely an SA issue but I'm only guessing... It may also be > > historical... > > Based on observation, it looks like ipoib joins a couple of IPv6 multicast > groups with send only membership. Yes. > However it changes the join_state from 4 to 1 > (send-only to full member). Yes, that is the workaround Roland had put in (likely for a non compliant SM which didn't support send only joins). > This results in the SA trying to create the > multicast group, only the required MCMemberRecord components have not been set. Right, the group either needs to be previously precreated or a receiver "started" first which would create the group. > I'm not sure if this indicates a serious problem, but I'm guessing not. I don't believe it's a serious problem (at least now). In any case, it is no worse than it was before your change for this (it is not a problem of your making...). > The join request simply fails and returns an error back to ipoib. (Which would have > happened for a send-only join if the group hadn't already been created.) Right. -- Hal > - Sean From mshefty at ichips.intel.com Fri Jan 12 09:29:45 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 09:29:45 -0800 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <1168621872.31913.11185.camel@hal.voltaire.com> References: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> <45A68416.1080707@ichips.intel.com> <1168621872.31913.11185.camel@hal.voltaire.com> Message-ID: <45A7C589.4040706@ichips.intel.com> >>Would we be okay with extending the IOCTL interface to allow multicast joins, >>notice registration, and event reporting? Or would it be acceptable to change >>the ib_umad read/write interface to add a command? > > > What do you have in mind here ? I was thinking of one of two possibilities here. Currently there are IOCTL calls to register/unregister with the MAD layer. Additional IOCTL calls could be added to join/leave multicast groups and register/unregister for SA events. Multicast and SA events would need to be reported through another IOCTL of some sort. The alternative basically rewrites the ib_umad interface to allow read and write to carry some sort of command, rather than mapping them directly to sending and receiving a MAD. This is how most of the RDMA kernel to user interfaces are written. For example, let read return an event type (MAD received, multicast event, etc.), along with the event data (the MAD, etc.). >>>As an alternative, a new kernel userspace SA module could be created to >>>explicitly interface with the kernel ib_sa. > > IMO, this is the best way to go. This was my original approach a couple of months back, but wasn't accepted as mer gable upstream because it increased the size of the user to kernel interface. If we can agree that this approach is usable, we can discuss more specific implementation details. - Sean From halr at voltaire.com Fri Jan 12 10:14:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 13:14:03 -0500 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> Message-ID: <1168625642.31913.14420.camel@hal.voltaire.com> On Thu, 2006-12-14 at 19:20, Sean Hefty wrote: One minor comment/question below: > diff --git a/drivers/infiniband/core/notice.c b/drivers/infiniband/core/notice.c > new file mode 100644 > index 0000000..038878d > --- /dev/null > +++ b/drivers/infiniband/core/notice.c [snip...] > +static int send_unreg(struct inform_group *group) > +{ > + struct inform_port *port = group->port; > + struct ib_sa_inform inform; > + int ret; > + > + memset(&inform, 0, sizeof inform); > + inform.lid_range_begin = cpu_to_be16(0xFFFF); > + inform.is_generic = 1; > + inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); > + inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); > + inform.trap.generic.qpn = IB_QP1; Is the QPN constrained to be QP1 here ? -- Hal [snip...] From halr at voltaire.com Fri Jan 12 10:24:07 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 13:24:07 -0500 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> Message-ID: <1168626222.31913.14903.camel@hal.voltaire.com> On Thu, 2006-12-14 at 19:20, Sean Hefty wrote: Another comment/question: > diff --git a/drivers/infiniband/core/notice.c b/drivers/infiniband/core/notice.c > new file mode 100644 > index 0000000..038878d > --- /dev/null > +++ b/drivers/infiniband/core/notice.c > +static void inform_event_handler(struct ib_event_handler *handler, > + struct ib_event *event) > +{ > + struct inform_device *dev; > + > + dev = ib_get_client_data(event->device, &inform_client); > + if (!dev) > + return; > + > + switch (event->event) { > + case IB_EVENT_PORT_ERR: > + case IB_EVENT_LID_CHANGE: > + case IB_EVENT_SM_CHANGE: > + case IB_EVENT_CLIENT_REREGISTER: > + inform_groups_lost(&dev->port[event->element.port_num - > + dev->start_port]); > + break; > + default: > + break; > + } > +} Are groups lost on LID change ? (Not sure what port error means). Also, guess it is safest to assume lost on SM change although that might not be the case. One could query via InformInfoRecords. -- Hal From halr at voltaire.com Fri Jan 12 10:31:08 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 13:31:08 -0500 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> Message-ID: <1168626666.31913.15318.camel@hal.voltaire.com> On Thu, 2006-12-14 at 19:20, Sean Hefty wrote: A few more comments and questions below: > diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c > index ea78687..88c228c 100644 > --- a/drivers/infiniband/core/sa_query.c > +++ b/drivers/infiniband/core/sa_query.c > +enum { > + IB_SA_SM_TRAP_GID_IN_SERVICE = 64, > + IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65, > + IB_SA_SM_TRAP_CREATE_MC_GROUP = 66, > + IB_SA_SM_TRAP_DELETE_MC_GROUP = 67, > + IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128, > + IB_SA_SM_TRAP_LINK_INTEGRITY = 129, > + IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130, > + IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131, Why not 144 and 145 too ? > + IB_SA_SM_TRAP_BAD_M_KEY = 256, > + IB_SA_SM_TRAP_BAD_P_KEY = 257, > + IB_SA_SM_TRAP_BAD_Q_KEY = 258, What about 259 ? > + IB_SA_SM_TRAP_ALL = 0xFFFF > +}; > + > +#define IB_SA_INFORM_GID IB_SA_COMP_MASK( 0) > +#define IB_SA_INFORM_LID_RANGE_BEGIN IB_SA_COMP_MASK( 1) > +#define IB_SA_INFORM_LID_RANGE_END IB_SA_COMP_MASK( 2) > +/* reserved: 3 */ > +#define IB_SA_INFORM_IS_GENERIC IB_SA_COMP_MASK( 4) > +#define IB_SA_INFORM_SUBCRIBE IB_SA_COMP_MASK( 5) > +#define IB_SA_INFORM_TYPE IB_SA_COMP_MASK( 6) > + > +#define IB_SA_INFORM_TRAP_NUMBER IB_SA_COMP_MASK( 7) > +#define IB_SA_INFORM_DEVICE_ID IB_SA_COMP_MASK( 7) > +#define IB_SA_INFORM_QPN IB_SA_COMP_MASK( 8) > +/* reserved: 9 */ > +#define IB_SA_INFORM_RESP_TIME IB_SA_COMP_MASK(10) > +/* reserved: 11 */ > +#define IB_SA_INFORM_PRODUCER_TYPE IB_SA_COMP_MASK(12) > +#define IB_SA_INFORM_VENDOR_ID IB_SA_COMP_MASK(12) Component mask is meaningless for InformInfo so these shouldn't be needed. -- Hal From halr at voltaire.com Fri Jan 12 10:33:55 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 13:33:55 -0500 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <45A7C589.4040706@ichips.intel.com> References: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> <45A68416.1080707@ichips.intel.com> <1168621872.31913.11185.camel@hal.voltaire.com> <45A7C589.4040706@ichips.intel.com> Message-ID: <1168626822.31913.15457.camel@hal.voltaire.com> On Fri, 2007-01-12 at 12:29, Sean Hefty wrote: > >>Would we be okay with extending the IOCTL interface to allow multicast joins, > >>notice registration, and event reporting? Or would it be acceptable to change > >>the ib_umad read/write interface to add a command? > > > > > > What do you have in mind here ? > > I was thinking of one of two possibilities here. Currently there are IOCTL > calls to register/unregister with the MAD layer. Additional IOCTL calls could > be added to join/leave multicast groups and register/unregister for SA events. > Multicast and SA events would need to be reported through another IOCTL of some > sort. > > The alternative basically rewrites the ib_umad interface to allow read and write > to carry some sort of command, rather than mapping them directly to sending and > receiving a MAD. This is how most of the RDMA kernel to user interfaces are > written. For example, let read return an event type (MAD received, multicast > event, etc.), along with the event data (the MAD, etc.). Do we really want to go down this approach ? > >>>As an alternative, a new kernel userspace SA module could be created to > >>>explicitly interface with the kernel ib_sa. > > > > IMO, this is the best way to go. > > This was my original approach a couple of months back, but wasn't accepted as > mer gable upstream because it increased the size of the user to kernel > interface. Can you point me at this ? I must have missed it. -- Hal > If we can agree that this approach is usable, we can discuss more > specific implementation details. > > - Sean From halr at voltaire.com Fri Jan 12 10:35:18 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 13:35:18 -0500 Subject: [openib-general] multicast.c question Message-ID: <1168626916.31913.15595.camel@hal.voltaire.com> Hi Sean, There were hard coded values for pkey, qkey, and join state in multicast.c.If so, where should these ultimately come from ? Also, where do the other parameters (components) that are necessary to create a group come from ? One option would be to obtain all of them from the appropriate (partition based) IPoIB broadcast group. -- Hal From mshefty at ichips.intel.com Fri Jan 12 11:26:26 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 11:26:26 -0800 Subject: [openib-general] multicast.c question In-Reply-To: <1168626916.31913.15595.camel@hal.voltaire.com> References: <1168626916.31913.15595.camel@hal.voltaire.com> Message-ID: <45A7E0E2.4060206@ichips.intel.com> > There were hard coded values for pkey, qkey, and join state in > multicast.c.If so, where should these ultimately come from ? A user specifies the values for the MCMemberRecord when they issue the join request, so the user controls all values. There is a call (ib_sa_get_mcmember_rec) that can be used to lookup an existing record, or obtain some default values for an MCMemberRecord if the user doesn't care what they are. I believe that the default values are the hard-coded values that you're referring to. The rdma_cm calls ib_sa_get_mcmember_rec() to lookup the broadcast group used by ipoib. It uses the returned MCMemberRecord as a template for creating new groups. The hard-coded values are only there for users that want to create a group, and don't care anything about its values. I hope this makes sense. - Sean From mshefty at ichips.intel.com Fri Jan 12 11:32:02 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 11:32:02 -0800 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <1168625642.31913.14420.camel@hal.voltaire.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> <1168625642.31913.14420.camel@hal.voltaire.com> Message-ID: <45A7E232.2020303@ichips.intel.com> >>+static int send_unreg(struct inform_group *group) >>+{ >>+ struct inform_port *port = group->port; >>+ struct ib_sa_inform inform; >>+ int ret; >>+ >>+ memset(&inform, 0, sizeof inform); >>+ inform.lid_range_begin = cpu_to_be16(0xFFFF); >>+ inform.is_generic = 1; >>+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); >>+ inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); >>+ inform.trap.generic.qpn = IB_QP1; > > > Is the QPN constrained to be QP1 here ? Currently, yes. Will reports ever be sent to a different QPN? - Sean From halr at voltaire.com Fri Jan 12 11:33:00 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 14:33:00 -0500 Subject: [openib-general] multicast.c question In-Reply-To: <45A7E0E2.4060206@ichips.intel.com> References: <1168626916.31913.15595.camel@hal.voltaire.com> <45A7E0E2.4060206@ichips.intel.com> Message-ID: <1168630376.31913.18602.camel@hal.voltaire.com> On Fri, 2007-01-12 at 14:26, Sean Hefty wrote: > > There were hard coded values for pkey, qkey, and join state in > > multicast.c.If so, where should these ultimately come from ? > > A user specifies the values for the MCMemberRecord when they issue the join > request, so the user controls all values. There is a call > (ib_sa_get_mcmember_rec) that can be used to lookup an existing record, or > obtain some default values for an MCMemberRecord if the user doesn't care what > they are. I believe that the default values are the hard-coded values that > you're referring to. > > The rdma_cm calls ib_sa_get_mcmember_rec() to lookup the broadcast group used by > ipoib. It uses the returned MCMemberRecord as a template for creating new > groups. The hard-coded values are only there for users that want to create a > group, and don't care anything about its values. > > I hope this makes sense. Makes sense with one minor comment: The default pkey which is used for a default when the user doesn't care might not be a valid pkey for that port. -- Hal > - Sean From mshefty at ichips.intel.com Fri Jan 12 11:38:32 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 11:38:32 -0800 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <1168626666.31913.15318.camel@hal.voltaire.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> <1168626666.31913.15318.camel@hal.voltaire.com> Message-ID: <45A7E3B8.3090901@ichips.intel.com> Hal Rosenstock wrote: >>+enum { >>+ IB_SA_SM_TRAP_GID_IN_SERVICE = 64, >>+ IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65, >>+ IB_SA_SM_TRAP_CREATE_MC_GROUP = 66, >>+ IB_SA_SM_TRAP_DELETE_MC_GROUP = 67, >>+ IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128, >>+ IB_SA_SM_TRAP_LINK_INTEGRITY = 129, >>+ IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130, >>+ IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131, > > > Why not 144 and 145 too ? I was just trying to setup the framework first. Adding new traps isn't overly difficult, but I would need to define appropriate data details as well. > >>+ IB_SA_SM_TRAP_BAD_M_KEY = 256, >>+ IB_SA_SM_TRAP_BAD_P_KEY = 257, >>+ IB_SA_SM_TRAP_BAD_Q_KEY = 258, > > > What about 259 ? Same as above. > Component mask is meaningless for InformInfo so these shouldn't be > needed. That's a good point. These should be removed. (I had defined them before I realized component mask was ignored for InformInfo.) - Sean From halr at voltaire.com Fri Jan 12 11:40:02 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 14:40:02 -0500 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <45A7E232.2020303@ichips.intel.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> <1168625642.31913.14420.camel@hal.voltaire.com> <45A7E232.2020303@ichips.intel.com> Message-ID: <1168630792.31913.18936.camel@hal.voltaire.com> On Fri, 2007-01-12 at 14:32, Sean Hefty wrote: > >>+static int send_unreg(struct inform_group *group) > >>+{ > >>+ struct inform_port *port = group->port; > >>+ struct ib_sa_inform inform; > >>+ int ret; > >>+ > >>+ memset(&inform, 0, sizeof inform); > >>+ inform.lid_range_begin = cpu_to_be16(0xFFFF); > >>+ inform.is_generic = 1; > >>+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); > >>+ inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); > >>+ inform.trap.generic.qpn = IB_QP1; > > > > > > Is the QPN constrained to be QP1 here ? > > Currently, yes. > > Will reports ever be sent to a different QPN? I think this depends on what QPN the subscription request came from. In the current OpenIB case (in the kernel), this is currently QP1. -- Hal > - Sean From halr at voltaire.com Fri Jan 12 12:02:02 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 12 Jan 2007 15:02:02 -0500 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <45A7E3B8.3090901@ichips.intel.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> <1168626666.31913.15318.camel@hal.voltaire.com> <45A7E3B8.3090901@ichips.intel.com> Message-ID: <1168632118.31913.20078.camel@hal.voltaire.com> On Fri, 2007-01-12 at 14:38, Sean Hefty wrote: > Hal Rosenstock wrote: > >>+enum { > >>+ IB_SA_SM_TRAP_GID_IN_SERVICE = 64, > >>+ IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65, > >>+ IB_SA_SM_TRAP_CREATE_MC_GROUP = 66, > >>+ IB_SA_SM_TRAP_DELETE_MC_GROUP = 67, > >>+ IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128, > >>+ IB_SA_SM_TRAP_LINK_INTEGRITY = 129, > >>+ IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130, > >>+ IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131, > > > > > > Why not 144 and 145 too ? > > I was just trying to setup the framework first. Adding new traps isn't overly > difficult, but I would need to define appropriate data details as well. > > > > >>+ IB_SA_SM_TRAP_BAD_M_KEY = 256, > >>+ IB_SA_SM_TRAP_BAD_P_KEY = 257, > >>+ IB_SA_SM_TRAP_BAD_Q_KEY = 258, > > > > > > What about 259 ? > > Same as above. Aren't those all in the spec ? > > Component mask is meaningless for InformInfo so these shouldn't be > > needed. > > That's a good point. These should be removed. (I had defined them before I > realized component mask was ignored for InformInfo.) Yes, thanks. -- Hal > - Sean From mshefty at ichips.intel.com Fri Jan 12 12:16:43 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 12:16:43 -0800 Subject: [openib-general] [RFC] userspace IB SA support In-Reply-To: <1168626822.31913.15457.camel@hal.voltaire.com> References: <000001c7346b$e18d5730$81c8180a@amr.corp.intel.com> <45A68416.1080707@ichips.intel.com> <1168621872.31913.11185.camel@hal.voltaire.com> <45A7C589.4040706@ichips.intel.com> <1168626822.31913.15457.camel@hal.voltaire.com> Message-ID: <45A7ECAB.509@ichips.intel.com> >>This was my original approach a couple of months back, but wasn't accepted as >>mer gable upstream because it increased the size of the user to kernel >>interface. > > > Can you point me at this ? I must have missed it. To clarify, I didn't request that the code be merged upstream. I only queried about the approach, and merging that into svn. My query was likely done off list. Part of the feedback was to ensure that the design was discussed on the list to get more input, which is what this is doing. See the following threads / related messages for more details: http://openib.org/pipermail/openib-general/2006-August/025271.html http://openib.org/pipermail/openib-general/2006-August/025434.html - Sean From mshefty at ichips.intel.com Fri Jan 12 12:24:35 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 12 Jan 2007 12:24:35 -0800 Subject: [openib-general] [RFC] [PATCH 1/1] ib_sa: add InformInfo registration for Notice reports In-Reply-To: <1168632118.31913.20078.camel@hal.voltaire.com> References: <000601c71fde$ce561fe0$8698070a@amr.corp.intel.com> <1168626666.31913.15318.camel@hal.voltaire.com> <45A7E3B8.3090901@ichips.intel.com> <1168632118.31913.20078.camel@hal.voltaire.com> Message-ID: <45A7EE83.6090109@ichips.intel.com> >>>What about 259 ? >> >>Same as above. > > > Aren't those all in the spec ? Yes - I just didn't want to define everything until we had a basic framework that was agreed upon. We can add support for other traps through separate patches. For notices, I did not 'unpack' the data detail section, since that involves decoding the notice itself, and creates a mess of ib_field definitions. I choose instead to leave this up to the consumer. So, I defined traps that I thought were most useful, plus enough to show how additional traps could be defined. >>>Component mask is meaningless for InformInfo so these shouldn't be >>>needed. >> >>That's a good point. These should be removed. (I had defined them before I >>realized component mask was ignored for InformInfo.) > > > Yes, thanks. I've removed these definitions from my git tree. - Sean From panda at cse.ohio-state.edu Fri Jan 12 12:32:15 2007 From: panda at cse.ohio-state.edu (Dhabaleswar Panda) Date: Fri, 12 Jan 2007 15:32:15 -0500 (EST) Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: from "Scott Weitzenkamp (sweitzen)" at Jan 12, 2007 08:54:57 AM Message-ID: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> > I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps Dr Panda's team > can help get the source RPM integrated with OFED 1.2. If the OFED community wants this, we will be happy to extend help. Thanks, DK > Scott Weitzenkamp > SQA and Release Manager > Server Virtualization Business Unit > Cisco Systems > > > > -----Original Message----- > > From: openib-general-bounces at openib.org > > [mailto:openib-general-bounces at openib.org] On Behalf Of Tziporet Koren > > Sent: Thursday, January 11, 2007 4:15 AM > > To: EWG > > Cc: OPENIB > > Subject: [openib-general] Reminder: OFED 1.2 coordination > > meeting next Monday at 9am PST > > > > Hi All, > > After a long holidays break we are going to have our next OFED 1.2 > > coordination meeting on Monday Jan-15 at 9am PST (Jeff sent > > bridge info) > > > > The only agenda item I have is reviewing components' > > readiness for the > > end of month code freeze. > > If you have other items for the agenda please let me know > > > > Thanks, > > Tziporet > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > _______________________________________________ > openfabrics-ewg mailing list > openfabrics-ewg at openib.org > http://openib.org/mailman/listinfo/openfabrics-ewg > From parks at lanl.gov Fri Jan 12 13:55:48 2007 From: parks at lanl.gov (Parks Fields) Date: Fri, 12 Jan 2007 14:55:48 -0700 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> References: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> Message-ID: <7.0.1.0.2.20070112145536.0281db50@lanl.gov> At 01:32 PM 1/12/2007, Dhabaleswar Panda wrote: > > I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps Dr Panda's team > > can help get the source RPM integrated with OFED 1.2. > >If the OFED community wants this, we will be happy to extend help. I want it. ***** Correspondence ***** This email contains no programmatic content that requires independent ADC review From spoole at ornl.gov Fri Jan 12 14:00:48 2007 From: spoole at ornl.gov (Stephen Poole) Date: Fri, 12 Jan 2007 17:00:48 -0500 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> Message-ID: I would find it very useful. Steve... Steve Poole Chief Scientist / Director of Special Projects Computer Science and Mathematics Division Chief Systems Architect Leadership Computing Facility Oak Ridge National Laboratory 865.574.9008 "Wisdom is not a product of schooling, but of the lifelong attempt to acquire it" Albert Einstein -- > From: Dhabaleswar Panda > Date: Fri, 12 Jan 2007 15:32:15 -0500 (EST) > To: > Cc: , > Subject: Re: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 > >> I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps Dr Panda's team >> can help get the source RPM integrated with OFED 1.2. > > If the OFED community wants this, we will be happy to extend help. > > Thanks, > > DK > >> Scott Weitzenkamp >> SQA and Release Manager >> Server Virtualization Business Unit >> Cisco Systems >> >> >>> -----Original Message----- >>> From: openib-general-bounces at openib.org >>> [mailto:openib-general-bounces at openib.org] On Behalf Of Tziporet Koren >>> Sent: Thursday, January 11, 2007 4:15 AM >>> To: EWG >>> Cc: OPENIB >>> Subject: [openib-general] Reminder: OFED 1.2 coordination >>> meeting next Monday at 9am PST >>> >>> Hi All, >>> After a long holidays break we are going to have our next OFED 1.2 >>> coordination meeting on Monday Jan-15 at 9am PST (Jeff sent >>> bridge info) >>> >>> The only agenda item I have is reviewing components' >>> readiness for the >>> end of month code freeze. >>> If you have other items for the agenda please let me know >>> >>> Thanks, >>> Tziporet >>> >>> _______________________________________________ >>> openib-general mailing list >>> openib-general at openib.org >>> http://openib.org/mailman/listinfo/openib-general >>> >>> To unsubscribe, please visit >>> http://openib.org/mailman/listinfo/openib-general >>> >> >> _______________________________________________ >> openfabrics-ewg mailing list >> openfabrics-ewg at openib.org >> http://openib.org/mailman/listinfo/openfabrics-ewg >> > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Sun Jan 14 10:45:17 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 14 Jan 2007 20:45:17 +0200 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to2.6.20. In-Reply-To: <1168797602.4950.5.camel@linux-q667.site> References: <1168797602.4950.5.camel@linux-q667.site> Message-ID: <20070114184517.GG26427@mellanox.co.il> BTW, is openib-general working for you? Seems to be blocked for me. -- MST From swise at opengridcomputing.com Sun Jan 14 10:46:31 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sun, 14 Jan 2007 12:46:31 -0600 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to2.6.20. In-Reply-To: <20070114184517.GG26427@mellanox.co.il> References: <1168797602.4950.5.camel@linux-q667.site> <20070114184517.GG26427@mellanox.co.il> Message-ID: <1168800392.4950.8.camel@linux-q667.site> It is blocked for me too today. On Sun, 2007-01-14 at 20:45 +0200, Michael S. Tsirkin wrote: > BTW, is openib-general working for you? > Seems to be blocked for me. > From swise at opengridcomputing.com Sun Jan 14 10:00:02 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sun, 14 Jan 2007 12:00:02 -0600 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070113211357.GB24038@mellanox.co.il> References: <20070112162040.27341.78123.stgit@dell3.ogc.int> <20070113211357.GB24038@mellanox.co.il> Message-ID: <1168797602.4950.5.camel@linux-q667.site> On Sat, 2007-01-13 at 23:13 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. > > > > > > This is needed even on kernels that might have genalloc because it > > may not be configured into the running kernel. Right now the only > > subsystem that turns on the generic allocator config option is IA64. > > So chances are good that 2.6.20 kernels will _not_ have this compiled in. > > So we just backport it to all supported kernels. > > > > To avoid collisions with kernels that _do_ have this compiled in, > > change the names of the exported symbols in the backport. > > > > Changes to genalloc.c: > > > > - Change exported symbol names by prepending ib_ > > EG: gen_pool_alloc() becomes ib_gen_pool_alloc() > > > > Changes to genalloc.h: > > > > - Change exported symbol names by prepending ib_ > > - added #defines for original exported symbol names to new names. Thus > > code calling the allocator doesn't need to change. > > > > EG: #define gen_pool_alloc ib_gen_pool alloc > > > > Signed-off-by: Steve Wise > > --- > > > > .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ > > .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ > > .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 18 ++ > > 3 files changed, 258 insertions(+), 0 deletions(-) > > > > diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > > new file mode 100644 > > index 0000000..3c23c68 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > > @@ -0,0 +1,42 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > + > > +/* > > + * General purpose special memory pool descriptor. > > + */ > > +struct gen_pool { > > + rwlock_t lock; > > + struct list_head chunks; /* list of chunks in this pool */ > > + int min_alloc_order; /* minimum allocation order */ > > +}; > > + > > +/* > > + * General purpose special memory pool chunk descriptor. > > + */ > > +struct gen_pool_chunk { > > + spinlock_t lock; > > + struct list_head next_chunk; /* next chunk in pool */ > > + unsigned long start_addr; /* starting address of memory chunk */ > > + unsigned long end_addr; /* ending address of memory chunk */ > > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > > +}; > > + > > +extern struct gen_pool *ib_gen_pool_create(int, int); > > If you think about it, by virtue of the defines above, genalloc.c > can be included without change. Correct? > I think you're right. I'll fix this. Thanks, Steve. From swise at opengridcomputing.com Sun Jan 14 09:47:49 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sun, 14 Jan 2007 11:47:49 -0600 Subject: [openib-general] libcxgb3 master does not compile (Was Fwd: Make failure log from sw075:x86_64:2.6.19-smp:) In-Reply-To: <20070113165346.GC24653@mellanox.co.il> References: <20070113165346.GC24653@mellanox.co.il> Message-ID: <1168796869.4950.1.camel@linux-q667.site> On Sat, 2007-01-13 at 18:53 +0200, Michael S. Tsirkin wrote: > Steve, could you fix the following please? > Oops. Sorry. I forgot this went into libibverbs already. Fix committed: commit 0d7105e87c5dcbb5903dfe351edf02b8d1c3cd77 Author: Steve Wise Date: Sun Jan 14 11:30:58 2007 -0600 Revert "Pass driver data through ibv_cmd_req_notify_cq()" Chelsio cxgb3 driver no longer needs to pass rptr value via ibv_cmd_req_notify_cq(). Signed-off-by: Steve Wise diff --git a/src/cq.c b/src/cq.c index b99bab1..88797f7 100644 --- a/src/cq.c +++ b/src/cq.c @@ -48,10 +48,9 @@ int iwch_arm_cq(struct ibv_cq *ibcq, int { int ret; struct iwch_cq *chp = to_iwch_cq(ibcq); - struct ibv_req_notify_cq cmd; pthread_spin_lock(&chp->lock); - ret = ibv_cmd_req_notify_cq(ibcq, solicited, &cmd, sizeof cmd); + ret = ibv_cmd_req_notify_cq(ibcq, solicited); pthread_spin_unlock(&chp->lock); return ret; From swise at opengridcomputing.com Sun Jan 14 09:49:50 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sun, 14 Jan 2007 11:49:50 -0600 Subject: [openib-general] libcxgb3 compitlation failure In-Reply-To: <1168771883.5050.3.camel@vladsk-laptop> References: <1168771883.5050.3.camel@vladsk-laptop> Message-ID: <1168796990.4950.3.camel@linux-q667.site> I just committed the fix for this. Sorry for the breakage. Steve. On Sun, 2007-01-14 at 12:51 +0200, Vladimir Sokolovsky wrote: > Hi Steve, > The compilation of libcxgb3 fails. See log below: > > make -C src/userspace/libcxgb3 \ > AM_LDFLAGS="-L../libibverbs/src -libverbs" > make[1]: Entering directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' > make all-am > make[2]: Entering directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' > if /bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibverbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -M > F ".deps/cq.Tpo" -c -o cq.lo `test -f 'src/cq.c' || echo './'`src/cq.c; \ > then mv -f ".deps/cq.Tpo" ".deps/cq.Plo"; else rm -f ".deps/cq.Tpo"; exit 1; fi > mkdir .libs > gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibverbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF .deps/cq.Tpo -c src/cq.c -fPIC -DPIC -o .li > bs/cq.o > src/cq.c: In function 'iwch_arm_cq': > src/cq.c:54: error: too many arguments to function 'ibv_cmd_req_notify_cq' > make[2]: *** [cq.lo] Error 1 > make[2]: Leaving directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' > make[1]: *** [all] Error 2 > make[1]: Leaving directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' > make: *** [libcxgb3] Error 2 > > Regards, > Vladimir > From mst at mellanox.co.il Sun Jan 14 06:31:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 14 Jan 2007 16:31:53 +0200 Subject: [openib-general] kernel.org cross build Message-ID: <20070114143152.GE21492@mellanox.co.il> OK, I now have cross compilers for kernel for powerpc, ppc64, x86_64 and ia64. The cross compilers are under ~mst/cross and will be used for openfabrics kernel nightly builds. Please note this only includes gcc 4.1.1 and binutils 2.17 subset necessary to build kernel modules. This can't build userspace - IMO, userspace builds are too distribution specific anyway, so cross-building them is much less useful. To use, just source the kernel-cross.sh file, giving it the correct architecture: #. ~mst/cross/kernel-cross.sh x86_64 Valid arches are: x86_64, ia64, ppc64, powerpc. If you want to reproduce the cross-compiler environment at your site, look at the script ~mst/cross/gcc-cross.sh which was used to create these compilers. -- MST From michael.arndt at informatik.tu-chemnitz.de Sun Jan 14 03:07:01 2007 From: michael.arndt at informatik.tu-chemnitz.de (Michael Arndt) Date: Sun, 14 Jan 2007 12:07:01 +0100 Subject: [openib-general] buiild.sh Message-ID: <000501c737cc$1e05ef70$21606d86@one7> Hi, I have some simple changes like printk(KERN_INFO "...") messages in the mad.c file to get more informations. But it seems to be that the build.sh script uses the openib-1.1.src.rpm to build the rpms. Is there a simple way to rebuild this src.rpm with the new source code and doing all the patches? I have read something about add_patch2ofed? How would that work? Thanks Micha From michael.arndt at informatik.tu-chemnitz.de Sun Jan 14 03:14:12 2007 From: michael.arndt at informatik.tu-chemnitz.de (Michael Arndt) Date: Sun, 14 Jan 2007 12:14:12 +0100 Subject: [openib-general] ioctl and send_agents References: <000901c732bc$17eeb9b0$21606d86@one7> <1168221630.4577.39665.camel@hal.voltaire.com> <001d01c73318$19033220$21606d86@one7> <1168432557.14647.135624.camel@hal.voltaire.com> Message-ID: <000a01c737cd$1ec07060$21606d86@one7> Hi, >> shouldn't there be a call like unlocked_ioctl or compat_ioctl like >> defined in this module? > > In user_mad.c, both unlocked/compat_ioctl are defined: > > static struct file_operations umad_fops = { > .owner = THIS_MODULE, > .read = ib_umad_read, > .write = ib_umad_write, > .poll = ib_umad_poll, > .unlocked_ioctl = ib_umad_ioctl, > .compat_ioctl = ib_umad_ioctl, > .open = ib_umad_open, > .release = ib_umad_close > }; Thanks for the help. I have overlooked a patch where unlocked_ioctl and compat_ioctl is replaced by ioctl in the struct above. Thanks Michael From vlad at mellanox.co.il Sun Jan 14 02:51:23 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 14 Jan 2007 12:51:23 +0200 Subject: [openib-general] libcxgb3 compitlation failure Message-ID: <1168771883.5050.3.camel@vladsk-laptop> Hi Steve, The compilation of libcxgb3 fails. See log below: make -C src/userspace/libcxgb3 \ AM_LDFLAGS="-L../libibverbs/src -libverbs" make[1]: Entering directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' make all-am make[2]: Entering directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' if /bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibverbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -M F ".deps/cq.Tpo" -c -o cq.lo `test -f 'src/cq.c' || echo './'`src/cq.c; \ then mv -f ".deps/cq.Tpo" ".deps/cq.Plo"; else rm -f ".deps/cq.Tpo"; exit 1; fi mkdir .libs gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibverbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF .deps/cq.Tpo -c src/cq.c -fPIC -DPIC -o .li bs/cq.o src/cq.c: In function 'iwch_arm_cq': src/cq.c:54: error: too many arguments to function 'ibv_cmd_req_notify_cq' make[2]: *** [cq.lo] Error 1 make[2]: Leaving directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' make[1]: *** [all] Error 2 make[1]: Leaving directory `/home/vlad/tmp/ofa_1_2_user-20070114-0200/src/userspace/libcxgb3' make: *** [libcxgb3] Error 2 Regards, Vladimir From ogerlitz at voltaire.com Sat Jan 13 23:44:41 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 14 Jan 2007 09:44:41 +0200 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <45A6DB7F.3060800@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> <20070112001956.GC17999@mellanox.co.il> <45A6DB7F.3060800@ichips.intel.com> Message-ID: <45A9DF69.40700@voltaire.com> Sean Hefty wrote: >> So, this looks like a work-around for some broken SM, does it not? > > Yes - I mentioned it because the resulting error message (wrong > component mask) is what was filling up the opensm log file. > > Jan 11 14:21:36 083844 [40583BB0] -> osm_mcmr_rcv_join_mgrp: ERR 1B11: > method = > SubnAdmSet, scope_state = 0x1, component mask = 0x0000000000010083, > expected com > p mask = 0x00000000000130c7, MGID: 0xffffffffffff0000 : > 0x2000001400020404 from > port 0x0002c9010ad258f1 > > I've applied a missing patch to my rdma-dev git tree that should avoid > filling up the opensm log file. But the error in the opensm log file is > a result of this work-around. Sean, Can you explain how this relates to your multicast changes? the IPoIB send-only-full-member-join hack was there before your patch and stayed there after your patch... and how come a change in the multicast code can cause the error steam to be finite... have you moved the retry mechanism from the ib_sa consumer to the ib_sa mcast engine? Or. From tziporet at dev.mellanox.co.il Sat Jan 13 21:18:29 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sun, 14 Jan 2007 07:18:29 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <200701140438.l0E4cbIk021137@xi.cse.ohio-state.edu> References: <200701140438.l0E4cbIk021137@xi.cse.ohio-state.edu> Message-ID: <45A9BD25.40903@dev.mellanox.co.il> Dhabaleswar Panda wrote: > > We will be happy to prepare the SRPM. > > Thanks, > > DK > > Can you send me the name of the person that is going to do it, so Vlad can work with him/her on OFED integration. Note that we will need the package this week so we will be able to update OFED install scripts. Tziporet From panda at cse.ohio-state.edu Sat Jan 13 20:38:37 2007 From: panda at cse.ohio-state.edu (Dhabaleswar Panda) Date: Sat, 13 Jan 2007 23:38:37 -0500 (EST) Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: from "Scott Weitzenkamp (sweitzen)" at Jan 13, 2007 02:13:50 PM Message-ID: <200701140438.l0E4cbIk021137@xi.cse.ohio-state.edu> > > Dhabaleswar Panda wrote: > > >> I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps > > Dr Panda's team > > >> can help get the source RPM integrated with OFED 1.2. > > >> > > > > > > If the OFED community wants this, we will be happy to extend help. > > > > > > Thanks, > > > > > > DK > > > > > For Open MPI we have Jeff from Cisco. > > For MVAPICH we have Pasha from Mellanox. > > > > Who from the community is going to be the maintainer in OFED > > and prepare > > the SRPM? > > I would suggest Dr Panda's team prepare the SRPM. I can commit Cisco to > test MVAPICH2 for OFED 1.2 equally with MVAPICH, Open MPI, HP MPI, and > Intel MPI. We will be happy to prepare the SRPM. Thanks, DK From dotanb at dev.mellanox.co.il Sat Jan 13 23:16:13 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 14 Jan 2007 09:16:13 +0200 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples In-Reply-To: References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> Message-ID: <45A9D8BD.9070701@dev.mellanox.co.il> Roland Dreier wrote: > OK, this is correct -- but since the examples don't destroy the CQ, is > there any point in acking the events? > > - R. > (sorry if there is any duplication, i got an email warning) I think that the answer is yes, those tests are being used as a code reference for new users, and this code should give a good example to them. thanks Dotan From tziporet at dev.mellanox.co.il Sat Jan 13 13:44:42 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sat, 13 Jan 2007 23:44:42 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> References: <200701122032.l0CKWFdr027939@xi.cse.ohio-state.edu> Message-ID: <45A952CA.4000206@dev.mellanox.co.il> Dhabaleswar Panda wrote: >> I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps Dr Panda's team >> can help get the source RPM integrated with OFED 1.2. >> > > If the OFED community wants this, we will be happy to extend help. > > Thanks, > > DK > For Open MPI we have Jeff from Cisco. For MVAPICH we have Pasha from Mellanox. Who from the community is going to be the maintainer in OFED and prepare the SRPM? Tziporet From mst at mellanox.co.il Sat Jan 13 13:13:57 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 13 Jan 2007 23:13:57 +0200 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070112162040.27341.78123.stgit@dell3.ogc.int> References: <20070112162040.27341.78123.stgit@dell3.ogc.int> Message-ID: <20070113211357.GB24038@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. > > > This is needed even on kernels that might have genalloc because it > may not be configured into the running kernel. Right now the only > subsystem that turns on the generic allocator config option is IA64. > So chances are good that 2.6.20 kernels will _not_ have this compiled in. > So we just backport it to all supported kernels. > > To avoid collisions with kernels that _do_ have this compiled in, > change the names of the exported symbols in the backport. > > Changes to genalloc.c: > > - Change exported symbol names by prepending ib_ > EG: gen_pool_alloc() becomes ib_gen_pool_alloc() > > Changes to genalloc.h: > > - Change exported symbol names by prepending ib_ > - added #defines for original exported symbol names to new names. Thus > code calling the allocator doesn't need to change. > > EG: #define gen_pool_alloc ib_gen_pool alloc > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 18 ++ > 3 files changed, 258 insertions(+), 0 deletions(-) > > diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); If you think about it, by virtue of the defines above, genalloc.c can be included without change. Correct? -- MST From sweitzen at cisco.com Sat Jan 13 14:13:50 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Sat, 13 Jan 2007 14:13:50 -0800 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <45A952CA.4000206@dev.mellanox.co.il> Message-ID: > Dhabaleswar Panda wrote: > >> I'd like to explore adding MVAPICH2 to OFED 1.2, perhaps > Dr Panda's team > >> can help get the source RPM integrated with OFED 1.2. > >> > > > > If the OFED community wants this, we will be happy to extend help. > > > > Thanks, > > > > DK > > > For Open MPI we have Jeff from Cisco. > For MVAPICH we have Pasha from Mellanox. > > Who from the community is going to be the maintainer in OFED > and prepare > the SRPM? I would suggest Dr Panda's team prepare the SRPM. I can commit Cisco to test MVAPICH2 for OFED 1.2 equally with MVAPICH, Open MPI, HP MPI, and Intel MPI. Scott From mst at mellanox.co.il Sat Jan 13 11:01:00 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 13 Jan 2007 21:01:00 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: References: Message-ID: <20070113190100.GB13017@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH] 2.6.20 ib_cm: limit cm message timeouts > > This all looks rather fishy: > > > +/* > > + * Limit CM msg timeouts to something reasonable. > > + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min. > > + */ > > +#define IB_CM_MAX_TIMEOUT 21 > > OK... (although 8 seconds seems a little short -- it seems a somewhat > longer timeout could be legitimate on a very busy fabric across a WAN > or something like that) > > but then... > > > + timeout = min(IB_CM_MAX_TIMEOUT, > > + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + > > + cm_convert_to_ms(cm_id_priv->av.packet_life_time)); > > should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too? > and similarly... > > > - cm_id_priv->timeout_ms = param->timeout_ms; > > + cm_id_priv->timeout_ms = min(IB_CM_MAX_TIMEOUT, param->timeout_ms); > > is timeout_ms misnamed, or did we just limit all timeouts to 21 msecs? > > ...and other places in the patch seem to have similar problems. > > Also, I would like to see warning messages like > > ib_cm: Possibly bogus timeout of xx (yyyyyy msecs) in REP from GID zzzz > > printed in the kernel log so people realize they have broken SRP > targets or whatever. Sean, do you plan to post a fixed patch? We really need this in 2.6.20 I think. -- MST From mst at mellanox.co.il Sat Jan 13 08:53:46 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 13 Jan 2007 18:53:46 +0200 Subject: [openib-general] libcxgb3 master does not compile (Was Fwd: Make failure log from sw075:x86_64:2.6.19-smp:) Message-ID: <20070113165346.GC24653@mellanox.co.il> Steve, could you fix the following please? ----- Forwarded message from Vladimir ----- Subject: Make failure log from sw075:x86_64:2.6.19-smp: Date: Sat, 13 Jan 2007 17:09:58 +0200 make[2]: Entering directory `/tmp/gen2_devel_user-20070113-1700_check/src/userspace/libcxg b3' if /bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibv erbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF ".deps/cq.Tpo" -c -o cq.l o `test -f 'src/cq.c' || echo './'`src/cq.c; \ then mv -f ".deps/cq.Tpo" ".deps/cq.Plo"; else rm -f ".deps/cq.Tpo"; exit 1; fi mkdir .libs gcc -DHAVE_CONFIG_H -I. -I. -I. -I../libibverbs/include -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF .deps/cq.Tpo -c src/cq.c -fPIC -DPIC -o .libs/cq.o src/cq.c: In function 'iwch_arm_cq': src/cq.c:54: error: too many arguments to function 'ibv_cmd_req_notify_cq' make[2]: *** [cq.lo] Error 1 make[2]: Leaving directory `/tmp/gen2_devel_user-20070113-1700_check/src/userspace/libcxgb 3' make[1]: *** [all] Error 2 make[1]: Leaving directory `/tmp/gen2_devel_user-20070113-1700_check/src/userspace/libcxgb 3' make: *** [libcxgb3] Error 2 -- MST From mst at mellanox.co.il Sun Jan 14 10:49:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 14 Jan 2007 20:49:48 +0200 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backportto2.6.20. In-Reply-To: <1168800392.4950.8.camel@linux-q667.site> References: <1168800392.4950.8.camel@linux-q667.site> Message-ID: <20070114184948.GH26427@mellanox.co.il> I wonder whom to let know about it, and how. Quoting Steve WIse : Subject: Re: [PATCH v3 3/3] ofed_1_2 Provide generic allocator backportto2.6.20. It is blocked for me too today. On Sun, 2007-01-14 at 20:45 +0200, Michael S. Tsirkin wrote: > BTW, is openib-general working for you? > Seems to be blocked for me. > -- MST From umaxx at oleco.net Sat Jan 13 05:16:40 2007 From: umaxx at oleco.net (Joerg Zinke) Date: Sat, 13 Jan 2007 14:16:40 +0100 Subject: [openib-general] libibcm get events Message-ID: <20070113141640.43778722@marvin.local> Hi, is there a non-blocking alternative for ib_cm_get_event() available? Any hints on how to check for events without blocking and waiting for the next one? Regards, Joerg From dotanb at dev.mellanox.co.il Sat Jan 13 02:41:23 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Sat, 13 Jan 2007 12:41:23 +0200 (IST) Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples In-Reply-To: References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> Message-ID: <1101.85.65.224.88.1168684883.squirrel@dev.mellanox.co.il> > OK, this is correct -- but since the examples don't destroy the CQ, is > there any point in acking the events? > > - R. > I think that the answer is yes, those tests are being used as a code reference for new users, and this code should give a good example to them. thanks Dotan From eitan at sw053.yok.mtl.com Fri Jan 12 21:20:38 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sat, 13 Jan 2007 07:20:38 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-13:normal completion Message-ID: <200701130520.l0D5KcEo000815@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Fri_Jan_12_08:29:50_2007 f40e97 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From eitan at sw053.yok.mtl.com Sat Jan 13 21:29:01 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sun, 14 Jan 2007 07:29:01 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-14:normal completion Message-ID: <200701140529.l0E5T1TH008091@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_13_09:43:14_2007 1f8015 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From ogerlitz at voltaire.com Sun Jan 14 00:34:01 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 14 Jan 2007 10:34:01 +0200 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples In-Reply-To: References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> Message-ID: <45A9EAF9.1020605@voltaire.com> Roland Dreier wrote: > OK, this is correct -- but since the examples don't destroy the CQ, is > there any point in acking the events? Yes, people use these examples when learning how to write code for IB, lets educate them well ... (ie the destroy cq should be added later) Or. From mlleinin at hpcn.ca.sandia.gov Fri Jan 12 16:01:43 2007 From: mlleinin at hpcn.ca.sandia.gov (Matt Leininger) Date: Fri, 12 Jan 2007 16:01:43 -0800 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: References: Message-ID: <1168646503.15888.207.camel@localhost> Sandia and LLNL would also find MVAPICH2 useful. Thanks, - Matt On Fri, 2007-01-12 at 17:00 -0500, Stephen Poole wrote: > I would find it very useful. > > Steve... > > Steve Poole > > Chief Scientist / Director of Special Projects > Computer Science and Mathematics Division > > Chief Systems Architect > Leadership Computing Facility > > Oak Ridge National Laboratory > 865.574.9008 > "Wisdom is not a product of schooling, but of the lifelong attempt to > acquire it" Albert Einstein > From mlleinin at hpcn.ca.sandia.gov Sun Jan 14 15:29:29 2007 From: mlleinin at hpcn.ca.sandia.gov (Matt Leininger) Date: Sun, 14 Jan 2007 15:29:29 -0800 Subject: [openib-general] [PATCH v3 3/3] ofed_1_2 Provide generic allocator backport to2.6.20. In-Reply-To: <1168800392.4950.8.camel@linux-q667.site> References: <1168797602.4950.5.camel@linux-q667.site> <20070114184517.GG26427@mellanox.co.il> <1168800392.4950.8.camel@linux-q667.site> Message-ID: <1168817369.15888.239.camel@localhost> On Sun, 2007-01-14 at 12:46 -0600, Steve WIse wrote: > It is blocked for me too today. The hard disk on the Sandia server filled up. I did a bit of house cleaning. Michael Lee will have to do some more, or else add another hard drive to the system. Thanks, - Matt > > > > On Sun, 2007-01-14 at 20:45 +0200, Michael S. Tsirkin wrote: > > BTW, is openib-general working for you? > > Seems to be blocked for me. > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Sun Jan 14 17:54:53 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 14 Jan 2007 17:54:53 -0800 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts References: <20070113190100.GB13017@mellanox.co.il> Message-ID: > We really need this in 2.6.20 I think. Why is this urgent? This is just a workaround for one target's buggy firmware, right? And I would hope there is firmware available that fixes this without any workaruonds on the other side ... or is that not the case? - R. From panda at cse.ohio-state.edu Sun Jan 14 18:46:00 2007 From: panda at cse.ohio-state.edu (Dhabaleswar Panda) Date: Sun, 14 Jan 2007 21:46:00 -0500 (EST) Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <45A9BD25.40903@dev.mellanox.co.il> from "Tziporet Koren" at Jan 14, 2007 07:18:29 AM Message-ID: <200701150246.l0F2k0dW025885@xi.cse.ohio-state.edu> > > Dhabaleswar Panda wrote: > > > > We will be happy to prepare the SRPM. > > > > Thanks, > > > > DK > > > > > Can you send me the name of the person that is going to do it, so Vlad > can work with him/her on OFED integration. Shaun Roland from my group (cc'ed in this e-mail) will be in charge of this. Vlad and Shaun can communicate. > Note that we will need the package this week so we will be able to > update OFED install scripts. Sure. We have the stable release of MVAPICH2 0.9.8 (done before SC '06). We will take this version and create the SRPM for OFED 1.2. DK From rdreier at cisco.com Sun Jan 14 19:19:53 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 14 Jan 2007 19:19:53 -0800 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> <1101.85.65.224.88.1168684883.squirrel@dev.mellanox.co.il> Message-ID: > > OK, this is correct -- but since the examples don't destroy the CQ, is > > there any point in acking the events? > I think that the answer is yes, those tests are being used as a code > reference for new users, and this code should give a good example to them. I guess my point was really that if people are using the code as a reference, and you want to make it a better reference, then a patch should actually make the code more correct. And if the code never destroys the CQ, then acking CQ events is actually worse than useless, since it is just pointless overhead. I would accept a patch that actually destroys all the IB objects, in which case acking CQ events is necessary. But I think I would prefer a patch that calls ibv_ack_cq_events() only once by keeping a count of the # of events to ack, since that style is likely to perform better. - R. From mst at mellanox.co.il Sun Jan 14 21:09:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 15 Jan 2007 07:09:30 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: References: <20070113190100.GB13017@mellanox.co.il> Message-ID: <20070115050930.GK26427@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH] 2.6.20 ib_cm: limit cm message timeouts > > > We really need this in 2.6.20 I think. > > Why is this urgent? This is just a workaround for one target's buggy > firmware, right? And I would hope there is firmware available that > fixes this without any workaruonds on the other side ... or is that > not the case? It seems not. Ishai will know. Ishai? -- MST From eitan at sw053.yok.mtl.com Sun Jan 14 21:17:34 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Mon, 15 Jan 2007 07:17:34 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-15:normal completion Message-ID: <200701150517.l0F5HYYB014461@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_13_09:43:14_2007 1f8015 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo 9 OsmStress IS3-128.topo Failures: 1 OsmStress IS3-128.topo From devesh28 at gmail.com Sun Jan 14 21:35:27 2007 From: devesh28 at gmail.com (Devesh Sharma) Date: Mon, 15 Jan 2007 11:05:27 +0530 Subject: [openib-general] WC Error code question In-Reply-To: <45A6419E.5000701@dev.mellanox.co.il> References: <2cfcf21e0701110514t3a8b5954s760f26a0ab0f7321@mail.gmail.com> <45A6419E.5000701@dev.mellanox.co.il> Message-ID: <309a667c0701142135o69788be9g1e002498d4303e93@mail.gmail.com> On which side you are getting this error? If its at initiator side then its bad lkey if its on other side then you have bad rkey. On 1/11/07, Dotan Barak wrote: > Steven Wooding wrote: > > Hi, > > > > I'm getting an IBV_WC_LOC_ACCESS_ERR when getting a work completion > > item related to an RDMA with ImmData transfer. > > > > What does this error actually mean? > > > > Thanks, > > > > Steve. > in which side do you get this completion? > My guess is that you are trying to send an RDMA Write with immediate > which has bad rkey > (rkey which doesn't match the remote address), and you get this status > at the responder side. > > > If you still have this problem, i will need some more info .. > > Dotan > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From dotanb at dev.mellanox.co.il Mon Jan 15 00:46:19 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 15 Jan 2007 10:46:19 +0200 Subject: [openib-general] libibcm get events In-Reply-To: <20070113141640.43778722@marvin.local> References: <20070113141640.43778722@marvin.local> Message-ID: <45AB3F5B.8030408@dev.mellanox.co.il> Joerg Zinke wrote: > Hi, > > is there a non-blocking alternative for ib_cm_get_event() available? Any > hints on how to check for events without blocking and waiting for the > next one? > > Regards, > > Joerg > Here is a response that Or Gerlitz send a few weeks ago on getting completion events in non blocking mode, i think that this can be useful in your case too: "sure, yes, for both questions: the actual object to deliver cq event is struct ibv_comp_channel whose only field is a file descriptor with which you can implement the requirements, use poll/select to set a timeout, or make it non blocking such that you can int rc; struct ibv_comp_channel my_ch; struct pollfd my_pollfd; flags = fcntl(my_ch.fd, F_GETFL) rc = fcntl(my_ch.fd, F_SETFL, flags | O_NONBLOCK) will make ibv_get_event to return immediately if there is no event to consume at the channel my_pollfd.fd = my_ch.fd; my_pollfd.events = POLLIN; my_pollfd.revents = 0; rc = poll(&my_pollfd, 1, ms_timeout); will allow you to either get a timeout if ms milliseconds have elapsed or there is an event waiting for you to consume " Dotan From dotanb at dev.mellanox.co.il Mon Jan 15 04:08:40 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 15 Jan 2007 14:08:40 +0200 Subject: [openib-general] [PATCH] [libibverbs] Adding acks to all of the CQ events in the pingpong examples In-Reply-To: References: <1168533575.1220.0.camel@mtls05.yok.mtl.com> <1101.85.65.224.88.1168684883.squirrel@dev.mellanox.co.il> Message-ID: <45AB6EC8.2030409@dev.mellanox.co.il> Roland Dreier wrote: > I would accept a patch that actually destroys all the IB objects, in > which case acking CQ events is necessary. But I think I would prefer > a patch that calls ibv_ack_cq_events() only once by keeping a count of > the # of events to ack, since that style is likely to perform better. > > - R. > If this is the issue than i will create a patch which destroy all of the resources as well. I will send it tomorrow. thanks Dotan From swise at opengridcomputing.com Mon Jan 15 05:56:35 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 07:56:35 -0600 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support Message-ID: <20070115135635.31378.9641.stgit@dell3.ogc.int> Michael/Vlad, Here is version 4. Changes since version 3: - No need to alter the genalloc.c file. - The patch to add genalloc.o to ib_core.ko was incorrect. It was adding genalloc.o to ib_core.ko, but also building a genalloc.ko. In addition to this patch series, the Chelsio driver code needs to be pulled from git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 I've tested this on 2.6.20-rc4. If you're happy with the changes, I ask that you pull these patches in and pull the T3 drivers into vlad's ofed_1_2 git tree. That will give us a base package for Chelsio's drivers. Then I'll post additional patches for the backports to the supported distros as I get them implemented and tested. How's that sound? If you would rather a patch for the chelsio drivers, lemme know and I'll email it to you directly. Signed-off-by: Steve Wise From swise at opengridcomputing.com Mon Jan 15 05:56:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 07:56:37 -0600 Subject: [openib-general] [PATCH v4 1/3] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070115135635.31378.9641.stgit@dell3.ogc.int> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> Message-ID: <20070115135637.31378.97300.stgit@dell3.ogc.int> - qp_num -> qp ptr patch for cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 +++++++++++++++++++++++ 1 files changed, 23 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch new file mode 100644 index 0000000..0763f70 --- /dev/null +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch @@ -0,0 +1,23 @@ +Update T3 driver: qp_num no longer in ib_wc. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " From swise at opengridcomputing.com Mon Jan 15 05:56:39 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 07:56:39 -0600 Subject: [openib-general] [PATCH v4 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070115135635.31378.9641.stgit@dell3.ogc.int> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> Message-ID: <20070115135639.31378.25338.stgit@dell3.ogc.int> - added cxgb3 and iw_cxgb3 config stuff - visit and build driver/net/cxgb3 to get the cxgb3 driver Signed-off-by: Steve Wise --- ofed_scripts/Makefile | 9 ++++++-- ofed_scripts/configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile index d63b1d2..8942385 100644 --- a/ofed_scripts/Makefile +++ b/ofed_scripts/Makefile @@ -46,8 +46,10 @@ kernel: @echo "Kernel sources: $(KSRC)" env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ - -I$(CWD)/drivers/infiniband/debug" \ - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ + -I$(CWD)/drivers/infiniband/debug \ + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ + -I$(CWD)/drivers/net/cxgb3 " \ + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ @@ -74,6 +76,9 @@ kernel: CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ LINUXINCLUDE=' \ $(BACKPORT_INCLUDES) \ -I$(CWD)/include \ diff --git a/ofed_scripts/configure b/ofed_scripts/configure index a0557e2..253427c 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -126,6 +126,12 @@ Usage: `basename $0` [options] --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] --without-vnic_stats-mod [yes] + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] + --without-cxgb3-mod [yes] + + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] + --without-cxgb3_debug-mod [yes] + --help - print out options @@ -207,7 +213,10 @@ get_backport_dir() 2.6.19*) echo 2.6.19 ;; - 2.6.2[0-9]*) + 2.6.20*) + echo 2.6.20 + ;; + 2.6.2[1-9]*) echo ;; *) @@ -607,6 +616,20 @@ main() --without-vnic_stats-mod) CONFIG_INFINIBAND_VNIC_STATS= ;; + --with-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3="m" + CONFIG_CHELSIO_T3="m" + ;; + --without-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3= + CONFIG_CHELSIO_T3= + ;; + --with-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG="y" + ;; + --without-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG= + ;; --with-modprobe|--without-modprobe) ;; -h | --help) @@ -679,6 +702,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} @@ -689,6 +714,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} # Check for minimal supported kernel version if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then @@ -742,6 +768,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} @@ -752,6 +780,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} EOFCONFIG echo "Created ${CONFIG}:" @@ -887,6 +916,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" else DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" fi +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" +else + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" +fi +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" +else + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" +fi +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" +else + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" +fi cat >> ${AUTOCONF_H} << EOFAUTOCONF #undef CONFIG_INFINIBAND #undef CONFIG_INFINIBAND_IPOIB @@ -908,6 +952,9 @@ #undef CONFIG_INFINIBAND_MADEYE #undef CONFIG_INFINIBAND_VNIC #undef CONFIG_INFINIBAND_VNIC_DEBUG #undef CONFIG_INFINIBAND_VNIC_STATS +#undef CONFIG_INFINIBAND_CXGB3 +#undef CONFIG_INFINIBAND_CXGB3_DEBUG +#undef CONFIG_CHELSIO_T3 #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY @@ -927,6 +974,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_ADDR_TRANS} ${DEFINE_INFINIBAND_MTHCA} ${DEFINE_INFINIBAND_VNIC} +${DEFINE_INFINIBAND_CXGB3} +${DEFINE_CHELSIO_T3} ${DEFINE_INFINIBAND_IPOIB_DEBUG} ${DEFINE_INFINIBAND_ISER} @@ -937,6 +986,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_RDS_DEBUG} ${DEFINE_INFINIBAND_VNIC_DEBUG} ${DEFINE_INFINIBAND_VNIC_STATS} +${DEFINE_INFINIBAND_CXGB3_DEBUG} ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} From swise at opengridcomputing.com Mon Jan 15 05:56:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 07:56:42 -0600 Subject: [openib-general] [PATCH v4 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070115135635.31378.9641.stgit@dell3.ogc.int> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> Message-ID: <20070115135642.31378.79497.stgit@dell3.ogc.int> This is needed even on kernels that might have genalloc because it may not be configured into the running kernel. Right now the only subsystem that turns on the generic allocator config option is IA64. So chances are good that 2.6.20 kernels will _not_ have this compiled in. So we just backport it to all supported kernels. To avoid collisions with kernels that _do_ have this compiled in, change the names of the exported symbols in the backport. Changes to genalloc.h: - Change exported symbol names by prepending ib_ - added #defines for original exported symbol names to new names. Thus code calling the allocator doesn't need to change. EG: #define gen_pool_alloc ib_gen_pool alloc Signed-off-by: Steve Wise --- .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 17 ++ 3 files changed, 257 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.20/include/src/genalloc.c b/kernel_addons/backport/2.6.20/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch new file mode 100644 index 0000000..93fee2b --- /dev/null +++ b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch @@ -0,0 +1,17 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From halr at voltaire.com Mon Jan 15 06:19:28 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 15 Jan 2007 09:19:28 -0500 Subject: [openib-general] [PATCH] Diags/saquery: Add ability to query InformInfoRecords Message-ID: <1168870767.18398.90564.camel@hal.voltaire.com> Diags/saquery: Add ability to query InformInfoRecords Signed-off-by: Hal Rosenstock diff --git a/diags/man/saquery.8 b/diags/man/saquery.8 index 0389bd1..9af3a9f 100644 --- a/diags/man/saquery.8 +++ b/diags/man/saquery.8 @@ -1,11 +1,11 @@ -.TH SAQUERY 8 "December 7, 2006" "OpenIB" "OpenIB Diagnostics" +.TH SAQUERY 8 "January 15, 2007" "OpenIB" "OpenIB Diagnostics" .SH NAME saquery \- query InfiniBand subnet administration attributes .SH SYNOPSIS .B saquery -[\-h] [\-d] [\-P] [\-N] [\-D] [\-S] [\-L] [\-l] [\-G] [\-C] [\-s] [\-g] [\-m] [--src-to-dst ] [] +[\-h] [\-d] [\-P] [\-N] [\-D] [\-S] [\-I] [\-L] [\-l] [\-G] [\-C] [\-s] [\-g] [\-m] [--src-to-dst ] [] .SH DESCRIPTION .PP @@ -27,6 +27,9 @@ get NodeDescriptions of CAs only \fB\-S\fR get ServiceRecord info .TP +\fB\-I\fR +get InformInfoRecord (subscription) info +.TP \fB\-L\fR return the Lids of the name specified .TP diff --git a/diags/src/saquery.c b/diags/src/saquery.c index 7495f7d..6ce52ad 100644 --- a/diags/src/saquery.c +++ b/diags/src/saquery.c @@ -414,24 +414,101 @@ print_service_record(ib_service_record_t p_sr->service_data8[10], p_sr->service_data8[11], p_sr->service_data8[12], p_sr->service_data8[13], p_sr->service_data8[14], p_sr->service_data8[15], - cl_ntoh16(p_sr->service_data16[0]), - cl_ntoh16(p_sr->service_data16[1]), - cl_ntoh16(p_sr->service_data16[2]), - cl_ntoh16(p_sr->service_data16[3]), - cl_ntoh16(p_sr->service_data16[4]), - cl_ntoh16(p_sr->service_data16[5]), - cl_ntoh16(p_sr->service_data16[6]), - cl_ntoh16(p_sr->service_data16[7]), - cl_ntoh32(p_sr->service_data32[0]), - cl_ntoh32(p_sr->service_data32[1]), - cl_ntoh32(p_sr->service_data32[2]), - cl_ntoh32(p_sr->service_data32[3]), - cl_ntoh64(p_sr->service_data64[0]), - cl_ntoh64(p_sr->service_data64[1]) + cl_ntoh16( p_sr->service_data16[0] ), + cl_ntoh16( p_sr->service_data16[1] ), + cl_ntoh16( p_sr->service_data16[2] ), + cl_ntoh16( p_sr->service_data16[3] ), + cl_ntoh16( p_sr->service_data16[4] ), + cl_ntoh16( p_sr->service_data16[5] ), + cl_ntoh16( p_sr->service_data16[6] ), + cl_ntoh16( p_sr->service_data16[7] ), + cl_ntoh32( p_sr->service_data32[0] ), + cl_ntoh32( p_sr->service_data32[1] ), + cl_ntoh32( p_sr->service_data32[2] ), + cl_ntoh32( p_sr->service_data32[3] ), + cl_ntoh64( p_sr->service_data64[0] ), + cl_ntoh64( p_sr->service_data64[1] ) ); } static void +print_inform_info_record(ib_inform_info_record_t *p_iir) +{ + uint32_t qpn; + uint8_t resp_time_val; + + ib_inform_info_get_qpn_resp_time(p_iir->inform_info.g_or_v.generic.qpn_resp_time_val, &qpn, &resp_time_val); + + if (p_iir->inform_info.is_generic) { + printf("InformInfoRecord dump:\n" + "\t\tRID\n" + "\t\tSubscriberGID...........0x%016" PRIx64 " : " + "0x%016" PRIx64 "\n" + "\t\tSubscriberEnum..........0x%X\n" + "\t\tInformInfo dump:\n" + "\t\tgid.....................0x%016" PRIx64 " : 0x%016" PRIx64 "\n" + "\t\tlid_range_begin.........0x%X\n" + "\t\tlid_range_end...........0x%X\n" + "\t\tis_generic..............0x%X\n" + "\t\tsubscribe...............0x%X\n" + "\t\ttrap_type...............0x%X\n" + "\t\ttrap_num................%u\n" + "\t\tqpn.....................0x%06X\n" + "\t\tresp_time_val...........0x%X\n" + "\t\tnode_type...............0x%06X\n" + "", + cl_ntoh64( p_iir->subscriber_gid.unicast.prefix ), + cl_ntoh64( p_iir->subscriber_gid.unicast.interface_id ), + cl_ntoh16( p_iir->subscriber_enum ), + cl_ntoh64( p_iir->inform_info.gid.unicast.prefix ), + cl_ntoh64( p_iir->inform_info.gid.unicast.interface_id ), + cl_ntoh16( p_iir->inform_info.lid_range_begin ), + cl_ntoh16( p_iir->inform_info.lid_range_end ), + p_iir->inform_info.is_generic, + p_iir->inform_info.subscribe, + cl_ntoh16( p_iir->inform_info.trap_type ), + cl_ntoh16( p_iir->inform_info.g_or_v.generic.trap_num ), + cl_ntoh32( qpn ), + resp_time_val, + cl_ntoh32(ib_inform_info_get_node_type( &p_iir->inform_info )) + ); + } else { + printf("InformInfoRecord dump:\n" + "\t\tRID\n" + "\t\tSubscriberGID...........0x%016" PRIx64 " : " + "0x%016" PRIx64 "\n" + "\t\tSubscriberEnum..........0x%X\n" + "\t\tInformInfo dump:\n" + "\t\tgid.....................0x%016" PRIx64 " : 0x%016" PRIx64 "\n" + "\t\tlid_range_begin.........0x%X\n" + "\t\tlid_range_end...........0x%X\n" + "\t\tis_generic..............0x%X\n" + "\t\tsubscribe...............0x%X\n" + "\t\ttrap_type...............0x%X\n" + "\t\tdev_id..................0x%X\n" + "\t\tqpn.....................0x%06X\n" + "\t\tresp_time_val...........0x%X\n" + "\t\tvendor_id...............0x%06X\n" + "", + cl_ntoh64( p_iir->subscriber_gid.unicast.prefix ), + cl_ntoh64( p_iir->subscriber_gid.unicast.interface_id ), + cl_ntoh16( p_iir->subscriber_enum ), + cl_ntoh64( p_iir->inform_info.gid.unicast.prefix ), + cl_ntoh64( p_iir->inform_info.gid.unicast.interface_id ), + cl_ntoh16( p_iir->inform_info.lid_range_begin ), + cl_ntoh16( p_iir->inform_info.lid_range_end ), + p_iir->inform_info.is_generic, + p_iir->inform_info.subscribe, + cl_ntoh16( p_iir->inform_info.trap_type ), + cl_ntoh16( p_iir->inform_info.g_or_v.vend.dev_id ), + cl_ntoh32( qpn ), + resp_time_val, + cl_ntoh32(ib_inform_info_get_node_type( &p_iir->inform_info )) + ); + } +} + +static void return_mad(void) { /* @@ -763,6 +840,26 @@ print_service_records(osm_bind_handle_t return (status); } +static ib_api_status_t +print_inform_info_records(osm_bind_handle_t bind_handle) +{ + int i = 0; + ib_inform_info_record_t *inform_info_record = NULL; + ib_net16_t attr_offset = ib_get_attr_offset(sizeof(*inform_info_record)); + ib_api_status_t status; + + status = get_all_records(bind_handle, IB_MAD_ATTR_INFORM_INFO_RECORD, attr_offset, 0); + if (status != IB_SUCCESS) + return (status); + + for (i = 0; i < result.result_cnt; i++) { + inform_info_record = osmv_get_query_inform_info_rec(result.p_result_madw, i); + print_inform_info_record(inform_info_record); + } + return_mad(); + return (status); +} + static osm_bind_handle_t get_bind_handle(void) { @@ -847,13 +944,14 @@ clean_up(void) static void usage(void) { - fprintf(stderr, "Usage: %s [-h -d -P -N -D -S -L -l -G -C -s -g -m --src-to-dst ] []\n", argv0); + fprintf(stderr, "Usage: %s [-h -d -P -N -D -S -I -L -l -G -C -s -g -m --src-to-dst ] []\n", argv0); fprintf(stderr, " Queries node records by default\n"); fprintf(stderr, " -d enable debugging\n"); fprintf(stderr, " -P get PathRecord info\n"); fprintf(stderr, " -N get NodeRecord info\n"); fprintf(stderr, " -D get NodeDescriptions of CAs only\n"); fprintf(stderr, " -S get ServiceRecord info\n"); + fprintf(stderr, " -I get InformInfoRecord (subscription) info\n"); fprintf(stderr, " -L return the Lids of the name specified\n"); fprintf(stderr, " -l return the unique Lid of the name specified\n"); fprintf(stderr, " -G return the Guids of the name specified\n"); @@ -877,7 +975,7 @@ main(int argc, char **argv) ib_net16_t dst_lid; ib_api_status_t status; - static char const str_opts[] = "PNDLlGCSsgmdh"; + static char const str_opts[] = "PNDLlGCSIsgmdh"; static const struct option long_opts [] = { {"P", 0, 0, 'P'}, {"N", 0, 0, 'N'}, @@ -891,6 +989,7 @@ main(int argc, char **argv) {"d", 0, 0, 'd'}, {"C", 0, 0, 'C'}, {"S", 0, 0, 'S'}, + {"I", 0, 0, 'I'}, {"help", 0, 0, 'h'}, {"src-to-dst", 1, 0, 1}, { } @@ -929,6 +1028,9 @@ main(int argc, char **argv) case 'S': query_type = IB_MAD_ATTR_SERVICE_RECORD; break; + case 'I': + query_type = IB_MAD_ATTR_INFORM_INFO_RECORD; + break; case 'N': query_type = IB_MAD_ATTR_NODE_RECORD; break; @@ -997,6 +1099,9 @@ main(int argc, char **argv) case IB_MAD_ATTR_SERVICE_RECORD: status = print_service_records(bind_handle); break; + case IB_MAD_ATTR_INFORM_INFO_RECORD: + status = print_inform_info_records(bind_handle); + break; default: fprintf(stderr, "Unknown query type %d\n", query_type); status = IB_UNKNOWN_ERROR; From jlentini at netapp.com Mon Jan 15 07:24:55 2007 From: jlentini at netapp.com (James Lentini) Date: Mon, 15 Jan 2007 10:24:55 -0500 (EST) Subject: [openib-general] NFS(v4)-RDMA In-Reply-To: <1168460361.5086.75.camel@julia.et.endace.com> References: <1168392546.5086.64.camel@julia.et.endace.com> <1168460361.5086.75.camel@julia.et.endace.com> Message-ID: I see the problem. The mount command's NFSv4 code needs to be updated to use the new default NFS-RDMA port. This patch should fix the problem. --- nfsrdmamount/nfsmount.c 2006-11-29 14:19:41.000000000 -0500 +++ nfsrdmamount/nfsmount.c 2007-01-15 10:19:07.406514000 -0500 @@ -1277,7 +1277,7 @@ int nfs4mount(const char *spec, const ch else strcpy(nh, inet_ntoa(server_addr.sin_addr)); - server_addr.sin_port = htons(2049); + server_addr.sin_port = htons(2050); printf("Doing nfs/rdma mount to %s, mount protocol to %s\n", mh, nh); #endif On Thu, 11 Jan 2007, vishal wrote: > Hi, > > NFSv4 does work over TCP..Any suggestions ? > > Thanks > > Vishla > > On Wed, 2007-01-10 at 12:07 -0500, James Lentini wrote: > > On Wed, 10 Jan 2007, vishal wrote: > > > > > Hi, > > > > > > I am getting an invalid argument error when I try to mount using > > > nfs(v4)-rdma. The mounts with nfs(v3)-rdma work fine. Following are the > > > details:- > > > > > > 1. Command Issued on the client side: - > > > ./nfsrdmamount -o rdma=10.0.0.2 -t nfs4 10.0.0.2:/ /data > > > > > > 2. /etc/exports on the client side > > > /data 10.0.0.1(rw,fsid=0,insecure,no_subtree_check,async) > > > > > > 3. Error recieved:- > > > nfsmount: Invalid argument > > > > > > 4. From strace:- > > > > > > mount("10.0.0.2:/", "/data", "nfs4", 0, "\1") = -1 EINVAL (Invalid > > > argument) > > > > Do NFSv4 mounts work over TCP? > > > > If not, make sure you have v4 enabled on the client and server. > From tziporet at dev.mellanox.co.il Mon Jan 15 07:25:38 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 15 Jan 2007 17:25:38 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <200701150246.l0F2k0dW025885@xi.cse.ohio-state.edu> References: <200701150246.l0F2k0dW025885@xi.cse.ohio-state.edu> Message-ID: <45AB9CF2.8060508@dev.mellanox.co.il> Dhabaleswar Panda wrote: > > Shaun Roland from my group (cc'ed in this e-mail) will be in charge of > this. Vlad and Shaun can communicate. > > Hi Shaun, Please open an account in the OFA server so you will be able to have a git tree to place your SRPM Contact Johann George to get a user account on openfabrics server. For a git tree. See https://wiki.openfabrics.org/tiki-index.php?page=HOWTO+add+userspace+package+to+ofa_user for more info. Tziporet From vlad at mellanox.co.il Mon Jan 15 07:33:44 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 15 Jan 2007 17:33:44 +0200 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <20070115135635.31378.9641.stgit@dell3.ogc.int> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> Message-ID: <1168875224.7021.32.camel@vladsk-laptop> Hi Steve, We need to pull 2.6.20-rc4 to ofed_1_2 first and then will pull your tree. This will be done tomorrow, I hope. Regards, Vladimir On Mon, 2007-01-15 at 07:56 -0600, Steve Wise wrote: > Michael/Vlad, > > Here is version 4. > > Changes since version 3: > > - No need to alter the genalloc.c file. > > - The patch to add genalloc.o to ib_core.ko was incorrect. It was adding > genalloc.o to ib_core.ko, but also building a genalloc.ko. > > > In addition to this patch series, the Chelsio driver code needs to be > pulled from > > git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 > > I've tested this on 2.6.20-rc4. If you're happy with the changes, I > ask that you pull these patches in and pull the T3 drivers into vlad's > ofed_1_2 git tree. That will give us a base package for Chelsio's > drivers. Then I'll post additional patches for the backports to the > supported distros as I get them implemented and tested. > > How's that sound? If you would rather a patch for the chelsio drivers, > lemme know and I'll email it to you directly. > > Signed-off-by: Steve Wise > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From dotanb at dev.mellanox.co.il Mon Jan 15 07:43:37 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 15 Jan 2007 17:43:37 +0200 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: <45A1EC8B.2050106@dev.mellanox.co.il> References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> <45A0B02E.1020105@dev.mellanox.co.il> <45A1EC8B.2050106@dev.mellanox.co.il> Message-ID: <45ABA129.9080000@dev.mellanox.co.il> Hi Roland. Dotan Barak wrote: > Roland Dreier wrote: > >> > This is version 1.1-pre1. >> >> OK, I haven't pushed the changes to fix static linking in 1.1-pre >> because I still need to finish fixing all the drivers for the ABI changes. >> >> Did you push the static library link support in the last fixes/changes that you made? thanks Dotan From jlentini at netapp.com Mon Jan 15 07:50:45 2007 From: jlentini at netapp.com (James Lentini) Date: Mon, 15 Jan 2007 10:50:45 -0500 (EST) Subject: [openib-general] Request for assistance from DWG In-Reply-To: <20070112001719.GB17999@mellanox.co.il> References: <20070112001719.GB17999@mellanox.co.il> Message-ID: Was the lack of a dual license on the ipath file and cxgb3 file just an oversight? On Fri, 12 Jan 2007, Michael S. Tsirkin wrote: > > What do people think ? > OFED sources tarball can mix in GPL code without making > all of OFED GPL-only under the "mere aggregation" clause in GPLv2: > "In addition, mere aggregation of another work not based on the Program > with the Program (or with a work based on the Program) on a volume of > a storage or distribution medium does not bring the other work under > the scope of this License." > > > and/or can we just isolate the GPL only code from GPL/BSD code so > > that people will know what is GPL/BSD and what is truely GPL only. > > That's why files have a license in the header, no? > > Quoting Woodruff, Robert J : > Subject: RE: Request for assistance from DWG > > I guess the question is, should we be taking code that is GPL only from > others and including it in our code base ? I also understand that it > might be pretty difficult, if not impossible to code some backport > without using some of the GPL only code from a newer kernel and > porting it back to an older kernel. > > What do people think ? > > If we have to have some GPL only files for backport and such > can we do that within openfabrics ? Probably a question for Bill Boas, > and the rest of the promoters. > > Bill ?? > > > and/or can we just isolate the GPL only code from GPL/BSD code so > that people will know what is GPL/BSD and what is truely GPL only. > > -----Original Message----- > From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] > Sent: Thursday, January 11, 2007 2:56 PM > To: Woodruff, Robert J > Cc: Chet Mehta; Steve Wise; Betsy Zeller; Tziporet Koren; > vlad at mellanox.co.il; Bryan O'Sullivan; openib; > openfabrics-lwg at openfabrics.org; openfabrics-ewg at openib.org > Subject: Re: Request for assistance from DWG > > The BSD+GPL can only apply to code developed by us. > Backport headers, list.h and iproute2 are not our code and can not be > "corrected". > > > Quoting Woodruff, Robert J : > Subject: Request for assistance from DWG > > I have added the appropriate maintainers to this thread > and sending it to the open fabrics email lists. > > Tziporet for OFED (mutex-backport.h file used by OFED) > Bryan for ipath > Vladimir for ipoib tools > Steve for cxgb3 driver > > All please review the list of GPL only files that are (or were in your > components before > we moved to git) and please correct the code so that meets the BSD+GPL > requirements > that we all agreed to when we joined openfabrics, if you have not done > so already. > > Thanks > woody > > > > ________________________________ > > From: Chet Mehta [mailto:chetm at us.ibm.com] > Sent: Thursday, January 11, 2007 2:17 PM > To: Woodruff, Robert J > Cc: openfabrics-lwg at openfabrics.org > Subject: RE: Request for assistance from DWG > > > > Bob, > > Thanks for the reply. Attached below is a list of files that we believe > have only a GPL license (the list was generated on 12/5/06). As I > mentioned in my last note, the search wasn't exhaustive so there may be > others. During the time we also noticed that there were some > utility-type files (e.g. files used for extraction, make or build) that > were GPL only. I'm assuming the DWG will decide if these should be dual > licensed or be placed on the exception list. Lastly I suspect there are > files that only have a BSD license - i.e. no GPL. While personally > that's less worrisome, going strictly by the Bylaws I would suspect > those should be corrected also. > > Thanks for your help to get this corrected! > > ################################################################ > #mutex-backport.h (Cisco Systems) > ################################################################ > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include > /linux/mutex-backport.h > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include > /linux/mutex-backport.h > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include > /linux/.svn/text-base/mutex-backport.h.svn-base > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include > /linux/.svn/text-base/mutex-backport.h.svn-base > ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include > /linux/mutex-backport.h > ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include > /linux/.svn/text-base/mutex-backport.h.svn-base > ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h > ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu > tex-backport.h.svn-base > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i > nclude/linux/mutex-backport.h > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i > nclude/linux/.svn/text-base/mutex-backport.h.svn-base > ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba > ckport.h > ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex > t-base/mutex-backport.h.svn-base > ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ > mutex-backport.h > ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ > .svn/text-base/mutex-backport.h.svn-base > ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ > mutex-backport.h > ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ > .svn/text-base/mutex-backport.h.svn-base > ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ > mutex-backport.h > ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ > .svn/text-base/mutex-backport.h.svn-base > ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ > mutex-backport.h > ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ > .svn/text-base/mutex-backport.h.svn-base > ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ > mutex-backport.h > ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ > .svn/text-base/mutex-backport.h.svn-base > > ################################################################ > #ipath_eth.c (PathScale) > ################################################################ > ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et > h.c > ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex > t-base/ipath_eth.c.svn-base > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > w/ipath/ipath_eth.c > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > w/ipath/.svn/text-base/ipath_eth.c.svn-base > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > w/ipath/ipath_ether/ipath_eth.c > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas > e/ipath_eth.c.svn-base > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i > path_eth.c > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. > svn/text-base/ipath_eth.c.svn-base > > ################################################################ > #lnstat.c Linux network stats > # Copyright (C) 2004 by Harald Welte > # Development of this code was funded by Astaro AG, > http://www.astaro.com/ > # Based on original concept and ideas from predecessor rtstat.c: > # Copyright 2001 by Robert Olsson > # Uppsala University, Sweden > ################################################################ > ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c > ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta > t.c.svn-base > > ################################################################ > #list.h ( kazutomo at mcs.anl.gov) > ################################################################ > ./gen2/trunk/src/userspace/ipoibtools/list.h > ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base > > ################################################################ > #cxgb3_main.c (Chelsio Communications) > ################################################################ > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx > gb3_main.c > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s > vn/text-base/cxgb3_main.c.svn-base > ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c > ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma > in.c.svn-base > > :Chet. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > -- > MST > > > -- > MST > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Mon Jan 15 07:59:39 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 09:59:39 -0600 Subject: [openib-general] Request for assistance from DWG In-Reply-To: References: <20070112001719.GB17999@mellanox.co.il> Message-ID: <1168876779.10308.5.camel@linux-q667.site> The ipath stuff listed below is from the old svn iwarp branch and can be ignored. The cxgb3 code is an oversight. I've added the correct MODULE_LICENSE() macro to the ULLD going into OFED 1.2. What else do I need to do? On Mon, 2007-01-15 at 10:50 -0500, James Lentini wrote: > Was the lack of a dual license on the ipath file and cxgb3 file just > an oversight? > > On Fri, 12 Jan 2007, Michael S. Tsirkin wrote: > > > > What do people think ? > > OFED sources tarball can mix in GPL code without making > > all of OFED GPL-only under the "mere aggregation" clause in GPLv2: > > "In addition, mere aggregation of another work not based on the Program > > with the Program (or with a work based on the Program) on a volume of > > a storage or distribution medium does not bring the other work under > > the scope of this License." > > > > > and/or can we just isolate the GPL only code from GPL/BSD code so > > > that people will know what is GPL/BSD and what is truely GPL only. > > > > That's why files have a license in the header, no? > > > > Quoting Woodruff, Robert J : > > Subject: RE: Request for assistance from DWG > > > > I guess the question is, should we be taking code that is GPL only from > > others and including it in our code base ? I also understand that it > > might be pretty difficult, if not impossible to code some backport > > without using some of the GPL only code from a newer kernel and > > porting it back to an older kernel. > > > > What do people think ? > > > > If we have to have some GPL only files for backport and such > > can we do that within openfabrics ? Probably a question for Bill Boas, > > and the rest of the promoters. > > > > Bill ?? > > > > > > and/or can we just isolate the GPL only code from GPL/BSD code so > > that people will know what is GPL/BSD and what is truely GPL only. > > > > -----Original Message----- > > From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] > > Sent: Thursday, January 11, 2007 2:56 PM > > To: Woodruff, Robert J > > Cc: Chet Mehta; Steve Wise; Betsy Zeller; Tziporet Koren; > > vlad at mellanox.co.il; Bryan O'Sullivan; openib; > > openfabrics-lwg at openfabrics.org; openfabrics-ewg at openib.org > > Subject: Re: Request for assistance from DWG > > > > The BSD+GPL can only apply to code developed by us. > > Backport headers, list.h and iproute2 are not our code and can not be > > "corrected". > > > > > > Quoting Woodruff, Robert J : > > Subject: Request for assistance from DWG > > > > I have added the appropriate maintainers to this thread > > and sending it to the open fabrics email lists. > > > > Tziporet for OFED (mutex-backport.h file used by OFED) > > Bryan for ipath > > Vladimir for ipoib tools > > Steve for cxgb3 driver > > > > All please review the list of GPL only files that are (or were in your > > components before > > we moved to git) and please correct the code so that meets the BSD+GPL > > requirements > > that we all agreed to when we joined openfabrics, if you have not done > > so already. > > > > Thanks > > woody > > > > > > > > ________________________________ > > > > From: Chet Mehta [mailto:chetm at us.ibm.com] > > Sent: Thursday, January 11, 2007 2:17 PM > > To: Woodruff, Robert J > > Cc: openfabrics-lwg at openfabrics.org > > Subject: RE: Request for assistance from DWG > > > > > > > > Bob, > > > > Thanks for the reply. Attached below is a list of files that we believe > > have only a GPL license (the list was generated on 12/5/06). As I > > mentioned in my last note, the search wasn't exhaustive so there may be > > others. During the time we also noticed that there were some > > utility-type files (e.g. files used for extraction, make or build) that > > were GPL only. I'm assuming the DWG will decide if these should be dual > > licensed or be placed on the exception list. Lastly I suspect there are > > files that only have a BSD license - i.e. no GPL. While personally > > that's less worrisome, going strictly by the Bylaws I would suspect > > those should be corrected also. > > > > Thanks for your help to get this corrected! > > > > ################################################################ > > #mutex-backport.h (Cisco Systems) > > ################################################################ > > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include > > /linux/mutex-backport.h > > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include > > /linux/mutex-backport.h > > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc4/linux-kernel/infiniband/include > > /linux/.svn/text-base/mutex-backport.h.svn-base > > ./gen2/tags/openib-1.0-rc4/ofed/tags/rc3/linux-kernel/infiniband/include > > /linux/.svn/text-base/mutex-backport.h.svn-base > > ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include > > /linux/mutex-backport.h > > ./gen2/tags/openib-1.0-rc2/ibed/tags/rc3/linux-kernel/infiniband/include > > /linux/.svn/text-base/mutex-backport.h.svn-base > > ./gen2/trunk/src/linux-kernel/infiniband/include/linux/mutex-backport.h > > ./gen2/trunk/src/linux-kernel/infiniband/include/linux/.svn/text-base/mu > > tex-backport.h.svn-base > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i > > nclude/linux/mutex-backport.h > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/i > > nclude/linux/.svn/text-base/mutex-backport.h.svn-base > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/mutex-ba > > ckport.h > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/include/linux/.svn/tex > > t-base/mutex-backport.h.svn-base > > ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ > > mutex-backport.h > > ./gen2/branches/1.0/ofed/tags/rc4/linux-kernel/infiniband/include/linux/ > > .svn/text-base/mutex-backport.h.svn-base > > ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ > > mutex-backport.h > > ./gen2/branches/1.0/ofed/tags/rc3/linux-kernel/infiniband/include/linux/ > > .svn/text-base/mutex-backport.h.svn-base > > ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ > > mutex-backport.h > > ./gen2/branches/1.0/ofed/tags/1.0/linux-kernel/infiniband/include/linux/ > > .svn/text-base/mutex-backport.h.svn-base > > ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ > > mutex-backport.h > > ./gen2/branches/1.0/ofed/tags/rc6/linux-kernel/infiniband/include/linux/ > > .svn/text-base/mutex-backport.h.svn-base > > ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ > > mutex-backport.h > > ./gen2/branches/1.0/ofed/tags/rc5/linux-kernel/infiniband/include/linux/ > > .svn/text-base/mutex-backport.h.svn-base > > > > ################################################################ > > #ipath_eth.c (PathScale) > > ################################################################ > > ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/ipath_et > > h.c > > ./gen2/tags/openib-1.0-rc1/src/linux-kernel/infiniband/hw/ipath/.svn/tex > > t-base/ipath_eth.c.svn-base > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > > w/ipath/ipath_eth.c > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > > w/ipath/.svn/text-base/ipath_eth.c.svn-base > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > > w/ipath/ipath_ether/ipath_eth.c > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/infiniband/h > > w/ipath/ipath_ether/.svn/text-base/ipath_eth.c.svn-base > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_eth.c > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/.svn/text-bas > > e/ipath_eth.c.svn-base > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/i > > path_eth.c > > ./gen2/branches/iwarp/src/linux-kernel/infiniband/hw/ipath/ipath_ether/. > > svn/text-base/ipath_eth.c.svn-base > > > > ################################################################ > > #lnstat.c Linux network stats > > # Copyright (C) 2004 by Harald Welte > > # Development of this code was funded by Astaro AG, > > http://www.astaro.com/ > > # Based on original concept and ideas from predecessor rtstat.c: > > # Copyright 2001 by Robert Olsson > > # Uppsala University, Sweden > > ################################################################ > > ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/lnstat.c > > ./gen2/trunk/src/userspace/ipoibtools/iproute2/misc/.svn/text-base/lnsta > > t.c.svn-base > > > > ################################################################ > > #list.h ( kazutomo at mcs.anl.gov) > > ################################################################ > > ./gen2/trunk/src/userspace/ipoibtools/list.h > > ./gen2/trunk/src/userspace/ipoibtools/.svn/text-base/list.h.svn-base > > > > ################################################################ > > #cxgb3_main.c (Chelsio Communications) > > ################################################################ > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/cx > > gb3_main.c > > ./gen2/branches/iwarp/tags/iwarp-2.6.17-stable/linux-kernel/net/cxgb3/.s > > vn/text-base/cxgb3_main.c.svn-base > > ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/cxgb3_main.c > > ./gen2/branches/iwarp/src/linux-kernel/net/cxgb3/.svn/text-base/cxgb3_ma > > in.c.svn-base > > > > :Chet. > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > -- > > MST > > > > > > -- > > MST > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mshefty at ichips.intel.com Mon Jan 15 08:27:13 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 15 Jan 2007 08:27:13 -0800 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <20070113190100.GB13017@mellanox.co.il> References: <20070113190100.GB13017@mellanox.co.il> Message-ID: <45ABAB61.8010408@ichips.intel.com> > Sean, do you plan to post a fixed patch? > We really need this in 2.6.20 I think. I do plan on fixing this, but didn't think it was urgent, given that the problem has gone this long without a fix. Wouldn't this only cause a problem if the target crashes between sending the MRA and a REP? - Sean From mshefty at ichips.intel.com Mon Jan 15 08:32:57 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 15 Jan 2007 08:32:57 -0800 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <45A9DF69.40700@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> <20070112001956.GC17999@mellanox.co.il> <45A6DB7F.3060800@ichips.intel.com> <45A9DF69.40700@voltaire.com> Message-ID: <45ABACB9.9030901@ichips.intel.com> > Can you explain how this relates to your multicast changes? the IPoIB > send-only-full-member-join hack was there before your patch and stayed > there after your patch... and how come a change in the multicast code > can cause the error steam to be finite... have you moved the retry > mechanism from the ib_sa consumer to the ib_sa mcast engine? There was a bug in the ib_sa multicast engine handling failed joins, which had it retry forever. (Basically, the response was not being matched with the request. So the response was discarded, and the request was retried.) I had fixed this in svn, but lost the patch moving over to git. - Sean From rdreier at cisco.com Mon Jan 15 09:09:21 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 15 Jan 2007 09:09:21 -0800 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: <45ABA129.9080000@dev.mellanox.co.il> (Dotan Barak's message of "Mon, 15 Jan 2007 17:43:37 +0200") References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> <45A0B02E.1020105@dev.mellanox.co.il> <45A1EC8B.2050106@dev.mellanox.co.il> <45ABA129.9080000@dev.mellanox.co.il> Message-ID: > Did you push the static library link support in the last fixes/changes > that you made? Yes, it should be working better now. From jlentini at netapp.com Mon Jan 15 09:33:42 2007 From: jlentini at netapp.com (James Lentini) Date: Mon, 15 Jan 2007 12:33:42 -0500 (EST) Subject: [openib-general] Request for assistance from DWG In-Reply-To: <1168876779.10308.5.camel@linux-q667.site> References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> Message-ID: On Mon, 15 Jan 2007, Steve WIse wrote: > The ipath stuff listed below is from the old svn iwarp branch and can be > ignored. > > The cxgb3 code is an oversight. > > I've added the correct MODULE_LICENSE() macro to the ULLD going into > OFED 1.2. What else do I need to do? Do you have the standard dual license text at the top of net/cxgb3/cxgb3_main.c? I don't see the net/cxgb3 directory in your git trees. Where are the sources for it? From swise at opengridcomputing.com Mon Jan 15 09:42:20 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 11:42:20 -0600 Subject: [openib-general] Request for assistance from DWG In-Reply-To: References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> Message-ID: <1168882940.10308.21.camel@linux-q667.site> On Mon, 2007-01-15 at 12:33 -0500, James Lentini wrote: > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > The ipath stuff listed below is from the old svn iwarp branch and can be > > ignored. > > > > The cxgb3 code is an oversight. > > > > I've added the correct MODULE_LICENSE() macro to the ULLD going into > > OFED 1.2. What else do I need to do? > > Do you have the standard dual license text at the top of > net/cxgb3/cxgb3_main.c? > > I don't see the net/cxgb3 directory in your git trees. Where are the > sources for it? the driver is in drivers/net/cxgb3 The top comments say 'see the license file included in the release'. Is that sufficient? From jlentini at netapp.com Mon Jan 15 09:47:54 2007 From: jlentini at netapp.com (James Lentini) Date: Mon, 15 Jan 2007 12:47:54 -0500 (EST) Subject: [openib-general] Request for assistance from DWG In-Reply-To: <1168882940.10308.21.camel@linux-q667.site> References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> <1168882940.10308.21.camel@linux-q667.site> Message-ID: On Mon, 15 Jan 2007, Steve WIse wrote: > On Mon, 2007-01-15 at 12:33 -0500, James Lentini wrote: > > > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > > > The ipath stuff listed below is from the old svn iwarp branch and can be > > > ignored. > > > > > > The cxgb3 code is an oversight. > > > > > > I've added the correct MODULE_LICENSE() macro to the ULLD going into > > > OFED 1.2. What else do I need to do? > > > > Do you have the standard dual license text at the top of > > net/cxgb3/cxgb3_main.c? > > > > I don't see the net/cxgb3 directory in your git trees. Where are the > > sources for it? > > > the driver is in drivers/net/cxgb3 > > > The top comments say 'see the license file included in the release'. Is > that sufficient? I'm not a lawyer, but referencing an external license file is common practice. Where is the LICENSE file? I don't see it in the drivers/net/cxgb3 directory. From swise at opengridcomputing.com Mon Jan 15 10:13:50 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 12:13:50 -0600 Subject: [openib-general] Request for assistance from DWG In-Reply-To: References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> <1168882940.10308.21.camel@linux-q667.site> Message-ID: <1168884830.10308.24.camel@linux-q667.site> I would assume the ofed pkg would have the license file. On Mon, 2007-01-15 at 12:47 -0500, James Lentini wrote: > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > On Mon, 2007-01-15 at 12:33 -0500, James Lentini wrote: > > > > > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > > > > > The ipath stuff listed below is from the old svn iwarp branch and can be > > > > ignored. > > > > > > > > The cxgb3 code is an oversight. > > > > > > > > I've added the correct MODULE_LICENSE() macro to the ULLD going into > > > > OFED 1.2. What else do I need to do? > > > > > > Do you have the standard dual license text at the top of > > > net/cxgb3/cxgb3_main.c? > > > > > > I don't see the net/cxgb3 directory in your git trees. Where are the > > > sources for it? > > > > > > the driver is in drivers/net/cxgb3 > > > > > > The top comments say 'see the license file included in the release'. Is > > that sufficient? > > I'm not a lawyer, but referencing an external license file is common > practice. > > Where is the LICENSE file? I don't see it in the drivers/net/cxgb3 > directory. From steve.apo at googlemail.com Mon Jan 15 10:47:43 2007 From: steve.apo at googlemail.com (Steven Wooding) Date: Mon, 15 Jan 2007 18:47:43 +0000 Subject: [openib-general] WC Error code question In-Reply-To: <309a667c0701142135o69788be9g1e002498d4303e93@mail.gmail.com> References: <2cfcf21e0701110514t3a8b5954s760f26a0ab0f7321@mail.gmail.com> <45A6419E.5000701@dev.mellanox.co.il> <309a667c0701142135o69788be9g1e002498d4303e93@mail.gmail.com> Message-ID: <2cfcf21e0701151047o4df2379esc806df1b4b20877d@mail.gmail.com> Sorry guys. Fixed the problem. I gave it an incorrect pointer for the MR. Thanks for replying. Steve. On 15/01/07, Devesh Sharma wrote: > > On which side you are getting this error? > > If its at initiator side then its bad lkey if its on other side then > you have bad rkey. > > On 1/11/07, Dotan Barak wrote: > > Steven Wooding wrote: > > > Hi, > > > > > > I'm getting an IBV_WC_LOC_ACCESS_ERR when getting a work completion > > > item related to an RDMA with ImmData transfer. > > > > > > What does this error actually mean? > > > > > > Thanks, > > > > > > Steve. > > in which side do you get this completion? > > My guess is that you are trying to send an RDMA Write with immediate > > which has bad rkey > > (rkey which doesn't match the remote address), and you get this status > > at the responder side. > > > > > > If you still have this problem, i will need some more info .. > > > > Dotan > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Mon Jan 15 10:57:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 15 Jan 2007 20:57:51 +0200 Subject: [openib-general] Request for assistance from DWG In-Reply-To: <1168884830.10308.24.camel@linux-q667.site> References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> <1168882940.10308.21.camel@linux-q667.site> <1168884830.10308.24.camel@linux-q667.site> Message-ID: <20070115185751.GC22940@mellanox.co.il> > On Mon, 2007-01-15 at 12:47 -0500, James Lentini wrote: > > > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > > > On Mon, 2007-01-15 at 12:33 -0500, James Lentini wrote: > > > > > > > > On Mon, 15 Jan 2007, Steve WIse wrote: > > > > > > > > > The ipath stuff listed below is from the old svn iwarp branch and can be > > > > > ignored. > > > > > > > > > > The cxgb3 code is an oversight. > > > > > > > > > > I've added the correct MODULE_LICENSE() macro to the ULLD going into > > > > > OFED 1.2. What else do I need to do? > > > > > > > > Do you have the standard dual license text at the top of > > > > net/cxgb3/cxgb3_main.c? > > > > > > > > I don't see the net/cxgb3 directory in your git trees. Where are the > > > > sources for it? > > > > > > > > > the driver is in drivers/net/cxgb3 > > > > > > > > > The top comments say 'see the license file included in the release'. Is > > > that sufficient? > > > > I'm not a lawyer, but referencing an external license file is common > > practice. > > > > Where is the LICENSE file? I don't see it in the drivers/net/cxgb3 > > directory. > > Quoting Steve WIse : > Subject: Re: Request for assistance from DWG > > I would assume the ofed pkg would have the license file. But there's still the question of what license does chelsio, as a copyright holder, release it under. I am not a lawyer, but license terms *should* be clear to nonlawyers, too. Steve, please just stick the actual licensing terms in each file, there's no real reason I can see to save this bit of space. -- MST From mst at mellanox.co.il Mon Jan 15 11:00:11 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 15 Jan 2007 21:00:11 +0200 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <1168875224.7021.32.camel@vladsk-laptop> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> <1168875224.7021.32.camel@vladsk-laptop> Message-ID: <20070115190011.GD22940@mellanox.co.il> But, can the license in files be fixed first pls? Quoting Vladimir Sokolovsky : Subject: Re: [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support Hi Steve, We need to pull 2.6.20-rc4 to ofed_1_2 first and then will pull your tree. This will be done tomorrow, I hope. Regards, Vladimir On Mon, 2007-01-15 at 07:56 -0600, Steve Wise wrote: > Michael/Vlad, > > Here is version 4. > > Changes since version 3: > > - No need to alter the genalloc.c file. > > - The patch to add genalloc.o to ib_core.ko was incorrect. It was adding > genalloc.o to ib_core.ko, but also building a genalloc.ko. > > > In addition to this patch series, the Chelsio driver code needs to be > pulled from > > git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 > > I've tested this on 2.6.20-rc4. If you're happy with the changes, I > ask that you pull these patches in and pull the T3 drivers into vlad's > ofed_1_2 git tree. That will give us a base package for Chelsio's > drivers. Then I'll post additional patches for the backports to the > supported distros as I get them implemented and tested. > > How's that sound? If you would rather a patch for the chelsio drivers, > lemme know and I'll email it to you directly. > > Signed-off-by: Steve Wise > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From swise at opengridcomputing.com Mon Jan 15 11:01:58 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 13:01:58 -0600 Subject: [openib-general] Request for assistance from DWG In-Reply-To: <20070115185751.GC22940@mellanox.co.il> References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> <1168882940.10308.21.camel@linux-q667.site> <1168884830.10308.24.camel@linux-q667.site> <20070115185751.GC22940@mellanox.co.il> Message-ID: <1168887718.10308.28.camel@linux-q667.site> > > Quoting Steve WIse : > > Subject: Re: Request for assistance from DWG > > > > I would assume the ofed pkg would have the license file. > > > But there's still the question of what license does chelsio, as a copyright holder, > release it under. I am not a lawyer, but license terms *should* be clear to > nonlawyers, too. > > Steve, please just stick the actual licensing terms in each file, there's no real > reason I can see to save this bit of space. > alright. Steve. From mst at mellanox.co.il Mon Jan 15 11:06:01 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 15 Jan 2007 21:06:01 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <45ABAB61.8010408@ichips.intel.com> References: <20070113190100.GB13017@mellanox.co.il> <45ABAB61.8010408@ichips.intel.com> Message-ID: <20070115190601.GE22940@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: [PATCH] 2.6.20 ib_cm: limit cm message timeouts > > > Sean, do you plan to post a fixed patch? > > We really need this in 2.6.20 I think. > > I do plan on fixing this, but didn't think it was urgent, given that the problem > has gone this long without a fix. OFED 1.1 included a work around, which solved the problem for some users. I think its the high availability setups that suffer from it the most. Others see a failure anyway. > Wouldn't this only cause a problem if the target crashes between sending the MRA > and a REP? Or in case of e.g. port failure. Which happens quite a lot when one does high-availability testing, but it can happen for everyone. -- MST From boris at mellanox.com Mon Jan 15 11:24:23 2007 From: boris at mellanox.com (Boris Shpolyansky) Date: Mon, 15 Jan 2007 11:24:23 -0800 Subject: [openib-general] buiild.sh Message-ID: <1E3DCD1C63492545881FACB6063A57C16E4317@mtiexch01.mti.com> Hi Michael, Hope these instructions would help you: 1. Open OFED-1.1 tar ball tar xvfz OFED-1.1.tgz 2. Apply build.patch (fixing build.sh script - attached) cd OFED-1.1 patch -p0 < build.patch 3. cd OFED-1.1/SOURCES 4. Open openib-1.1.tgz tar xvfz openib-1.1.tgz 5. Put your own patch file under appropriate dir, like: openib-1.1/kernel_patches/fixes/ 6. Go back to OFED-1.1-rc6 dir cd .. 7. Remove original source RPM: rm SRPMS/openib-1.1-0.src.rpm 8. Rebuild your binary RPM(s) using build.sh. It should build new source RPM first. (you can use "-c" option for unattended build if you have current ofed.conf file up to date or do an interactive build) ./build.sh [-c ofed.conf] 9. Re-install the RPMs using install.sh Regards, Boris Shpolyansky Application Engineer Mellanox Technologies Inc. 2900 Stender Way Santa Clara, CA 95054 Tel.: (408) 916 0014 Fax: (408) 970 3403 Cell: (408) 834 9365 www.mellanox.com -----Original Message----- From: openib-general-bounces at openib.org [mailto:openib-general-bounces at openib.org] On Behalf Of Michael Arndt Sent: Sunday, January 14, 2007 3:07 AM To: OpenIB Mailinglist Subject: [openib-general] buiild.sh Hi, I have some simple changes like printk(KERN_INFO "...") messages in the mad.c file to get more informations. But it seems to be that the build.sh script uses the openib-1.1.src.rpm to build the rpms. Is there a simple way to rebuild this src.rpm with the new source code and doing all the patches? I have read something about add_patch2ofed? How would that work? Thanks Micha _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- A non-text attachment was scrubbed... Name: build.patch Type: application/octet-stream Size: 694 bytes Desc: build.patch URL: From halr at voltaire.com Mon Jan 15 11:30:08 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 15 Jan 2007 14:30:08 -0500 Subject: [openib-general] [PATCH] OpenSM console: Add run-time switch for console [off|local|socket] Message-ID: <1168889401.32185.16214.camel@hal.voltaire.com> OpenSM console: Add run-time switch for console [off|local|socket] Signed-off-by: Ira Weiny Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h index c9b04eb..c256621 100644 --- a/osm/include/opensm/osm_subnet.h +++ b/osm/include/opensm/osm_subnet.h @@ -265,7 +265,7 @@ typedef struct _osm_subn_opt boolean_t no_partition_enforcement; boolean_t no_qos; boolean_t accum_log_file; - boolean_t console; + char * console; uint16_t console_port; cl_map_t port_prof_ignore_guids; boolean_t port_profile_switch_nodes; diff --git a/osm/man/opensm.8 b/osm/man/opensm.8 index e388f6d..15c1411 100644 --- a/osm/man/opensm.8 +++ b/osm/man/opensm.8 @@ -128,11 +128,13 @@ SMPs. Without -maxsmps, OpenSM defaults to a maximum of 4 outstanding SMPs. .TP -\fB\-console\fR -This option brings up the OpenSM console. +\fB\-console [off|local|socket]\fR +This option brings up the OpenSM console (default off). +Note that the socket option will only be available if OpenSM +--enable-console-socket. .TP \fB\-console-port\fR -Specify an alternate telnet port for the console (default 10000). +Specify an alternate telnet port for the socket console (default 10000). Note that this option only appears if OpenSM was built with --enable-console-socket. .TP diff --git a/osm/opensm/main.c b/osm/opensm/main.c index 8dcbfd4..ff517eb 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -228,8 +228,12 @@ show_usage(void) " SMPs.\n" " Without -maxsmps, OpenSM defaults to a maximum of\n" " 4 outstanding SMPs.\n\n" ); - printf( "-console\n" - " This option brings up the OpenSM console.\n\n" ); +#ifdef ENABLE_OSM_CONSOLE_SOCKET + printf( "-console [off|local|socket]\n" +#else + printf( "-console [off|local]\n" +#endif + " This option activates the OpenSM console. (default off)\n\n"); #ifdef ENABLE_OSM_CONSOLE_SOCKET printf( "-console-port \n" " Specify an alternate telnet port for the console (default %d).\n\n", @@ -581,7 +585,7 @@ main( { "no_part_enforce",0,NULL, 'N'}, { "qos", 0, NULL, 'Q'}, { "maxsmps", 1, NULL, 'n'}, - { "console", 0, NULL, 'q'}, + { "console", 1, NULL, 'q'}, { "V", 0, NULL, 'V'}, { "help", 0, NULL, 'h'}, { "once", 0, NULL, 'o'}, @@ -696,8 +700,17 @@ main( /* * OpenSM interactive console */ - opt.console = TRUE; - printf(" Enabling OpenSM interactive console\n"); + if (strcmp(optarg, "off") == 0) { + opt.console = "off"; + } else if (strcmp(optarg, "local") == 0) { + opt.console = "local"; +#ifdef ENABLE_OSM_CONSOLE_SOCKET + } else if (strcmp(optarg, "socket") == 0) { + opt.console = "socket"; +#endif + } else { + printf("-console %s option not understood\n", optarg); + } break; #ifdef ENABLE_OSM_CONSOLE_SOCKET @@ -964,7 +977,7 @@ main( Sit here forever */ while( !osm_exit_flag ) { - if (opt.console) + if (strcmp(opt.console, "off") != 0) osm_console(&osm); else cl_thread_suspend( 10000 ); diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index 7b6925a..9610e21 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -351,7 +351,7 @@ void osm_console_init(osm_subn_opt_t *op { p_osm->console.socket = -1; /* set up the file descriptors for the console */ - if (opt->console) { + if (strcmp(opt->console, "local") == 0) { p_osm->console.in = stdin; p_osm->console.out = stdout; p_osm->console.in_fd = fileno(stdin); @@ -359,7 +359,7 @@ void osm_console_init(osm_subn_opt_t *op osm_console_prompt(p_osm->console.out); #ifdef ENABLE_OSM_CONSOLE_SOCKET - } else { + } else if (strcmp(opt->console, "socket") == 0) { struct sockaddr_in sin; int optval = 1; @@ -393,7 +393,6 @@ void osm_console_init(osm_subn_opt_t *op p_osm->console.out = NULL; p_osm->console.in_fd = -1; p_osm->console.out_fd = -1; - opt->console = 1; osm_log(&(p_osm->log), OSM_LOG_INFO, "osm_console_init: Console listening on port %d\n", opt->console_port); #endif diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c index e075091..ed1bdbb 100644 --- a/osm/opensm/osm_subnet.c +++ b/osm/opensm/osm_subnet.c @@ -445,7 +445,7 @@ osm_subn_set_default_opt( p_opt->m_key_lease_period = 0; p_opt->sweep_interval = OSM_DEFAULT_SWEEP_INTERVAL_SECS; p_opt->max_wire_smps = OSM_DEFAULT_SMP_MAX_ON_WIRE; - p_opt->console = FALSE; + p_opt->console = "off"; p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT; p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; /* by default we will consider waiting for 50x transaction timeout normal */ @@ -850,6 +850,10 @@ osm_subn_parse_conf_file( "max_wire_smps", p_key, p_val, &p_opts->max_wire_smps); + __osm_subn_opts_unpack_charp( + "console", + p_key, p_val, &p_opts->console); + __osm_subn_opts_unpack_uint16( "console_port", p_key, p_val, &p_opts->console_port); @@ -1268,7 +1272,9 @@ osm_subn_write_conf_file( "disable_multicast %s\n\n" "# If TRUE opensm will exit on fatal initialization issues\n" "exit_on_fatal %s\n\n" - "# Telnet port for console (default is 10000)\n" + "# console [off|local|socket]\n" + "console %s\n\n" + "# Telnet port for console (default %d)\n" "console_port %d\n\n", p_opts->log_flags, p_opts->force_log_flush ? "TRUE" : "FALSE", @@ -1280,7 +1286,8 @@ osm_subn_write_conf_file( p_opts->no_multicast_option ? "TRUE" : "FALSE", p_opts->disable_multicast ? "TRUE" : "FALSE", p_opts->exit_on_fatal ? "TRUE" : "FALSE", - p_opts->console_port + p_opts->console, + OSM_DEFAULT_CONSOLE_PORT, p_opts->console_port ); fprintf( From swise at opengridcomputing.com Mon Jan 15 11:39:37 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 13:39:37 -0600 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <20070115190011.GD22940@mellanox.co.il> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> <1168875224.7021.32.camel@vladsk-laptop> <20070115190011.GD22940@mellanox.co.il> Message-ID: <1168889977.10308.31.camel@linux-q667.site> On Mon, 2007-01-15 at 21:00 +0200, Michael S. Tsirkin wrote: > But, can the license in files be fixed first pls? > Done. commit 0ce8edf32779b0915b6cf4b2ddbd1c5ef004e4b4 Author: Steve Wise Date: Mon Jan 15 13:26:23 2007 -0600 Add dual bsd/gpl license text to cxgb3 files This is needed for the OFED 1.2 distribution. Signed-off-by: Steve Wise diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 8902007..63048ca 100755 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -1,12 +1,33 @@ /* - * This file is part of the Chelsio T3 Ethernet driver for Linux. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ /* This file should not be included directly. Include common.h instead. */ diff --git a/drivers/net/cxgb3/ael1002.c b/drivers/net/cxgb3/ael1002.c index 93a90d8..e9f3244 100755 --- a/drivers/net/cxgb3/ael1002.c +++ b/drivers/net/cxgb3/ael1002.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include "common.h" #include "regs.h" diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h index 60a979b..4b37036 100755 --- a/drivers/net/cxgb3/common.h +++ b/drivers/net/cxgb3/common.h @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #ifndef __CHELSIO_COMMON_H #define __CHELSIO_COMMON_H diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h index 0fdc365..b8724a3 100755 --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h @@ -1,12 +1,34 @@ /* - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #ifndef _CXGB3_OFFLOAD_CTL_DEFS_H #define _CXGB3_OFFLOAD_CTL_DEFS_H diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h index 1ee77b2..b046491 100755 --- a/drivers/net/cxgb3/cxgb3_ioctl.h +++ b/drivers/net/cxgb3/cxgb3_ioctl.h @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver for Linux. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #ifndef __CHIOCTL_H__ #define __CHIOCTL_H__ diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 7e7ee7a..57f7e79 100755 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver for Linux. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include #include #include @@ -75,7 +95,7 @@ static const struct pci_device_id cxgb3_ MODULE_DESCRIPTION(DRV_DESC); MODULE_AUTHOR("Chelsio Communications"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, cxgb3_pci_tbl); diff --git a/drivers/net/cxgb3/firmware_exports.h b/drivers/net/cxgb3/firmware_exports.h index 1ceda0f..f0fb528 100755 --- a/drivers/net/cxgb3/firmware_exports.h +++ b/drivers/net/cxgb3/firmware_exports.h @@ -1,27 +1,33 @@ -/* - * ---------------------------------------------------------------------------- - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<<<<< - * ---------------------------------------------------------------------------- - * Copyright 2004 (C) Chelsio Communications, Inc. (Chelsio) +/* + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * Chelsio Communications, Inc. owns the sole copyright to this software. - * You may not make a copy, you may not derive works herefrom, and you may - * not distribute this work to others. Other restrictions of rights may apply - * as well. This is unpublished, confidential information. All rights reserved. - * This software contains confidential information and trade secrets of Chelsio - * Communications, Inc. Use, disclosure, or reproduction is prohibited without - * the prior express written permission of Chelsio Communications, Inc. - * ---------------------------------------------------------------------------- - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Warranty <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - * ---------------------------------------------------------------------------- - * CHELSIO MAKES NO WARRANTY OF ANY KIND WITH REGARD TO THE USE OF THIS - * SOFTWARE, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * ---------------------------------------------------------------------------- + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * This is the firmware_exports.h header file, firmware interface defines. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * - * Written January 2005 by felix marti (felix at chelsio.com) + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _FIRMWARE_EXPORTS_H_ #define _FIRMWARE_EXPORTS_H_ diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c index 9997138..6a3e5a2 100755 --- a/drivers/net/cxgb3/l2t.c +++ b/drivers/net/cxgb3/l2t.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h index 51a9c1f..343dea9 100755 --- a/drivers/net/cxgb3/l2t.h +++ b/drivers/net/cxgb3/l2t.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/net/cxgb3/mc5.c b/drivers/net/cxgb3/mc5.c index 44fa9ea..6d8b02d 100755 --- a/drivers/net/cxgb3/mc5.c +++ b/drivers/net/cxgb3/mc5.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include "common.h" #include "regs.h" diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 56bb19c..e3676e1 100755 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. - * - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include #include #include diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h index 6650b04..734dc7e 100755 --- a/drivers/net/cxgb3/t3_cpl.h +++ b/drivers/net/cxgb3/t3_cpl.h @@ -1,16 +1,34 @@ /* - * Definitions of the CPL 5 commands and status codes. + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2004-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * Written by Dimitris Michailidis (dm at chelsio.com) + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #ifndef T3_CPL_H #define T3_CPL_H diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c index 35a7fab..14ea6b9 100755 --- a/drivers/net/cxgb3/t3_hw.c +++ b/drivers/net/cxgb3/t3_hw.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. - * - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include "common.h" #include "regs.h" #include "sge_defs.h" diff --git a/drivers/net/cxgb3/t3cdev.h b/drivers/net/cxgb3/t3cdev.h index 359584e..3ea4c47 100755 --- a/drivers/net/cxgb3/t3cdev.h +++ b/drivers/net/cxgb3/t3cdev.h @@ -1,5 +1,6 @@ /* - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. + * Copyright (C) 2006 Chelsio Communications. All rights reserved. + * Copyright (C) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h index 1413ea3..bafc7d8 100755 --- a/drivers/net/cxgb3/version.h +++ b/drivers/net/cxgb3/version.h @@ -1,19 +1,34 @@ -/***************************************************************************** - * * - * File: * - * version.h * - * * - * Description: * - * Chelsio driver version defines. * - * * - * Copyright (c) 2003 - 2006 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers at chelsio.com * - * * - * http://www.chelsio.com * - * * - ****************************************************************************/ +/* + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ /* $Date: 2006/10/31 18:57:51 $ $RCSfile: version.h,v $ $Revision: 1.3 $ */ #ifndef __CHELSIO_VERSION_H #define __CHELSIO_VERSION_H diff --git a/drivers/net/cxgb3/vsc8211.c b/drivers/net/cxgb3/vsc8211.c index 6a0a815..7d101ae 100755 --- a/drivers/net/cxgb3/vsc8211.c +++ b/drivers/net/cxgb3/vsc8211.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include "common.h" /* VSC8211 PHY specific registers. */ diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c index 0f209c7..bcb7427 100755 --- a/drivers/net/cxgb3/xgmac.c +++ b/drivers/net/cxgb3/xgmac.c @@ -1,14 +1,34 @@ /* - * This file is part of the Chelsio T3 Ethernet driver. + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. * - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this - * release for licensing terms and conditions. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #include "common.h" #include "regs.h" From tziporet at dev.mellanox.co.il Mon Jan 15 11:58:08 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 15 Jan 2007 21:58:08 +0200 Subject: [openib-general] [openfabrics-ewg] Request for assistance from DWG In-Reply-To: <1168887718.10308.28.camel@linux-q667.site> References: <20070112001719.GB17999@mellanox.co.il> <1168876779.10308.5.camel@linux-q667.site> <1168882940.10308.21.camel@linux-q667.site> <1168884830.10308.24.camel@linux-q667.site> <20070115185751.GC22940@mellanox.co.il> <1168887718.10308.28.camel@linux-q667.site> Message-ID: <45ABDCD0.6060301@dev.mellanox.co.il> Steve WIse wrote: >>> I would assume the ofed pkg would have the license file. >>> > Hi Steve, I think each file should have the legal section at the beginning like all other OFA components. Tziporet From swise at opengridcomputing.com Mon Jan 15 12:11:12 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 14:11:12 -0600 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <1168889977.10308.31.camel@linux-q667.site> References: <20070115135635.31378.9641.stgit@dell3.ogc.int> <1168875224.7021.32.camel@vladsk-laptop> <20070115190011.GD22940@mellanox.co.il> <1168889977.10308.31.camel@linux-q667.site> Message-ID: <1168891873.10308.49.camel@linux-q667.site> Er, hold on. Its 2007. I need to redo this. Don't pull yet until I reply again. Sorry. Steve. On Mon, 2007-01-15 at 13:39 -0600, Steve WIse wrote: > On Mon, 2007-01-15 at 21:00 +0200, Michael S. Tsirkin wrote: > > But, can the license in files be fixed first pls? > > > > Done. > > > > > commit 0ce8edf32779b0915b6cf4b2ddbd1c5ef004e4b4 > Author: Steve Wise > Date: Mon Jan 15 13:26:23 2007 -0600 > > Add dual bsd/gpl license text to cxgb3 files > > This is needed for the OFED 1.2 distribution. > > Signed-off-by: Steve Wise > > diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h > index 8902007..63048ca 100755 > --- a/drivers/net/cxgb3/adapter.h > +++ b/drivers/net/cxgb3/adapter.h > @@ -1,12 +1,33 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > > /* This file should not be included directly. Include common.h instead. */ > diff --git a/drivers/net/cxgb3/ael1002.c b/drivers/net/cxgb3/ael1002.c > index 93a90d8..e9f3244 100755 > --- a/drivers/net/cxgb3/ael1002.c > +++ b/drivers/net/cxgb3/ael1002.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h > index 60a979b..4b37036 100755 > --- a/drivers/net/cxgb3/common.h > +++ b/drivers/net/cxgb3/common.h > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef __CHELSIO_COMMON_H > #define __CHELSIO_COMMON_H > > diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h > index 0fdc365..b8724a3 100755 > --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h > +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h > @@ -1,12 +1,34 @@ > /* > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef _CXGB3_OFFLOAD_CTL_DEFS_H > #define _CXGB3_OFFLOAD_CTL_DEFS_H > > diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h > index 1ee77b2..b046491 100755 > --- a/drivers/net/cxgb3/cxgb3_ioctl.h > +++ b/drivers/net/cxgb3/cxgb3_ioctl.h > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef __CHIOCTL_H__ > #define __CHIOCTL_H__ > > diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > index 7e7ee7a..57f7e79 100755 > --- a/drivers/net/cxgb3/cxgb3_main.c > +++ b/drivers/net/cxgb3/cxgb3_main.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include > #include > #include > @@ -75,7 +95,7 @@ static const struct pci_device_id cxgb3_ > > MODULE_DESCRIPTION(DRV_DESC); > MODULE_AUTHOR("Chelsio Communications"); > -MODULE_LICENSE("GPL"); > +MODULE_LICENSE("Dual BSD/GPL"); > MODULE_VERSION(DRV_VERSION); > MODULE_DEVICE_TABLE(pci, cxgb3_pci_tbl); > > diff --git a/drivers/net/cxgb3/firmware_exports.h b/drivers/net/cxgb3/firmware_exports.h > index 1ceda0f..f0fb528 100755 > --- a/drivers/net/cxgb3/firmware_exports.h > +++ b/drivers/net/cxgb3/firmware_exports.h > @@ -1,27 +1,33 @@ > -/* > - * ---------------------------------------------------------------------------- > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<<<<< > - * ---------------------------------------------------------------------------- > - * Copyright 2004 (C) Chelsio Communications, Inc. (Chelsio) > +/* > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > * > - * Chelsio Communications, Inc. owns the sole copyright to this software. > - * You may not make a copy, you may not derive works herefrom, and you may > - * not distribute this work to others. Other restrictions of rights may apply > - * as well. This is unpublished, confidential information. All rights reserved. > - * This software contains confidential information and trade secrets of Chelsio > - * Communications, Inc. Use, disclosure, or reproduction is prohibited without > - * the prior express written permission of Chelsio Communications, Inc. > - * ---------------------------------------------------------------------------- > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Warranty <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< > - * ---------------------------------------------------------------------------- > - * CHELSIO MAKES NO WARRANTY OF ANY KIND WITH REGARD TO THE USE OF THIS > - * SOFTWARE, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO, THE > - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. > - * ---------------------------------------------------------------------------- > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > * > - * This is the firmware_exports.h header file, firmware interface defines. > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > * > - * Written January 2005 by felix marti (felix at chelsio.com) > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > #ifndef _FIRMWARE_EXPORTS_H_ > #define _FIRMWARE_EXPORTS_H_ > diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c > index 9997138..6a3e5a2 100755 > --- a/drivers/net/cxgb3/l2t.c > +++ b/drivers/net/cxgb3/l2t.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h > index 51a9c1f..343dea9 100755 > --- a/drivers/net/cxgb3/l2t.h > +++ b/drivers/net/cxgb3/l2t.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > diff --git a/drivers/net/cxgb3/mc5.c b/drivers/net/cxgb3/mc5.c > index 44fa9ea..6d8b02d 100755 > --- a/drivers/net/cxgb3/mc5.c > +++ b/drivers/net/cxgb3/mc5.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c > index 56bb19c..e3676e1 100755 > --- a/drivers/net/cxgb3/sge.c > +++ b/drivers/net/cxgb3/sge.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > - * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > - * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include > #include > #include > diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h > index 6650b04..734dc7e 100755 > --- a/drivers/net/cxgb3/t3_cpl.h > +++ b/drivers/net/cxgb3/t3_cpl.h > @@ -1,16 +1,34 @@ > /* > - * Definitions of the CPL 5 commands and status codes. > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2004-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * Written by Dimitris Michailidis (dm at chelsio.com) > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef T3_CPL_H > #define T3_CPL_H > > diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > index 35a7fab..14ea6b9 100755 > --- a/drivers/net/cxgb3/t3_hw.c > +++ b/drivers/net/cxgb3/t3_hw.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > - * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > - * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > #include "sge_defs.h" > diff --git a/drivers/net/cxgb3/t3cdev.h b/drivers/net/cxgb3/t3cdev.h > index 359584e..3ea4c47 100755 > --- a/drivers/net/cxgb3/t3cdev.h > +++ b/drivers/net/cxgb3/t3cdev.h > @@ -1,5 +1,6 @@ > /* > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * Copyright (C) 2006 Chelsio Communications. All rights reserved. > + * Copyright (C) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h > index 1413ea3..bafc7d8 100755 > --- a/drivers/net/cxgb3/version.h > +++ b/drivers/net/cxgb3/version.h > @@ -1,19 +1,34 @@ > -/***************************************************************************** > - * * > - * File: * > - * version.h * > - * * > - * Description: * > - * Chelsio driver version defines. * > - * * > - * Copyright (c) 2003 - 2006 Chelsio Communications, Inc. * > - * All rights reserved. * > - * * > - * Maintainers: maintainers at chelsio.com * > - * * > - * http://www.chelsio.com * > - * * > - ****************************************************************************/ > +/* > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > /* $Date: 2006/10/31 18:57:51 $ $RCSfile: version.h,v $ $Revision: 1.3 $ */ > #ifndef __CHELSIO_VERSION_H > #define __CHELSIO_VERSION_H > diff --git a/drivers/net/cxgb3/vsc8211.c b/drivers/net/cxgb3/vsc8211.c > index 6a0a815..7d101ae 100755 > --- a/drivers/net/cxgb3/vsc8211.c > +++ b/drivers/net/cxgb3/vsc8211.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > > /* VSC8211 PHY specific registers. */ > diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c > index 0f209c7..bcb7427 100755 > --- a/drivers/net/cxgb3/xgmac.c > +++ b/drivers/net/cxgb3/xgmac.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Mon Jan 15 12:16:03 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 15 Jan 2007 22:16:03 +0200 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMASupport In-Reply-To: <1168891873.10308.49.camel@linux-q667.site> References: <1168891873.10308.49.camel@linux-q667.site> Message-ID: <20070115201603.GG22940@mellanox.co.il> I wouldn't worry too much - copyright terms are virtually infinite nowdays. But no prob. Quoting r. Steve WIse : Subject: Re: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMASupport Er, hold on. Its 2007. I need to redo this. Don't pull yet until I reply again. Sorry. Steve. On Mon, 2007-01-15 at 13:39 -0600, Steve WIse wrote: > On Mon, 2007-01-15 at 21:00 +0200, Michael S. Tsirkin wrote: > > But, can the license in files be fixed first pls? > > > > Done. > > > > > commit 0ce8edf32779b0915b6cf4b2ddbd1c5ef004e4b4 > Author: Steve Wise > Date: Mon Jan 15 13:26:23 2007 -0600 > > Add dual bsd/gpl license text to cxgb3 files > > This is needed for the OFED 1.2 distribution. > > Signed-off-by: Steve Wise > > diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h > index 8902007..63048ca 100755 > --- a/drivers/net/cxgb3/adapter.h > +++ b/drivers/net/cxgb3/adapter.h > @@ -1,12 +1,33 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > > /* This file should not be included directly. Include common.h instead. */ > diff --git a/drivers/net/cxgb3/ael1002.c b/drivers/net/cxgb3/ael1002.c > index 93a90d8..e9f3244 100755 > --- a/drivers/net/cxgb3/ael1002.c > +++ b/drivers/net/cxgb3/ael1002.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h > index 60a979b..4b37036 100755 > --- a/drivers/net/cxgb3/common.h > +++ b/drivers/net/cxgb3/common.h > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef __CHELSIO_COMMON_H > #define __CHELSIO_COMMON_H > > diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h > index 0fdc365..b8724a3 100755 > --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h > +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h > @@ -1,12 +1,34 @@ > /* > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef _CXGB3_OFFLOAD_CTL_DEFS_H > #define _CXGB3_OFFLOAD_CTL_DEFS_H > > diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h > index 1ee77b2..b046491 100755 > --- a/drivers/net/cxgb3/cxgb3_ioctl.h > +++ b/drivers/net/cxgb3/cxgb3_ioctl.h > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef __CHIOCTL_H__ > #define __CHIOCTL_H__ > > diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > index 7e7ee7a..57f7e79 100755 > --- a/drivers/net/cxgb3/cxgb3_main.c > +++ b/drivers/net/cxgb3/cxgb3_main.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include > #include > #include > @@ -75,7 +95,7 @@ static const struct pci_device_id cxgb3_ > > MODULE_DESCRIPTION(DRV_DESC); > MODULE_AUTHOR("Chelsio Communications"); > -MODULE_LICENSE("GPL"); > +MODULE_LICENSE("Dual BSD/GPL"); > MODULE_VERSION(DRV_VERSION); > MODULE_DEVICE_TABLE(pci, cxgb3_pci_tbl); > > diff --git a/drivers/net/cxgb3/firmware_exports.h b/drivers/net/cxgb3/firmware_exports.h > index 1ceda0f..f0fb528 100755 > --- a/drivers/net/cxgb3/firmware_exports.h > +++ b/drivers/net/cxgb3/firmware_exports.h > @@ -1,27 +1,33 @@ > -/* > - * ---------------------------------------------------------------------------- > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<<<<< > - * ---------------------------------------------------------------------------- > - * Copyright 2004 (C) Chelsio Communications, Inc. (Chelsio) > +/* > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > * > - * Chelsio Communications, Inc. owns the sole copyright to this software. > - * You may not make a copy, you may not derive works herefrom, and you may > - * not distribute this work to others. Other restrictions of rights may apply > - * as well. This is unpublished, confidential information. All rights reserved. > - * This software contains confidential information and trade secrets of Chelsio > - * Communications, Inc. Use, disclosure, or reproduction is prohibited without > - * the prior express written permission of Chelsio Communications, Inc. > - * ---------------------------------------------------------------------------- > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Warranty <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< > - * ---------------------------------------------------------------------------- > - * CHELSIO MAKES NO WARRANTY OF ANY KIND WITH REGARD TO THE USE OF THIS > - * SOFTWARE, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO, THE > - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. > - * ---------------------------------------------------------------------------- > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > * > - * This is the firmware_exports.h header file, firmware interface defines. > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > * > - * Written January 2005 by felix marti (felix at chelsio.com) > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > #ifndef _FIRMWARE_EXPORTS_H_ > #define _FIRMWARE_EXPORTS_H_ > diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c > index 9997138..6a3e5a2 100755 > --- a/drivers/net/cxgb3/l2t.c > +++ b/drivers/net/cxgb3/l2t.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h > index 51a9c1f..343dea9 100755 > --- a/drivers/net/cxgb3/l2t.h > +++ b/drivers/net/cxgb3/l2t.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > diff --git a/drivers/net/cxgb3/mc5.c b/drivers/net/cxgb3/mc5.c > index 44fa9ea..6d8b02d 100755 > --- a/drivers/net/cxgb3/mc5.c > +++ b/drivers/net/cxgb3/mc5.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c > index 56bb19c..e3676e1 100755 > --- a/drivers/net/cxgb3/sge.c > +++ b/drivers/net/cxgb3/sge.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > - * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > - * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include > #include > #include > diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h > index 6650b04..734dc7e 100755 > --- a/drivers/net/cxgb3/t3_cpl.h > +++ b/drivers/net/cxgb3/t3_cpl.h > @@ -1,16 +1,34 @@ > /* > - * Definitions of the CPL 5 commands and status codes. > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2004-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * Written by Dimitris Michailidis (dm at chelsio.com) > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #ifndef T3_CPL_H > #define T3_CPL_H > > diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > index 35a7fab..14ea6b9 100755 > --- a/drivers/net/cxgb3/t3_hw.c > +++ b/drivers/net/cxgb3/t3_hw.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > - * > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > - * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > #include "sge_defs.h" > diff --git a/drivers/net/cxgb3/t3cdev.h b/drivers/net/cxgb3/t3cdev.h > index 359584e..3ea4c47 100755 > --- a/drivers/net/cxgb3/t3cdev.h > +++ b/drivers/net/cxgb3/t3cdev.h > @@ -1,5 +1,6 @@ > /* > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > + * Copyright (C) 2006 Chelsio Communications. All rights reserved. > + * Copyright (C) 2006 Open Grid Computing, Inc. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h > index 1413ea3..bafc7d8 100755 > --- a/drivers/net/cxgb3/version.h > +++ b/drivers/net/cxgb3/version.h > @@ -1,19 +1,34 @@ > -/***************************************************************************** > - * * > - * File: * > - * version.h * > - * * > - * Description: * > - * Chelsio driver version defines. * > - * * > - * Copyright (c) 2003 - 2006 Chelsio Communications, Inc. * > - * All rights reserved. * > - * * > - * Maintainers: maintainers at chelsio.com * > - * * > - * http://www.chelsio.com * > - * * > - ****************************************************************************/ > +/* > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > /* $Date: 2006/10/31 18:57:51 $ $RCSfile: version.h,v $ $Revision: 1.3 $ */ > #ifndef __CHELSIO_VERSION_H > #define __CHELSIO_VERSION_H > diff --git a/drivers/net/cxgb3/vsc8211.c b/drivers/net/cxgb3/vsc8211.c > index 6a0a815..7d101ae 100755 > --- a/drivers/net/cxgb3/vsc8211.c > +++ b/drivers/net/cxgb3/vsc8211.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > > /* VSC8211 PHY specific registers. */ > diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c > index 0f209c7..bcb7427 100755 > --- a/drivers/net/cxgb3/xgmac.c > +++ b/drivers/net/cxgb3/xgmac.c > @@ -1,14 +1,34 @@ > /* > - * This file is part of the Chelsio T3 Ethernet driver. > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > * > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > * > - * This program is distributed in the hope that it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > - * release for licensing terms and conditions. > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > */ > - > #include "common.h" > #include "regs.h" > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > -- MST From swise at opengridcomputing.com Mon Jan 15 12:31:03 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 14:31:03 -0600 Subject: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMASupport In-Reply-To: <20070115201603.GG22940@mellanox.co.il> References: <1168891873.10308.49.camel@linux-q667.site> <20070115201603.GG22940@mellanox.co.il> Message-ID: <1168893063.10308.55.camel@linux-q667.site> I've pushed up these changes now. git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 Steve. On Mon, 2007-01-15 at 22:16 +0200, Michael S. Tsirkin wrote: > I wouldn't worry too much - copyright terms are virtually > infinite nowdays. But no prob. > > > Quoting r. Steve WIse : > Subject: Re: [openib-general] [PATCH v4 0/3] ofed_1_2 - Chelsio T3 RDMASupport > > Er, hold on. Its 2007. I need to redo this. > > Don't pull yet until I reply again. > > Sorry. > > Steve. > > > On Mon, 2007-01-15 at 13:39 -0600, Steve WIse wrote: > > On Mon, 2007-01-15 at 21:00 +0200, Michael S. Tsirkin wrote: > > > But, can the license in files be fixed first pls? > > > > > > > Done. > > > > > > > > > > commit 0ce8edf32779b0915b6cf4b2ddbd1c5ef004e4b4 > > Author: Steve Wise > > Date: Mon Jan 15 13:26:23 2007 -0600 > > > > Add dual bsd/gpl license text to cxgb3 files > > > > This is needed for the OFED 1.2 distribution. > > > > Signed-off-by: Steve Wise > > > > diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h > > index 8902007..63048ca 100755 > > --- a/drivers/net/cxgb3/adapter.h > > +++ b/drivers/net/cxgb3/adapter.h > > @@ -1,12 +1,33 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > > > /* This file should not be included directly. Include common.h instead. */ > > diff --git a/drivers/net/cxgb3/ael1002.c b/drivers/net/cxgb3/ael1002.c > > index 93a90d8..e9f3244 100755 > > --- a/drivers/net/cxgb3/ael1002.c > > +++ b/drivers/net/cxgb3/ael1002.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include "common.h" > > #include "regs.h" > > > > diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h > > index 60a979b..4b37036 100755 > > --- a/drivers/net/cxgb3/common.h > > +++ b/drivers/net/cxgb3/common.h > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #ifndef __CHELSIO_COMMON_H > > #define __CHELSIO_COMMON_H > > > > diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h > > index 0fdc365..b8724a3 100755 > > --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h > > +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h > > @@ -1,12 +1,34 @@ > > /* > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #ifndef _CXGB3_OFFLOAD_CTL_DEFS_H > > #define _CXGB3_OFFLOAD_CTL_DEFS_H > > > > diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h > > index 1ee77b2..b046491 100755 > > --- a/drivers/net/cxgb3/cxgb3_ioctl.h > > +++ b/drivers/net/cxgb3/cxgb3_ioctl.h > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #ifndef __CHIOCTL_H__ > > #define __CHIOCTL_H__ > > > > diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > > index 7e7ee7a..57f7e79 100755 > > --- a/drivers/net/cxgb3/cxgb3_main.c > > +++ b/drivers/net/cxgb3/cxgb3_main.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver for Linux. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include > > #include > > #include > > @@ -75,7 +95,7 @@ static const struct pci_device_id cxgb3_ > > > > MODULE_DESCRIPTION(DRV_DESC); > > MODULE_AUTHOR("Chelsio Communications"); > > -MODULE_LICENSE("GPL"); > > +MODULE_LICENSE("Dual BSD/GPL"); > > MODULE_VERSION(DRV_VERSION); > > MODULE_DEVICE_TABLE(pci, cxgb3_pci_tbl); > > > > diff --git a/drivers/net/cxgb3/firmware_exports.h b/drivers/net/cxgb3/firmware_exports.h > > index 1ceda0f..f0fb528 100755 > > --- a/drivers/net/cxgb3/firmware_exports.h > > +++ b/drivers/net/cxgb3/firmware_exports.h > > @@ -1,27 +1,33 @@ > > -/* > > - * ---------------------------------------------------------------------------- > > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<<<<< > > - * ---------------------------------------------------------------------------- > > - * Copyright 2004 (C) Chelsio Communications, Inc. (Chelsio) > > +/* > > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > * > > - * Chelsio Communications, Inc. owns the sole copyright to this software. > > - * You may not make a copy, you may not derive works herefrom, and you may > > - * not distribute this work to others. Other restrictions of rights may apply > > - * as well. This is unpublished, confidential information. All rights reserved. > > - * This software contains confidential information and trade secrets of Chelsio > > - * Communications, Inc. Use, disclosure, or reproduction is prohibited without > > - * the prior express written permission of Chelsio Communications, Inc. > > - * ---------------------------------------------------------------------------- > > - * >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Warranty <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< > > - * ---------------------------------------------------------------------------- > > - * CHELSIO MAKES NO WARRANTY OF ANY KIND WITH REGARD TO THE USE OF THIS > > - * SOFTWARE, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO, THE > > - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. > > - * ---------------------------------------------------------------------------- > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > * > > - * This is the firmware_exports.h header file, firmware interface defines. > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > * > > - * Written January 2005 by felix marti (felix at chelsio.com) > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > #ifndef _FIRMWARE_EXPORTS_H_ > > #define _FIRMWARE_EXPORTS_H_ > > diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c > > index 9997138..6a3e5a2 100755 > > --- a/drivers/net/cxgb3/l2t.c > > +++ b/drivers/net/cxgb3/l2t.c > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > > * > > * This software is available to you under a choice of one of two > > diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h > > index 51a9c1f..343dea9 100755 > > --- a/drivers/net/cxgb3/l2t.h > > +++ b/drivers/net/cxgb3/l2t.h > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. > > * > > * This software is available to you under a choice of one of two > > diff --git a/drivers/net/cxgb3/mc5.c b/drivers/net/cxgb3/mc5.c > > index 44fa9ea..6d8b02d 100755 > > --- a/drivers/net/cxgb3/mc5.c > > +++ b/drivers/net/cxgb3/mc5.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include "common.h" > > #include "regs.h" > > > > diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c > > index 56bb19c..e3676e1 100755 > > --- a/drivers/net/cxgb3/sge.c > > +++ b/drivers/net/cxgb3/sge.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > - * > > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > > - * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include > > #include > > #include > > diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h > > index 6650b04..734dc7e 100755 > > --- a/drivers/net/cxgb3/t3_cpl.h > > +++ b/drivers/net/cxgb3/t3_cpl.h > > @@ -1,16 +1,34 @@ > > /* > > - * Definitions of the CPL 5 commands and status codes. > > + * Copyright (c) 2004-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2004-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * Written by Dimitris Michailidis (dm at chelsio.com) > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #ifndef T3_CPL_H > > #define T3_CPL_H > > > > diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > > index 35a7fab..14ea6b9 100755 > > --- a/drivers/net/cxgb3/t3_hw.c > > +++ b/drivers/net/cxgb3/t3_hw.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > - * > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > - * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include "common.h" > > #include "regs.h" > > #include "sge_defs.h" > > diff --git a/drivers/net/cxgb3/t3cdev.h b/drivers/net/cxgb3/t3cdev.h > > index 359584e..3ea4c47 100755 > > --- a/drivers/net/cxgb3/t3cdev.h > > +++ b/drivers/net/cxgb3/t3cdev.h > > @@ -1,5 +1,6 @@ > > /* > > - * Copyright (C) 2003-2006 Chelsio Communications. All rights reserved. > > + * Copyright (C) 2006 Chelsio Communications. All rights reserved. > > + * Copyright (C) 2006 Open Grid Computing, Inc. All rights reserved. > > * > > * This software is available to you under a choice of one of two > > * licenses. You may choose to be licensed under the terms of the GNU > > diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h > > index 1413ea3..bafc7d8 100755 > > --- a/drivers/net/cxgb3/version.h > > +++ b/drivers/net/cxgb3/version.h > > @@ -1,19 +1,34 @@ > > -/***************************************************************************** > > - * * > > - * File: * > > - * version.h * > > - * * > > - * Description: * > > - * Chelsio driver version defines. * > > - * * > > - * Copyright (c) 2003 - 2006 Chelsio Communications, Inc. * > > - * All rights reserved. * > > - * * > > - * Maintainers: maintainers at chelsio.com * > > - * * > > - * http://www.chelsio.com * > > - * * > > - ****************************************************************************/ > > +/* > > + * Copyright (c) 2003-2006 Chelsio, Inc. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > + */ > > /* $Date: 2006/10/31 18:57:51 $ $RCSfile: version.h,v $ $Revision: 1.3 $ */ > > #ifndef __CHELSIO_VERSION_H > > #define __CHELSIO_VERSION_H > > diff --git a/drivers/net/cxgb3/vsc8211.c b/drivers/net/cxgb3/vsc8211.c > > index 6a0a815..7d101ae 100755 > > --- a/drivers/net/cxgb3/vsc8211.c > > +++ b/drivers/net/cxgb3/vsc8211.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include "common.h" > > > > /* VSC8211 PHY specific registers. */ > > diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c > > index 0f209c7..bcb7427 100755 > > --- a/drivers/net/cxgb3/xgmac.c > > +++ b/drivers/net/cxgb3/xgmac.c > > @@ -1,14 +1,34 @@ > > /* > > - * This file is part of the Chelsio T3 Ethernet driver. > > + * Copyright (c) 2005-2006 Chelsio, Inc. All rights reserved. > > * > > - * Copyright (C) 2005-2006 Chelsio Communications. All rights reserved. > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > * > > - * This program is distributed in the hope that it will be useful, but WITHOUT > > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > - * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this > > - * release for licensing terms and conditions. > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > */ > > - > > #include "common.h" > > #include "regs.h" > > > > > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > From alois.wiedenhofer at aon.at Mon Jan 15 13:05:50 2007 From: alois.wiedenhofer at aon.at (Alois) Date: Mon, 15 Jan 2007 22:05:50 +0100 (MET) Subject: [openib-general] is openSM running on Solaris 10_x86 Message-ID: <1168895150.45abecae933ee@webmail.aon.at> Hello, we have to run a small infinband network with 2 Solaris machine back-to-back, without a switch, so we need a subnet manager. Has somebode already compiled openSM on Solaris 10_x86 ? regards Alois ------------------------------------------- Versendet durch aonWebmail (webmail.aon.at) From swise at opengridcomputing.com Mon Jan 15 13:19:17 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:17 -0600 Subject: [openib-general] [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 Message-ID: <20070115211917.10511.83814.stgit@dell3.ogc.int> Here is the series to back-port the cxgb3 and iw_cxgb3 drivers to SLES9SP3. Sorry for the large patch count, but each patch is very small and updates or backports a specific file or service needed by the chelsio drivers. It is missing one big part however: netevent notification support for neighbour and next hop changes. I'm pondering how to support this outside the kernel. I did port the netevent notifier service, but no notifications are done as of yet. So this is TBD. But the drivers load ok on SLES9SP3 and I'll be testing soon. Michael, if you have time, please review the various additions/changes I've made and tell me what you think. Just trying to get some feedback as I go along with these backports... Thanks, Steve. From swise at opengridcomputing.com Mon Jan 15 13:19:19 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:19 -0600 Subject: [openib-general] [PATCH RFC 01/21] ofed_1_2 Generic allocator backport for SLEL9SP3. In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211919.10511.56547.stgit@dell3.ogc.int> Generic allocator backport for SLEL9SP3. Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/genalloc.h | 43 +++++ .../2.6.5_sles9_sp3/include/src/genalloc.c | 198 +++++++++++++++++++++++ 2 files changed, 241 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h new file mode 100644 index 0000000..f92c19b --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h @@ -0,0 +1,43 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free +#define BITS_PER_BYTE 8 diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); From swise at opengridcomputing.com Mon Jan 15 13:19:21 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:21 -0600 Subject: [openib-general] [PATCH RFC 02/21] ofed_1_2 Backport kfifo to SLES9SP3. In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211921.10511.92282.stgit@dell3.ogc.int> Backport kfifo to SLES9SP3. Signed-off-by: Steve Wise --- .../backport/2.6.5_sles9_sp3/include/linux/kfifo.h | 157 ++++++++++++++++++ .../backport/2.6.5_sles9_sp3/include/src/kfifo.c | 196 +++++++++++++++++++++++ 2 files changed, 353 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h new file mode 100644 index 0000000..48eccd8 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h @@ -0,0 +1,157 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef _LINUX_KFIFO_H +#define _LINUX_KFIFO_H + +#ifdef __KERNEL__ + +#include +#include + +struct kfifo { + unsigned char *buffer; /* the buffer holding the data */ + unsigned int size; /* the size of the allocated buffer */ + unsigned int in; /* data is added at offset (in % size) */ + unsigned int out; /* data is extracted from off. (out % size) */ + spinlock_t *lock; /* protects concurrent modifications */ +}; + +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, + spinlock_t *lock); +extern void kfifo_free(struct kfifo *fifo); +extern unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); +extern unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); + +/** + * __kfifo_reset - removes the entire FIFO contents, no locking version + * @fifo: the fifo to be emptied. + */ +static inline void __kfifo_reset(struct kfifo *fifo) +{ + fifo->in = fifo->out = 0; +} + +/** + * kfifo_reset - removes the entire FIFO contents + * @fifo: the fifo to be emptied. + */ +static inline void kfifo_reset(struct kfifo *fifo) +{ + unsigned long flags; + + spin_lock_irqsave(fifo->lock, flags); + + __kfifo_reset(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); +} + +/** + * kfifo_put - puts some data into the FIFO + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + */ +static inline unsigned int kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_put(fifo, buffer, len); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * kfifo_get - gets some data from the FIFO + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + */ +static inline unsigned int kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_get(fifo, buffer, len); + + /* + * optimization: if the FIFO is empty, set the indices to 0 + * so we don't wrap the next time + */ + if (fifo->in == fifo->out) + fifo->in = fifo->out = 0; + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version + * @fifo: the fifo to be used. + */ +static inline unsigned int __kfifo_len(struct kfifo *fifo) +{ + return fifo->in - fifo->out; +} + +/** + * kfifo_len - returns the number of bytes available in the FIFO + * @fifo: the fifo to be used. + */ +static inline unsigned int kfifo_len(struct kfifo *fifo) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_len(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +#else +#warning "don't include kernel headers in userspace" +#endif /* __KERNEL__ */ +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c new file mode 100644 index 0000000..5d1d907 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c @@ -0,0 +1,196 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include + +/** + * kfifo_init - allocates a new FIFO using a preallocated buffer + * @buffer: the preallocated buffer to be used. + * @size: the size of the internal buffer, this have to be a power of 2. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the + * struct kfifo with kfree(). + */ +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock) +{ + struct kfifo *fifo; + + /* size must be a power of 2 */ + BUG_ON(size & (size - 1)); + + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); + if (!fifo) + return ERR_PTR(-ENOMEM); + + fifo->buffer = buffer; + fifo->size = size; + fifo->in = fifo->out = 0; + fifo->lock = lock; + + return fifo; +} +EXPORT_SYMBOL(kfifo_init); + +/** + * kfifo_alloc - allocates a new FIFO and its internal buffer + * @size: the size of the internal buffer to be allocated. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * The size will be rounded-up to a power of 2. + */ +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) +{ + unsigned char *buffer; + struct kfifo *ret; + + /* + * round up to the next power of 2, since our 'let the indices + * wrap' tachnique works only in this case. + */ + if (size & (size - 1)) { + BUG_ON(size > 0x80000000); + size = roundup_pow_of_two(size); + } + + buffer = kmalloc(size, gfp_mask); + if (!buffer) + return ERR_PTR(-ENOMEM); + + ret = kfifo_init(buffer, size, gfp_mask, lock); + + if (IS_ERR(ret)) + kfree(buffer); + + return ret; +} +EXPORT_SYMBOL(kfifo_alloc); + +/** + * kfifo_free - frees the FIFO + * @fifo: the fifo to be freed. + */ +void kfifo_free(struct kfifo *fifo) +{ + kfree(fifo->buffer); + kfree(fifo); +} +EXPORT_SYMBOL(kfifo_free); + +/** + * __kfifo_put - puts some data into the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->size - fifo->in + fifo->out); + + /* + * Ensure that we sample the fifo->out index -before- we + * start putting bytes into the kfifo. + */ + + smp_mb(); + + /* first put the data starting from fifo->in to buffer end */ + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); + + /* then put the rest (if any) at the beginning of the buffer */ + memcpy(fifo->buffer, buffer + l, len - l); + + /* + * Ensure that we add the bytes to the kfifo -before- + * we update the fifo->in index. + */ + + smp_wmb(); + + fifo->in += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_put); + +/** + * __kfifo_get - gets some data from the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->in - fifo->out); + + /* + * Ensure that we sample the fifo->in index -before- we + * start removing bytes from the kfifo. + */ + + smp_rmb(); + + /* first get the data from fifo->out until the end of the buffer */ + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); + + /* then get the rest (if any) from the beginning of the buffer */ + memcpy(buffer + l, fifo->buffer, len - l); + + /* + * Ensure that we remove the bytes from the kfifo -before- + * we update the fifo->out index. + */ + + smp_mb(); + + fifo->out += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_get); From swise at opengridcomputing.com Mon Jan 15 13:19:23 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:23 -0600 Subject: [openib-general] [PATCH RFC 03/21] ofed_1_2 Backport netevents to SUSE9SP3. In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211923.10511.16934.stgit@dell3.ogc.int> Backport netevents to SUSE9SP3. Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/net/netevent.h | 33 +++++++++++ .../2.6.5_sles9_sp3/include/src/netevent.c | 71 +++++++++++++++++++++++ 2 files changed, 104 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); From swise at opengridcomputing.com Mon Jan 15 13:19:26 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:26 -0600 Subject: [openib-general] [PATCH RFC 04/21] ofed_1_2 Patch to include linux addons. In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211926.10511.89655.stgit@dell3.ogc.int> Patch to include linux addons. - change linux_stream* patch to a generic name - add genalloc, kfifo, and netevent Signed-off-by: Steve Wise --- .../linux_stream_idr_to_2_6_5-7_244.patch | 25 ------------- .../linux_stuff_to_2_6_5-7_244.patch | 46 +++++++++++++++++++++++ 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch deleted file mode 100644 index 74d8403..0000000 --- a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch +++ /dev/null @@ -1,25 +0,0 @@ -diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile -index 163d991..2cd239f 100644 ---- a/drivers/infiniband/core/Makefile -+++ b/drivers/infiniband/core/Makefile -@@ -26,3 +26,6 @@ ib_ucm-y := ucm.o - - ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ - uverbs_marshall.o -+ -+obj-$(CONFIG_INFINIBAND) += stream.o -+ib_core-y += stream.o ib_idr.o -diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c -new file mode 100644 -index 0000000..96a48fe ---- /dev/null -+++ b/drivers/infiniband/core/stream.c -@@ -0,0 +1 @@ -+#include "src/stream.c" -diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c -new file mode 100644 -index 0000000..58cf933 ---- /dev/null -+++ b/drivers/infiniband/core/ib_idr.c -@@ -0,0 +1 @@ -+#include "src/ib_idr.c" diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch new file mode 100644 index 0000000..6ca67a5 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch @@ -0,0 +1,46 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..51687a8 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,6 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++obj-$(CONFIG_INFINIBAND) += stream.o ++ib_core-y += stream.o ib_idr.o genalloc.o kfifo.o netevent.o +diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/stream.c +@@ -0,0 +1 @@ ++#include "src/stream.c" +diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/ib_idr.c +@@ -0,0 +1 @@ ++#include "src/ib_idr.c" +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/kfifo.c b/drivers/infiniband/core/kfifo.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/kfifo.c +@@ -0,0 +1 @@ ++#include "src/kfifo.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" From swise at opengridcomputing.com Mon Jan 15 13:19:28 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:28 -0600 Subject: [openib-general] [PATCH RFC 05/21] ofed_1_2 Backport kmalloc_node() to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211928.10511.35997.stgit@dell3.ogc.int> Backport kmalloc_node() to sles9sp3 Signed-off-by: Steve Wise --- .../backport/2.6.5_sles9_sp3/include/linux/slab.h | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h index 0540cc6..1464c56 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h @@ -12,6 +12,11 @@ static inline void *kzalloc(size_t size, return ret; } +static inline void *kmalloc_node(size_t size, gfp_t flags, int nid) +{ + return kmalloc(size, flags); +} + #endif #include_next From swise at opengridcomputing.com Mon Jan 15 13:19:32 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:32 -0600 Subject: [openib-general] [PATCH RFC 07/21] ofed_1_2 Add DEFINE_RWLOCK to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211932.10511.96441.stgit@dell3.ogc.int> Add DEFINE_RWLOCK to sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/spinlock.h | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h index 4644d50..449e0eb 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h @@ -3,6 +3,7 @@ #define BACKPORT_LINUX_SPINLOCK_H #include_next #define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED +#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED #define spin_trylock_irqsave(lock, flags) \ ({ \ From swise at opengridcomputing.com Mon Jan 15 13:19:34 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:34 -0600 Subject: [openib-general] [PATCH RFC 08/21] ofed_1_2 Backport cancel_rearming_workqueue(). In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211934.10511.58896.stgit@dell3.ogc.int> Backport cancel_rearming_workqueue(). Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/workqueue.h | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h index 330f47f..f90d134 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h @@ -26,6 +26,13 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, + struct delayed_work *dwork) +{ + while (!cancel_delayed_work(&dwork->work)) + flush_workqueue(wq); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) From swise at opengridcomputing.com Mon Jan 15 13:19:30 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:30 -0600 Subject: [openib-general] [PATCH RFC 06/21] ofed_1_2 Include in neighbour.h In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211930.10511.96925.stgit@dell3.ogc.int> Include in neighbour.h Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/net/neighbour.h | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h new file mode 100644 index 0000000..c2451fb --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h @@ -0,0 +1,5 @@ +#ifndef BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 +#define BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 +#include +#include_next +#endif From swise at opengridcomputing.com Mon Jan 15 13:19:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:53 -0600 Subject: [openib-general] [PATCH RFC 17/21] ofed_1_2 Backport infiniband/hw/cxgb3/iwch_provider.c to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211953.10511.56935.stgit@dell3.ogc.int> Backport infiniband/hw/cxgb3/iwch_provider.c to sles9sp3 Signed-off-by: Steve Wise --- .../iwch_provider_to_2_6_5-7_244.patch | 21 +++++++++++++++++++++ 1 files changed, 21 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch new file mode 100644 index 0000000..2375474 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch @@ -0,0 +1,21 @@ +diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c +index 28be418..86fe3e3 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c +@@ -347,14 +347,14 @@ static int iwch_mmap(struct ib_ucontext + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_flags &= ~VM_MAYREAD; +- ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, ++ ret = io_remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ +- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, ++ ret = remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + len, vma->vm_page_prot); + } + From swise at opengridcomputing.com Mon Jan 15 13:19:49 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:49 -0600 Subject: [openib-general] [PATCH RFC 15/21] ofed_1_2 Add dst_mtu() for iw_cxgb3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211949.10511.26742.stgit@dell3.ogc.int> Add dst_mtu() for iw_cxgb3 Signed-off-by: Steve Wise --- .../backport/2.6.5_sles9_sp3/include/net/dst.h | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h new file mode 100644 index 0000000..a399110 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h @@ -0,0 +1,13 @@ +#ifndef BACKPORT_DST_SLES9SP3 +#define BACKPORT_DST_SLES9SP3 +#include_next +static inline u32 dst_mtu(struct dst_entry *dst) +{ + u32 mtu = dst_metric(dst, RTAX_MTU); + /* + * Alexey put it here, so ask him about it :) + */ + barrier(); + return mtu; +} +#endif From swise at opengridcomputing.com Mon Jan 15 13:20:01 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:20:01 -0600 Subject: [openib-general] [PATCH RFC 21/21] ofed_1_2 Backport drivers/net/cxgb3/t3_hw.c to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115212001.10511.24713.stgit@dell3.ogc.int> Backport drivers/net/cxgb3/t3_hw.c to sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch | 43 +++++++++++++++++++++++ 1 files changed, 43 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch new file mode 100644 index 0000000..a667be0 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch @@ -0,0 +1,43 @@ +diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h +index 463ca32..b931fd2 100755 +--- a/drivers/net/cxgb3/adapter.h ++++ b/drivers/net/cxgb3/adapter.h +@@ -179,6 +179,7 @@ struct adapter { + struct list_head adapter_list; + void __iomem *regs; + struct pci_dev *pdev; ++ u32 saved_pci_state[16]; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned long flags; +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3250,7 +3250,7 @@ int t3_reset_adapter(struct adapter *ada + uint16_t devid = 0; + + if (is_pcie(adapter)) +- pci_save_state(adapter->pdev); ++ pci_save_state(adapter->pdev, adapter->saved_pci_state); + t3_write_reg(adapter, A_PL_RST, F_CRSTWRM | F_CRSTWRMMODE); + + /* +@@ -3268,7 +3268,7 @@ int t3_reset_adapter(struct adapter *ada + return -1; + + if (is_pcie(adapter)) +- pci_restore_state(adapter->pdev); ++ pci_restore_state(adapter->pdev, adapter->saved_pci_state); + return 0; + } + +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From swise at opengridcomputing.com Mon Jan 15 13:19:57 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:57 -0600 Subject: [openib-general] [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211957.10511.15769.stgit@dell3.ogc.int> Add LINUXINCLUDE to drivers/net/cxgb3/Makefile Signed-off-by: Steve Wise --- .../cxgb3_makefile_to_2_6_5-7_244.patch | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + From swise at opengridcomputing.com Mon Jan 15 13:19:38 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:38 -0600 Subject: [openib-general] [PATCH RFC 10/21] ofed_1_2 Added additional defines needed by cxgb3 to pci.h In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211938.10511.67631.stgit@dell3.ogc.int> Added additional defines needed by cxgb3 to pci.h Signed-off-by: Steve Wise --- .../backport/2.6.5_sles9_sp3/include/linux/pci.h | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h index b43b19c..beb954b 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h @@ -6,6 +6,8 @@ #include_next #define PCI_EXP_DEVCTL 8 /* Device Control */ #define PCI_EXP_LNKCTL 16 /* Link Control */ #define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ struct msix_entry { u16 vector; /* kernel uses to write allocated vector */ From swise at opengridcomputing.com Mon Jan 15 13:19:40 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:40 -0600 Subject: [openib-general] [PATCH RFC 11/21] ofed_1_2 Backport ethtool stuff to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211940.10511.61006.stgit@dell3.ogc.int> Backport ethtool stuff to sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/ethtool.h | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h new file mode 100644 index 0000000..383d04f --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h @@ -0,0 +1,6 @@ +#ifndef BACKPORT_ETHTOOL_TO_SLES9SP3 +#define BACKPORT_ETHTOOL_TO_SLES9SP3 +#include_next +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) +#endif From swise at opengridcomputing.com Mon Jan 15 13:19:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:42 -0600 Subject: [openib-general] [PATCH RFC 12/21] ofed_1_2 Add functions/defines needed by cxgb3 to netdevice.h for sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211942.10511.33734.stgit@dell3.ogc.int> Add functions/defines needed by cxgb3 to netdevice.h for sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/netdevice.h | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h index 5641019..cf5da60 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#define NETDEV_TX_OK 0 /* driver took care of packet */ +#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ +#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ + #endif From swise at opengridcomputing.com Mon Jan 15 13:19:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:36 -0600 Subject: [openib-general] [PATCH RFC 09/21] ofed_1_2 Backport if_mii(). In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211936.10511.92133.stgit@dell3.ogc.int> Backport if_mii(). Signed-off-by: Steve Wise --- .../backport/2.6.5_sles9_sp3/include/linux/mii.h | 17 +++++++++++++++++ 1 files changed, 17 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h new file mode 100644 index 0000000..9351a64 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_MII_TO_SLES9SP3 +#define BACKPORT_LINUX_MII_TO_SLES9SP3 +#include_next + +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymetric pause */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ + + +static inline struct mii_ioctl_data *if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} +#endif From swise at opengridcomputing.com Mon Jan 15 13:19:44 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:44 -0600 Subject: [openib-general] [PATCH RFC 13/21] ofed_1_2 Added gso_size for cxgb3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211944.10511.93157.stgit@dell3.ogc.int> Added gso_size for cxgb3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/skbuff.h | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h index cc56236..8c80451 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h @@ -41,4 +41,7 @@ static inline int skb_can_coalesce(struc return 0; } +#define gso_size tso_size +#define CHECKSUM_COMPLETE CHECKSUM_HW + #endif From swise at opengridcomputing.com Mon Jan 15 13:19:47 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:47 -0600 Subject: [openib-general] [PATCH RFC 14/21] ofed_1_2 Add spin_trylock_irq() for cxgb3 support In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211946.10511.23168.stgit@dell3.ogc.int> Add spin_trylock_irq() for cxgb3 support Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/spinlock.h | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h index 449e0eb..00506f4 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h @@ -14,4 +14,11 @@ ({ \ #define spin_lock_nested(lock, subclass) spin_lock(lock) +#define spin_trylock_irq(lock) \ +({ \ + local_irq_disable(); \ + spin_trylock(lock) ? \ + 1 : ({ local_irq_enable(); 0; }); \ +}) + #endif From swise at opengridcomputing.com Mon Jan 15 13:19:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:51 -0600 Subject: [openib-general] [PATCH RFC 16/21] ofed_1_2 Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211951.10511.9529.stgit@dell3.ogc.int> Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch | 35 +++++++++++++++++++++++ 1 files changed, 35 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch new file mode 100644 index 0000000..af468f7 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch @@ -0,0 +1,35 @@ +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c +index 3237fc8..2a38953 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c +@@ -234,7 +234,7 @@ static void *alloc_ep(int size, gfp_t gf + epc = kmalloc(size, gfp); + if (epc) { + memset(epc, 0, size); +- kref_init(&epc->kref); ++ kref_init(&epc->kref, __free_ep); + spin_lock_init(&epc->lock); + init_waitqueue_head(&epc->waitq); + } +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct + } + }; + +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) ++ if (ip_route_output_key(&rt, &fl)) + return NULL; + return rt; + } +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h +index 893f9d0..e54e202 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h +@@ -57,7 +57,7 @@ #define MPA_FLAGS_MASK 0xE0 + #define put_ep(ep) { \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ +- kref_put(&((ep)->kref), __free_ep); \ ++ kref_put(&((ep)->kref)); \ + } + + #define get_ep(ep) { \ From swise at opengridcomputing.com Mon Jan 15 13:19:55 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:55 -0600 Subject: [openib-general] [PATCH RFC 18/21] ofed_1_2 Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211955.10511.58381.stgit@dell3.ogc.int> Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 Signed-off-by: Steve Wise --- .../cxio_resource_to_2_6_5-7_244.patch | 20 ++++++++++++++++++++ 1 files changed, 20 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch new file mode 100644 index 0000000..20df3ec --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch @@ -0,0 +1,20 @@ +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +index d1d8722..4fb9890 100644 +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c +@@ -64,13 +64,13 @@ static int __cxio_init_resource_fifo(str + __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32)); + if (random) { + j = 0; +- random_bytes = random32(); ++ get_random_bytes(&random_bytes, 4); + for (i = 0; i < RANDOM_SIZE; i++) + rarray[i] = i + skip_low; + for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { + if (j >= RANDOM_SIZE) { + j = 0; +- random_bytes = random32(); ++ get_random_bytes(&random_bytes, 4); + } + idx = (random_bytes >> (j * 2)) & 0xF; + __kfifo_put(*fifo, From swise at opengridcomputing.com Mon Jan 15 13:19:59 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 15 Jan 2007 15:19:59 -0600 Subject: [openib-general] [PATCH RFC 20/21] ofed_1_2 Backport drivers/net/cxgb3_main.c to sles9sp3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070115211959.10511.71149.stgit@dell3.ogc.int> Backport drivers/net/cxgb3_main.c to sles9sp3 Signed-off-by: Steve Wise --- .../cxgb3_main_to_2_6_5-7_244.patch | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_5-7_244.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_5-7_244.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) From robert.j.woodruff at intel.com Mon Jan 15 13:29:41 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 13:29:41 -0800 Subject: [openib-general] HCA and iWarp user space library names don't match what verbs is expecting Message-ID: I just pulled the latest userspace code from the maintainer's git trees and it looks like the latest verbs is looking for file names different than what is built for mthca, cxgb3, and amso. ib verbs is looking for file names of libmthca-rdmav2.so, libamso-rdmav2.so, and libcxgb3-rdmav2.so. The names build by the makefiles are mthca.so, amso.so, and cxgb3.so. Looks like the makefiles for these three need to be fixed so that it matches the names that the verbs are looking for ? woody From swise at opengridcomputing.com Mon Jan 15 13:42:02 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 15:42:02 -0600 Subject: [openib-general] HCA and iWarp user space library names don't match what verbs is expecting In-Reply-To: References: Message-ID: <1168897322.10308.69.camel@linux-q667.site> On Mon, 2007-01-15 at 13:29 -0800, Woodruff, Robert J wrote: > I just pulled the latest userspace code from the maintainer's git trees > and it looks > like the latest verbs is looking for file names different than what is > built for > mthca, cxgb3, and amso. > > ib verbs is looking for file names of libmthca-rdmav2.so, > libamso-rdmav2.so, > and libcxgb3-rdmav2.so. > The names build by the makefiles are mthca.so, amso.so, and cxgb3.so. > Looks like the makefiles for these three need to be fixed so that it > matches the names that the verbs are looking for ? > > woody Hmm. I committed the change to cxgb3 and amso to support this. And I know Roland committed libmthca because I tested all this. Did you: reconfig the libs? (./autogen.sh && ./configure) build/install them? (make && make install) Steve From robert.j.woodruff at intel.com Mon Jan 15 13:43:06 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 13:43:06 -0800 Subject: [openib-general] HCA and iWarp user space library names don't match what verbs is expecting In-Reply-To: Message-ID: Woody wrote, >I just pulled the latest userspace code from the maintainer's git trees >and it looks >like the latest verbs is looking for file names different than what is Never mind. looks like I had a mismatch of old and new software. woody From rdreier at cisco.com Mon Jan 15 13:43:57 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 15 Jan 2007 13:43:57 -0800 Subject: [openib-general] HCA and iWarp user space library names don't match what verbs is expecting In-Reply-To: (Robert J. Woodruff's message of "Mon, 15 Jan 2007 13:29:41 -0800") References: Message-ID: > I just pulled the latest userspace code from the maintainer's git trees > and it looks > like the latest verbs is looking for file names different than what is > built for > mthca, cxgb3, and amso. Did you really pull the latest code? For example make sure your libcxgb3 tree has commit fb02c47a, which it seems Steve committed last Wednesday. In that tree, Makefile.am has: lib_LTLIBRARIES = src/libcxgb3.la so it should be impossible to build a plain cxgb3.so. libmthca and libamso are a little trickier since they test the libibverbs that they are being built against and do the right thing depending on libibverbs 1.0 vs. 1.1-pre. But eg libmthca with commit c03590b5 should definitely work against the latest libibverbs master branch. - R. From robert.j.woodruff at intel.com Mon Jan 15 13:45:04 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 13:45:04 -0800 Subject: [openib-general] HCA and iWarp user space library names don'tmatch what verbs is expecting In-Reply-To: <1168897322.10308.69.camel@linux-q667.site> Message-ID: Steve wrote> >Hmm. I committed the change to cxgb3 and amso to support this. And I >know Roland committed libmthca because I tested all this. Never mind, pilot error on my part, had a mismatch of old and new S/W. woody From halr at voltaire.com Mon Jan 15 13:55:40 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 15 Jan 2007 16:55:40 -0500 Subject: [openib-general] is openSM running on Solaris 10_x86 In-Reply-To: <1168895150.45abecae933ee@webmail.aon.at> References: <1168895150.45abecae933ee@webmail.aon.at> Message-ID: <1168898139.32185.24779.camel@hal.voltaire.com> Hi Alois, On Mon, 2007-01-15 at 16:05, Alois wrote: > Hello, > > we have to run a small infinband network with 2 Solaris machine back-to-back, without a switch, so we need a subnet manager. > > Has somebode already compiled openSM on Solaris 10_x86 ? I'm unaware of this support but maybe it does exist. OpenSM in Linux is pthreads based and already supports x86. I suspect it wouldn't be too hard to have an S10 port. It largely involves porting the vendor layer of OpenSM to the IB driver in Solaris (and whether (and how) the ability to send and receive on QPs 0 and 1 are exposed to user space). -- Hal > regards > Alois > > ------------------------------------------- > Versendet durch aonWebmail (webmail.aon.at) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From robert.j.woodruff at intel.com Mon Jan 15 14:01:52 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 14:01:52 -0800 Subject: [openib-general] perftest compile error Message-ID: I think the files in the userspace/perftest directory need to be updated to the latest verbs. I get this compile error when trying to compile them agains the latest verbs. Not sure who maintains these. cc -Wall -g -D_GNU_SOURCE rdma_lat.c get_clock.c -libverbs -lrdmacm -o ib_rdma_lat rdma_lat.c: In function `pp_client_connect': rdma_lat.c:288: error: structure has no member named `private_data' rdma_lat.c:289: error: structure has no member named `private_data_len' rdma_lat.c:291: error: structure has no member named `private_data' rdma_lat.c:292: error: structure has no member named `private_data_len' rdma_lat.c:299: error: structure has no member named `private_data' rdma_lat.c: In function `pp_server_connect': rdma_lat.c:402: error: structure has no member named `private_data' rdma_lat.c:403: error: structure has no member named `private_data_len' rdma_lat.c:405: error: structure has no member named `private_data_len' rdma_lat.c:413: error: structure has no member named `private_data' make: *** [rdma_lat] Error 1 From robert.j.woodruff at intel.com Mon Jan 15 14:32:59 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 14:32:59 -0800 Subject: [openib-general] amso compile error Message-ID: Looks like another peice of code that needs updating to the new verbs API. I am using the latest code from the git trees for userspace. config.status: creating libamso.spec config.status: creating config.h config.status: executing depfiles commands make all-am make[1]: Entering directory `/usr/src/redhat/BUILD/openib-usermode-git011507/libamso' if /bin/sh ./libtool --mode=compile --tag=CC gcc -DHAVE_CONFIG_H -I. -I. -I. -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF ".deps/cq.Tpo" -c -o cq.lo `test -f 'src/cq.c' || echo './'`src/cq.c; \ then mv -f ".deps/cq.Tpo" ".deps/cq.Plo"; else rm -f ".deps/cq.Tpo"; exit 1; fi mkdir .libs gcc -DHAVE_CONFIG_H -I. -I. -I. -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF .deps/cq.Tpo -c src/cq.c -fPIC -DPIC -o .libs/cq.o src/cq.c: In function `amso_arm_cq': src/cq.c:57: error: too many arguments to function `ibv_cmd_req_notify_cq' make[1]: *** [cq.lo] Error 1 make[1]: Leaving directory `/usr/src/redhat/BUILD/openib-usermode-git011507/libamso' make: *** [all] Error 2 From swise at opengridcomputing.com Mon Jan 15 15:33:24 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 17:33:24 -0600 Subject: [openib-general] amso compile error In-Reply-To: References: Message-ID: <1168904004.10308.72.camel@linux-q667.site> Oops. I just pushed the fix for this. Thanks, Steve. On Mon, 2007-01-15 at 14:32 -0800, Woodruff, Robert J wrote: > Looks like another peice of code that needs updating to the new verbs > API. > > I am using the latest code from the git trees for userspace. > > config.status: creating libamso.spec > config.status: creating config.h > config.status: executing depfiles commands > make all-am > make[1]: Entering directory > `/usr/src/redhat/BUILD/openib-usermode-git011507/libamso' > if /bin/sh ./libtool --mode=compile --tag=CC gcc -DHAVE_CONFIG_H -I. -I. > -I. -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo -MD -MP -MF > ".deps/cq.Tpo" -c -o cq.lo `test -f 'src/cq.c' || echo './'`src/cq.c; \ > then mv -f ".deps/cq.Tpo" ".deps/cq.Plo"; else rm -f ".deps/cq.Tpo"; > exit 1; fi > mkdir .libs > gcc -DHAVE_CONFIG_H -I. -I. -I. -g -Wall -D_GNU_SOURCE -g -O2 -MT cq.lo > -MD -MP -MF .deps/cq.Tpo -c src/cq.c -fPIC -DPIC -o .libs/cq.o > src/cq.c: In function `amso_arm_cq': > src/cq.c:57: error: too many arguments to function > `ibv_cmd_req_notify_cq' > make[1]: *** [cq.lo] Error 1 > make[1]: Leaving directory > `/usr/src/redhat/BUILD/openib-usermode-git011507/libamso' > make: *** [all] Error 2 > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Mon Jan 15 16:15:55 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Mon, 15 Jan 2007 18:15:55 -0600 Subject: [openib-general] problem building user libs with build_ofa_user.sh Message-ID: <1168906555.15918.10.camel@linux-q667.site> Vlad, I'm trying to build the ofed 1.2 user package on sles9sp3 using ofabuild/build_ofa_user.sh and running into problems configuring libmthca and others. The config of libmthca fails first, with the config.log showing this error: configure:20907: gcc -o conftest -g -O2 -I../libibverbs/include -L. conftest.c -libverbs >&5 ./libibverbs.a: file not recognized: File truncated collect2: ld returned 1 exit status configure:20910: $? = 1 configure: program exited with status 1 configure: failed program was: I _think_ it is because build_ofa_user.sh creates an empty libibverbs.a file for some reason, and the sles9sp3 gcc isn't liking it... >From build_ofa_user.sh: echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" echo > libibverbs.a if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L." > ${conflog}.${dir} 2>&1 ) then echo "Failed to execute (in $(pwd)): $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" /bin/rm -f libibverbs.a exit 1 fi Steve. From sashak at voltaire.com Mon Jan 15 17:04:19 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 16 Jan 2007 03:04:19 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <45AB9CF2.8060508@dev.mellanox.co.il> References: <200701150246.l0F2k0dW025885@xi.cse.ohio-state.edu> <45AB9CF2.8060508@dev.mellanox.co.il> Message-ID: <20070116010419.GD16348@sashak.voltaire.com> On 17:25 Mon 15 Jan , Tziporet Koren wrote: > Dhabaleswar Panda wrote: > > > > Shaun Roland from my group (cc'ed in this e-mail) will be in charge of > > this. Vlad and Shaun can communicate. > > > > > Hi Shaun, > Please open an account in the OFA server so you will be able to have a > git tree to place your SRPM But why git tree is needed for SRPM? SRPM is binary file, no? Sasha From robert.j.woodruff at intel.com Mon Jan 15 17:00:54 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 15 Jan 2007 17:00:54 -0800 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <20070116010419.GD16348@sashak.voltaire.com> Message-ID: Sasha wrote, >On 17:25 Mon 15 Jan , Tziporet Koren wrote: > Dhabaleswar Panda wrote: > > > > Shaun Roland from my group (cc'ed in this e-mail) will be in charge of > > this. Vlad and Shaun can communicate. > > > > > Hi Shaun, > Please open an account in the OFA server so you will be able to have a > git tree to place your SRPM >But why git tree is needed for SRPM? SRPM is binary file, no? >Sasha Sasha wrote, I am not sure why a git tree is needed for RPMS, unless that is the only way to expose it from the server to the outside world. I there a way to allow people from the outside to access just a directory with the RPMS ? or tarballs for things ? woody _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vatsa at veritas.com Mon Jan 15 18:11:31 2007 From: vatsa at veritas.com (Sreevatsa Nagarajarao) Date: Mon, 15 Jan 2007 18:11:31 -0800 (PST) Subject: [openib-general] Stale CM callbacks In-Reply-To: <45A6751E.6080707@ichips.intel.com> References: <45A4583B.8050807@veritas.com> <45A528AF.2050105@ichips.intel.com> <45A5906F.4000107@veritas.com> <45A6751E.6080707@ichips.intel.com> Message-ID: Hi, > If you are seeing any issues with stale connections, please let me know. > It's possible that the cm is not handling things correctly. It seems that when the reset node comes back up and tries to set up a connection with a remote node, it may get a number of IB_CM_REP_ERROR, or IB_CM_REQ_ERROR events before establishing a successful connection. At the same time the remote node gets almost no errors. Is it because the remote node would have destroyed the qpairs and explicitly called ib_send_cm_dreq() earlier when it determines that a node has gone down ? Also, in a multi-node cluster, the time when a connect between the reset node and any other remote node succeeds can vary considerably because of the above errors. We have been experiementing with some of the paramters (max_cm_retries, retry_count) to ib_send_cm_req() and ib_send_cm_rep() but without success. This behaviour is preventing the cluster ports from forming within some stipulated time in our environment. We don't see these issues if we reboot a node instead of reseting it. Please let me know if you have any suggestions for us. Thanks, Sreevatsa > > - Sean From krkumar2 at in.ibm.com Mon Jan 15 19:52:15 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 16 Jan 2007 09:22:15 +0530 Subject: [openib-general] GIT fails to clone librdmacm and libamso Message-ID: Hi, When I run the following commands : git clone http://staging.openfabrics.org/pub/scn/~swise/libamso.git libamso git clone http://staging.openfabrics.org/pub/scn/~shefty/librdmacm.git I get error : "Cannot get remote repository information. Perhaps git-update-server-info needs to be run there?" It is working for libibverbs : git clone git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git What could the problem be ? The libamso and rdmacm are http:// links and not git, so I guess I am doing something wrong ? Thanks, - KK From halr at voltaire.com Mon Jan 15 20:42:14 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 15 Jan 2007 23:42:14 -0500 Subject: [openib-general] [PATCH][MINOR] OpenSM/osm_ucast_updn.c: Handle more malloc failures better Message-ID: <1168922532.32185.49587.camel@hal.voltaire.com> OpenSM/osm_ucast_updn.c: Handle more malloc failures better Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_ucast_updn.c b/osm/opensm/osm_ucast_updn.c index 4450b6b..59d5daa 100644 --- a/osm/opensm/osm_ucast_updn.c +++ b/osm/opensm/osm_ucast_updn.c @@ -242,6 +242,15 @@ __updn_bfs_by_node( /* Init the list pointers */ p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t)); + if (!p_nextList) + { + osm_log( p_log, OSM_LOG_ERROR, + "__updn_bfs_by_node: ERR AA14: " + "No memory for p_nextList\n" ); + OSM_LOG_EXIT( p_log ); + return 1; + } + cl_list_construct( p_nextList ); cl_list_init( p_nextList, 10 ); p_currList = p_nextList; @@ -354,6 +363,15 @@ __updn_bfs_by_node( cl_list_count(p_currList) ); /* Init the switch directed list */ p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t)); + if (!p_nextList) + { + osm_log( p_log, OSM_LOG_ERROR, + "__updn_bfs_by_node: ERR AA15: " + "No memory for p_nextList\n" ); + OSM_LOG_EXIT( p_log ); + return 1; + } + cl_list_construct( p_nextList ); cl_list_init( p_nextList, 10 ); /* Go over all current list items till it's empty */ @@ -705,6 +723,15 @@ updn_subn_rank( /* Init the list pointers */ p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t)); + if (!p_nextList) + { + osm_log( p_log, OSM_LOG_ERROR, + "updn_subn_rank: ERR AA15: " + "No memory for p_nextList\n" ); + OSM_LOG_EXIT( p_log ); + return 1; + } + cl_list_construct( p_nextList ); cl_list_init( p_nextList, 10 ); p_currList = p_nextList; @@ -759,6 +786,15 @@ updn_subn_rank( { rank++; p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t)); + if (!p_nextList) + { + osm_log( p_log, OSM_LOG_ERROR, + "updn_subn_rank: ERR AA16: " + "No memory for p_nextList\n" ); + OSM_LOG_EXIT( p_log ); + return 1; + } + cl_list_construct( p_nextList ); cl_list_init( p_nextList, 10 ); p_physp = (osm_physp_t*)cl_list_remove_head( p_currList ); @@ -1238,12 +1274,21 @@ __osm_updn_find_root_nodes_by_min_hop( if ( (numHopBarsOverThd1 == 1) && (numHopBarsOverThd2 == 1) ) { p_guid = malloc(sizeof(uint64_t)); - *p_guid = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log( &p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " - "Inserting GUID 0x%" PRIx64 " as root node\n", - *p_guid ); - cl_list_insert_tail(p_root_nodes_list, p_guid); + if (p_guid) + { + *p_guid = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); + osm_log( &p_osm->log, OSM_LOG_DEBUG, + "__osm_updn_find_root_nodes_by_min_hop: " + "Inserting GUID 0x%" PRIx64 " as root node\n", + *p_guid ); + cl_list_insert_tail(p_root_nodes_list, p_guid); + } + else + { + osm_log( &p_osm->log, OSM_LOG_ERROR, + "__osm_updn_find_root_nodes_by_min_hop: ERR AA13: " + "No memory for p_guid\n" ); + } } } From eitan at sw053.yok.mtl.com Mon Jan 15 21:15:03 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Tue, 16 Jan 2007 07:15:03 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-16:normal completion Message-ID: <200701160515.l0G5F3bb019795@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_15_09:19:24_2007 5b51a2 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From or.gerlitz at gmail.com Mon Jan 15 21:34:36 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Tue, 16 Jan 2007 07:34:36 +0200 Subject: [openib-general] ipoib ipv6 multicast joins, was: multicast code/merge status In-Reply-To: <45ABACB9.9030901@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <1168457495.18817.1746.camel@hal.voltaire.com> <45A6D245.1020106@ichips.intel.com> <20070112001956.GC17999@mellanox.co.il> <45A6DB7F.3060800@ichips.intel.com> <45A9DF69.40700@voltaire.com> <45ABACB9.9030901@ichips.intel.com> Message-ID: <15ddcffd0701152134q660aa6c6r7955fd9855eac01d@mail.gmail.com> On 1/15/07, Sean Hefty wrote: > > Can you explain how this relates to your multicast changes? the IPoIB > > send-only-full-member-join hack was there before your patch and stayed > > there after your patch... and how come a change in the multicast code > > can cause the error steam to be finite... have you moved the retry > > mechanism from the ib_sa consumer to the ib_sa mcast engine? > > There was a bug in the ib_sa multicast engine handling failed joins, which had > it retry forever. (Basically, the response was not being matched with the > request. So the response was discarded, and the request was retried.) I had > fixed this in svn, but lost the patch moving over to git. sure, got you. Or. From mst at mellanox.co.il Mon Jan 15 23:59:26 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 09:59:26 +0200 Subject: [openib-general] [PATCH RFC 13/21] ofed_1_2 Added gso_size for cxgb3 In-Reply-To: <20070115211944.10511.93157.stgit@dell3.ogc.int> References: <20070115211944.10511.93157.stgit@dell3.ogc.int> Message-ID: <20070116075925.GK22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 13/21] ofed_1_2 Added gso_size for cxgb3 > > > Added gso_size for cxgb3 > > Signed-off-by: Steve Wise > --- > > .../2.6.5_sles9_sp3/include/linux/skbuff.h | 3 +++ > 1 files changed, 3 insertions(+), 0 deletions(-) > > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > index cc56236..8c80451 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > @@ -41,4 +41,7 @@ static inline int skb_can_coalesce(struc > return 0; > } > > +#define gso_size tso_size > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > #endif We have defines (e.g. CHECKSUM_PARTIAL) at the top of this file, I think it'll be prettier to add this there. -- MST From mst at mellanox.co.il Tue Jan 16 00:02:01 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:02:01 +0200 Subject: [openib-general] [PATCH RFC 17/21] ofed_1_2 Backport infiniband/hw/cxgb3/iwch_provider.c to sles9sp3 In-Reply-To: <20070115211953.10511.56935.stgit@dell3.ogc.int> References: <20070115211953.10511.56935.stgit@dell3.ogc.int> Message-ID: <20070116080201.GL22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 17/21] ofed_1_2 Backport infiniband/hw/cxgb3/iwch_provider.c to sles9sp3 > > > Backport infiniband/hw/cxgb3/iwch_provider.c to sles9sp3 > > Signed-off-by: Steve Wise > --- > > .../iwch_provider_to_2_6_5-7_244.patch | 21 +++++++++++++++++++++ > 1 files changed, 21 insertions(+), 0 deletions(-) > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..2375474 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_provider_to_2_6_5-7_244.patch > @@ -0,0 +1,21 @@ > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c > +index 28be418..86fe3e3 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c > ++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c > +@@ -347,14 +347,14 @@ static int iwch_mmap(struct ib_ucontext > + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; > + vma->vm_flags &= ~VM_MAYREAD; > +- ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, > ++ ret = io_remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, > + len, vma->vm_page_prot); > + } else { > + > + /* > + * Map WQ or CQ contig dma memory... > + */ > +- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, > ++ ret = remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, > + len, vma->vm_page_prot); > + } > + Can we use define in kernel_addons to convert remap_pfn_range to remap_page_range? I know we do this for mthca too, but maybe this needs fixing there as well. -- MST From mst at mellanox.co.il Tue Jan 16 00:03:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:03:38 +0200 Subject: [openib-general] [PATCH RFC 18/21] ofed_1_2 Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 In-Reply-To: <20070115211955.10511.58381.stgit@dell3.ogc.int> References: <20070115211955.10511.58381.stgit@dell3.ogc.int> Message-ID: <20070116080338.GM22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 18/21] ofed_1_2 Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 > > > Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 > > Signed-off-by: Steve Wise > --- > > .../cxio_resource_to_2_6_5-7_244.patch | 20 ++++++++++++++++++++ > 1 files changed, 20 insertions(+), 0 deletions(-) > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..20df3ec > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxio_resource_to_2_6_5-7_244.patch > @@ -0,0 +1,20 @@ > +diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c > +index d1d8722..4fb9890 100644 > +--- a/drivers/infiniband/hw/cxgb3/core/cxio_resource.c > ++++ b/drivers/infiniband/hw/cxgb3/core/cxio_resource.c > +@@ -64,13 +64,13 @@ static int __cxio_init_resource_fifo(str > + __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32)); > + if (random) { > + j = 0; > +- random_bytes = random32(); > ++ get_random_bytes(&random_bytes, 4); > + for (i = 0; i < RANDOM_SIZE; i++) > + rarray[i] = i + skip_low; > + for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { > + if (j >= RANDOM_SIZE) { > + j = 0; > +- random_bytes = random32(); > ++ get_random_bytes(&random_bytes, 4); > + } > + idx = (random_bytes >> (j * 2)) & 0xF; > + __kfifo_put(*fifo, Surely random32 can be a macro in kernel_addons? -- MST From mst at mellanox.co.il Tue Jan 16 00:06:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:06:48 +0200 Subject: [openib-general] [PATCH RFC 16/21] ofed_1_2 Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 In-Reply-To: <20070115211951.10511.9529.stgit@dell3.ogc.int> References: <20070115211951.10511.9529.stgit@dell3.ogc.int> Message-ID: <20070116080648.GN22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 16/21] ofed_1_2 Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 > > > Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 > > Signed-off-by: Steve Wise > --- > > .../2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch | 35 +++++++++++++++++++++++ > 1 files changed, 35 insertions(+), 0 deletions(-) > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..af468f7 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > @@ -0,0 +1,35 @@ > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +index 3237fc8..2a38953 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +@@ -234,7 +234,7 @@ static void *alloc_ep(int size, gfp_t gf > + epc = kmalloc(size, gfp); > + if (epc) { > + memset(epc, 0, size); > +- kref_init(&epc->kref); > ++ kref_init(&epc->kref, __free_ep); > + spin_lock_init(&epc->lock); > + init_waitqueue_head(&epc->waitq); > + } > +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct > + } > + }; > + > +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) > ++ if (ip_route_output_key(&rt, &fl)) > + return NULL; > + return rt; > + } > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h > +index 893f9d0..e54e202 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h > +@@ -57,7 +57,7 @@ #define MPA_FLAGS_MASK 0xE0 > + #define put_ep(ep) { \ > + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ > + ep, atomic_read(&((ep)->kref.refcount))); \ > +- kref_put(&((ep)->kref), __free_ep); \ > ++ kref_put(&((ep)->kref)); \ > + } > + > + #define get_ep(ep) { \ It's not easy to do something abou t kref_put (we'd need to define our own struct for it, like we did for work_struct). But surely ip_route_output_key can just be a macro in kernel_addons? -- MST From mst at mellanox.co.il Tue Jan 16 00:08:20 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:08:20 +0200 Subject: [openib-general] [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile In-Reply-To: <20070115211957.10511.15769.stgit@dell3.ogc.int> References: <20070115211957.10511.15769.stgit@dell3.ogc.int> Message-ID: <20070116080820.GO22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > > Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > Signed-off-by: Steve Wise > --- > > .../cxgb3_makefile_to_2_6_5-7_244.patch | 12 ++++++++++++ > 1 files changed, 12 insertions(+), 0 deletions(-) > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + I wonder why is it needed there but not in other makefiles. -- MST From mst at mellanox.co.il Tue Jan 16 00:21:50 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:21:50 +0200 Subject: [openib-general] [PATCH] 2.6.20 ib_cm: limit cm message timeouts In-Reply-To: <20070115050930.GK26427@mellanox.co.il> References: <20070113190100.GB13017@mellanox.co.il> <20070115050930.GK26427@mellanox.co.il> Message-ID: <20070116082150.GR22940@mellanox.co.il> > Quoting Michael S. Tsirkin : > Subject: Re: [PATCH] 2.6.20 ib_cm: limit cm message timeouts > > > Quoting Roland Dreier : > > Subject: Re: [PATCH] 2.6.20 ib_cm: limit cm message timeouts > > > > > We really need this in 2.6.20 I think. > > > > Why is this urgent? This is just a workaround for one target's buggy > > firmware, right? And I would hope there is firmware available that > > fixes this without any workaruonds on the other side ... or is that > > not the case? > > It seems not. Ishai will know. Ishai? OK, it seems there *is* a fixed FW available, our target wasn't updated. OK, let's try fixing this for 2.6.21 then. Sean, but I still would like to see a fixed patch to queue for OFED 1.2. -- MST From mst at mellanox.co.il Tue Jan 16 00:31:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:31:09 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: References: <20070116010419.GD16348@sashak.voltaire.com> Message-ID: <20070116083109.GS22940@mellanox.co.il> > Quoting Woodruff, Robert J : > Subject: Re: [openfabrics-ewg] Reminder: OFED 1.2 > > Sasha wrote, > >On 17:25 Mon 15 Jan , Tziporet Koren wrote: > > Dhabaleswar Panda wrote: > > > > > > Shaun Roland from my group (cc'ed in this e-mail) will be in charge of > > > this. Vlad and Shaun can communicate. > > > > > > > > Hi Shaun, > > Please open an account in the OFA server so you will be able to have a > > > git tree to place your SRPM > > >But why git tree is needed for SRPM? SRPM is binary file, no? > > >Sasha > > Sasha wrote, > I am not sure why a git tree is needed for RPMS, unless that is the only way to > expose it from the server to the outside world. I there a way to allow > people from the outside to access just a directory with the RPMS ? or > tarballs for things ? Using git for binary files does not make sense. If you want to host files on OFA server, just create a world-readable pub_html directory under $HOME and put the files there. They will be accessible as http://www.openfabrics.org/~/ . OTOH, do we really want to host a copy on OFA servers? I thought OSU already host it, we can just wget the SRPM from there. No? -- MST From mst at mellanox.co.il Tue Jan 16 00:42:57 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:42:57 +0200 Subject: [openib-general] [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 In-Reply-To: <20070115211917.10511.83814.stgit@dell3.ogc.int> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> Message-ID: <20070116084257.GT22940@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 > > > Here is the series to back-port the cxgb3 and iw_cxgb3 drivers to > SLES9SP3. Sorry for the large patch count, but each patch is very > small and updates or backports a specific file or service needed by the > chelsio drivers. > > It is missing one big part however: netevent notification support for > neighbour and next hop changes. I'm pondering how to support this > outside the kernel. I did port the netevent notifier service, but no > notifications are done as of yet. So this is TBD. But the drivers load > ok on SLES9SP3 and I'll be testing soon. > > Michael, if you have time, please review the various additions/changes > I've made and tell me what you think. Just trying to get some feedback > as I go along with these backports... BTW, Steve, I wouldn't start working on backports from SLES9. I'd start with 2.6.19 and go back to 2.6.11 over kernel.org versions, just making sure they build (we have this build environment on openfabrics.org, or its easy to check all versions out from kernel.org git). In this way you get incrementtall a host of data on what is and what isn't in which kernel version. Then adding support for a distro is just a matter of finding a closest kernel.org kernel and mostly removing stuff that distro backported from newer kernels. -- MST From mst at mellanox.co.il Tue Jan 16 00:59:50 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 10:59:50 +0200 Subject: [openib-general] GIT fails to clone librdmacm and libamso In-Reply-To: References: Message-ID: <20070116085950.GW22940@mellanox.co.il> All, please run git-update-server-info on your repos if you didn't already. Quoting Krishna Kumar2 : Subject: GIT fails to clone librdmacm and libamso Hi, When I run the following commands : git clone http://staging.openfabrics.org/pub/scn/~swise/libamso.git libamso git clone http://staging.openfabrics.org/pub/scn/~shefty/librdmacm.git I get error : "Cannot get remote repository information. Perhaps git-update-server-info needs to be run there?" It is working for libibverbs : git clone git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git What could the problem be ? The libamso and rdmacm are http:// links and not git, so I guess I am doing something wrong ? Thanks, - KK _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mst at mellanox.co.il Tue Jan 16 01:04:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 11:04:30 +0200 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <1168906555.15918.10.camel@linux-q667.site> References: <1168906555.15918.10.camel@linux-q667.site> Message-ID: <20070116090430.GX22940@mellanox.co.il> > Quoting Steve WIse : > Subject: problem building user libs with build_ofa_user.sh > > Vlad, > > I'm trying to build the ofed 1.2 user package on sles9sp3 using > ofabuild/build_ofa_user.sh and running into problems configuring > libmthca and others. The config of libmthca fails first, with the > config.log showing this error: > > configure:20907: gcc -o conftest -g -O2 -I../libibverbs/include -L. conftest.c -libverbs >&5 > ./libibverbs.a: file not recognized: File truncated > collect2: ld returned 1 exit status > configure:20910: $? = 1 > configure: program exited with status 1 > configure: failed program was: > > > I _think_ it is because build_ofa_user.sh creates an empty libibverbs.a > file for some reason, and the sles9sp3 gcc isn't liking it... We create it just so that configure passes. This is weird - works fine for us on same system. gcc should not even look at that file since all symbols are resolved. What does gcc --version report? > >From build_ofa_user.sh: > > echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" > echo > libibverbs.a > if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L." > ${conflog}.${dir} 2>&1 ) > then > echo "Failed to execute (in $(pwd)): $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" > /bin/rm -f libibverbs.a > exit 1 > fi > > > Steve. Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? -- MST From mst at mellanox.co.il Tue Jan 16 01:10:49 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 11:10:49 +0200 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <20070116090430.GX22940@mellanox.co.il> References: <1168906555.15918.10.camel@linux-q667.site> <20070116090430.GX22940@mellanox.co.il> Message-ID: <20070116091049.GY22940@mellanox.co.il> > Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? Sorry, should have been ar qc libibverbs.a. -- MST From dotanb at dev.mellanox.co.il Tue Jan 16 01:51:33 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 11:51:33 +0200 Subject: [openib-general] [libibcm] [PATCH] Added checks to memory allocation failure when using asprintf Message-ID: <1168941094.12444.2.camel@mtls05.yok.mtl.com> Added checks to memory allocation failure when using asprintf. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibcm/src/cm.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibcm/src/cm.c 2007-01-15 17:02:21.000000000 +0200 +++ gen2_devel_user/src/userspace/libibcm/src/cm.c 2007-01-16 10:12:28.000000000 +0200 @@ -150,8 +150,9 @@ struct ib_cm_device* ib_cm_open_device(s dev->device_context = device_context; - asprintf(&dev_path, "/dev/infiniband/ucm%s", - device_context->device->dev_name + sizeof("uverbs") - 1); + if (asprintf(&dev_path, "/dev/infiniband/ucm%s", + device_context->device->dev_name + sizeof("uverbs") - 1) < 0) + goto err2; dev->fd = open(dev_path, O_RDWR); if (dev->fd < 0) { @@ -164,6 +165,7 @@ struct ib_cm_device* ib_cm_open_device(s err: free(dev_path); +err2: free(dev); return NULL; } From dotanb at dev.mellanox.co.il Tue Jan 16 01:54:19 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 11:54:19 +0200 Subject: [openib-general] [perftest] [PATCH] Added checks to memory allocation failure when using asprintf Message-ID: <1168941259.12444.4.camel@mtls05.yok.mtl.com> Added checks to memory allocation failure when using asprintf. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/perftest/rdma_bw.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/rdma_bw.c 2007-01-15 19:11:32.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/rdma_bw.c 2007-01-16 10:17:07.000000000 +0200 @@ -134,7 +134,9 @@ static struct pingpong_context *pp_clien struct pingpong_context *ctx = NULL; struct rdma_conn_param conn_param; - asprintf(&service, "%d", data->port); + if (asprintf(&service, "%d", data->port) < 0) + goto err4; + n = getaddrinfo(data->servername, service, &hints, &res); if (n < 0) { @@ -324,7 +326,9 @@ static struct pingpong_context *pp_serve struct rdma_cm_id *child_cm_id; struct rdma_conn_param conn_param; - asprintf(&service, "%d", data->port); + if (asprintf(&service, "%d", data->port)) + goto err5; + if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) { fprintf(stderr, "%d:%s: %s for port %d\n", pid, __func__, gai_strerror(n), data->port); Index: gen2_devel_user/src/userspace/perftest/rdma_lat.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/rdma_lat.c 2007-01-15 19:11:32.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/rdma_lat.c 2007-01-16 10:17:36.000000000 +0200 @@ -208,7 +208,9 @@ static struct pingpong_context *pp_clien struct pingpong_context *ctx = NULL; struct rdma_conn_param conn_param; - asprintf(&service, "%d", data->port); + if (asprintf(&service, "%d", data->port) < 0) + goto err4; + n = getaddrinfo(data->servername, service, &hints, &res); if (n < 0) { @@ -368,7 +370,9 @@ static struct pingpong_context *pp_serve struct rdma_cm_id *child_cm_id; struct rdma_conn_param conn_param; - asprintf(&service, "%d", data->port); + if (asprintf(&service, "%d", data->port) < 0) + goto err5; + if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) { fprintf(stderr, "%d:%s: %s for port %d\n", pid, __func__, gai_strerror(n), data->port); Index: gen2_devel_user/src/userspace/perftest/read_bw.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/read_bw.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/read_bw.c 2007-01-16 10:18:26.000000000 +0200 @@ -117,7 +117,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -195,7 +197,9 @@ int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/read_lat.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/read_lat.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/read_lat.c 2007-01-16 10:19:03.000000000 +0200 @@ -189,7 +189,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -237,7 +239,9 @@ static int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/send_bw.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/send_bw.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/send_bw.c 2007-01-16 10:19:33.000000000 +0200 @@ -127,7 +127,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -205,7 +207,9 @@ int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/send_lat.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/send_lat.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/send_lat.c 2007-01-16 10:28:05.000000000 +0200 @@ -198,7 +198,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -246,7 +248,9 @@ static int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/write_bw.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/write_bw.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/write_bw.c 2007-01-16 10:26:23.000000000 +0200 @@ -126,7 +126,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -204,7 +206,9 @@ int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/write_bw_postlist.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/write_bw_postlist.c 2007-01-15 17:02:22.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/write_bw_postlist.c 2007-01-16 10:26:52.000000000 +0200 @@ -126,7 +126,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -204,7 +206,9 @@ int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/perftest/write_lat.c =================================================================== --- gen2_devel_user.orig/src/userspace/perftest/write_lat.c 2007-01-15 17:02:21.000000000 +0200 +++ gen2_devel_user/src/userspace/perftest/write_lat.c 2007-01-16 10:27:38.000000000 +0200 @@ -186,7 +186,9 @@ static int pp_client_connect(const char int n; int sockfd = -1; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -234,7 +236,9 @@ static int pp_server_connect(int port) int sockfd = -1, connfd; int n; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return -1; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { From dotanb at dev.mellanox.co.il Tue Jan 16 01:55:29 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 11:55:29 +0200 Subject: [openib-general] [libibverbs] [PATCH] Added checks to memory allocation failure when using asprintf Message-ID: <1168941329.12444.6.camel@mtls05.yok.mtl.com> Added checks to memory allocation failure when using asprintf. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/src/device.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/src/device.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/src/device.c 2007-01-16 09:48:19.000000000 +0200 @@ -111,7 +111,8 @@ struct ibv_context *ibv_open_device(stru int cmd_fd; struct ibv_context *context; - asprintf(&devpath, "/dev/infiniband/%s", device->dev_name); + if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name) < 0) + return NULL; /* * We'll only be doing writes, but we need O_RDWR in case the Index: gen2_devel_user/src/userspace/libibverbs/src/sysfs.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/src/sysfs.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/src/sysfs.c 2007-01-16 09:48:44.000000000 +0200 @@ -82,7 +82,8 @@ int ibv_read_sysfs_file(const char *dir, int fd; int len; - asprintf(&path, "%s/%s", dir, file); + if (asprintf(&path, "%s/%s", dir, file) < 0) + return -1; fd = open(path, O_RDONLY); if (fd < 0) { From dotanb at dev.mellanox.co.il Tue Jan 16 01:56:53 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 11:56:53 +0200 Subject: [openib-general] [libibverbs] [PATCH] Fix some memory leaks in case of error flows during driver init Message-ID: <1168941413.12444.8.camel@mtls05.yok.mtl.com> Fix some memory leaks in case of error flows during driver init. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/src/init.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/src/init.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/src/init.c 2007-01-15 18:16:07.000000000 +0200 @@ -312,11 +312,14 @@ static void read_config(void) if (stat(path, &buf)) { fprintf(stderr, PFX "Warning: couldn't stat config file '%s'.\n", path); + free(path); continue; } - if (!S_ISREG(buf.st_mode)) + if (!S_ISREG(buf.st_mode)) { + free(path); continue; + } read_config_file(path); free(path); From dotanb at dev.mellanox.co.il Tue Jan 16 01:58:37 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 11:58:37 +0200 Subject: [openib-general] [libibverbs/examples] pingpong tests cleanup (resource destroy + ack events + asprintf failure check) Message-ID: <1168941517.12444.11.camel@mtls05.yok.mtl.com> Added resource cleaning before end of test + ack to CQ events. Added checks to memory allocation failure when using asprintf. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-16 09:56:52.000000000 +0200 @@ -139,7 +139,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -205,7 +207,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { @@ -322,7 +326,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -374,6 +378,46 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -622,6 +666,8 @@ int main(int argc, char *argv[]) return 1; } + ibv_ack_cq_events(ev_cq, 1); + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -706,5 +752,11 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-16 09:57:25.000000000 +0200 @@ -150,7 +150,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -229,7 +231,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { @@ -358,7 +362,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -424,6 +428,55 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx, int num_qp) +{ + int i; + + for (i = 0; i < num_qp; ++i) { + if (ibv_destroy_qp(ctx->qp[i])) { + fprintf(stderr, "Couldn't destroy QP[%d]\n", i); + return 1; + } + } + + if (ibv_destroy_srq(ctx->srq)) { + fprintf(stderr, "Couldn't destroy SRQ\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -710,6 +763,8 @@ int main(int argc, char *argv[]) return 1; } + ibv_ack_cq_events(ev_cq, 1); + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -801,5 +856,11 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + if (pp_close_ctx(ctx, num_qp)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-16 09:57:54.000000000 +0200 @@ -127,7 +127,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -193,7 +195,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { @@ -310,7 +314,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -362,6 +366,46 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -610,6 +654,8 @@ int main(int argc, char *argv[]) return 1; } + ibv_ack_cq_events(ev_cq, 1); + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -694,5 +740,11 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-16 09:58:22.000000000 +0200 @@ -128,7 +128,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -194,7 +196,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { @@ -311,7 +315,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -363,6 +367,51 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_destroy_ah(ctx->ah)) { + fprintf(stderr, "Couldn't destroy AH\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -608,6 +657,8 @@ int main(int argc, char *argv[]) return 1; } + ibv_ack_cq_events(ev_cq, 1); + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -692,5 +743,11 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } From dotanb at dev.mellanox.co.il Tue Jan 16 02:56:59 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 12:56:59 +0200 Subject: [openib-general] [srptools] [PATCH] style fix in asprintf result check Message-ID: <1168945019.12444.14.camel@mtls05.yok.mtl.com> Check that the result of asprintf is negative (like it being done in all asprintf checks). Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/srptools/srp_daemon/srp_daemon.c =================================================================== --- gen2_devel_user.orig/src/userspace/srptools/srp_daemon/srp_daemon.c 2007-01-15 17:02:21.000000000 +0200 +++ gen2_devel_user/src/userspace/srptools/srp_daemon/srp_daemon.c 2007-01-16 10:02:50.000000000 +0200 @@ -1209,7 +1209,7 @@ static int umad_resources_create(struct ret = asprintf(&umad_res->port_sysfs_path, "%s/class/infiniband/%s/ports/%d", sysfs_path, config->dev_name, config->port_num); - if (ret == -1) { + if (ret < 0) { umad_res->port_sysfs_path = NULL; return -ENOMEM; } From dotanb at dev.mellanox.co.il Tue Jan 16 02:58:48 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 12:58:48 +0200 Subject: [openib-general] [srptools] [PATCH] Added checks to memory allocation failure when using asprintf Message-ID: <1168945128.12444.17.camel@mtls05.yok.mtl.com> Added checks to memory allocation failure when using asprintf. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/srptools/src/srp-dm.c =================================================================== --- gen2_devel_user.orig/src/userspace/srptools/src/srp-dm.c 2007-01-15 17:02:21.000000000 +0200 +++ gen2_devel_user/src/userspace/srptools/src/srp-dm.c 2007-01-16 10:07:47.000000000 +0200 @@ -128,7 +128,8 @@ static int read_file(const char *dir, co int fd; int len; - asprintf(&path, "%s/%s", dir, file); + if (asprintf(&path, "%s/%s", dir, file) < 0) + return -1; fd = open(path, O_RDONLY); if (fd < 0) @@ -185,8 +186,11 @@ static int setup_port_sysfs_path(void) { return -1; } - asprintf(&port_sysfs_path, "%s/class/infiniband/%s/ports/%s", - sysfs_path, ibdev, ibport); + if (asprintf(&port_sysfs_path, "%s/class/infiniband/%s/ports/%s", + sysfs_path, ibdev, ibport) < 0) { + fprintf(stderr, "Couldn't allocate memory\n"); + return -1; + } return 0; } From karthiks at cdac.in Tue Jan 16 03:50:40 2007 From: karthiks at cdac.in (Karthik Sarangan) Date: Tue, 16 Jan 2007 16:50:40 +0500 Subject: [openib-general] compiling gen2 kdapl (jlentini) Message-ID: <1168948240.4011.22.camel@karthiks-lnx> Target: Compiling open-iser-target (Voltaire) and kdapl target for gen2 openib I have downloaded kdapl for openib-gen2 from https://svn.openfabrics.org/svn/openib/gen2/users/jlentini as well as the open-iser-target (voltaire) How do I compile the open-iser-target with kdapl for RHEL 4 U3 on Intel Xeon 64 bi? Thanking you, Karthik S. From ogerlitz at voltaire.com Tue Jan 16 03:36:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 13:36:26 +0200 (IST) Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch Message-ID: Hi Michael, I have just realized that a) this patch was not pushed upstream and b) the --same-- instance of it is kept on all backports of both OFED 1.1 & 1.2 staging It also does not have a changelog comment and Signed-Off-By signature... Can you shed some light on what's going on here? thanks, Or. # pwd /home/ogerlitz/OFED-1.1/SOURCES/openib-1.1/kernel_patches # find . -name \*ipoib\* | grep 8111 | xargs ls -l -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.11_FC4/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.11/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.12/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.13/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.13_suse10_0_u/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.14/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.15/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.16/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.16_sles10/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.9/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.9_U3/ipoib_8111_to_2_6_16.patch -rw-r--r-- 1 1078 101 2616 Oct 19 16:21 ./backport/2.6.9_U4/ipoib_8111_to_2_6_16.patch Index: openib_branch1.0/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- openib_branch1.0.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ openib_branch1.0/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -82,6 +82,9 @@ static const u8 ipv4_bcast_addr[] = { struct workqueue_struct *ipoib_workqueue; +static DEFINE_SPINLOCK(ipoib_all_neigh_list_lock); +static LIST_HEAD(ipoib_all_neigh_list); + static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); @@ -751,6 +754,17 @@ static void ipoib_neigh_destructor(struc unsigned long flags; struct ipoib_ah *ah = NULL; + struct ipoib_neigh *tn, *nn = NULL; + spin_lock(&ipoib_all_neigh_list_lock); + list_for_each_entry(tn, &ipoib_all_neigh_list, all_neigh_list) + if (tn->neighbour == n) { + nn = tn; + break; + } + spin_unlock(&ipoib_all_neigh_list_lock); + if (!nn) + return; + ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n", be32_to_cpup((__be32 *) n->ha), @@ -783,19 +797,33 @@ struct ipoib_neigh *ipoib_neigh_alloc(st neigh->neighbour = neighbour; *to_ipoib_neigh(neighbour) = neigh; + spin_lock(&ipoib_all_neigh_list_lock); + list_add_tail(&neigh->all_neigh_list, &ipoib_all_neigh_list); + neigh->neighbour->ops->destructor = ipoib_neigh_destructor; + spin_unlock(&ipoib_all_neigh_list_lock); + return neigh; } void ipoib_neigh_free(struct ipoib_neigh *neigh) { + struct ipoib_neigh *nn; + spin_lock(&ipoib_all_neigh_list_lock); + list_del(&neigh->all_neigh_list); + list_for_each_entry(nn, &ipoib_all_neigh_list, all_neigh_list) + if (nn->neighbour->ops == neigh->neighbour->ops) + goto found; + + neigh->neighbour->ops->destructor = NULL; +found: + spin_unlock(&ipoib_all_neigh_list_lock); + *to_ipoib_neigh(neigh->neighbour) = NULL; kfree(neigh); } static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) { - parms->neigh_destructor = ipoib_neigh_destructor; - return 0; } Index: openib_branch1.0/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- openib_branch1.0.orig/drivers/infiniband/ulp/ipoib/ipoib.h +++ openib_branch1.0/drivers/infiniband/ulp/ipoib/ipoib.h @@ -47,6 +47,8 @@ #include #include +#include + #include #include @@ -217,6 +219,7 @@ struct ipoib_neigh { struct neighbour *neighbour; + struct list_head all_neigh_list; struct list_head list; }; From mst at mellanox.co.il Tue Jan 16 03:40:04 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 13:40:04 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: References: Message-ID: <20070116114004.GZ22940@mellanox.co.il> > Quoting Or Gerlitz : > Subject: OFED ipoib_8111_to_2_6_16.patch > > Hi Michael, > > I have just realized that > > a) this patch was not pushed upstream > > and > > b) the --same-- instance of it is kept on all backports of both OFED 1.1 & 1.2 staging > > It also does not have a changelog comment and Signed-Off-By signature... > > Can you shed some light on what's going on here? > > thanks, > > Or. Isn't it obvious from the name? It's a backport for kernels <= 2.6.16. -- MST From dotanb at dev.mellanox.co.il Tue Jan 16 03:54:18 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 16 Jan 2007 13:54:18 +0200 Subject: [openib-general] does the libibverbs support static linkage? In-Reply-To: References: <1606.85.65.223.184.1167729070.squirrel@dev.mellanox.co.il> <45A0B02E.1020105@dev.mellanox.co.il> <45A1EC8B.2050106@dev.mellanox.co.il> <45ABA129.9080000@dev.mellanox.co.il> Message-ID: <45ACBCEA.6080808@dev.mellanox.co.il> Roland Dreier wrote: > > Did you push the static library link support in the last fixes/changes > > that you made? > > Yes, it should be working better now. > I don't know what i tried to do several days ago (or on which driver version), but now it works without any problem. I'm adding this test case to the regression, to make sure that it won't be broken in the future. thanks Dotan From ogerlitz at voltaire.com Tue Jan 16 04:14:29 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 14:14:29 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <20070116114004.GZ22940@mellanox.co.il> References: <20070116114004.GZ22940@mellanox.co.il> Message-ID: <45ACC1A5.7070603@voltaire.com> Michael S. Tsirkin wrote: >> Quoting Or Gerlitz : >> Subject: OFED ipoib_8111_to_2_6_16.patch > Isn't it obvious from the name? sure, thanks for the clarification. > It's a backport for kernels <= 2.6.16. Can you please send (and add to OFED 1.2) a changelog comment explaining the problem and how it is solved in 2.6.17 and above ?! We are looking on some code around ipoib_neigh_destructor() and friends and the changelog would really be of help to us. Thanks, Or. From mst at mellanox.co.il Tue Jan 16 04:27:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 14:27:15 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <45ACC1A5.7070603@voltaire.com> References: <20070116114004.GZ22940@mellanox.co.il> <45ACC1A5.7070603@voltaire.com> Message-ID: <20070116122715.GA22940@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: OFED ipoib_8111_to_2_6_16.patch > > Michael S. Tsirkin wrote: > >> Quoting Or Gerlitz : > >> Subject: OFED ipoib_8111_to_2_6_16.patch > > > Isn't it obvious from the name? > > sure, thanks for the clarification. > > > It's a backport for kernels <= 2.6.16. > > Can you please send (and add to OFED 1.2) a changelog comment explaining > the problem and how it is solved in 2.6.17 and above ?! > > We are looking on some code around ipoib_neigh_destructor() and friends > and the changelog would really be of help to us. Try this git log -Sneigh_destructor -- include/net/neighbour.h -- MST From ogerlitz at voltaire.com Tue Jan 16 05:28:22 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 15:28:22 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <20070116122715.GA22940@mellanox.co.il> References: <20070116114004.GZ22940@mellanox.co.il> <45ACC1A5.7070603@voltaire.com> <20070116122715.GA22940@mellanox.co.il> Message-ID: <45ACD2F6.8010705@voltaire.com> Michael S. Tsirkin wrote: >>> It's a backport for kernels <= 2.6.16. >> Can you please send (and add to OFED 1.2) a changelog comment explaining >> the problem and how it is solved in 2.6.17 and above ?! >> We are looking on some code around ipoib_neigh_destructor() and friends >> and the changelog would really be of help to us. > Try this > git log -Sneigh_destructor -- include/net/neighbour.h produced nothing on my net-2.6.20 git however browsing the git log i see this patch, is this the one you refer to? also, having that at (my) hand does not remove the need that you will set a changelog/signature for the OFED ipoib related backport patch. > commit c5ecd62c25400a3c6856e009f84257d5bd03f03b > Author: Michael S. Tsirkin > Date: Mon Mar 20 22:25:41 2006 -0800 > > [NET]: Move destructor from neigh->ops to neigh_params > > struct neigh_ops currently has a destructor field, which no in-kernel > drivers outside of infiniband use. The infiniband/ulp/ipoib in-tree > driver stashes some info in the neighbour structure (the results of > the second-stage lookup from ARP results to real link-level path), and > it uses neigh->ops->destructor to get a callback so it can clean up > this extra info when a neighbour is freed. We've run into problems > with this: since the destructor is in an ops field that is shared > between neighbours that may belong to different net devices, there's > no way to set/clear it safely. > > The following patch moves this field to neigh_parms where it can be > safely set, together with its twin neigh_setup. Two additional > patches in the patch series update ipoib to use this new interface. > > Signed-off-by: Michael S. Tsirkin > Signed-off-by: Roland Dreier > Signed-off-by: David S. Miller > From jlentini at netapp.com Tue Jan 16 05:38:18 2007 From: jlentini at netapp.com (James Lentini) Date: Tue, 16 Jan 2007 08:38:18 -0500 (EST) Subject: [openib-general] compiling gen2 kdapl (jlentini) In-Reply-To: <1168948240.4011.22.camel@karthiks-lnx> References: <1168948240.4011.22.camel@karthiks-lnx> Message-ID: That iSER target code is just a reference. It requires an unrelease iSCSI module and as a result is not usable. On Tue, 16 Jan 2007, Karthik Sarangan wrote: > Target: Compiling open-iser-target (Voltaire) and kdapl target for gen2 > openib > > I have downloaded kdapl for openib-gen2 from > https://svn.openfabrics.org/svn/openib/gen2/users/jlentini > as well as the open-iser-target (voltaire) > > > How do I compile the open-iser-target with kdapl for RHEL 4 U3 on Intel > Xeon 64 bi? > > Thanking you, > > Karthik S. > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mst at mellanox.co.il Tue Jan 16 05:35:07 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 15:35:07 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <45ACD2F6.8010705@voltaire.com> References: <45ACD2F6.8010705@voltaire.com> Message-ID: <20070116133507.GC22940@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: OFED ipoib_8111_to_2_6_16.patch > > Michael S. Tsirkin wrote: > >>> It's a backport for kernels <= 2.6.16. > > >> Can you please send (and add to OFED 1.2) a changelog comment explaining > >> the problem and how it is solved in 2.6.17 and above ?! > > >> We are looking on some code around ipoib_neigh_destructor() and friends > >> and the changelog would really be of help to us. > > > Try this > > git log -Sneigh_destructor -- include/net/neighbour.h > > produced nothing on my net-2.6.20 git however browsing the git log i see > this patch, is this the one you refer to? Yes. > also, having that at (my) hand does not remove the need that you will > set a changelog/signature for the OFED ipoib related backport patch. Feel free to add that. -- MST From mst at mellanox.co.il Tue Jan 16 05:41:19 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 15:41:19 +0200 Subject: [openib-general] [PATCH incremental] IB/ipoib_cm: interoperability fix Message-ID: <20070116134119.GD22940@mellanox.co.il> We don't support creating UC connections at the moment, so we should fall back to UD if remote side only supports UC, but not RC. Signed-off-by: Michael S. Tsirkin --- Roland, This is an incremental patch, please apply in -mm on top of the last one. Alternatively, I can repost the whole patch. Let me know. diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bb35d05..8082d50 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -396,7 +396,8 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev); #define IPOIB_FLAGS_RC 0x80 #define IPOIB_FLAGS_UC 0x40 -#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC | IPOIB_FLAGS_UC)) +/* We don't support UC connections at the moment */ +#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) static inline int ipoib_cm_admin_enabled(struct net_device *dev) { -- MST From swise at opengridcomputing.com Tue Jan 16 06:08:58 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:08:58 -0600 Subject: [openib-general] [PATCH RFC 18/21] ofed_1_2 Backport infiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 In-Reply-To: <20070116080338.GM22940@mellanox.co.il> References: <20070115211955.10511.58381.stgit@dell3.ogc.int> <20070116080338.GM22940@mellanox.co.il> Message-ID: <1168956538.17859.1.camel@linux-q667.site> > Surely random32 can be a macro in kernel_addons? > I started out trying to back-port random32(). But it was too hairy and not worth the effort IMO. I don't know how make random32() use get_random_bytes() in a macro and return the value. Show me how. From swise at opengridcomputing.com Tue Jan 16 06:11:34 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:11:34 -0600 Subject: [openib-general] [PATCH RFC 16/21] ofed_1_2 Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 In-Reply-To: <20070116080648.GN22940@mellanox.co.il> References: <20070115211951.10511.9529.stgit@dell3.ogc.int> <20070116080648.GN22940@mellanox.co.il> Message-ID: <1168956694.17859.6.camel@linux-q667.site> On Tue, 2007-01-16 at 10:06 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH RFC 16/21] ofed_1_2 Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 > > > > > > Backport infinband/hw/cxgb3/iwch_cm.[ch] to sles9sp3 > > > > Signed-off-by: Steve Wise > > --- > > > > .../2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch | 35 +++++++++++++++++++++++ > > 1 files changed, 35 insertions(+), 0 deletions(-) > > > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > > new file mode 100644 > > index 0000000..af468f7 > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > > @@ -0,0 +1,35 @@ > > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c > > +index 3237fc8..2a38953 100644 > > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c > > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c > > +@@ -234,7 +234,7 @@ static void *alloc_ep(int size, gfp_t gf > > + epc = kmalloc(size, gfp); > > + if (epc) { > > + memset(epc, 0, size); > > +- kref_init(&epc->kref); > > ++ kref_init(&epc->kref, __free_ep); > > + spin_lock_init(&epc->lock); > > + init_waitqueue_head(&epc->waitq); > > + } > > +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct > > + } > > + }; > > + > > +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) > > ++ if (ip_route_output_key(&rt, &fl)) > > + return NULL; > > + return rt; > > + } > > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h > > +index 893f9d0..e54e202 100644 > > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h > > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h > > +@@ -57,7 +57,7 @@ #define MPA_FLAGS_MASK 0xE0 > > + #define put_ep(ep) { \ > > + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ > > + ep, atomic_read(&((ep)->kref.refcount))); \ > > +- kref_put(&((ep)->kref), __free_ep); \ > > ++ kref_put(&((ep)->kref)); \ > > + } > > + > > + #define get_ep(ep) { \ > > It's not easy to do something abou t kref_put (we'd need to define our own struct > for it, like we did for work_struct). > But surely ip_route_output_key can just be a macro in kernel_addons? > It could, but I was worried that if we did that and someone used it expecting the sock and flags parameters to mean something, then they'd get hosed. And since the kref stuff was already being patched, I just went ahead and used ip_route_output_key directly. But I'll go ahead and add a ip_route_output_flow() backport. From swise at opengridcomputing.com Tue Jan 16 06:14:53 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:14:53 -0600 Subject: [openib-general] [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile In-Reply-To: <20070116080820.GO22940@mellanox.co.il> References: <20070115211957.10511.15769.stgit@dell3.ogc.int> <20070116080820.GO22940@mellanox.co.il> Message-ID: <1168956893.17859.11.camel@linux-q667.site> On Tue, 2007-01-16 at 10:08 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > > > > > Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > > > Signed-off-by: Steve Wise > > --- > > > > .../cxgb3_makefile_to_2_6_5-7_244.patch | 12 ++++++++++++ > > 1 files changed, 12 insertions(+), 0 deletions(-) > > > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > > new file mode 100644 > > index 0000000..ad7e7f4 > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > > @@ -0,0 +1,12 @@ > > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > > +index 3434679..bb008b6 100755 > > +--- a/drivers/net/cxgb3/Makefile > > ++++ b/drivers/net/cxgb3/Makefile > > +@@ -1,6 +1,7 @@ > > + # > > + # Chelsio T3 driver > > + # > > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > > + > > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > > + > > I wonder why is it needed there but not in other makefiles. > This one took me a while to figure out!!! For the life of me, I couldn't get drivers/net/cxbg3 to get passed in the search path for the addons. Anyway, drivers/infiniband/Makefile get patched so all the infiniband subdirs have it defined. drivers/net doesn't. See kernel_patches/.../top_8109_to_2_6_5-7_244.patch From swise at opengridcomputing.com Tue Jan 16 06:16:12 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:16:12 -0600 Subject: [openib-general] [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 In-Reply-To: <20070116084257.GT22940@mellanox.co.il> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> <20070116084257.GT22940@mellanox.co.il> Message-ID: <1168956973.17859.13.camel@linux-q667.site> On Tue, 2007-01-16 at 10:42 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 > > > > > > Here is the series to back-port the cxgb3 and iw_cxgb3 drivers to > > SLES9SP3. Sorry for the large patch count, but each patch is very > > small and updates or backports a specific file or service needed by the > > chelsio drivers. > > > > It is missing one big part however: netevent notification support for > > neighbour and next hop changes. I'm pondering how to support this > > outside the kernel. I did port the netevent notifier service, but no > > notifications are done as of yet. So this is TBD. But the drivers load > > ok on SLES9SP3 and I'll be testing soon. > > > > Michael, if you have time, please review the various additions/changes > > I've made and tell me what you think. Just trying to get some feedback > > as I go along with these backports... > > BTW, Steve, I wouldn't start working on backports from SLES9. > I'd start with 2.6.19 and go back to 2.6.11 over kernel.org versions, > just making sure they build (we have this build environment on openfabrics.org, > or its easy to check all versions out from kernel.org git). > In this way you get incrementtall a host of data on what is and what isn't in > which kernel version. > > Then adding support for a distro is just a matter of finding a closest > kernel.org kernel and mostly removing stuff that distro backported from newer kernels. > As I jumped into sles9sp3 I realized that's what was done previously. On the other hand, jumping to sles9sp3 probably hit all the backports needed. :-) From swise at opengridcomputing.com Tue Jan 16 06:19:34 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:19:34 -0600 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <20070116090430.GX22940@mellanox.co.il> References: <1168906555.15918.10.camel@linux-q667.site> <20070116090430.GX22940@mellanox.co.il> Message-ID: <1168957174.17859.17.camel@linux-q667.site> On Tue, 2007-01-16 at 11:04 +0200, Michael S. Tsirkin wrote: > > Quoting Steve WIse : > > Subject: problem building user libs with build_ofa_user.sh > > > > Vlad, > > > > I'm trying to build the ofed 1.2 user package on sles9sp3 using > > ofabuild/build_ofa_user.sh and running into problems configuring > > libmthca and others. The config of libmthca fails first, with the > > config.log showing this error: > > > > configure:20907: gcc -o conftest -g -O2 -I../libibverbs/include -L. conftest.c -libverbs >&5 > > ./libibverbs.a: file not recognized: File truncated > > collect2: ld returned 1 exit status > > configure:20910: $? = 1 > > configure: program exited with status 1 > > configure: failed program was: > > > > > > I _think_ it is because build_ofa_user.sh creates an empty libibverbs.a > > file for some reason, and the sles9sp3 gcc isn't liking it... > > We create it just so that configure passes. > This is weird - works fine for us on same system. > gcc should not even look at that file since all symbols are resolved. > What does gcc --version report? > vic11:~ # gcc --version gcc (GCC) 3.3.3 (SuSE Linux) > > >From build_ofa_user.sh: > > > > echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" > > echo > libibverbs.a > > if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L." > ${conflog}.${dir} 2>&1 ) > > then > > echo "Failed to execute (in $(pwd)): $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" > > /bin/rm -f libibverbs.a > > exit 1 > > fi > > > > > > Steve. > > > Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? > From mst at mellanox.co.il Tue Jan 16 06:20:45 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 16:20:45 +0200 Subject: [openib-general] [PATCH RFC 18/21] ofed_1_2 Backportinfiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 In-Reply-To: <1168956538.17859.1.camel@linux-q667.site> References: <1168956538.17859.1.camel@linux-q667.site> Message-ID: <20070116142045.GE22940@mellanox.co.il> > Quoting Steve WIse : > Subject: Re: [PATCH RFC 18/21] ofed_1_2 Backportinfiniband/hw/cxgb3/core/cxio_resource.c to sles9sp3 > > > > Surely random32 can be a macro in kernel_addons? > > > > I started out trying to back-port random32(). But it was too hairy and > not worth the effort IMO. I don't know how make random32() use > get_random_bytes() in a macro and return the value. Show me how. Use a gcc extension: #define random32() ({ int random32_internal_variable; \ get_random_bytes(&random32_internal_variable);\ random32_internal_variable; }) or just make it an inline function -- MST From mst at mellanox.co.il Tue Jan 16 06:22:16 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 16:22:16 +0200 Subject: [openib-general] [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE todrivers/net/cxgb3/Makefile In-Reply-To: <1168956893.17859.11.camel@linux-q667.site> References: <1168956893.17859.11.camel@linux-q667.site> Message-ID: <20070116142216.GF22940@mellanox.co.il> > Quoting Steve WIse : > Subject: Re: [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE todrivers/net/cxgb3/Makefile > > On Tue, 2007-01-16 at 10:08 +0200, Michael S. Tsirkin wrote: > > > Quoting Steve Wise : > > > Subject: [PATCH RFC 19/21] ofed_1_2 Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > > > > > > > > Add LINUXINCLUDE to drivers/net/cxgb3/Makefile > > > > > > Signed-off-by: Steve Wise > > > --- > > > > > > .../cxgb3_makefile_to_2_6_5-7_244.patch | 12 ++++++++++++ > > > 1 files changed, 12 insertions(+), 0 deletions(-) > > > > > > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > > > new file mode 100644 > > > index 0000000..ad7e7f4 > > > --- /dev/null > > > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_5-7_244.patch > > > @@ -0,0 +1,12 @@ > > > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > > > +index 3434679..bb008b6 100755 > > > +--- a/drivers/net/cxgb3/Makefile > > > ++++ b/drivers/net/cxgb3/Makefile > > > +@@ -1,6 +1,7 @@ > > > + # > > > + # Chelsio T3 driver > > > + # > > > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > > > + > > > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > > > + > > > > I wonder why is it needed there but not in other makefiles. > > > > This one took me a while to figure out!!! For the life of me, I > couldn't get drivers/net/cxbg3 to get passed in the search path for the > addons. > > Anyway, drivers/infiniband/Makefile get patched so all the infiniband > subdirs have it defined. drivers/net doesn't. See > > kernel_patches/.../top_8109_to_2_6_5-7_244.patch I see. Makes sense. -- MST From swise at opengridcomputing.com Tue Jan 16 06:31:15 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:31:15 -0600 Subject: [openib-general] GIT fails to clone librdmacm and libamso In-Reply-To: <20070116085950.GW22940@mellanox.co.il> References: <20070116085950.GW22940@mellanox.co.il> Message-ID: <1168957875.17859.19.camel@linux-q667.site> The URLs are incorrect. That's scm, not scn. On Tue, 2007-01-16 at 10:59 +0200, Michael S. Tsirkin wrote: > All, please run git-update-server-info on your repos if you didn't already. > > Quoting Krishna Kumar2 : > Subject: GIT fails to clone librdmacm and libamso > > Hi, > > When I run the following commands : > > git clone http://staging.openfabrics.org/pub/scn/~swise/libamso.git libamso > git clone http://staging.openfabrics.org/pub/scn/~shefty/librdmacm.git > > I get error : > > "Cannot get remote repository information. > Perhaps git-update-server-info needs to be run there?" > > It is working for libibverbs : > git clone git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git > > What could the problem be ? The libamso and rdmacm are http:// links and not git, > so I guess I am doing something wrong ? > > Thanks, > > - KK > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Tue Jan 16 06:39:10 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:39:10 -0600 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <20070116091049.GY22940@mellanox.co.il> References: <1168906555.15918.10.camel@linux-q667.site> <20070116090430.GX22940@mellanox.co.il> <20070116091049.GY22940@mellanox.co.il> Message-ID: <1168958350.17859.21.camel@linux-q667.site> On Tue, 2007-01-16 at 11:10 +0200, Michael S. Tsirkin wrote: > > > Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? > > Sorry, should have been ar qc libibverbs.a. > That solved configuring the provider libs. I'm also failing in tvflash: configure:3507: gcc -c -g -O2 -I../libibverbs/include conftest.c >&5 conftest.c:60:21: pci/pci.h: No such file or directory configure:3513: $? = 1 From mst at mellanox.co.il Tue Jan 16 06:45:07 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 16 Jan 2007 16:45:07 +0200 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <1168958350.17859.21.camel@linux-q667.site> References: <1168906555.15918.10.camel@linux-q667.site> <20070116090430.GX22940@mellanox.co.il> <20070116091049.GY22940@mellanox.co.il> <1168958350.17859.21.camel@linux-q667.site> Message-ID: <20070116144507.GH22940@mellanox.co.il> > Quoting Steve WIse : > Subject: Re: problem building user libs with build_ofa_user.sh > > On Tue, 2007-01-16 at 11:10 +0200, Michael S. Tsirkin wrote: > > > > > Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? > > > > Sorry, should have been ar qc libibverbs.a. > > > > That solved configuring the provider libs. OK, we'll do it this way. > I'm also failing in tvflash: > > > configure:3507: gcc -c -g -O2 -I../libibverbs/include conftest.c >&5 > conftest.c:60:21: pci/pci.h: No such file or directory > configure:3513: $? = 1 library missing? -- MST From ogerlitz at voltaire.com Tue Jan 16 06:46:18 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 16:46:18 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <20070116133507.GC22940@mellanox.co.il> References: <45ACD2F6.8010705@voltaire.com> <20070116133507.GC22940@mellanox.co.il> Message-ID: <45ACE53A.9010605@voltaire.com> Michael S. Tsirkin wrote: >>> git log -Sneigh_destructor -- include/net/neighbour.h >> produced nothing on my net-2.6.20 git however browsing the git log i see >> this patch, is this the one you refer to? > Yes. thanks >> also, having that at (my) hand does not remove the need that you will >> set a changelog/signature for the OFED ipoib related backport patch. > Feel free to add that. Unless i miss something, we want all OFED kernel patches to meet **basic** kernel working conversions, specifically that for each patch there is a change log and an owner. Or. From swise at opengridcomputing.com Tue Jan 16 06:52:22 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 08:52:22 -0600 Subject: [openib-general] problem building user libs with build_ofa_user.sh In-Reply-To: <20070116144507.GH22940@mellanox.co.il> References: <1168906555.15918.10.camel@linux-q667.site> <20070116090430.GX22940@mellanox.co.il> <20070116091049.GY22940@mellanox.co.il> <1168958350.17859.21.camel@linux-q667.site> <20070116144507.GH22940@mellanox.co.il> Message-ID: <1168959142.17859.24.camel@linux-q667.site> On Tue, 2007-01-16 at 16:45 +0200, Michael S. Tsirkin wrote: > > Quoting Steve WIse : > > Subject: Re: problem building user libs with build_ofa_user.sh > > > > On Tue, 2007-01-16 at 11:10 +0200, Michael S. Tsirkin wrote: > > > > > > > Try replacing 'echo > libibverbs.a' with 'ar qc liby.a' - does this help? > > > > > > Sorry, should have been ar qc libibverbs.a. > > > > > > > That solved configuring the provider libs. > > OK, we'll do it this way. > > > I'm also failing in tvflash: > > > > > > configure:3507: gcc -c -g -O2 -I../libibverbs/include conftest.c >&5 > > conftest.c:60:21: pci/pci.h: No such file or directory > > configure:3513: $? = 1 > > library missing? > yes. pci-utils-devel. It builds now. Thanks. BTW: These types of failures don't cause the build script to stop since they are separate background processes. Dunno if that's what you intended... Steve. From ogerlitz at voltaire.com Tue Jan 16 07:18:43 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 17:18:43 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45A5373C.2060306@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> Message-ID: <45ACECD3.90106@voltaire.com> Sean Hefty wrote: >> OK, got you at last (sorry but i have somehow ignored the call to >> ib_addr_get_mgid() at the rdmacm code). So to achieve interop with >> IPoIB all we need to do is remove the rdmacm signature bit and not to >> over-write the rdmacm qkey on the the qkey of the ipoib ipv4 broadcast >> group, are you ok with that? > I believe this would achieve interop with ipoib. However, overwriting > the qkey may break any existing UD communication that the user may > have. I just need to think about this more, and see what we can come up > with. Hi Sean, Based on our communication so far, the elements which are missing are ++ on the rdmacm kernel code: (drivers/infiniband/core/cma.c) +1 remove the rdmacm signature byte from the mgid +2 get the qkey used by the ipv4 broadcast group and use it mimic IPoIB qkey flow: +3 on rdma_create_qp do modify qp with some def qkey (eg zero) +4 on the join completion path before attaching a qp to the associated mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) ++ on the rdmacm user space code: (librdmacm/src/cma.c) +3 on rdma_create_qp do modify qp with some def qkey (eg zero) +4 on the join completion path before attaching a qp to the associated mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) With the time frame for 2.6.21 and OFED 1.2 becoming short, can you update of the multicast patch series status? We really want it in for this time frame, please let me know if you prefer to get patches that implement the above (eg as reference) or do it yourself... thanks, Or. From ishai at dev.mellanox.co.il Tue Jan 16 07:20:25 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Tue, 16 Jan 2007 17:20:25 +0200 Subject: [openib-general] [PATCH] IB/SRP: check memory allocation Message-ID: <45ACED39.8020303@dev.mellanox.co.il> This patch checks if the kmalloc in match_strdup was successful. Signed-off-by: Ishai Rabinovitz --- Index: gen2_devel_kernel/drivers/infiniband/ulp/srp/ib_srp.c =================================================================== --- gen2_devel_kernel.orig/drivers/infiniband/ulp/srp/ib_srp.c 2007-01-16 16:12:02.000000000 +0200 +++ gen2_devel_kernel/drivers/infiniband/ulp/srp/ib_srp.c 2007-01-16 16:48:24.000000000 +0200 @@ -1627,18 +1627,30 @@ static int srp_parse_options(const char switch (token) { case SRP_OPT_ID_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_IOC_GUID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_DGID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } if (strlen(p) != 32) { printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); kfree(p); @@ -1662,6 +1674,10 @@ static int srp_parse_options(const char case SRP_OPT_SERVICE_ID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; @@ -1699,6 +1715,10 @@ static int srp_parse_options(const char case SRP_OPT_INITIATOR_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; From ishai at dev.mellanox.co.il Tue Jan 16 07:26:22 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Tue, 16 Jan 2007 17:26:22 +0200 Subject: [openib-general] [PATCH] IB/SRP add need_reset Message-ID: <45ACEE9E.108@dev.mellanox.co.il> When there is a call to send_tsk_mgmt it posts a send and waits for 5 seconds to get a response. When the QP is in an error state it is obvious that there will be no response so it is quite useless to wait. This timeout causes SRP to wait a long time to reconnect. (Each abort and each reset_device call send_tsk_mgmt that waits for the timeout). The following patch solves this problem by identifying the failure and returning an immediate error code. Signed-off-by: Ishai Rabinovitz --- Hi Roland, This is an old patch. We thought at first that the timeout is because there is a bug in low level driver. After rechecking it, I found that the bug is internal to SRP. Index: last_stable/drivers/infiniband/ulp/srp/ib_srp.c =================================================================== --- last_stable.orig/drivers/infiniband/ulp/srp/ib_srp.c 2006-09-25 13:51:47.000000000 +0300 +++ last_stable/drivers/infiniband/ulp/srp/ib_srp.c 2006-09-25 15:40:04.000000000 +0300 @@ -543,6 +543,7 @@ static int srp_reconnect_target(struct s target->tx_head = 0; target->tx_tail = 0; + target->need_reset = 0; ret = srp_connect_target(target); if (ret) goto err; @@ -858,6 +859,7 @@ static void srp_completion(struct ib_cq printk(KERN_ERR PFX "failed %s status %d\n", wc.wr_id & SRP_OP_RECV ? "receive" : "send", wc.status); + target->need_reset = 1; break; } @@ -1313,6 +1315,8 @@ static int srp_abort(struct scsi_cmnd *s printk(KERN_ERR "SRP abort called\n"); + if (target->need_reset) + return FAILED; if (srp_find_req(target, scmnd, &req)) return FAILED; if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK)) @@ -1341,6 +1345,8 @@ static int srp_reset_device(struct scsi_ printk(KERN_ERR "SRP reset_device called\n"); + if (target->need_reset) + return FAILED; if (srp_find_req(target, scmnd, &req)) return FAILED; if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET)) @@ -1750,6 +1756,7 @@ static ssize_t srp_create_target(struct goto err_free; } + target->need_reset = 0; ret = srp_connect_target(target); if (ret) { printk(KERN_ERR PFX "Connection failed\n"); Index: last_stable/drivers/infiniband/ulp/srp/ib_srp.h =================================================================== --- last_stable.orig/drivers/infiniband/ulp/srp/ib_srp.h 2006-09-25 13:51:47.000000000 +0300 +++ last_stable/drivers/infiniband/ulp/srp/ib_srp.h 2006-09-25 14:00:36.000000000 +0300 @@ -158,6 +158,7 @@ struct srp_target_port { struct completion done; int status; enum srp_target_state state; + int need_reset; }; struct srp_iu { From ogerlitz at voltaire.com Tue Jan 16 07:33:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 17:33:26 +0200 Subject: [openib-general] some IB multicast sendonly thoughts Message-ID: <45ACF046.7090605@voltaire.com> > 15.2.5.17.1 GROUP MEMBERSHIP > An endport must specify the type of multicast subscription or deletion that > it wants. The MCMemberRecord:JoinState component indicates the > membership qualities a port wishes to add (in joining or creating a group) > or remove (in leaving a group). The meanings of the MCMember- > Record:JoinState bits are: > • FullMember: Group messages are routed both to and from the port. > The port is considered a member for purposes of group creation and > deletion, i.e.: if no member ports with FullMember=1 remain, the > group may be deleted; otherwise it may not. > • NonMember: Group messages are routed both to and from the port. > The port is not considered a member for purposes of group creation/ > deletion. > • SendOnlyNonMember: Group messages are only routed from the > port; none are routed to the port. The port is not considered a member > for purposes of group creation/deletion. .... > MCMemberRecord:JoinState.FullMember bit must be set to 1 in the SubnAdmSet() > request that creates a multicast group. ... OK, assuming my setup consists of: +1 IB node doing only multicast TX on a group +2 an IB/Ethernet gateway 3+ Eth node doing only multicast RX on the equiv mac (forget manytoone) The gateway design is to register for SA MGID IN/OUT traps and when it gets MGID IN it joins the the mgroup as ***NonMember** etc Now, since the TX node joins as SendOnlyNonMember the SA would never create this group --> the TX node would never get MLID etc to create AH, etc etc ---> this setup is broken. any thoughts and/or ideas would be welcome Or. From eitan at mellanox.co.il Tue Jan 16 07:37:58 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Tue, 16 Jan 2007 17:37:58 +0200 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF046.7090605@voltaire.com> References: <45ACF046.7090605@voltaire.com> Message-ID: <45ACF156.7090706@mellanox.co.il> Or Gerlitz wrote: >> 15.2.5.17.1 GROUP MEMBERSHIP >> > > >> An endport must specify the type of multicast subscription or deletion that >> it wants. The MCMemberRecord:JoinState component indicates the >> membership qualities a port wishes to add (in joining or creating a group) >> or remove (in leaving a group). The meanings of the MCMember- >> Record:JoinState bits are: >> > > >> • FullMember: Group messages are routed both to and from the port. >> The port is considered a member for purposes of group creation and >> deletion, i.e.: if no member ports with FullMember=1 remain, the >> group may be deleted; otherwise it may not. >> > > >> • NonMember: Group messages are routed both to and from the port. >> The port is not considered a member for purposes of group creation/ >> deletion. >> > > >> • SendOnlyNonMember: Group messages are only routed from the >> port; none are routed to the port. The port is not considered a member >> for purposes of group creation/deletion. >> > > .... > > >> MCMemberRecord:JoinState.FullMember bit must be set to 1 in the SubnAdmSet() >> request that creates a multicast group. >> > > ... > > OK, assuming my setup consists of: > > +1 IB node doing only multicast TX on a group > > +2 an IB/Ethernet gateway > > 3+ Eth node doing only multicast RX on the equiv mac (forget manytoone) > > The gateway design is to register for SA MGID IN/OUT traps and when it > gets MGID IN it joins the the mgroup as ***NonMember** etc > GW needs to listen on IGMP on the Eth port... > Now, since the TX node joins as SendOnlyNonMember the SA would never > create this group --> the TX node would never get MLID etc to create AH, > etc etc > > ---> this setup is broken. > > any thoughts and/or ideas would be welcome > > Or. > > > > > > > > > > > > > > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From tziporet at mellanox.co.il Tue Jan 16 07:41:12 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 16 Jan 2007 17:41:12 +0200 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze Message-ID: <45ACF218.5020400@mellanox.co.il> The meeting summary is also avilable on the Wiki: https://wiki.openfabrics.org/tiki-index.php?page=Teleconf+01-15-2007 *Abbreviated minutes / summary* * OFED 1.2 components readiness for end of January code freeze: In general most packages will be ready on time. Some features will be added after the Alpha but these will be add-ons that are not part of core/drivers/libraries and cannot harm stability of the package. * OFED 1.2 plan was updated on the Wiki: https://openib.org/tiki/tiki-index.php?page=OFED+1.2+release+plan+and+features * Decided to add MVAPICH2 package - OSU are the owners * Daily build update on the OFA server * Bonding module will be added to OFED 1.2 to support HA on older kernels * Toward the release each user level owner will open a branch (with the name ofed_1_2) to be used for the release sources *Detailed Minutes* Build & integration process: * There is a daily build of all components on OFA server * User space - from all user space git trees * kernel.org starting from 2.6.12 till 2.6.19 (supporting kernel 2.6.19 and 20 is required, the rest of kernel versions are optional) * Kernel cross compilation: X86_64, ia64, ppc, ppc64 (Vlad will send a mail) * Build mails will be sent every day starting this week * HOWTO was updated in WIKI home page: o HOWTO Build OFA user package o HOWTO Build OFA kernel package o HOWTO add userspace package to ofa_user General changes to the package: * Chelsio driver - on work - to be integrated this week * Re-base to kernel 2.6.20 to be done in few days * Sa cache - in ofed_1_2 today - Intel please test * Multicast - we wait for Voltaire and Sean to close all technical details - should be ready by the end of the week * SA async event support and InformInfo? and Notice reporting - will be ready only first week of Feb * Bonding module - Voltaire are working to prepare back-port of the bonding module that will support HA for IPoIB in older kernels. The module should be ready by the end of this week * low level drivers - all going to be ready on time, except the new ehca interrupt handler Libraries: * libibverbs 1.1: (from Roland) o Fork support - done o Minor ABI extensions for new low-level drivers - done o Better low-level driver handling, including multiple drivers linked in statically - done o Dropped from OFED 1.2: + Memory windows + Reregister memory region (incl. extend without unregister...) + libibverbs 1.0 to be included as well * Management: All the code is ready. Hal will prepare a release version for OFED 1.2. * CM/CMA: Sean will prepare a release version. Number will probably be 1.1 not to collide with existing libraries version in Redhat * uDAPL: Woody will ask Arlin to create a release for uDAPL library Kernel ULPs: * IPoIB: o Connected Mode - code ready and under testing now o NAPI - should test with CM - Mellanox. Current ehca interrupt handler does not working well with NAPI. IBM is working to change their interrupt handler to overcome this. Not clear if this will be ready on time. * SDP: o Netstat - done - under testing o Scalability - done o small messages BW - on work * iSER: Most changes are ready and integrated to ofed 1_2 git tree. * SRP: MPP integration for HA - we wait for new MPP driver from Engenio * RDS: Tziporet to approach Oracle to understand if the License issue going to be resolved till end of month. * VNIC - Integrated into ofed_1_2 git tree - Qlogic should test it Management: * OpenSM: o QoS - on work; first version will be ready at end of month o FAT tree routing - done o Taurus routing - done o SA InformInfo? improvements and InformInfoRecord? support - done o SA database dump/restore - ? o Modular routing for multicast - ? o IB router enablement - done * Diagnostics: o Monitoring QoS - ? o Partitions analysis - ? o GUI tool - done o perfquery support for extended port counters - done o ibportstate enhancements to change link speed of a port - ? *MPI* * MVAPICH 0.99 - Beta should be ready this or next week. * Open MPI 1.2 - Code in convergence. For the Alpha we will use a pre-release version but it will become stable for the Beta * MVAPICH2 0.9.8 - OSU (Shaun Roland) will be the owner in OFED and will provide the source RPM, based on the 0.9.8 release that was already done * Each MPI owner will open a git tree on the OFA server and locate the SRPM there. -------------- next part -------------- An HTML attachment was scrubbed... URL: From ogerlitz at voltaire.com Tue Jan 16 07:46:57 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 17:46:57 +0200 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF156.7090706@mellanox.co.il> References: <45ACF046.7090605@voltaire.com> <45ACF156.7090706@mellanox.co.il> Message-ID: <45ACF371.20009@voltaire.com> Eitan Zahavi wrote: > Or Gerlitz wrote: >> OK, assuming my setup consists of: >> +1 IB node doing only multicast TX on a group >> +2 an IB/Ethernet gateway >> 3+ Eth node doing only multicast RX on the equiv mac (forget manytoone) >> The gateway design is to register for SA MGID IN/OUT traps and when it >> gets MGID IN it joins the the mgroup as ***NonMember** etc > GW needs to listen on IGMP on the Eth port... this was fast... thanks for jumping on it. So you are saying that the GW **has** to listen on IGMP at the Eth side and **has** to do IB SA join in the only way that forces the SA to create the group --> FullMember ? If indeed, this is kind of bad, I find the approach of the GW being "transparent" to the SA in the sense that it does not cause mgroup create/destroy nor mgroup ref count inc/dec much more robust, so you are saying its not feasible with the IB spec. Or. From halr at voltaire.com Tue Jan 16 07:47:54 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 16 Jan 2007 10:47:54 -0500 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF046.7090605@voltaire.com> References: <45ACF046.7090605@voltaire.com> Message-ID: <1168962465.32185.90620.camel@hal.voltaire.com> On Tue, 2007-01-16 at 10:33, Or Gerlitz wrote: > > 15.2.5.17.1 GROUP MEMBERSHIP > > > An endport must specify the type of multicast subscription or deletion that > > it wants. The MCMemberRecord:JoinState component indicates the > > membership qualities a port wishes to add (in joining or creating a group) > > or remove (in leaving a group). The meanings of the MCMember- > > Record:JoinState bits are: > > > • FullMember: Group messages are routed both to and from the port. > > The port is considered a member for purposes of group creation and > > deletion, i.e.: if no member ports with FullMember=1 remain, the > > group may be deleted; otherwise it may not. > > > • NonMember: Group messages are routed both to and from the port. > > The port is not considered a member for purposes of group creation/ > > deletion. > > > • SendOnlyNonMember: Group messages are only routed from the > > port; none are routed to the port. The port is not considered a member > > for purposes of group creation/deletion. > > .... > > > MCMemberRecord:JoinState.FullMember bit must be set to 1 in the SubnAdmSet() > > request that creates a multicast group. > > ... > > OK, assuming my setup consists of: > > +1 IB node doing only multicast TX on a group > > +2 an IB/Ethernet gateway > > 3+ Eth node doing only multicast RX on the equiv mac (forget manytoone) > > The gateway design is to register for SA MGID IN/OUT traps and when it > gets MGID IN it joins the the mgroup as ***NonMember** etc > > Now, since the TX node joins as SendOnlyNonMember the SA would never > create this group --> the TX node would never get MLID etc to create AH, > etc etc > > ---> this setup is broken. > > any thoughts and/or ideas would be welcome I don't think it matters whether it is a gateway scenario as you describe or just a pure IB network. The only thing that matters is the sequencing of the transmitter and receiver; e.g. whether a receiver creates the group prior to the send only transmitter. In the case of the gateway, when the receiver starts on the ethernet side, the gateway would need to know this via some protocol (e.g. IGMP) and then join appropriately on the IB side. -- Hal > Or. > > > > > > > > > > > > > > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From eitan at mellanox.co.il Tue Jan 16 07:53:50 2007 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Tue, 16 Jan 2007 17:53:50 +0200 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF371.20009@voltaire.com> References: <45ACF046.7090605@voltaire.com> <45ACF156.7090706@mellanox.co.il> <45ACF371.20009@voltaire.com> Message-ID: <45ACF50E.8030707@mellanox.co.il> Or Gerlitz wrote: > Eitan Zahavi wrote: > >> Or Gerlitz wrote: >> >>> OK, assuming my setup consists of: >>> +1 IB node doing only multicast TX on a group >>> +2 an IB/Ethernet gateway >>> 3+ Eth node doing only multicast RX on the equiv mac (forget manytoone) >>> > > >>> The gateway design is to register for SA MGID IN/OUT traps and when it >>> gets MGID IN it joins the the mgroup as ***NonMember** etc >>> > > >> GW needs to listen on IGMP on the Eth port... >> > > this was fast... thanks for jumping on it. > > So you are saying that the GW **has** to listen on IGMP at the Eth side > and **has** to do IB SA join in the only way that forces the SA to > create the group --> FullMember ? > Yes > If indeed, this is kind of bad, I find it very reasonable > I find the approach of the GW being > "transparent" to the SA in the sense that it does not cause mgroup > create/destroy nor mgroup ref count inc/dec much more robust, so you are > saying its not feasible with the IB spec. > > Or. > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ogerlitz at voltaire.com Tue Jan 16 07:58:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 17:58:26 +0200 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <45ACF218.5020400@mellanox.co.il> References: <45ACF218.5020400@mellanox.co.il> Message-ID: <45ACF622.4060603@voltaire.com> Tziporet Koren wrote: Hi Tziporet, thanks for the details info, below are few comments: > *Abbreviated minutes / summary* > * Bonding module will be added to OFED 1.2 to support HA on older > kernels The bonding package would support: fresh (2.6.20) and some older upstream kernels along with SLES10 and RH4 Ux (x=3 for sure) > General changes to the package: > * Multicast - we wait for Voltaire and Sean to close all technical > details - should be ready by the end of the week I have just sent Sean over the list a clarification email, if needed we would be able to help doing the missing patches and i guess in a combined effort this would be ready for the end of --next-- week > Management: > * OpenSM: > o QoS - on work; first version will be ready at end of month what about the host side QoS code? i did not see an newer RFC nor patch other then the RFC that was sent many months ago. Or. From ogerlitz at voltaire.com Tue Jan 16 08:08:51 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 18:08:51 +0200 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF50E.8030707@mellanox.co.il> References: <45ACF046.7090605@voltaire.com> <45ACF156.7090706@mellanox.co.il> <45ACF371.20009@voltaire.com> <45ACF50E.8030707@mellanox.co.il> Message-ID: <45ACF893.9060504@voltaire.com> Eitan Zahavi wrote: >> So you are saying that the GW **has** to listen on IGMP at the Eth >> side and **has** to do IB SA join in the only way that forces the SA >> to create the group --> FullMember ? > Yes >> If indeed, this is kind of bad, > I find it very reasonable OK, let me think about it for a while, thanks for the quick response. Or. From vlad at mellanox.co.il Tue Jan 16 08:19:46 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 16 Jan 2007 18:19:46 +0200 Subject: [openib-general] [PATCH v3 0/3] ofed_1_2 - Chelsio T3 RDMA Support In-Reply-To: <20070112162034.27341.33435.stgit@dell3.ogc.int> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> Message-ID: <1168964386.5133.23.camel@vladsk-laptop> Chelsio driver code pulled to ofed_1_2. Regards, Vladimir On Fri, 2007-01-12 at 10:20 -0600, Steve Wise wrote: > Michael/Vlad, > > Here is version 3 of the patch series for adding the Chelsio kernel > drivers. > > Changes since version 2: > > - made genalloc.[ch] a backport addon as per our discussions. At this > point, I've got it working for 2.6.20. > > - use #defines to avoid patching cxgb3 files for the genalloc backport. > > > In addition to this patch series, the Chelsio driver code needs to be > pulled from > > git://staging.openfabrics.org/~swise/cxgb3.git for-ofed_1_2 > > I've tested this on 2.6.20-rc4. If you're happy with the changes, I > ask that you pull these patches in and pull the T3 drivers into vlad's > ofed_1_2 git tree. That will give us a base package for Chelsio's > drivers. Then I'll post additional patches for the backports to the > supported distros as I get them implemented and tested. > > How's that sound? If you would rather a patch for the chelsio drivers, > lemme know and I'll email it to you directly. > > Signed-off-by: Steve Wise > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From mshefty at ichips.intel.com Tue Jan 16 08:35:28 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 16 Jan 2007 08:35:28 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <45ACECD3.90106@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> Message-ID: <45ACFED0.9050000@ichips.intel.com> > mimic IPoIB qkey flow: > > +3 on rdma_create_qp do modify qp with some def qkey (eg zero) > +4 on the join completion path before attaching a qp to the associated > mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) The rdma cm allows UD QP communication, which requires a valid qkey before or without joining a multicast group. I'd like to find a way to continue to support this. > +3 on rdma_create_qp do modify qp with some def qkey (eg zero) > +4 on the join completion path before attaching a qp to the associated > mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) Isn't the ipoib qkey a privileged qkey? - Sean From vlad at mellanox.co.il Tue Jan 16 08:50:26 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 16 Jan 2007 18:50:26 +0200 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070112162038.27341.81922.stgit@dell3.ogc.int> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> Message-ID: <1168966226.9562.5.camel@vladsk-laptop> Hi Steve, I got the following failure on 2.6.17 and 2.6.18 kernels: /tmp/ofa_kernel-20070116-1806_linux-2.6.18.6_check/drivers/net/cxgb3/Module.symvers: No such file or directory make[2]: *** [__modpost] Error 1 make[1]: *** [modules] Error 2 make[1]: Leaving directory `//kernel.org/x86_64/linux-2.6.18.6' make: *** [kernel] Error 2 I think that the problem is in SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3". Try to make it a different target. See iscsi as an example. Regards, Vladimir On Fri, 2007-01-12 at 10:20 -0600, Steve Wise wrote: > - added cxgb3 and iw_cxgb3 config stuff > - visit and build driver/net/cxgb3 to get the cxgb3 driver > > Signed-off-by: Steve Wise > --- > > ofed_scripts/Makefile | 9 ++++++-- > ofed_scripts/configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 58 insertions(+), 3 deletions(-) > > diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile > index d63b1d2..8942385 100644 > --- a/ofed_scripts/Makefile > +++ b/ofed_scripts/Makefile > @@ -46,8 +46,10 @@ kernel: > @echo "Kernel sources: $(KSRC)" > env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ > -I$(CWD)/drivers/infiniband/ulp/ipoib \ > - -I$(CWD)/drivers/infiniband/debug" \ > - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ > + -I$(CWD)/drivers/infiniband/debug \ > + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ > + -I$(CWD)/drivers/net/cxgb3 " \ > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ > EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ > CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ > CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ > @@ -74,6 +76,9 @@ kernel: > CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ > CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ > CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ > + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ > + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ > + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ > LINUXINCLUDE=' \ > $(BACKPORT_INCLUDES) \ > -I$(CWD)/include \ > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > index a0557e2..253427c 100755 > --- a/ofed_scripts/configure > +++ b/ofed_scripts/configure > @@ -126,6 +126,12 @@ Usage: `basename $0` [options] > --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] > --without-vnic_stats-mod [yes] > > + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] > + --without-cxgb3-mod [yes] > + > + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] > + --without-cxgb3_debug-mod [yes] > + > --help - print out options > > > @@ -207,7 +213,10 @@ get_backport_dir() > 2.6.19*) > echo 2.6.19 > ;; > - 2.6.2[0-9]*) > + 2.6.20*) > + echo 2.6.20 > + ;; > + 2.6.2[1-9]*) > echo > ;; > *) > @@ -607,6 +616,20 @@ main() > --without-vnic_stats-mod) > CONFIG_INFINIBAND_VNIC_STATS= > ;; > + --with-cxgb3-mod) > + CONFIG_INFINIBAND_CXGB3="m" > + CONFIG_CHELSIO_T3="m" > + ;; > + --without-cxgb3-mod) > + CONFIG_INFINIBAND_CXGB3= > + CONFIG_CHELSIO_T3= > + ;; > + --with-cxgb3_debug-mod) > + CONFIG_INFINIBAND_CXGB3_DEBUG="y" > + ;; > + --without-cxgb3_debug-mod) > + CONFIG_INFINIBAND_CXGB3_DEBUG= > + ;; > --with-modprobe|--without-modprobe) > ;; > -h | --help) > @@ -679,6 +702,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} > @@ -689,6 +714,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} > > # Check for minimal supported kernel version > if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then > @@ -742,6 +768,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} > @@ -752,6 +780,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} > > EOFCONFIG > echo "Created ${CONFIG}:" > @@ -887,6 +916,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" > else > DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" > fi > +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then > + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" > +else > + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" > +fi > +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then > + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" > +else > + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" > +fi > +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then > + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" > +else > + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" > +fi > cat >> ${AUTOCONF_H} << EOFAUTOCONF > #undef CONFIG_INFINIBAND > #undef CONFIG_INFINIBAND_IPOIB > @@ -908,6 +952,9 @@ #undef CONFIG_INFINIBAND_MADEYE > #undef CONFIG_INFINIBAND_VNIC > #undef CONFIG_INFINIBAND_VNIC_DEBUG > #undef CONFIG_INFINIBAND_VNIC_STATS > +#undef CONFIG_INFINIBAND_CXGB3 > +#undef CONFIG_INFINIBAND_CXGB3_DEBUG > +#undef CONFIG_CHELSIO_T3 > > #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA > #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY > @@ -927,6 +974,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > ${DEFINE_INFINIBAND_ADDR_TRANS} > ${DEFINE_INFINIBAND_MTHCA} > ${DEFINE_INFINIBAND_VNIC} > +${DEFINE_INFINIBAND_CXGB3} > +${DEFINE_CHELSIO_T3} > > ${DEFINE_INFINIBAND_IPOIB_DEBUG} > ${DEFINE_INFINIBAND_ISER} > @@ -937,6 +986,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > ${DEFINE_INFINIBAND_RDS_DEBUG} > ${DEFINE_INFINIBAND_VNIC_DEBUG} > ${DEFINE_INFINIBAND_VNIC_STATS} > +${DEFINE_INFINIBAND_CXGB3_DEBUG} > > ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} > ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Tue Jan 16 08:55:43 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 10:55:43 -0600 Subject: [openib-general] building dapl for ofed Message-ID: <1168966544.19404.2.camel@linux-q667.site> I'm having problems building dapl for ofed 1.2. I'm using the dapl rdma_ucm branch and still getting compile problems. What librdmacm branch should I be using? Thanks, Steve. From swise at opengridcomputing.com Tue Jan 16 08:58:12 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 10:58:12 -0600 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <1168966226.9562.5.camel@vladsk-laptop> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> <1168966226.9562.5.camel@vladsk-laptop> Message-ID: <1168966692.19404.5.camel@linux-q667.site> I haven't backported cxgb3 to anything yet. The patches I posted for the initial import of cxgb3 will only compile on 2.6.20, as I said in the patch overview. I'm right now trying to get them to build from 2.6.19 back to 2.6.11 as michael suggested... On Tue, 2007-01-16 at 18:50 +0200, Vladimir Sokolovsky wrote: > Hi Steve, > I got the following failure on 2.6.17 and 2.6.18 kernels: > > /tmp/ofa_kernel-20070116-1806_linux-2.6.18.6_check/drivers/net/cxgb3/Module.symvers: No such file or directory > make[2]: *** [__modpost] Error 1 > make[1]: *** [modules] Error 2 > make[1]: Leaving directory `//kernel.org/x86_64/linux-2.6.18.6' > make: *** [kernel] Error 2 > > I think that the problem is in SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3". > Try to make it a different target. See iscsi as an example. > > Regards, > Vladimir > > On Fri, 2007-01-12 at 10:20 -0600, Steve Wise wrote: > > - added cxgb3 and iw_cxgb3 config stuff > > - visit and build driver/net/cxgb3 to get the cxgb3 driver > > > > Signed-off-by: Steve Wise > > --- > > > > ofed_scripts/Makefile | 9 ++++++-- > > ofed_scripts/configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 58 insertions(+), 3 deletions(-) > > > > diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile > > index d63b1d2..8942385 100644 > > --- a/ofed_scripts/Makefile > > +++ b/ofed_scripts/Makefile > > @@ -46,8 +46,10 @@ kernel: > > @echo "Kernel sources: $(KSRC)" > > env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ > > -I$(CWD)/drivers/infiniband/ulp/ipoib \ > > - -I$(CWD)/drivers/infiniband/debug" \ > > - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ > > + -I$(CWD)/drivers/infiniband/debug \ > > + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ > > + -I$(CWD)/drivers/net/cxgb3 " \ > > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ > > EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ > > CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ > > CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ > > @@ -74,6 +76,9 @@ kernel: > > CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ > > CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ > > CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ > > + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ > > + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ > > + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ > > LINUXINCLUDE=' \ > > $(BACKPORT_INCLUDES) \ > > -I$(CWD)/include \ > > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > > index a0557e2..253427c 100755 > > --- a/ofed_scripts/configure > > +++ b/ofed_scripts/configure > > @@ -126,6 +126,12 @@ Usage: `basename $0` [options] > > --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] > > --without-vnic_stats-mod [yes] > > > > + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] > > + --without-cxgb3-mod [yes] > > + > > + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] > > + --without-cxgb3_debug-mod [yes] > > + > > --help - print out options > > > > > > @@ -207,7 +213,10 @@ get_backport_dir() > > 2.6.19*) > > echo 2.6.19 > > ;; > > - 2.6.2[0-9]*) > > + 2.6.20*) > > + echo 2.6.20 > > + ;; > > + 2.6.2[1-9]*) > > echo > > ;; > > *) > > @@ -607,6 +616,20 @@ main() > > --without-vnic_stats-mod) > > CONFIG_INFINIBAND_VNIC_STATS= > > ;; > > + --with-cxgb3-mod) > > + CONFIG_INFINIBAND_CXGB3="m" > > + CONFIG_CHELSIO_T3="m" > > + ;; > > + --without-cxgb3-mod) > > + CONFIG_INFINIBAND_CXGB3= > > + CONFIG_CHELSIO_T3= > > + ;; > > + --with-cxgb3_debug-mod) > > + CONFIG_INFINIBAND_CXGB3_DEBUG="y" > > + ;; > > + --without-cxgb3_debug-mod) > > + CONFIG_INFINIBAND_CXGB3_DEBUG= > > + ;; > > --with-modprobe|--without-modprobe) > > ;; > > -h | --help) > > @@ -679,6 +702,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} > > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} > > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} > > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} > > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} > > > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} > > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} > > @@ -689,6 +714,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} > > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} > > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} > > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} > > > > # Check for minimal supported kernel version > > if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then > > @@ -742,6 +768,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} > > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} > > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} > > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} > > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} > > > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} > > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} > > @@ -752,6 +780,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} > > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} > > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} > > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} > > > > EOFCONFIG > > echo "Created ${CONFIG}:" > > @@ -887,6 +916,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" > > else > > DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" > > fi > > +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then > > + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" > > +else > > + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" > > +fi > > +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then > > + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" > > +else > > + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" > > +fi > > +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then > > + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" > > +else > > + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" > > +fi > > cat >> ${AUTOCONF_H} << EOFAUTOCONF > > #undef CONFIG_INFINIBAND > > #undef CONFIG_INFINIBAND_IPOIB > > @@ -908,6 +952,9 @@ #undef CONFIG_INFINIBAND_MADEYE > > #undef CONFIG_INFINIBAND_VNIC > > #undef CONFIG_INFINIBAND_VNIC_DEBUG > > #undef CONFIG_INFINIBAND_VNIC_STATS > > +#undef CONFIG_INFINIBAND_CXGB3 > > +#undef CONFIG_INFINIBAND_CXGB3_DEBUG > > +#undef CONFIG_CHELSIO_T3 > > > > #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA > > #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY > > @@ -927,6 +974,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > > ${DEFINE_INFINIBAND_ADDR_TRANS} > > ${DEFINE_INFINIBAND_MTHCA} > > ${DEFINE_INFINIBAND_VNIC} > > +${DEFINE_INFINIBAND_CXGB3} > > +${DEFINE_CHELSIO_T3} > > > > ${DEFINE_INFINIBAND_IPOIB_DEBUG} > > ${DEFINE_INFINIBAND_ISER} > > @@ -937,6 +986,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > > ${DEFINE_INFINIBAND_RDS_DEBUG} > > ${DEFINE_INFINIBAND_VNIC_DEBUG} > > ${DEFINE_INFINIBAND_VNIC_STATS} > > +${DEFINE_INFINIBAND_CXGB3_DEBUG} > > > > ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} > > ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Tue Jan 16 09:01:22 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 16 Jan 2007 19:01:22 +0200 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <1168966692.19404.5.camel@linux-q667.site> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> <1168966226.9562.5.camel@vladsk-laptop> <1168966692.19404.5.camel@linux-q667.site> Message-ID: <1168966882.9562.7.camel@vladsk-laptop> It breaks compilation without selecting cxgb3... Regards, Vladimir On Tue, 2007-01-16 at 10:58 -0600, Steve WIse wrote: > I haven't backported cxgb3 to anything yet. > > The patches I posted for the initial import of cxgb3 will only compile > on 2.6.20, as I said in the patch overview. > > I'm right now trying to get them to build from 2.6.19 back to 2.6.11 as > michael suggested... > > > > On Tue, 2007-01-16 at 18:50 +0200, Vladimir Sokolovsky wrote: > > Hi Steve, > > I got the following failure on 2.6.17 and 2.6.18 kernels: > > > > /tmp/ofa_kernel-20070116-1806_linux-2.6.18.6_check/drivers/net/cxgb3/Module.symvers: No such file or directory > > make[2]: *** [__modpost] Error 1 > > make[1]: *** [modules] Error 2 > > make[1]: Leaving directory `//kernel.org/x86_64/linux-2.6.18.6' > > make: *** [kernel] Error 2 > > > > I think that the problem is in SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3". > > Try to make it a different target. See iscsi as an example. > > > > Regards, > > Vladimir > > > > On Fri, 2007-01-12 at 10:20 -0600, Steve Wise wrote: > > > - added cxgb3 and iw_cxgb3 config stuff > > > - visit and build driver/net/cxgb3 to get the cxgb3 driver > > > > > > Signed-off-by: Steve Wise > > > --- > > > > > > ofed_scripts/Makefile | 9 ++++++-- > > > ofed_scripts/configure | 52 +++++++++++++++++++++++++++++++++++++++++++++++- > > > 2 files changed, 58 insertions(+), 3 deletions(-) > > > > > > diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile > > > index d63b1d2..8942385 100644 > > > --- a/ofed_scripts/Makefile > > > +++ b/ofed_scripts/Makefile > > > @@ -46,8 +46,10 @@ kernel: > > > @echo "Kernel sources: $(KSRC)" > > > env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ > > > -I$(CWD)/drivers/infiniband/ulp/ipoib \ > > > - -I$(CWD)/drivers/infiniband/debug" \ > > > - $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ > > > + -I$(CWD)/drivers/infiniband/debug \ > > > + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ > > > + -I$(CWD)/drivers/net/cxgb3 " \ > > > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ > > > EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ > > > CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ > > > CONFIG_INFINIBAND_IPOIB=$(CONFIG_INFINIBAND_IPOIB) \ > > > @@ -74,6 +76,9 @@ kernel: > > > CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ > > > CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ > > > CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ > > > + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ > > > + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ > > > + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ > > > LINUXINCLUDE=' \ > > > $(BACKPORT_INCLUDES) \ > > > -I$(CWD)/include \ > > > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > > > index a0557e2..253427c 100755 > > > --- a/ofed_scripts/configure > > > +++ b/ofed_scripts/configure > > > @@ -126,6 +126,12 @@ Usage: `basename $0` [options] > > > --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] > > > --without-vnic_stats-mod [yes] > > > > > > + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] > > > + --without-cxgb3-mod [yes] > > > + > > > + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] > > > + --without-cxgb3_debug-mod [yes] > > > + > > > --help - print out options > > > > > > > > > @@ -207,7 +213,10 @@ get_backport_dir() > > > 2.6.19*) > > > echo 2.6.19 > > > ;; > > > - 2.6.2[0-9]*) > > > + 2.6.20*) > > > + echo 2.6.20 > > > + ;; > > > + 2.6.2[1-9]*) > > > echo > > > ;; > > > *) > > > @@ -607,6 +616,20 @@ main() > > > --without-vnic_stats-mod) > > > CONFIG_INFINIBAND_VNIC_STATS= > > > ;; > > > + --with-cxgb3-mod) > > > + CONFIG_INFINIBAND_CXGB3="m" > > > + CONFIG_CHELSIO_T3="m" > > > + ;; > > > + --without-cxgb3-mod) > > > + CONFIG_INFINIBAND_CXGB3= > > > + CONFIG_CHELSIO_T3= > > > + ;; > > > + --with-cxgb3_debug-mod) > > > + CONFIG_INFINIBAND_CXGB3_DEBUG="y" > > > + ;; > > > + --without-cxgb3_debug-mod) > > > + CONFIG_INFINIBAND_CXGB3_DEBUG= > > > + ;; > > > --with-modprobe|--without-modprobe) > > > ;; > > > -h | --help) > > > @@ -679,6 +702,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > > > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} > > > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} > > > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} > > > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} > > > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} > > > > > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} > > > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} > > > @@ -689,6 +714,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > > > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} > > > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} > > > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} > > > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} > > > > > > # Check for minimal supported kernel version > > > if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then > > > @@ -742,6 +768,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > > > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} > > > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} > > > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} > > > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} > > > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} > > > > > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} > > > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} > > > @@ -752,6 +780,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > > > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} > > > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} > > > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} > > > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} > > > > > > EOFCONFIG > > > echo "Created ${CONFIG}:" > > > @@ -887,6 +916,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" > > > else > > > DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" > > > fi > > > +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then > > > + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" > > > +else > > > + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" > > > +fi > > > +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then > > > + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" > > > +else > > > + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" > > > +fi > > > +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then > > > + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" > > > +else > > > + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" > > > +fi > > > cat >> ${AUTOCONF_H} << EOFAUTOCONF > > > #undef CONFIG_INFINIBAND > > > #undef CONFIG_INFINIBAND_IPOIB > > > @@ -908,6 +952,9 @@ #undef CONFIG_INFINIBAND_MADEYE > > > #undef CONFIG_INFINIBAND_VNIC > > > #undef CONFIG_INFINIBAND_VNIC_DEBUG > > > #undef CONFIG_INFINIBAND_VNIC_STATS > > > +#undef CONFIG_INFINIBAND_CXGB3 > > > +#undef CONFIG_INFINIBAND_CXGB3_DEBUG > > > +#undef CONFIG_CHELSIO_T3 > > > > > > #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA > > > #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY > > > @@ -927,6 +974,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > > > ${DEFINE_INFINIBAND_ADDR_TRANS} > > > ${DEFINE_INFINIBAND_MTHCA} > > > ${DEFINE_INFINIBAND_VNIC} > > > +${DEFINE_INFINIBAND_CXGB3} > > > +${DEFINE_CHELSIO_T3} > > > > > > ${DEFINE_INFINIBAND_IPOIB_DEBUG} > > > ${DEFINE_INFINIBAND_ISER} > > > @@ -937,6 +986,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > > > ${DEFINE_INFINIBAND_RDS_DEBUG} > > > ${DEFINE_INFINIBAND_VNIC_DEBUG} > > > ${DEFINE_INFINIBAND_VNIC_STATS} > > > +${DEFINE_INFINIBAND_CXGB3_DEBUG} > > > > > > ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} > > > ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} > > > > > > _______________________________________________ > > > openib-general mailing list > > > openib-general at openib.org > > > http://openib.org/mailman/listinfo/openib-general > > > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Tue Jan 16 09:02:43 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 11:02:43 -0600 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <1168966226.9562.5.camel@vladsk-laptop> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> <1168966226.9562.5.camel@vladsk-laptop> Message-ID: <1168966963.19404.9.camel@linux-q667.site> On Tue, 2007-01-16 at 18:50 +0200, Vladimir Sokolovsky wrote: > Hi Steve, > I got the following failure on 2.6.17 and 2.6.18 kernels: > > /tmp/ofa_kernel-20070116-1806_linux-2.6.18.6_check/drivers/net/cxgb3/Module.symvers: No such file or directory > make[2]: *** [__modpost] Error 1 > make[1]: *** [modules] Error 2 > make[1]: Leaving directory `//kernel.org/x86_64/linux-2.6.18.6' > make: *** [kernel] Error 2 > > I think that the problem is in SUBDIRS="$(CWD)/drivers/infiniband $(CWD)/drivers/net/cxgb3". > Try to make it a different target. See iscsi as an example. > By the way, this works ok building on sles9sp3 so I don't think its the SUBDIRS line. From swise at opengridcomputing.com Tue Jan 16 09:04:18 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 11:04:18 -0600 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <1168966882.9562.7.camel@vladsk-laptop> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> <1168966226.9562.5.camel@vladsk-laptop> <1168966692.19404.5.camel@linux-q667.site> <1168966882.9562.7.camel@vladsk-laptop> Message-ID: <1168967058.19404.12.camel@linux-q667.site> On Tue, 2007-01-16 at 19:01 +0200, Vladimir Sokolovsky wrote: > It breaks compilation without selecting cxgb3... > Oh. I see. Alright...lemme rework this... Thanks, Steve. From ogerlitz at voltaire.com Tue Jan 16 09:06:47 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 19:06:47 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45ACFED0.9050000@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> Message-ID: <45AD0627.7050305@voltaire.com> Sean Hefty wrote: >> mimic IPoIB qkey flow: >> +3 on rdma_create_qp do modify qp with some def qkey (eg zero) >> +4 on the join completion path before attaching a qp to the associated >> mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) > The rdma cm allows UD QP communication, which requires a valid qkey > before or without joining a multicast group. I'd like to find a way to > continue to support this. sure, it can use the rdmacm qkey (0x1234567 etc) when it creates the QP and later --if-- the user joins a multicast group modify the qp state with the group qkey and report it in the cma event such that the consumer of the rdmacm would set this into his IB UD TX WR >> +3 on rdma_create_qp do modify qp with some def qkey (eg zero) >> +4 on the join completion path before attaching a qp to the associated >> mgid, do modify qp with this mrec qkey (=ipv4 broadcast one) > Isn't the ipoib qkey a privileged qkey? looking in ipoib code you can see the following code in ipoib_mcast_join_task > if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { > ipoib_mcast_join(dev, priv->broadcast, 0); > return; > } so ipoib_mcast_join is called with create=0 for the broadcast group and this makes it provide a component mask of > comp_mask = > IB_SA_MCMEMBER_REC_MGID | > IB_SA_MCMEMBER_REC_PORT_GID | > IB_SA_MCMEMBER_REC_PKEY | > IB_SA_MCMEMBER_REC_JOIN_STATE; that is the SA sets the QKEY, RATE, MTU, SL etc etc for the broadcast group and later other any joins done by ipoib uses the priv->broadcast->mcmember fields So the broadcast qkey is basically what the SA has set when it created the group. During my talking here i got a pointer to section 10 in the IPoIB RFC (4391) mentioning something like "some 3rd party --has-- to create the broadcast group": > 10. Sending and Receiving IP Multicast Packets > A node joining an IP multicast group must first construct an MGID > according to the rule described in section 4 above. Once the correct > MGID is calculated, the node must call the SA of the outbound link to > attempt a "FullMember" join of the IB multicast group corresponding > to the MGID. If the IB multicast group does not already exist, one > must be created first with the IPoIB link MTU. The MGID MUST use the > same P_Key, Q_Key, SL, MTU, and HopLimit as those used in the > broadcast-GID. The rest of attributes SHOULD follow the values used > in the broadcast-GID as well. Bottom line, Looking in the IB SPEC and IPoIB RFC i did not see mentioning of privileged QKEY. Or. From ogerlitz at voltaire.com Tue Jan 16 09:31:21 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 16 Jan 2007 19:31:21 +0200 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45ACF50E.8030707@mellanox.co.il> References: <45ACF046.7090605@voltaire.com> <45ACF156.7090706@mellanox.co.il> <45ACF371.20009@voltaire.com> <45ACF50E.8030707@mellanox.co.il> Message-ID: <45AD0BE9.1030101@voltaire.com> Eitan Zahavi wrote: > Or Gerlitz wrote: >> Eitan Zahavi wrote: >> So you are saying that the GW **has** to listen on IGMP at the Eth >> side and **has** to do IB SA join in the only way that forces the SA >> to create the group --> FullMember ? > Yes >> If indeed, this is kind of bad, > I find it very reasonable OK, going fwd with this approach, the GW got IGMP --> so it did FULL MEMBER join and the group is created, what is going on when the Eth multicast node stopped doing RX is there a "leave" IGMP which the GW can trap and act? Or. From mshefty at ichips.intel.com Tue Jan 16 09:37:53 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 16 Jan 2007 09:37:53 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AD0627.7050305@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> Message-ID: <45AD0D71.9040705@ichips.intel.com> > sure, it can use the rdmacm qkey (0x1234567 etc) when it creates the QP > and later --if-- the user joins a multicast group modify the qp state > with the group qkey and report it in the cma event such that the > consumer of the rdmacm would set this into his IB UD TX WR Changing the qkey would break its existing UD communication. > Bottom line, Looking in the IB SPEC and IPoIB RFC i did not see > mentioning of privileged QKEY. From RFC 4391 (ipoib RFC), 4.1: 2. Q_Key It is RECOMMENDED that a controlled Q_Key be used with the high-order bit set. This is to prevent non-privileged software from fabricating and sending out bogus IP datagrams. I don't know what qkey is actually assigned, however. I have some path forward related tasks that I would like to complete before starting on this. I hope to finish that before the end of this week. I don't want to rush on the multicast support and miss something. For the rdma cm, we may need to let the user set some options when joining a multicast group. Maybe something like: join type (send-only or send-receive), group type (ipoib or rdma defined), etc. I do plan on requesting that the core multicast changes to ib_sa and ib_ipoib be pulled into 2.6.21. - Sean From arlin.r.davis at intel.com Tue Jan 16 09:43:13 2007 From: arlin.r.davis at intel.com (Arlin Davis) Date: Tue, 16 Jan 2007 09:43:13 -0800 Subject: [openib-general] building dapl for ofed In-Reply-To: <1168966544.19404.2.camel@linux-q667.site> Message-ID: <000001c73995$d2b02ce0$16d6180a@amr.corp.intel.com> >-----Original Message----- >From: Steve WIse [mailto:swise at opengridcomputing.com] >Sent: Tuesday, January 16, 2007 8:56 AM >To: Davis, Arlin R; Hefty, Sean >Cc: openib-general >Subject: building dapl for ofed > >I'm having problems building dapl for ofed 1.2. > >I'm using the dapl rdma_ucm branch and still getting compile problems. >What librdmacm branch should I be using? Did you use the rdma_ucm branch for both dapl and librdmacm? > >Thanks, > >Steve. From halr at voltaire.com Tue Jan 16 09:42:40 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 16 Jan 2007 12:42:40 -0500 Subject: [openib-general] some IB multicast sendonly thoughts In-Reply-To: <45AD0BE9.1030101@voltaire.com> References: <45ACF046.7090605@voltaire.com> <45ACF156.7090706@mellanox.co.il> <45ACF371.20009@voltaire.com> <45ACF50E.8030707@mellanox.co.il> <45AD0BE9.1030101@voltaire.com> Message-ID: <1168969318.32185.97252.camel@hal.voltaire.com> On Tue, 2007-01-16 at 12:31, Or Gerlitz wrote: > Eitan Zahavi wrote: > > Or Gerlitz wrote: > >> Eitan Zahavi wrote: > > >> So you are saying that the GW **has** to listen on IGMP at the Eth > >> side and **has** to do IB SA join in the only way that forces the SA > >> to create the group --> FullMember ? > > > Yes > > >> If indeed, this is kind of bad, > > > I find it very reasonable > > OK, going fwd with this approach, the GW got IGMP --> so it did FULL > MEMBER join and the group is created, what is going on when the Eth > multicast node stopped doing RX is there a "leave" IGMP which the GW can > trap and act? Leave is supported in IGMPv2 and v3. v1 only had reports so the host(s) would need to be polled in that case. -- Hal > Or. > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Tue Jan 16 10:14:21 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 12:14:21 -0600 Subject: [openib-general] building dapl for ofed In-Reply-To: <000001c73995$d2b02ce0$16d6180a@amr.corp.intel.com> References: <000001c73995$d2b02ce0$16d6180a@amr.corp.intel.com> Message-ID: <1168971262.19404.18.camel@linux-q667.site> On Tue, 2007-01-16 at 09:43 -0800, Arlin Davis wrote: > > >-----Original Message----- > >From: Steve WIse [mailto:swise at opengridcomputing.com] > >Sent: Tuesday, January 16, 2007 8:56 AM > >To: Davis, Arlin R; Hefty, Sean > >Cc: openib-general > >Subject: building dapl for ofed > > > >I'm having problems building dapl for ofed 1.2. > > > >I'm using the dapl rdma_ucm branch and still getting compile problems. > >What librdmacm branch should I be using? > > Did you use the rdma_ucm branch for both dapl and librdmacm? There is no rdma_ucm branch in librdmacm.git. Which should be used: master rdma_ucm-abi3 multicast 1.1 1.0 From sean.hefty at intel.com Tue Jan 16 10:23:59 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 16 Jan 2007 10:23:59 -0800 Subject: [openib-general] building dapl for ofed In-Reply-To: <1168971262.19404.18.camel@linux-q667.site> Message-ID: <000701c7399b$7ef49220$c2ca180a@amr.corp.intel.com> >There is no rdma_ucm branch in librdmacm.git. Which should be used: > master > rdma_ucm-abi3 > multicast > 1.1 > 1.0 > rdma_ucm-abi3 From jsquyres at cisco.com Tue Jan 16 10:34:30 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 16 Jan 2007 13:34:30 -0500 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <20070116083109.GS22940@mellanox.co.il> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> Message-ID: FWIW, having git's for the MPI implementations was asked for on the call yesterday (by Tziporet, IIRC?). The rationale, as I understood it, was threefold: 1. Putting the MPI release in git provides a level of OFED-specific history and version control. This was explicitly stated on the call yesterday. 2. MPI's have concrete "releases" to OFED just like all other ULP's, especially if there is any OFED-specific packaging involved in the MPI's release. This was not stated on the call, but it makes sense to me. 3. Putting everything in git makes it nicely uniform for OFED to be assembled. This was not stated on the call, and I'm sure it's not a requirement, but it is a little nice to be uniform when assembling OFED (my $0.02). 4. We used to put the MPI releases in SVN (tarball or SRPM) for prior OFED release processes, so putting them in a git seems to parallel that procedure. I don't really care one way or another; this was just my understanding of why it was requested. On Jan 16, 2007, at 3:31 AM, Michael S. Tsirkin wrote: >> Quoting Woodruff, Robert J : >> Subject: Re: [openfabrics-ewg] Reminder: OFED 1.2 >> >> Sasha wrote, >>> On 17:25 Mon 15 Jan , Tziporet Koren wrote: >>> Dhabaleswar Panda wrote: >>>> >>>> Shaun Roland from my group (cc'ed in this e-mail) will be in >>>> charge of >>>> this. Vlad and Shaun can communicate. >>>> >>>> >>> Hi Shaun, >>> Please open an account in the OFA server so you will be able to >>> have a >> >>> git tree to place your SRPM >> >>> But why git tree is needed for SRPM? SRPM is binary file, no? >> >>> Sasha >> >> Sasha wrote, >> I am not sure why a git tree is needed for RPMS, unless that is >> the only way to >> expose it from the server to the outside world. I there a way to >> allow >> people from the outside to access just a directory with the RPMS ? or >> tarballs for things ? > > Using git for binary files does not make sense. > If you want to host files on OFA server, just create > a world-readable pub_html directory under $HOME and put the files > there. > They will be accessible as http://www.openfabrics.org/~/ > . > > OTOH, do we really want to host a copy on OFA servers? > I thought OSU already host it, we can just wget the SRPM from > there. No? > > -- > MST > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From jsquyres at cisco.com Tue Jan 16 10:51:51 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 16 Jan 2007 13:51:51 -0500 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> Message-ID: <740DDE2C-2C82-4818-9572-D820BF0792BD@cisco.com> On Jan 16, 2007, at 1:34 PM, Jeff Squyres wrote: > FWIW, having git's for the MPI implementations was asked for on the > call yesterday (by Tziporet, IIRC?). The rationale, as I understood > it, was threefold: Threefold, fourfold, tenfold... who's counting? :-) -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From rdreier at cisco.com Tue Jan 16 11:11:16 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 11:11:16 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <45ACF218.5020400@mellanox.co.il> (Tziporet Koren's message of "Tue, 16 Jan 2007 17:41:12 +0200") References: <45ACF218.5020400@mellanox.co.il> Message-ID: > * libibverbs 1.1: (from Roland) > o Fork support - done > o Minor ABI extensions for new low-level drivers - done > o Better low-level driver handling, including multiple drivers > linked in statically - done > o Dropped from OFED 1.2: > + Memory windows OK, I was planning on postponing them until libibverbs 1.2. > + Reregister memory region (incl. extend without > unregister...) I will include the basic API but no implementation for this so that we have a chance at including it in a future release of libibverbs 1.1 without breaking API or ABI. > + libibverbs 1.0 to be included as well I believe I have a way to make libibverbs 1.1 binary-compatible with applications (but not drivers) built against libibverbs 1.0. So it will probably not be necessary to include libibverbs 1.0. - R. From swise at opengridcomputing.com Tue Jan 16 11:51:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 16 Jan 2007 13:51:56 -0600 Subject: [openib-general] [PATCH v5 1/3] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070116195154.2559.11592.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> Message-ID: <20070116195156.2559.49740.stgit@dell3.ogc.int> Changes to kernel_patches/ for Chelsio T3 Support. - qp_num -> qp ptr patch for cxgb3. Signed-off-by: Steve Wise --- kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 +++++++++++++++++++++++ 1 files changed, 23 insertions(+), 0 deletions(-) diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch new file mode 100644 index 0000000..0763f70 --- /dev/null +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch @@ -0,0 +1,23 @@ +Update T3 driver: qp_num no longer in ib_wc. + +From: Steve Wise + +Signed-off-by: Steve Wise +--- + + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c +index ff09509..122f7b4 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh + ret = 1; + + wc->wr_id = cookie; +- wc->qp_num = qhp->wq.qpid; ++ wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " From swise at opengridcomputing.com Tue Jan 16 11:51:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 16 Jan 2007 13:51:54 -0600 Subject: [openib-general] [PATCH v5 0/3] ofed_1_2 - Chelsio T3 RDMA Support Message-ID: <20070116195154.2559.11592.stgit@dell3.ogc.int> Michael/Vlad, Here is version 5. Changes since version 4: - Redid cxgb3 support in ofed_scripts/Makefile ala the iscsi rules. Signed-off-by: Steve Wise From swise at opengridcomputing.com Tue Jan 16 11:51:58 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 16 Jan 2007 13:51:58 -0600 Subject: [openib-general] [PATCH v5 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070116195154.2559.11592.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> Message-ID: <20070116195158.2559.52947.stgit@dell3.ogc.int> Changes to ofed scripts for Chelsio T3 Support. - added cxgb3 and iw_cxgb3 config stuff - visit and build driver/net/cxgb3 to get the cxgb3 driver Signed-off-by: Steve Wise --- ofed_scripts/Makefile | 73 +++++++++++++++++++++++++++++++++++++++++++++++- ofed_scripts/configure | 52 ++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile index 63dca81..276400b 100644 --- a/ofed_scripts/Makefile +++ b/ofed_scripts/Makefile @@ -33,6 +33,13 @@ uninstall: uninstall_iscsi clean_kernel: clean_kernel_iscsi endif +ifeq ($(CONFIG_CHELSIO_T3),m) +kernel: kernel_cxgb3 +install_kernel: install_kernel_cxgb3 +uninstall: uninstall_cxgb3 +clean_kernel: clean_kernel_cxgb3 +endif + ######################### # make kernel # ######################### @@ -46,7 +53,9 @@ kernel: @echo "Kernel sources: $(KSRC)" env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ -I$(CWD)/drivers/infiniband/ulp/ipoib \ - -I$(CWD)/drivers/infiniband/debug" \ + -I$(CWD)/drivers/infiniband/debug \ + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ + -I$(CWD)/drivers/net/cxgb3 " \ $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ @@ -75,6 +84,8 @@ kernel: CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ LINUXINCLUDE=' \ $(BACKPORT_INCLUDES) \ -I$(CWD)/include \ @@ -105,6 +116,23 @@ kernel_iscsi: cat $(CWD)/drivers/scsi/Module.symvers >> $(CWD)/drivers/infiniband/Module.symvers; \ fi; +kernel_cxgb3: + echo "Building drivers/net/cxgb3"; \ + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ + EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ + LINUXINCLUDE=' \ + $(BACKPORT_INCLUDES) \ + -I$(CWD)/include \ + -Iinclude \ + $$(if $$(KBUILD_SRC),-Iinclude2 -I$$(srctree)/include) \ + -include include/linux/autoconf.h \ + -include $(CWD)/include/linux/autoconf.h \ + ' \ + modules; \ + if [ -f $(CWD)/drivers/net/cxgb3/Module.symvers ]; then \ + cat $(CWD)/drivers/net/cxgb3/Module.symvers >> $(CWD)/drivers/infiniband/Module.symvers; \ + fi; ######################### # Install kernel # @@ -181,6 +209,33 @@ install_kernel_iscsi: $(DEPMOD) -r -ae $(KVERSION) +install_kernel_cxgb3: + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" \ + KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) \ + INSTALL_MOD_DIR="drivers/net/cxgb3" INSTALL_MOD_PATH=$(CWD) \ + $(WITH_MAKE_PARAMS) modules_install; + + # Backup original cxgb3.ko kernel module under 'prefix'/backup directory + if [ -d $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 ]; then \ + mkdir -p $(DESTDIR)/$(prefix)/backup; \ + if [ ! -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz ]; then \ + tar czfP $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko; \ + fi; \ + /bin/rm -f $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko; \ + fi; + + # Copy new cxgb3 kernel module to $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 + if [ -d $(CWD)/lib/modules/$(KVERSION)/drivers/net/cxgb3 ]; then \ + mkdir -p $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ + mv $(CWD)/lib/modules/$(KVERSION)/drivers/net/cxgb3/cxgb3.ko $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ + fi; \ + if [ -d $(CWD)/lib/modules/$(KVERSION)/extra ]; then \ + mkdir -p $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ + mv $(CWD)/lib/modules/$(KVERSION)/extra/cxgb3.ko $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 ; \ + fi; + + $(DEPMOD) -r -ae $(KVERSION) + clean: clean_kernel clean_kernel: @@ -189,6 +244,9 @@ clean_kernel: clean_kernel_iscsi: $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/scsi" KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) $(WITH_MAKE_PARAMS) clean +clean_kernel_cxgb3: + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) $(WITH_MAKE_PARAMS) clean + uninstall: if [ -f $(DESTDIR)/$(prefix)/backup/infiniband_ko-$(KVERSION).tgz ]; then \ rm -rf $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/infiniband; \ @@ -209,18 +267,31 @@ uninstall_iscsi: $(DEPMOD) -r -ae $(KVERSION) +uninstall_cxgb3: + if [ -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz ]; then \ + /bin/rm $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko ; \ + tar xzfP $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz; \ + /bin/rm -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz; \ + fi; + + + $(DEPMOD) -r -ae $(KVERSION) + help: @echo @echo kernel: build kernel modules @echo kernel_iscsi: build iSCSI kernel modules + @echo kernel_cxgb3: build cxgb3 kernel module @echo all: build kernel modules @echo @echo install_kernel: install kernel modules under $(DESTDIR)/$(MODULES_DIR) @echo install_kernel_iscsi: install iSCSI kernel modules under $(DESTDIR)/$(MODULES_DIR) + @echo install_kernel_cxgb3: install cxgb3 kernel module under $(DESTDIR)/$(MODULES_DIR) @echo install: run install_kernel @echo @echo clean: delete kernel modules binaries @echo clean_kernel: delete kernel modules binaries @echo clean_kernel_iscsi: delete iscsi kernel modules binaries + @echo clean_kernel_cxgb3: delete cxgb3 kernel module binaries @echo uninstall: delete installed environment @echo diff --git a/ofed_scripts/configure b/ofed_scripts/configure index b99104d..e0b188f 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -129,6 +129,12 @@ Usage: `basename $0` [options] --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] --without-vnic_stats-mod [yes] + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] + --without-cxgb3-mod [yes] + + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] + --without-cxgb3_debug-mod [yes] + --help - print out options @@ -210,7 +216,10 @@ get_backport_dir() 2.6.19*) echo 2.6.19 ;; - 2.6.2[0-9]*) + 2.6.20*) + echo 2.6.20 + ;; + 2.6.2[1-9]*) echo ;; *) @@ -625,6 +634,20 @@ main() --without-vnic_stats-mod) CONFIG_INFINIBAND_VNIC_STATS= ;; + --with-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3="m" + CONFIG_CHELSIO_T3="m" + ;; + --without-cxgb3-mod) + CONFIG_INFINIBAND_CXGB3= + CONFIG_CHELSIO_T3= + ;; + --with-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG="y" + ;; + --without-cxgb3_debug-mod) + CONFIG_INFINIBAND_CXGB3_DEBUG= + ;; --with-modprobe|--without-modprobe) ;; -h | --help) @@ -698,6 +721,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} @@ -708,6 +733,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} # Check for minimal supported kernel version if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then @@ -762,6 +788,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} @@ -772,6 +800,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} EOFCONFIG echo "Created ${CONFIG}:" @@ -912,6 +941,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" else DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" fi +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" +else + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" +fi +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" +else + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" +fi +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" +else + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" +fi cat >> ${AUTOCONF_H} << EOFAUTOCONF #undef CONFIG_INFINIBAND #undef CONFIG_INFINIBAND_IPOIB @@ -934,6 +978,9 @@ #undef CONFIG_INFINIBAND_MADEYE #undef CONFIG_INFINIBAND_VNIC #undef CONFIG_INFINIBAND_VNIC_DEBUG #undef CONFIG_INFINIBAND_VNIC_STATS +#undef CONFIG_INFINIBAND_CXGB3 +#undef CONFIG_INFINIBAND_CXGB3_DEBUG +#undef CONFIG_CHELSIO_T3 #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY @@ -954,6 +1001,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_ADDR_TRANS} ${DEFINE_INFINIBAND_MTHCA} ${DEFINE_INFINIBAND_VNIC} +${DEFINE_INFINIBAND_CXGB3} +${DEFINE_CHELSIO_T3} ${DEFINE_INFINIBAND_IPOIB_DEBUG} ${DEFINE_INFINIBAND_ISER} @@ -964,6 +1013,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG ${DEFINE_INFINIBAND_RDS_DEBUG} ${DEFINE_INFINIBAND_VNIC_DEBUG} ${DEFINE_INFINIBAND_VNIC_STATS} +${DEFINE_INFINIBAND_CXGB3_DEBUG} ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} From swise at opengridcomputing.com Tue Jan 16 11:52:00 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 16 Jan 2007 13:52:00 -0600 Subject: [openib-general] [PATCH v5 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070116195154.2559.11592.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> Message-ID: <20070116195200.2559.17490.stgit@dell3.ogc.int> Provide generic allocator backport to 2.6.20. This is needed even on kernels that might have genalloc because it may not be configured into the running kernel. Right now the only subsystem that turns on the generic allocator config option is IA64. So chances are good that 2.6.20 kernels will _not_ have this compiled in. So we just backport it to all supported kernels. To avoid collisions with kernels that _do_ have this compiled in, change the names of the exported symbols in the backport. Changes to genalloc.h: - Change exported symbol names by prepending ib_ - added #defines for original exported symbol names to new names. Thus code calling the allocator doesn't need to change. EG: #define gen_pool_alloc ib_gen_pool alloc Signed-off-by: Steve Wise --- .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 17 ++ 3 files changed, 257 insertions(+), 0 deletions(-) diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.20/include/src/genalloc.c b/kernel_addons/backport/2.6.20/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.20/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch new file mode 100644 index 0000000..93fee2b --- /dev/null +++ b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch @@ -0,0 +1,17 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From swise at opengridcomputing.com Tue Jan 16 11:53:38 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Tue, 16 Jan 2007 13:53:38 -0600 Subject: [openib-general] [PATCH v3 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <1168966882.9562.7.camel@vladsk-laptop> References: <20070112162034.27341.33435.stgit@dell3.ogc.int> <20070112162038.27341.81922.stgit@dell3.ogc.int> <1168966226.9562.5.camel@vladsk-laptop> <1168966692.19404.5.camel@linux-q667.site> <1168966882.9562.7.camel@vladsk-laptop> Message-ID: <1168977218.19404.28.camel@linux-q667.site> On Tue, 2007-01-16 at 19:01 +0200, Vladimir Sokolovsky wrote: > It breaks compilation without selecting cxgb3... > > Regards, > Vladimir > I just reposted the series (version 5) with a new Makefile scheme similar to iscsi. Steve. From rdreier at cisco.com Tue Jan 16 12:24:05 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 12:24:05 -0800 Subject: [openib-general] [libibverbs] [PATCH] Added checks to memory allocation failure when using asprintf In-Reply-To: <1168941329.12444.6.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Tue, 16 Jan 2007 11:55:29 +0200") References: <1168941329.12444.6.camel@mtls05.yok.mtl.com> Message-ID: Thanks, applied to master and stable branches of libibverbs. From rdreier at cisco.com Tue Jan 16 12:27:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 12:27:58 -0800 Subject: [openib-general] [libibverbs] [PATCH] Fix some memory leaks in case of error flows during driver init In-Reply-To: <1168941413.12444.8.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Tue, 16 Jan 2007 11:56:53 +0200") References: <1168941413.12444.8.camel@mtls05.yok.mtl.com> Message-ID: Good catch, I pushed out a patch fixing this in a slightly different way. From rdreier at cisco.com Tue Jan 16 12:42:10 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 12:42:10 -0800 Subject: [openib-general] [libibverbs/examples] pingpong tests cleanup (resource destroy + ack events + asprintf failure check) In-Reply-To: <1168941517.12444.11.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Tue, 16 Jan 2007 11:58:37 +0200") References: <1168941517.12444.11.camel@mtls05.yok.mtl.com> Message-ID: Looks pretty good, but could you please: - split off the asprintf() checks into a separate patch ("one idea per patch"). - change the code so it keeps a count of cq events and then just does one ibv_ack_cq_events(cq, num_cq_events) at the end. If people are going to use this code as an example, then we should avoid expensive calls to ibv_ack_cq_events() (which has to do a pthread_mutex_lock()/unlock() and a pthread_cond_signal()) in the main data-moving flow. thanks... From rdreier at cisco.com Tue Jan 16 12:46:01 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 12:46:01 -0800 Subject: [openib-general] [PATCH incremental] IB/ipoib_cm: interoperability fix In-Reply-To: <20070116134119.GD22940@mellanox.co.il> (Michael S. Tsirkin's message of "Tue, 16 Jan 2007 15:41:19 +0200") References: <20070116134119.GD22940@mellanox.co.il> Message-ID: OK, I added this patch to the for-mm branch, and rolled it into the patch in the ipoib-cm branch. (I did it that way to avoid redoing the NAPI/CM merge). - R. From rdreier at cisco.com Tue Jan 16 12:49:03 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 12:49:03 -0800 Subject: [openib-general] [PATCH] IB/SRP: check memory allocation In-Reply-To: <45ACED39.8020303@dev.mellanox.co.il> ( ishai@dev.mellanox.co.il's message of "Tue, 16 Jan 2007 17:20:25 +0200") References: <45ACED39.8020303@dev.mellanox.co.il> Message-ID: Thanks, applied. From changquing.tang at hp.com Tue Jan 16 14:12:55 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 16 Jan 2007 22:12:55 -0000 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: Message-ID: <349DCDA352EACF42A0C49FA6DCEA8403172022@G3W0634.americas.hpqcorp.net> > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Roland Dreier > Sent: Tuesday, January 16, 2007 1:11 PM > To: Tziporet Koren > Cc: EWG; OPENIB > Subject: Re: [openib-general] Minutes for January 15, 2007 > teleconference about OFED 1.2 development progress toward code freeze > > > * libibverbs 1.1: (from Roland) > > o Fork support - done Roland: To what extent Fork is supported ? Is this a complete solution that any style fork() code should work, or do we still need some sync between parent and child so that child won't touch pinned page before call exec() ? Thank you. --CQ > > o Minor ABI extensions for new low-level drivers - done > > o Better low-level driver handling, including > multiple drivers > > linked in statically - done > > o Dropped from OFED 1.2: > > + Memory windows > > OK, I was planning on postponing them until libibverbs 1.2. > > > + Reregister memory region (incl. extend without > > unregister...) > > I will include the basic API but no implementation for this > so that we have a chance at including it in a future release > of libibverbs 1.1 without breaking API or ABI. > > > + libibverbs 1.0 to be included as well > > I believe I have a way to make libibverbs 1.1 > binary-compatible with applications (but not drivers) built > against libibverbs 1.0. So it will probably not be necessary > to include libibverbs 1.0. > > - R. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From rdreier at cisco.com Tue Jan 16 14:16:54 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 14:16:54 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA8403172022@G3W0634.americas.hpqcorp.net> (Changqing Tang's message of "Tue, 16 Jan 2007 22:12:55 -0000") References: <349DCDA352EACF42A0C49FA6DCEA8403172022@G3W0634.americas.hpqcorp.net> Message-ID: > To what extent Fork is supported ? Is this a complete solution > that any style fork() code should work, or do we still need some sync > between parent and child so that child won't touch pinned page before > call exec() ? Just about anything should work. However child processes may not use any IB resources from the parent and may not touch any registered memory regions either. But no synchronization is required, and the child may continue to execute its copy of the parent process (exec is not required) The only thing that would break would be if the parent registers a memory region that only covers part of a page, and the child attempts to use something from the rest of that page. That will fail because registered memory does not exist in the child process, and this can only be done on page boundaries. - R. From changquing.tang at hp.com Tue Jan 16 14:17:40 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 16 Jan 2007 22:17:40 -0000 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: Message-ID: <349DCDA352EACF42A0C49FA6DCEA840317202F@G3W0634.americas.hpqcorp.net> > > + libibverbs 1.0 to be included as well > > I believe I have a way to make libibverbs 1.1 > binary-compatible with applications (but not drivers) built > against libibverbs 1.0. So it will probably not be necessary > to include libibverbs 1.0. We need this compatible feature very much so I hope you double confirm this. Can app build with 1.0 work on 1.1 ? (if we only use the basic feature). --CQ > > - R. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From rdreier at cisco.com Tue Jan 16 14:54:09 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 16 Jan 2007 14:54:09 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA840317202F@G3W0634.americas.hpqcorp.net> (Changqing Tang's message of "Tue, 16 Jan 2007 22:17:40 -0000") References: <349DCDA352EACF42A0C49FA6DCEA840317202F@G3W0634.americas.hpqcorp.net> Message-ID: > We need this compatible feature very much so I hope you double confirm > this. Can app build with 1.0 work on 1.1 ? (if we only use the basic > feature). It's not a matter of which features you use. But yes, my goal is to make apps compiled against libibverbs 1.0 still work with libibverbs 1.1. I'm not positive it is possible since I haven't fully implemented it, but that is what I am trying to acheive. From krkumar2 at in.ibm.com Tue Jan 16 19:28:07 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Wed, 17 Jan 2007 08:58:07 +0530 Subject: [openib-general] GIT fails to clone librdmacm and libamso In-Reply-To: <1168957875.17859.19.camel@linux-q667.site> Message-ID: Thanks Steve, it works now. Copy-n-paste was not working and I had wrongly typed the link. Thanks for the help. - KK Steve WIse wrote on 01/16/2007 08:01:15 PM: > The URLs are incorrect. That's scm, not scn. > > > On Tue, 2007-01-16 at 10:59 +0200, Michael S. Tsirkin wrote: > > All, please run git-update-server-info on your repos if you didn't already. > > > > Quoting Krishna Kumar2 : > > Subject: GIT fails to clone librdmacm and libamso > > > > Hi, > > > > When I run the following commands : > > > > git clone http://staging.openfabrics.org/pub/scn/~swise/libamso.git libamso > > git clone http://staging.openfabrics.org/pub/scn/~shefty/librdmacm.git > > > > I get error : > > > > "Cannot get remote repository information. > > Perhaps git-update-server-info needs to be run there?" > > > > It is working for libibverbs : > > git clone git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git > > > > What could the problem be ? The libamso and rdmacm are http:// > links and not git, > > so I guess I am doing something wrong ? > > > > Thanks, > > > > - KK > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib. > org/mailman/listinfo/openib-general > > > From eitan at sw053.yok.mtl.com Tue Jan 16 21:24:54 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Wed, 17 Jan 2007 07:24:54 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-17:normal completion Message-ID: <200701170524.l0H5OsNn026130@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_15_23:49:50_2007 6ff53d ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From mst at mellanox.co.il Tue Jan 16 21:59:55 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 07:59:55 +0200 Subject: [openib-general] building dapl for ofed In-Reply-To: <000701c7399b$7ef49220$c2ca180a@amr.corp.intel.com> References: <1168971262.19404.18.camel@linux-q667.site> <000701c7399b$7ef49220$c2ca180a@amr.corp.intel.com> Message-ID: <20070117055955.GB30926@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: building dapl for ofed > > >There is no rdma_ucm branch in librdmacm.git. Which should be used: > > master > > rdma_ucm-abi3 > > multicast > > 1.1 > > 1.0 > > > > rdma_ucm-abi3 OFED is packaging master, BTW. -- MST From tziporet at mellanox.co.il Tue Jan 16 23:41:21 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 17 Jan 2007 09:41:21 +0200 Subject: [openib-general] building dapl for ofed In-Reply-To: <20070117055955.GB30926@mellanox.co.il> References: <1168971262.19404.18.camel@linux-q667.site> <000701c7399b$7ef49220$c2ca180a@amr.corp.intel.com> <20070117055955.GB30926@mellanox.co.il> Message-ID: <45ADD321.6000201@mellanox.co.il> Michael S. Tsirkin wrote: >>> There is no rdma_ucm branch in librdmacm.git. Which should be used: >>> master >>> rdma_ucm-abi3 >>> multicast >>> 1.1 >>> 1.0 >>> >>> >> rdma_ucm-abi3 >> > > OFED is packaging master, BTW. > > Hi Sean, Can you reply which branch we should use for OFED 1_2? Thanks, Tziporet From mst at mellanox.co.il Tue Jan 16 23:43:41 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 09:43:41 +0200 Subject: [openib-general] building dapl for ofed In-Reply-To: <45ADD321.6000201@mellanox.co.il> References: <45ADD321.6000201@mellanox.co.il> Message-ID: <20070117074341.GK30926@mellanox.co.il> > Quoting Tziporet Koren : > Subject: Re: [openib-general] building dapl for ofed > > Michael S. Tsirkin wrote: > >>> There is no rdma_ucm branch in librdmacm.git. Which should be used: > >>> master > >>> rdma_ucm-abi3 > >>> multicast > >>> 1.1 > >>> 1.0 > >>> > >>> > >> rdma_ucm-abi3 > >> > > > > OFED is packaging master, BTW. > > > > > Hi Sean, > Can you reply which branch we should use for OFED 1_2? Incidentally, I think it was agreed that OFED will package a 1.0 release of librdmacm, correct? -- MST From vlad at mellanox.co.il Tue Jan 16 23:44:32 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 17 Jan 2007 09:44:32 +0200 Subject: [openib-general] [PATCH v5 1/3] ofed_1_2 Changes to kernel_patches/ for Chelsio T3 Support. In-Reply-To: <20070116195156.2559.49740.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> <20070116195156.2559.49740.stgit@dell3.ogc.int> Message-ID: <1169019872.5258.2.camel@vladsk-laptop> Applied. Regards, Vladimir On Tue, 2007-01-16 at 13:51 -0600, Steve Wise wrote: > Changes to kernel_patches/ for Chelsio T3 Support. > > - qp_num -> qp ptr patch for cxgb3. > > Signed-off-by: Steve Wise > --- > > kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch | 23 +++++++++++++++++++++++ > 1 files changed, 23 insertions(+), 0 deletions(-) > > diff --git a/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch > new file mode 100644 > index 0000000..0763f70 > --- /dev/null > +++ b/kernel_patches/fixes/cxgb3_wc_qpn_to_qp.patch > @@ -0,0 +1,23 @@ > +Update T3 driver: qp_num no longer in ib_wc. > + > +From: Steve Wise > + > +Signed-off-by: Steve Wise > +--- > + > + drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- > + 1 files changed, 1 insertions(+), 1 deletions(-) > + > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c > +index ff09509..122f7b4 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c > +@@ -80,7 +80,7 @@ int iwch_poll_cq_one(struct iwch_dev *rh > + ret = 1; > + > + wc->wr_id = cookie; > +- wc->qp_num = qhp->wq.qpid; > ++ wc->qp = &qhp->ibqp; > + wc->vendor_err = CQE_STATUS(cqe); > + > + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Tue Jan 16 23:44:43 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 17 Jan 2007 09:44:43 +0200 Subject: [openib-general] [PATCH v5 2/3] ofed_1_2 Changes to ofed scripts for Chelsio T3 Support. In-Reply-To: <20070116195158.2559.52947.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> <20070116195158.2559.52947.stgit@dell3.ogc.int> Message-ID: <1169019883.5258.4.camel@vladsk-laptop> Applied. Regards, Vladimir On Tue, 2007-01-16 at 13:51 -0600, Steve Wise wrote: > Changes to ofed scripts for Chelsio T3 Support. > > - added cxgb3 and iw_cxgb3 config stuff > - visit and build driver/net/cxgb3 to get the cxgb3 driver > > Signed-off-by: Steve Wise > --- > > ofed_scripts/Makefile | 73 +++++++++++++++++++++++++++++++++++++++++++++++- > ofed_scripts/configure | 52 ++++++++++++++++++++++++++++++++++ > 2 files changed, 123 insertions(+), 2 deletions(-) > > diff --git a/ofed_scripts/Makefile b/ofed_scripts/Makefile > index 63dca81..276400b 100644 > --- a/ofed_scripts/Makefile > +++ b/ofed_scripts/Makefile > @@ -33,6 +33,13 @@ uninstall: uninstall_iscsi > clean_kernel: clean_kernel_iscsi > endif > > +ifeq ($(CONFIG_CHELSIO_T3),m) > +kernel: kernel_cxgb3 > +install_kernel: install_kernel_cxgb3 > +uninstall: uninstall_cxgb3 > +clean_kernel: clean_kernel_cxgb3 > +endif > + > ######################### > # make kernel # > ######################### > @@ -46,7 +53,9 @@ kernel: > @echo "Kernel sources: $(KSRC)" > env EXTRA_CFLAGS="$(OPENIB_KERNEL_EXTRA_CFLAGS) $(KERNEL_MEMTRACK_CFLAGS) -I$(CWD)/include -I$(CWD)/drivers/infiniband/include \ > -I$(CWD)/drivers/infiniband/ulp/ipoib \ > - -I$(CWD)/drivers/infiniband/debug" \ > + -I$(CWD)/drivers/infiniband/debug \ > + -I$(CWD)/drivers/infiniband/hw/cxgb3/core \ > + -I$(CWD)/drivers/net/cxgb3 " \ > $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/infiniband" KERNELRELEASE=$(KVERSION) \ > EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ > CONFIG_INFINIBAND=$(CONFIG_INFINIBAND) \ > @@ -75,6 +84,8 @@ kernel: > CONFIG_INFINIBAND_VNIC=$(CONFIG_INFINIBAND_VNIC) \ > CONFIG_INFINIBAND_VNIC_DEBUG=$(CONFIG_INFINIBAND_VNIC_DEBUG) \ > CONFIG_INFINIBAND_VNIC_STATS=$(CONFIG_INFINIBAND_VNIC_STATS) \ > + CONFIG_INFINIBAND_CXGB3=$(CONFIG_INFINIBAND_CXGB3) \ > + CONFIG_INFINIBAND_CXGB3_DEBUG=$(CONFIG_INFINIBAND_CXGB3_DEBUG) \ > LINUXINCLUDE=' \ > $(BACKPORT_INCLUDES) \ > -I$(CWD)/include \ > @@ -105,6 +116,23 @@ kernel_iscsi: > cat $(CWD)/drivers/scsi/Module.symvers >> $(CWD)/drivers/infiniband/Module.symvers; \ > fi; > > +kernel_cxgb3: > + echo "Building drivers/net/cxgb3"; \ > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) \ > + EXTRAVERSION=$(EXTRAVERSION) V=1 $(WITH_MAKE_PARAMS) \ > + CONFIG_CHELSIO_T3=$(CONFIG_CHELSIO_T3) \ > + LINUXINCLUDE=' \ > + $(BACKPORT_INCLUDES) \ > + -I$(CWD)/include \ > + -Iinclude \ > + $$(if $$(KBUILD_SRC),-Iinclude2 -I$$(srctree)/include) \ > + -include include/linux/autoconf.h \ > + -include $(CWD)/include/linux/autoconf.h \ > + ' \ > + modules; \ > + if [ -f $(CWD)/drivers/net/cxgb3/Module.symvers ]; then \ > + cat $(CWD)/drivers/net/cxgb3/Module.symvers >> $(CWD)/drivers/infiniband/Module.symvers; \ > + fi; > > ######################### > # Install kernel # > @@ -181,6 +209,33 @@ install_kernel_iscsi: > > $(DEPMOD) -r -ae $(KVERSION) > > +install_kernel_cxgb3: > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" \ > + KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) \ > + INSTALL_MOD_DIR="drivers/net/cxgb3" INSTALL_MOD_PATH=$(CWD) \ > + $(WITH_MAKE_PARAMS) modules_install; > + > + # Backup original cxgb3.ko kernel module under 'prefix'/backup directory > + if [ -d $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 ]; then \ > + mkdir -p $(DESTDIR)/$(prefix)/backup; \ > + if [ ! -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz ]; then \ > + tar czfP $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko; \ > + fi; \ > + /bin/rm -f $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko; \ > + fi; > + > + # Copy new cxgb3 kernel module to $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 > + if [ -d $(CWD)/lib/modules/$(KVERSION)/drivers/net/cxgb3 ]; then \ > + mkdir -p $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ > + mv $(CWD)/lib/modules/$(KVERSION)/drivers/net/cxgb3/cxgb3.ko $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ > + fi; \ > + if [ -d $(CWD)/lib/modules/$(KVERSION)/extra ]; then \ > + mkdir -p $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3; \ > + mv $(CWD)/lib/modules/$(KVERSION)/extra/cxgb3.ko $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3 ; \ > + fi; > + > + $(DEPMOD) -r -ae $(KVERSION) > + > clean: clean_kernel > > clean_kernel: > @@ -189,6 +244,9 @@ clean_kernel: > clean_kernel_iscsi: > $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/scsi" KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) $(WITH_MAKE_PARAMS) clean > > +clean_kernel_cxgb3: > + $(MAKE) -C $(KSRC) SUBDIRS="$(CWD)/drivers/net/cxgb3" KERNELRELEASE=$(KVERSION) EXTRAVERSION=$(EXTRAVERSION) $(WITH_MAKE_PARAMS) clean > + > uninstall: > if [ -f $(DESTDIR)/$(prefix)/backup/infiniband_ko-$(KVERSION).tgz ]; then \ > rm -rf $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/infiniband; \ > @@ -209,18 +267,31 @@ uninstall_iscsi: > > $(DEPMOD) -r -ae $(KVERSION) > > +uninstall_cxgb3: > + if [ -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz ]; then \ > + /bin/rm $(DESTDIR)/$(MODULES_DIR)/kernel/drivers/net/cxgb3/cxgb3.ko ; \ > + tar xzfP $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz; \ > + /bin/rm -f $(DESTDIR)/$(prefix)/backup/cxgb3_ko-$(KVERSION).tgz; \ > + fi; > + > + > + $(DEPMOD) -r -ae $(KVERSION) > + > help: > @echo > @echo kernel: build kernel modules > @echo kernel_iscsi: build iSCSI kernel modules > + @echo kernel_cxgb3: build cxgb3 kernel module > @echo all: build kernel modules > @echo > @echo install_kernel: install kernel modules under $(DESTDIR)/$(MODULES_DIR) > @echo install_kernel_iscsi: install iSCSI kernel modules under $(DESTDIR)/$(MODULES_DIR) > + @echo install_kernel_cxgb3: install cxgb3 kernel module under $(DESTDIR)/$(MODULES_DIR) > @echo install: run install_kernel > @echo > @echo clean: delete kernel modules binaries > @echo clean_kernel: delete kernel modules binaries > @echo clean_kernel_iscsi: delete iscsi kernel modules binaries > + @echo clean_kernel_cxgb3: delete cxgb3 kernel module binaries > @echo uninstall: delete installed environment > @echo > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > index b99104d..e0b188f 100755 > --- a/ofed_scripts/configure > +++ b/ofed_scripts/configure > @@ -129,6 +129,12 @@ Usage: `basename $0` [options] > --with-vnic_stats-mod make CONFIG_INFINIBAND_VNIC_STATS=y [no] > --without-vnic_stats-mod [yes] > > + --with-cxgb3-mod make CONFIG_INFINIBAND_CXGB3=m [no] > + --without-cxgb3-mod [yes] > + > + --with-cxgb3_debug-mod make CONFIG_INFINIBAND_CXGB3_DEBUG=y [no] > + --without-cxgb3_debug-mod [yes] > + > --help - print out options > > > @@ -210,7 +216,10 @@ get_backport_dir() > 2.6.19*) > echo 2.6.19 > ;; > - 2.6.2[0-9]*) > + 2.6.20*) > + echo 2.6.20 > + ;; > + 2.6.2[1-9]*) > echo > ;; > *) > @@ -625,6 +634,20 @@ main() > --without-vnic_stats-mod) > CONFIG_INFINIBAND_VNIC_STATS= > ;; > + --with-cxgb3-mod) > + CONFIG_INFINIBAND_CXGB3="m" > + CONFIG_CHELSIO_T3="m" > + ;; > + --without-cxgb3-mod) > + CONFIG_INFINIBAND_CXGB3= > + CONFIG_CHELSIO_T3= > + ;; > + --with-cxgb3_debug-mod) > + CONFIG_INFINIBAND_CXGB3_DEBUG="y" > + ;; > + --without-cxgb3_debug-mod) > + CONFIG_INFINIBAND_CXGB3_DEBUG= > + ;; > --with-modprobe|--without-modprobe) > ;; > -h | --help) > @@ -698,6 +721,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG:-''} > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE:-''} > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC:-''} > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3:-''} > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3:-''} > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA:-''} > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY:-''} > @@ -708,6 +733,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG:-''} > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG:-''} > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS:-''} > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG:-''} > > # Check for minimal supported kernel version > if ! check_kerver ${KVERSION} ${MIN_KVERSION}; then > @@ -762,6 +788,8 @@ CONFIG_INFINIBAND_RDS=${CONFIG_INFINIBAN > CONFIG_INFINIBAND_RDS_DEBUG=${CONFIG_INFINIBAND_RDS_DEBUG} > CONFIG_INFINIBAND_MADEYE=${CONFIG_INFINIBAND_MADEYE} > CONFIG_INFINIBAND_VNIC=${CONFIG_INFINIBAND_VNIC} > +CONFIG_INFINIBAND_CXGB3=${CONFIG_INFINIBAND_CXGB3} > +CONFIG_CHELSIO_T3=${CONFIG_CHELSIO_T3} > > CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=${CONFIG_INFINIBAND_IPOIB_DEBUG_DATA} > CONFIG_INFINIBAND_SDP_SEND_ZCOPY=${CONFIG_INFINIBAND_SDP_SEND_ZCOPY} > @@ -772,6 +800,7 @@ CONFIG_INFINIBAND_IPATH=${CONFIG_INFINIB > CONFIG_INFINIBAND_MTHCA_DEBUG=${CONFIG_INFINIBAND_MTHCA_DEBUG} > CONFIG_INFINIBAND_VNIC_DEBUG=${CONFIG_INFINIBAND_VNIC_DEBUG} > CONFIG_INFINIBAND_VNIC_STATS=${CONFIG_INFINIBAND_VNIC_STATS} > +CONFIG_INFINIBAND_CXGB3_DEBUG=${CONFIG_INFINIBAND_CXGB3_DEBUG} > > EOFCONFIG > echo "Created ${CONFIG}:" > @@ -912,6 +941,21 @@ if [ "X${CONFIG_INFINIBAND_VNIC_STATS}" > else > DEFINE_INFINIBAND_VNIC_STATS="#undef CONFIG_INFINIBAND_VNIC_STATS" > fi > +if [ "X${CONFIG_INFINIBAND_CXGB3}" == "Xm" ]; then > + DEFINE_INFINIBAND_CXGB3="#define CONFIG_INFINIBAND_CXGB3 1" > +else > + DEFINE_INFINIBAND_CXGB3="#undef CONFIG_INFINIBAND_CXGB3" > +fi > +if [ "X${CONFIG_INFINIBAND_CXGB3_DEBUG}" == "Xy" ]; then > + DEFINE_INFINIBAND_CXGB3_DEBUG="#define CONFIG_INFINIBAND_CXGB3_DEBUG 1" > +else > + DEFINE_INFINIBAND_CXGB3_DEBUG="#undef CONFIG_INFINIBAND_CXGB3_DEBUG" > +fi > +if [ "X${CONFIG_CHELSIO_T3}" == "Xm" ]; then > + DEFINE_CHELSIO_T3="#define CONFIG_CHELSIO_T3 1" > +else > + DEFINE_CHELSIO_T3="#undef CONFIG_CHELSIO_T3" > +fi > cat >> ${AUTOCONF_H} << EOFAUTOCONF > #undef CONFIG_INFINIBAND > #undef CONFIG_INFINIBAND_IPOIB > @@ -934,6 +978,9 @@ #undef CONFIG_INFINIBAND_MADEYE > #undef CONFIG_INFINIBAND_VNIC > #undef CONFIG_INFINIBAND_VNIC_DEBUG > #undef CONFIG_INFINIBAND_VNIC_STATS > +#undef CONFIG_INFINIBAND_CXGB3 > +#undef CONFIG_INFINIBAND_CXGB3_DEBUG > +#undef CONFIG_CHELSIO_T3 > > #undef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA > #undef CONFIG_INFINIBAND_SDP_SEND_ZCOPY > @@ -954,6 +1001,8 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > ${DEFINE_INFINIBAND_ADDR_TRANS} > ${DEFINE_INFINIBAND_MTHCA} > ${DEFINE_INFINIBAND_VNIC} > +${DEFINE_INFINIBAND_CXGB3} > +${DEFINE_CHELSIO_T3} > > ${DEFINE_INFINIBAND_IPOIB_DEBUG} > ${DEFINE_INFINIBAND_ISER} > @@ -964,6 +1013,7 @@ #undef CONFIG_INFINIBAND_MTHCA_DEBUG > ${DEFINE_INFINIBAND_RDS_DEBUG} > ${DEFINE_INFINIBAND_VNIC_DEBUG} > ${DEFINE_INFINIBAND_VNIC_STATS} > +${DEFINE_INFINIBAND_CXGB3_DEBUG} > > ${DEFINE_INFINIBAND_IPOIB_DEBUG_DATA} > ${DEFINE_INFINIBAND_SDP_SEND_ZCOPY} > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Tue Jan 16 23:44:57 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 17 Jan 2007 09:44:57 +0200 Subject: [openib-general] [PATCH v5 3/3] ofed_1_2 Provide generic allocator backport to 2.6.20. In-Reply-To: <20070116195200.2559.17490.stgit@dell3.ogc.int> References: <20070116195154.2559.11592.stgit@dell3.ogc.int> <20070116195200.2559.17490.stgit@dell3.ogc.int> Message-ID: <1169019897.5258.6.camel@vladsk-laptop> Applied. Regards, Vladimir On Tue, 2007-01-16 at 13:52 -0600, Steve Wise wrote: > Provide generic allocator backport to 2.6.20. > > This is needed even on kernels that might have genalloc because it > may not be configured into the running kernel. Right now the only > subsystem that turns on the generic allocator config option is IA64. > So chances are good that 2.6.20 kernels will _not_ have this compiled in. > So we just backport it to all supported kernels. > > To avoid collisions with kernels that _do_ have this compiled in, > change the names of the exported symbols in the backport. > > Changes to genalloc.h: > > - Change exported symbol names by prepending ib_ > - added #defines for original exported symbol names to new names. Thus > code calling the allocator doesn't need to change. > > EG: #define gen_pool_alloc ib_gen_pool alloc > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.20/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.20/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.20/linux_genalloc_to_2.6.20.patch | 17 ++ > 3 files changed, 257 insertions(+), 0 deletions(-) > > diff --git a/kernel_addons/backport/2.6.20/include/linux/genalloc.h b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.20/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.20/include/src/genalloc.c b/kernel_addons/backport/2.6.20/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.20/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch > new file mode 100644 > index 0000000..93fee2b > --- /dev/null > +++ b/kernel_patches/backport/2.6.20/linux_genalloc_to_2.6.20.patch > @@ -0,0 +1,17 @@ > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 163d991..2cd239f 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..96a48fe > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From umaxx at oleco.net Tue Jan 16 23:49:43 2007 From: umaxx at oleco.net (Joerg Zinke) Date: Wed, 17 Jan 2007 08:49:43 +0100 Subject: [openib-general] libibcm get events In-Reply-To: <45AB3F5B.8030408@dev.mellanox.co.il> References: <20070113141640.43778722@marvin.local> <45AB3F5B.8030408@dev.mellanox.co.il> Message-ID: <20070117084943.62da8523@marvin.local> On Mon, 15 Jan 2007 10:46:19 +0200 Dotan Barak wrote: > Joerg Zinke wrote: > > Hi, > > > > is there a non-blocking alternative for ib_cm_get_event() > > available? Any hints on how to check for events without blocking > > and waiting for the next one? > > > Here is a response that Or Gerlitz send a few weeks ago on getting > completion events in non blocking mode, i think that this can be > useful in your case too: > > > "sure, yes, for both questions: the actual object to deliver cq event > is struct ibv_comp_channel whose only field is a file descriptor with > which you can implement the requirements, use poll/select to set a > timeout, or make it non blocking such that you can > > int rc; > struct ibv_comp_channel my_ch; > struct pollfd my_pollfd; > > flags = fcntl(my_ch.fd, F_GETFL) > rc = fcntl(my_ch.fd, F_SETFL, flags | O_NONBLOCK) > > will make ibv_get_event to return immediately if there is no event to > consume at the channel > > my_pollfd.fd = my_ch.fd; > my_pollfd.events = POLLIN; > my_pollfd.revents = 0; > > rc = poll(&my_pollfd, 1, ms_timeout); > > will allow you to either get a timeout if ms milliseconds have > elapsed or there is an event waiting for you to consume " > Many thanks for this hint, i will give it a try. Joerg From tziporet at dev.mellanox.co.il Wed Jan 17 00:07:08 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 17 Jan 2007 10:07:08 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: References: <45ACF218.5020400@mellanox.co.il> Message-ID: <45ADD92C.7020401@dev.mellanox.co.il> Roland Dreier wrote: > I believe I have a way to make libibverbs 1.1 binary-compatible with > applications (but not drivers) built against libibverbs 1.0. So it > will probably not be necessary to include libibverbs 1.0. > > - R. > > This is very good. When will we have this change (since libibverbs is very centric I prefer having it sooner than later) Thanks, Tziporet From tziporet at dev.mellanox.co.il Wed Jan 17 00:23:45 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 17 Jan 2007 10:23:45 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <45ACF622.4060603@voltaire.com> References: <45ACF218.5020400@mellanox.co.il> <45ACF622.4060603@voltaire.com> Message-ID: <45ADDD11.5040006@dev.mellanox.co.il> Or Gerlitz wrote: > Tziporet Koren wrote: > > > > The bonding package would support: fresh (2.6.20) and some older > upstream kernels along with SLES10 and RH4 Ux (x=3 for sure) > > OK - please send us all the info once its ready >> General changes to the package: >> * Multicast - we wait for Voltaire and Sean to close all technical >> details - should be ready by the end of the week >> > > I have just sent Sean over the list a clarification email, if needed we > would be able to help doing the missing patches and i guess in a > combined effort this would be ready for the end of --next-- week > > Thanks - please work with MST & Vlad on integration > what about the host side QoS code? i did not see an newer RFC nor patch > other then the RFC that was sent many months ago. > > > We are going to update our low level driver (mthca) to support it. Beside there should be a small change in CMA for this, and its specified in the RFC. Tziporet From tziporet at dev.mellanox.co.il Wed Jan 17 00:05:36 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 17 Jan 2007 10:05:36 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> Message-ID: <45ADD8D0.8080301@dev.mellanox.co.il> Jeff Squyres wrote: > FWIW, having git's for the MPI implementations was asked for on the > call yesterday (by Tziporet, IIRC?). The rationale, as I understood > it, was threefold: > > 1. Putting the MPI release in git provides a level of OFED-specific > history and version control. This was explicitly stated on the call > yesterday. > > 2. MPI's have concrete "releases" to OFED just like all other ULP's, > especially if there is any OFED-specific packaging involved in the > MPI's release. This was not stated on the call, but it makes sense > to me. > > 3. Putting everything in git makes it nicely uniform for OFED to be > assembled. This was not stated on the call, and I'm sure it's not a > requirement, but it is a little nice to be uniform when assembling > OFED (my $0.02). > > 4. We used to put the MPI releases in SVN (tarball or SRPM) for prior > OFED release processes, so putting them in a git seems to parallel > that procedure. > > I don't really care one way or another; this was just my > understanding of why it was requested. > > > Hi, Jeff is correct - I requested this from the reasons above. Tziporet From dotanb at dev.mellanox.co.il Wed Jan 17 00:58:52 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 17 Jan 2007 10:58:52 +0200 Subject: [openib-general] [libibverbs/examples] pingpong tests cleanup (resource destroy + ack events + asprintf failure check) In-Reply-To: References: <1168941517.12444.11.camel@mtls05.yok.mtl.com> Message-ID: <45ADE54C.6000205@dev.mellanox.co.il> Roland Dreier wrote: > Looks pretty good, but could you please: > > - split off the asprintf() checks into a separate patch ("one idea > per patch"). > - change the code so it keeps a count of cq events and then just > does one ibv_ack_cq_events(cq, num_cq_events) at the end. If > people are going to use this code as an example, then we should > avoid expensive calls to ibv_ack_cq_events() (which has to do a > pthread_mutex_lock()/unlock() and a pthread_cond_signal()) in the > main data-moving flow. > > thanks... > - i wanted to give one patch with all of the fixes, but separate patches are fine by me (i will keep that in mind). - i will change the code according to this tip (and add it to the man pages of this verb) later on i will send the 2 patches. thanks Dotan From dotanb at dev.mellanox.co.il Wed Jan 17 01:37:54 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 17 Jan 2007 11:37:54 +0200 Subject: [openib-general] [libibverbs/examples] [PATCH] Added checks to memory allocation failure when using asprintf in pingpong tests Message-ID: <1169026675.20182.1.camel@mtls05.yok.mtl.com> Added checks to memory allocation failure when using asprintf in pingpong tests. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-16 09:56:52.000000000 +0200 @@ -139,7 +139,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -205,7 +207,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-16 09:57:25.000000000 +0200 @@ -150,7 +150,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -229,7 +231,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-16 09:57:54.000000000 +0200 @@ -127,7 +127,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -193,7 +195,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { Index: gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-15 17:02:23.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-16 09:58:22.000000000 +0200 @@ -128,7 +128,9 @@ static struct pingpong_dest *pp_client_e int sockfd = -1; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { @@ -194,7 +196,9 @@ static struct pingpong_dest *pp_server_e int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; - asprintf(&service, "%d", port); + if (asprintf(&service, "%d", port) < 0) + return NULL; + n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { From dotanb at dev.mellanox.co.il Wed Jan 17 01:39:01 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 17 Jan 2007 11:39:01 +0200 Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events Message-ID: <1169026741.20182.3.camel@mtls05.yok.mtl.com> Added resource cleaning before end of pingpong tests + ack to CQ events. Signed-off-by: Dotan Barak --- Index: gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 10:58:02.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 11:09:59.000000000 +0200 @@ -322,7 +322,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -374,6 +374,46 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -451,6 +491,7 @@ int main(int argc, char *argv[]) int use_event = 0; int routs; int rcnt, scnt; + int num_of_events = 0; srand48(getpid() * time(NULL)); @@ -622,6 +663,8 @@ int main(int argc, char *argv[]) return 1; } + ++num_of_events; + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -706,5 +749,13 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + ibv_ack_cq_events(ctx->cq, num_of_events); + + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-17 10:58:02.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/srq_pingpong.c 2007-01-17 11:10:31.000000000 +0200 @@ -358,7 +358,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -424,6 +424,55 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx, int num_qp) +{ + int i; + + for (i = 0; i < num_qp; ++i) { + if (ibv_destroy_qp(ctx->qp[i])) { + fprintf(stderr, "Couldn't destroy QP[%d]\n", i); + return 1; + } + } + + if (ibv_destroy_srq(ctx->srq)) { + fprintf(stderr, "Couldn't destroy SRQ\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -517,6 +566,7 @@ int main(int argc, char *argv[]) int rcnt, scnt; int num_wc; int i; + int num_of_events = 0; srand48(getpid() * time(NULL)); @@ -710,6 +760,8 @@ int main(int argc, char *argv[]) return 1; } + ++num_of_events; + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -801,5 +853,13 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + ibv_ack_cq_events(ctx->cq, num_of_events); + + if (pp_close_ctx(ctx, num_qp)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-17 10:58:02.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/uc_pingpong.c 2007-01-17 11:10:11.000000000 +0200 @@ -310,7 +310,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -362,6 +362,46 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -439,6 +479,7 @@ int main(int argc, char *argv[]) int use_event = 0; int routs; int rcnt, scnt; + int num_of_events = 0; srand48(getpid() * time(NULL)); @@ -610,6 +651,8 @@ int main(int argc, char *argv[]) return 1; } + ++num_of_events; + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -694,5 +737,13 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + ibv_ack_cq_events(ctx->cq, num_of_events); + + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } Index: gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c =================================================================== --- gen2_devel_user.orig/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-17 10:58:02.000000000 +0200 +++ gen2_devel_user/src/userspace/libibverbs/examples/ud_pingpong.c 2007-01-17 11:10:20.000000000 +0200 @@ -311,7 +311,7 @@ static struct pingpong_context *pp_init_ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { - fprintf(stderr, "Couldn't allocate MR\n"); + fprintf(stderr, "Couldn't register MR\n"); return NULL; } @@ -363,6 +363,51 @@ static struct pingpong_context *pp_init_ return ctx; } +int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_destroy_ah(ctx->ah)) { + fprintf(stderr, "Couldn't destroy AH\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { @@ -445,6 +490,7 @@ int main(int argc, char *argv[]) int use_event = 0; int routs; int rcnt, scnt; + int num_of_events = 0; srand48(getpid() * time(NULL)); @@ -608,6 +654,8 @@ int main(int argc, char *argv[]) return 1; } + ++num_of_events; + if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; @@ -692,5 +740,13 @@ int main(int argc, char *argv[]) iters, usec / 1000000., usec / iters); } + ibv_ack_cq_events(ctx->cq, num_of_events); + + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + return 0; } From ishai at dev.mellanox.co.il Wed Jan 17 02:19:15 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Wed, 17 Jan 2007 12:19:15 +0200 Subject: [openib-general] [srptools] [PATCH] style fix in asprintf result check In-Reply-To: <1168945019.12444.14.camel@mtls05.yok.mtl.com> References: <1168945019.12444.14.camel@mtls05.yok.mtl.com> Message-ID: <45ADF823.3080900@dev.mellanox.co.il> Applied, Thanks Ishai From ishai at dev.mellanox.co.il Wed Jan 17 02:18:59 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Wed, 17 Jan 2007 12:18:59 +0200 Subject: [openib-general] [srptools] [PATCH] Added checks to memory allocation failure when using asprintf In-Reply-To: <1168945128.12444.17.camel@mtls05.yok.mtl.com> References: <1168945128.12444.17.camel@mtls05.yok.mtl.com> Message-ID: <45ADF813.2040104@dev.mellanox.co.il> Applied, Thanks Ishai From ogerlitz at voltaire.com Wed Jan 17 03:00:41 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 17 Jan 2007 13:00:41 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AD0D71.9040705@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> Message-ID: <45AE01D9.3000107@voltaire.com> Sean Hefty wrote: >> sure, it can use the rdmacm qkey (0x1234567 etc) when it creates the >> QP and later --if-- the user joins a multicast group modify the qp >> state with the group qkey and report it in the cma event such that the >> consumer of the rdmacm would set this into his IB UD TX WR > Changing the qkey would break its existing UD communication. OK, so we have three use cases here for a UD QP +1 used only for unicast +2 used only for multicast +3 used for both unicast and multicast and my suggestion (default qkey, when join is completed do qp modify with the group qkey) would work fine for use cases 1 - since the user never joins to anything and 2 - same as it works in ipoib so we are left with use case 3. To make things simple, the solution i suggest is that that the RDMA CM would --not-- do this modify QP/QKEY (that is would set the 0x12345678 qkey on the modify qp to init) and rather leave it to the RDMA CM consumer --if-- they wish to do so. However it will use the ipv4 broadcast group qkey for doing mcast joins and report this qkey to the user in the ud param of the event. So users that don't care about their qkey would never bother to do this modify qp and users who do care would do it and have to take caution if their QP is of type 3 (both unicast and mcast). If you don't like this direction, your idea from below to have two option for group type - rdmacm or ipoib and have the consumer specify it, so for group type ipoib you will use the ipv4 brd qkey for both join and modify qp and for group type rdmacm you would just use the rdmacm qkey and do no modify qp - this is fine for us as well. >> Bottom line, Looking in the IB SPEC and IPoIB RFC i did not see >> mentioning of privileged QKEY. > > From RFC 4391 (ipoib RFC), 4.1: > > 2. Q_Key > > It is RECOMMENDED that a controlled Q_Key be used with the > high-order bit set. This is to prevent non-privileged > software from fabricating and sending out bogus IP datagrams. > > I don't know what qkey is actually assigned, however. this (what qkey is assigned to the ipv4 broadcast group by different SAs) is orthogonal to the discussion we do here. > I have some path forward related tasks that I would like to complete > before starting on this. I hope to finish that before the end of this > week. I don't want to rush on the multicast support and miss > something. For the rdma cm, we may need to let the user set some > options when joining a multicast group. Maybe something like: join type > (send-only or send-receive), group type (ipoib or rdma defined), etc. As I see it, the group type (or having no types and being always interoperable with ipoib as i suggest above) seems easy to add to the current implementation and would put it in acceptable state for upstream pushing to 2.6.21 and inclusion in OFED 1.2 . As for the join type, as i told you before, I think it should --not-- delay the upstream nor the ofed 1.2 push - if you have the time add this to the user/kernel ABI and have ucma kernel return -EINVAL if someone attempts to to send-only join. And if you don't have the time for that, it can be added later. Actually, as you can see in the ipoib code, it never does a send-only-non-member join, so my take here is that till the ipoib issue is resolved there is no reason to have this complexity in the rdmacm. > I do plan on requesting that the core multicast changes to ib_sa and > ib_ipoib be pulled into 2.6.21. This is great news but again I think the "nobody perfect" rule applies well here, the current rdmacm multicast support (which the little fixes we discuss over this thread) can be pushed to 2.6.21 and be enhanced later. Or. From mst at mellanox.co.il Wed Jan 17 04:17:16 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 14:17:16 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AD0D71.9040705@ichips.intel.com> References: <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> Message-ID: <20070117121716.GF13720@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: multicast code/merge status > > > sure, it can use the rdmacm qkey (0x1234567 etc) when it creates the QP > > and later --if-- the user joins a multicast group modify the qp state > > with the group qkey and report it in the cma event such that the > > consumer of the rdmacm would set this into his IB UD TX WR > > Changing the qkey would break its existing UD communication. > > > Bottom line, Looking in the IB SPEC and IPoIB RFC i did not see > > mentioning of privileged QKEY. > > From RFC 4391 (ipoib RFC), 4.1: > > 2. Q_Key > > It is RECOMMENDED that a controlled Q_Key be used with the > high-order bit set. This is to prevent non-privileged > software from fabricating and sending out bogus IP datagrams. BTW, should we be worried that proposed extension (passing qkey in rdma cm param list) seems to expose this qkey to non-privileged software? Maybe a machanism should be in place to control access to this separately from regular rdma cm for RC QPs? -- MST From ishai at dev.mellanox.co.il Wed Jan 17 04:37:44 2007 From: ishai at dev.mellanox.co.il (ishai) Date: Wed, 17 Jan 2007 14:37:44 +0200 Subject: [openib-general] History change in srptools.git Message-ID: <45AE1898.5030704@dev.mellanox.co.il> Hi I changed the history in srptools.git (Some e-mail addresses errors) The change is only on the log information. The files was not changed. The change: version 19c761889b9bd86abc027a13c1c6d0a96607fe79 become version 2088b76f62cd0e94d2c8415a6a328dc818d200f1 If you are working on it you will need to perform rebase. Sorry for any inconvenient. Thanks Ishai From ogerlitz at voltaire.com Wed Jan 17 04:38:52 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 17 Jan 2007 14:38:52 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <45ADDD11.5040006@dev.mellanox.co.il> References: <45ACF218.5020400@mellanox.co.il> <45ACF622.4060603@voltaire.com> <45ADDD11.5040006@dev.mellanox.co.il> Message-ID: <45AE18DC.4020606@voltaire.com> Tziporet Koren wrote: > Or Gerlitz wrote: >> Tziporet Koren wrote: >> >> >> The bonding package would support: fresh (2.6.20) and some older >> upstream kernels along with SLES10 and RH4 Ux (x=3 for sure) >> >> > OK - please send us all the info once its ready >>> General changes to the package: >>> * Multicast - we wait for Voltaire and Sean to close all technical >>> details - should be ready by the end of the week >>> >> >> I have just sent Sean over the list a clarification email, if needed >> we would be able to help doing the missing patches and i guess in a >> combined effort this would be ready for the end of --next-- week >> >> > Thanks - please work with MST & Vlad on integration >> what about the host side QoS code? i did not see an newer RFC nor >> patch other then the RFC that was sent many months ago. > We are going to update our low level driver (mthca) to support it. > Beside there should be a small change in CMA for this, and its specified > in the RFC. I understand that the change involves letting the rdma cm know the SID when the consumer calls --rdma_resolve_route-- where today it get to know the SID when the consumer calls --rdma_connect-- . So this is not an internal RDMA CM change but rather also changes the API. Same for SRP as the api of ib_sa_path_rec_get (that is the structure it gets as input) changes, the SRP code also changes. Any, can you send the mthca and rdmacm/rdmacm-consumers changes as RFC/PATCH over the list before the actual code freeze??? As for the QoS RFC (http://openib.org/pipermail/openib-general/2006-May/022331.html) sent by Eitan, one design issue I see there is how to deal with IB ULPs which do --not-- have a well known SID. So they call ib_cm_listen with IB_CM_ASSIGN_SERVICE_ID and get from the CM a service id to use, then they might do some out of band exchange of this SID before starting their connection establishment. from include/rdma/ib_cm.h > * @service_id: Service identifier matched against incoming connection > * and service ID resolution requests. The service ID should be specified > * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will > * assign a service ID to the caller. Typically this happens with MPI up to the extent that different ranks within the same job may get a different SID. One solution i was thinking of is to +1 define --range-- (eg big enough to serve 1024 CM consumers) per ULP +2 change the CM to support allocating SID in a range +3 change the ULPs which use IB_CM_ASSIGN_SERVICE_ID to ask SID in the relevant range +4 change the QoS manager at the SM side to support ranges Or. From ogerlitz at voltaire.com Wed Jan 17 04:44:53 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 17 Jan 2007 14:44:53 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <20070117121716.GF13720@mellanox.co.il> References: <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> <20070117121716.GF13720@mellanox.co.il> Message-ID: <45AE1A45.2060005@voltaire.com> Michael S. Tsirkin wrote: >> Quoting Sean Hefty : >> Subject: Re: multicast code/merge status >> >>> sure, it can use the rdmacm qkey (0x1234567 etc) when it creates the QP >>> and later --if-- the user joins a multicast group modify the qp state >>> with the group qkey and report it in the cma event such that the >>> consumer of the rdmacm would set this into his IB UD TX WR >> Changing the qkey would break its existing UD communication. >> >>> Bottom line, Looking in the IB SPEC and IPoIB RFC i did not see >>> mentioning of privileged QKEY. >> From RFC 4391 (ipoib RFC), 4.1: >> >> 2. Q_Key >> >> It is RECOMMENDED that a controlled Q_Key be used with the >> high-order bit set. This is to prevent non-privileged >> software from fabricating and sending out bogus IP datagrams. > > BTW, should we be worried that proposed extension (passing qkey in rdma cm param > list) seems to expose this qkey to non-privileged software? As was said over related threads here and elsewhere, multicast has its in nature non safeties and having IB implement broadcast over multicast adds more in safety to the party. Specifically, as Roland has commented, a user can attach his user space UD QP to the MGID of the ipv4 broadcast (if ipoib is running on this node it will join the group) and start making this IP subnet go crazy. We only want interop with IPoIB and we don't need to join/attach the ipv4 broadcast group just have an option for the rdmacm to use its qkey for joins and later either the rdmacm or the consumer will also set this qkey into the QP and the UD TX WR > Maybe a machanism should be in place to control access to this separately > from regular rdma cm for RC QPs? not following you here, how does qkey relates to RC QPs ? Or. From ogerlitz at voltaire.com Wed Jan 17 04:52:32 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 17 Jan 2007 14:52:32 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <45ACE53A.9010605@voltaire.com> References: <45ACD2F6.8010705@voltaire.com> <20070116133507.GC22940@mellanox.co.il> <45ACE53A.9010605@voltaire.com> Message-ID: <45AE1C10.70003@voltaire.com> Or Gerlitz wrote: > Michael S. Tsirkin wrote: >>>> git log -Sneigh_destructor -- include/net/neighbour.h >>> also, having that at (my) hand does not remove the need that you will >>> set a changelog/signature for the OFED ipoib related backport patch. >> Feel free to add that. > Unless i miss something, we want all OFED kernel patches to meet > **basic** kernel working conversions, specifically that for each patch > there is a change log and an owner. OK, I realize now that in OFED 1.1 out of 438 .patch files under kernel_patches only 103 of them have Signed-Off-by line and assuming this maps 1:1 to the files that have change log, i am not asking you to write now 335 change-logs/signed-off-by section. However, since understanding this patch in detail is important to a peer member individual/company of the community (myself/Voltaire) and you being this patch owner and also having the OFED kernel patches maintainer chair, it makes sense that per our request you will put 5 minutes of your time to write a change log. Or. From mst at mellanox.co.il Wed Jan 17 05:39:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 15:39:24 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AE1A45.2060005@voltaire.com> References: <45AE1A45.2060005@voltaire.com> Message-ID: <20070117133924.GH13720@mellanox.co.il> > > Maybe a machanism should be in place to control access to this separately > > from regular rdma cm for RC QPs? > > not following you here, how does qkey relates to RC QPs ? Currently you can block userspace from creating QPs by unloading uverbs module. Maybe we should make it possible to block creating UD QPs from userspace as a separate security measure. -- MST From mst at mellanox.co.il Wed Jan 17 05:52:04 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 15:52:04 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <45AE1C10.70003@voltaire.com> References: <45ACD2F6.8010705@voltaire.com> <20070116133507.GC22940@mellanox.co.il> <45ACE53A.9010605@voltaire.com> <45AE1C10.70003@voltaire.com> Message-ID: <20070117135204.GJ13720@mellanox.co.il> > However, since understanding this patch in detail is important to a peer > member individual/company of the community (myself/Voltaire)fo/openib-general I really would like to help. What is it that you want to know? Here's an explanation from an older mail. Does this help? Work around for neighbour destructor issue for kernels < 2.6.17: keep a global list of all ipoib neighbours. Use it in destructor to 1. Verify that this neighbour belongs to an ipoib device 2. Check that the neighbour is the last one to use the destructor, if so reset the destructor pointer -- MST From mst at mellanox.co.il Wed Jan 17 06:04:56 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 16:04:56 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <45AE18DC.4020606@voltaire.com> References: <45ACF218.5020400@mellanox.co.il> <45ACF622.4060603@voltaire.com> <45ADDD11.5040006@dev.mellanox.co.il> <45AE18DC.4020606@voltaire.com> Message-ID: <20070117140455.GK13720@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze > > Tziporet Koren wrote: > > Or Gerlitz wrote: > >> Tziporet Koren wrote: > >> > >> > >> The bonding package would support: fresh (2.6.20) and some older > >> upstream kernels along with SLES10 and RH4 Ux (x=3 for sure) > >> > >> > > OK - please send us all the info once its ready > >>> General changes to the package: > >>> * Multicast - we wait for Voltaire and Sean to close all technical > >>> details - should be ready by the end of the week > >>> > >> > >> I have just sent Sean over the list a clarification email, if needed > >> we would be able to help doing the missing patches and i guess in a > >> combined effort this would be ready for the end of --next-- week > >> > >> > > Thanks - please work with MST & Vlad on integration > >> what about the host side QoS code? i did not see an newer RFC nor > >> patch other then the RFC that was sent many months ago. > > > We are going to update our low level driver (mthca) to support it. > > > Beside there should be a small change in CMA for this, and its specified > > in the RFC. > > I understand that the change involves letting the rdma cm know the SID > when the consumer calls --rdma_resolve_route-- where today it get to > know the SID when the consumer calls --rdma_connect-- . So this is not > an internal RDMA CM change but rather also changes the API. > > Same for SRP as the api of ib_sa_path_rec_get (that is the structure it > gets as input) changes, the SRP code also changes. > > Any, can you send the mthca and rdmacm/rdmacm-consumers changes as > RFC/PATCH over the list before the actual code freeze??? I didn't start on this code yet, but it does not look like a huge project, I hope to post code by next week. To avoid major disruptions all over the stack, my preference for OFED 1.2 would be to add new API calls and a module option (off by default) for cma/srp to use them. > As for the QoS RFC > (http://openib.org/pipermail/openib-general/2006-May/022331.html) sent > by Eitan, one design issue I see there is how to deal with IB ULPs which > do --not-- have a well known SID. So they call ib_cm_listen with > IB_CM_ASSIGN_SERVICE_ID and get from the CM a service id to use, then > they might do some out of band exchange of this SID before starting > their connection establishment. > > from include/rdma/ib_cm.h > > > * @service_id: Service identifier matched against incoming connection > > * and service ID resolution requests. The service ID should be specified > > * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will > > * assign a service ID to the caller. > > Typically this happens with MPI up to the extent that different ranks > within the same job may get a different SID. One solution i was thinking > of is to > > +1 define --range-- (eg big enough to serve 1024 CM consumers) per ULP > +2 change the CM to support allocating SID in a range > +3 change the ULPs which use IB_CM_ASSIGN_SERVICE_ID to ask SID in the > relevant range > +4 change the QoS manager at the SM side to support ranges For OFED 1.2, I only planned to implement this for SDP and SRP. I do not expect all this to be mergeable in 2.6.21 time frame, so maybe that's enough. So I think I'll opt for an easier +5 don't set SID in path record query for userspace apps -- MST From kliteyn at dev.mellanox.co.il Wed Jan 17 07:01:34 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:01:34 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser Message-ID: <45AE3A4E.4050501@dev.mellanox.co.il> Hi Hal The following series of six patches implements QoS policy file parser: 1. QoS parser Lex file 2. QoS parser Lex-generated c file 3. QoS parser grammar (Yacc) file 4. QoS parser Yacc-generated grammar c and h file 5. QoS parser header file that defines parse tree data structures 6. Changes in makefiles and configure.in file for compiling QoS parser files -- Yevgeny Signed-off-by: Yevgeny Kliteynik From kliteyn at dev.mellanox.co.il Wed Jan 17 07:03:58 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:03:58 +0200 Subject: [openib-general] [PATCH 1/6] osm: QoS parser Lex file Message-ID: <45AE3ADE.5060609@dev.mellanox.co.il> Hi Hal. This patch is QoS parser Lex file Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser.l | 245 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 245 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser.l b/osm/opensm/osm_qos_parser.l new file mode 100644 index 0000000..73b2a29 --- /dev/null +++ b/osm/opensm/osm_qos_parser.l @@ -0,0 +1,245 @@ +%{ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Lexer of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +#define SAVE_POS save_pos() +static void save_pos(); + +extern int column_num; +extern int line_num; +extern FILE * yyin; + +%} + +XML_VERSION_START <\? +XML_VERSION_END \?> +QOS_POLICY_START +QOS_POLICY_END <\/qos\-policy> +PORT_GROUPS_START +PORT_GROUPS_END <\/port\-groups> +PORT_GROUP_START +PORT_GROUP_END <\/port\-group> +NAME_START +NAME_END <\/name> +USE_START +USE_END <\/use> +PORT_GUID_START +PORT_GUID_END <\/port\-guid> +PORT_NAME_START +PORT_NAME_END <\/port\-name> +PARTITION_START +PARTITION_END <\/partition> +NODE_TYPE_START +NODE_TYPE_END <\/node\-type> +QOS_SETUP_START +QOS_SETUP_END <\/qos\-setup> +SL2VL_TABLES_START +SL2VL_TABLES_END <\/sl2vl\-tables> +SL2VL_SCOPE_START +SL2VL_SCOPE_END <\/sl2vl\-scope> +GROUP_START +GROUP_END <\/group> +FROM_START +FROM_END <\/from> +TO_START +TO_END <\/to> +SL2VL_TABLE_START +SL2VL_TABLE_END <\/sl2vl\-table> +ACROSS_START +ACROSS_END <\/across> +ACROSS_FROM_START +ACROSS_FROM_END <\/across\-from> +ACROSS_TO_START +ACROSS_TO_END <\/across\-to> +VLARB_TABLES_START +VLARB_TABLES_END <\/vlarb\-tables> +VLARB_SCOPE_START +VLARB_SCOPE_END <\/vlarb\-scope> +VLARB_HIGH_START +VLARB_HIGH_END <\/vlarb\-high> +VLARB_LOW_START +VLARB_LOW_END <\/vlarb\-low> +VLARB_HIGH_LIMIT_START +VLARB_HIGH_LIMIT_END <\/vl\-high\-limit> +QOS_LEVELS_START +QOS_LEVELS_END <\/qos\-levels> +QOS_LEVEL_START +QOS_LEVEL_END <\/qos\-level> +SN_START +SN_END <\/sn> +SL_START +SL_END <\/sl> +CLASS_START +CLASS_END <\/class> +MTU_LIMIT_START +MTU_LIMIT_END <\/mtu\-limit> +RATE_LIMIT_START +RATE_LIMIT_END <\/rate\-limit> +QOS_MATCH_RULES_START +QOS_MATCH_RULES_END <\/qos\-match\-rules> +QOS_MATCH_RULE_START +QOS_MATCH_RULE_END <\/qos\-match\-rule> +QOS_LEVEL_SN_START +QOS_LEVEL_SN_END <\/qos\-level\-sn> +SOURCE_START +SOURCE_END <\/source> +DESTINATION_START +DESTINATION_END <\/destination> +SERVICE_START +SERVICE_END <\/service> + +WHITE [ \t\n]* +COMMENT + +%% + +{WHITE}{XML_VERSION_START}{WHITE} { SAVE_POS; return TK_XML_VERSION_START; } +{WHITE}{XML_VERSION_END}{WHITE} { SAVE_POS; return TK_XML_VERSION_END; } +{WHITE}{QOS_POLICY_START}{WHITE} { SAVE_POS; return TK_QOS_POLICY_START; } +{WHITE}{QOS_POLICY_END}{WHITE} { SAVE_POS; return TK_QOS_POLICY_END; } +{WHITE}{PORT_GROUPS_START}{WHITE} { SAVE_POS; return TK_PORT_GROUPS_START; } +{WHITE}{PORT_GROUPS_END}{WHITE} { SAVE_POS; return TK_PORT_GROUPS_END; } +{WHITE}{PORT_GROUP_START}{WHITE} { SAVE_POS; return TK_PORT_GROUP_START; } +{WHITE}{PORT_GROUP_END}{WHITE} { SAVE_POS; return TK_PORT_GROUP_END; } +{WHITE}{NAME_START}{WHITE} { SAVE_POS; return TK_NAME_START; } +{WHITE}{NAME_END}{WHITE} { SAVE_POS; return TK_NAME_END; } +{WHITE}{USE_START}{WHITE} { SAVE_POS; return TK_USE_START; } +{WHITE}{USE_END}{WHITE} { SAVE_POS; return TK_USE_END; } +{WHITE}{PORT_GUID_START}{WHITE} { SAVE_POS; return TK_PORT_GUID_START; } +{WHITE}{PORT_GUID_END}{WHITE} { SAVE_POS; return TK_PORT_GUID_END; } +{WHITE}{PORT_NAME_START}{WHITE} { SAVE_POS; return TK_PORT_NAME_START; } +{WHITE}{PORT_NAME_END}{WHITE} { SAVE_POS; return TK_PORT_NAME_END; } +{WHITE}{PARTITION_START}{WHITE} { SAVE_POS; return TK_PARTITION_START; } +{WHITE}{PARTITION_END}{WHITE} { SAVE_POS; return TK_PARTITION_END; } +{WHITE}{NODE_TYPE_START}{WHITE} { SAVE_POS; return TK_NODE_TYPE_START; } +{WHITE}{NODE_TYPE_END}{WHITE} { SAVE_POS; return TK_NODE_TYPE_END; } +{WHITE}{QOS_SETUP_START}{WHITE} { SAVE_POS; return TK_QOS_SETUP_START; } +{WHITE}{QOS_SETUP_END}{WHITE} { SAVE_POS; return TK_QOS_SETUP_END; } +{WHITE}{SL2VL_TABLES_START}{WHITE} { SAVE_POS; return TK_SL2VL_TABLES_START; } +{WHITE}{SL2VL_TABLES_END}{WHITE} { SAVE_POS; return TK_SL2VL_TABLES_END; } +{WHITE}{SL2VL_SCOPE_START}{WHITE} { SAVE_POS; return TK_SL2VL_SCOPE_START; } +{WHITE}{SL2VL_SCOPE_END}{WHITE} { SAVE_POS; return TK_SL2VL_SCOPE_END; } +{WHITE}{GROUP_START}{WHITE} { SAVE_POS; return TK_GROUP_START; } +{WHITE}{GROUP_END}{WHITE} { SAVE_POS; return TK_GROUP_END; } +{WHITE}{FROM_START}{WHITE} { SAVE_POS; return TK_FROM_START; } +{WHITE}{FROM_END}{WHITE} { SAVE_POS; return TK_FROM_END; } +{WHITE}{TO_START}{WHITE} { SAVE_POS; return TK_TO_START; } +{WHITE}{TO_END}{WHITE} { SAVE_POS; return TK_TO_END; } +{WHITE}{SL2VL_TABLE_START}{WHITE} { SAVE_POS; return TK_SL2VL_TABLE_START; } +{WHITE}{SL2VL_TABLE_END}{WHITE} { SAVE_POS; return TK_SL2VL_TABLE_END; } +{WHITE}{ACROSS_START}{WHITE} { SAVE_POS; return TK_ACROSS_START; } +{WHITE}{ACROSS_END}{WHITE} { SAVE_POS; return TK_ACROSS_END; } +{WHITE}{ACROSS_FROM_START}{WHITE} { SAVE_POS; return TK_ACROSS_FROM_START; } +{WHITE}{ACROSS_FROM_END}{WHITE} { SAVE_POS; return TK_ACROSS_FROM_END; } +{WHITE}{ACROSS_TO_START}{WHITE} { SAVE_POS; return TK_ACROSS_TO_START; } +{WHITE}{ACROSS_TO_END}{WHITE} { SAVE_POS; return TK_ACROSS_TO_END; } +{WHITE}{VLARB_TABLES_START}{WHITE} { SAVE_POS; return TK_VLARB_TABLES_START; } +{WHITE}{VLARB_TABLES_END}{WHITE} { SAVE_POS; return TK_VLARB_TABLES_END; } +{WHITE}{VLARB_SCOPE_START}{WHITE} { SAVE_POS; return TK_VLARB_SCOPE_START; } +{WHITE}{VLARB_SCOPE_END}{WHITE} { SAVE_POS; return TK_VLARB_SCOPE_END; } +{WHITE}{VLARB_HIGH_START}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_START; } +{WHITE}{VLARB_HIGH_END}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_END; } +{WHITE}{VLARB_LOW_START}{WHITE} { SAVE_POS; return TK_VLARB_LOW_START; } +{WHITE}{VLARB_LOW_END}{WHITE} { SAVE_POS; return TK_VLARB_LOW_END; } +{WHITE}{VLARB_HIGH_LIMIT_START}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_LIMIT_START; } +{WHITE}{VLARB_HIGH_LIMIT_END}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_LIMIT_END; } +{WHITE}{QOS_LEVELS_START}{WHITE} { SAVE_POS; return TK_QOS_LEVELS_START; } +{WHITE}{QOS_LEVELS_END}{WHITE} { SAVE_POS; return TK_QOS_LEVELS_END; } +{WHITE}{QOS_LEVEL_START}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_START; } +{WHITE}{QOS_LEVEL_END}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_END; } +{WHITE}{SN_START}{WHITE} { SAVE_POS; return TK_SN_START; } +{WHITE}{SN_END}{WHITE} { SAVE_POS; return TK_SN_END; } +{WHITE}{SL_START}{WHITE} { SAVE_POS; return TK_SL_START; } +{WHITE}{SL_END}{WHITE} { SAVE_POS; return TK_SL_END; } +{WHITE}{CLASS_START}{WHITE} { SAVE_POS; return TK_CLASS_START; } +{WHITE}{CLASS_END}{WHITE} { SAVE_POS; return TK_CLASS_END; } +{WHITE}{MTU_LIMIT_START}{WHITE} { SAVE_POS; return TK_MTU_LIMIT_START; } +{WHITE}{MTU_LIMIT_END}{WHITE} { SAVE_POS; return TK_MTU_LIMIT_END; } +{WHITE}{RATE_LIMIT_START}{WHITE} { SAVE_POS; return TK_RATE_LIMIT_START; } +{WHITE}{RATE_LIMIT_END}{WHITE} { SAVE_POS; return TK_RATE_LIMIT_END; } +{WHITE}{QOS_MATCH_RULES_START}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULES_START; } +{WHITE}{QOS_MATCH_RULES_END}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULES_END; } +{WHITE}{QOS_MATCH_RULE_START}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULE_START; } +{WHITE}{QOS_MATCH_RULE_END}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULE_END; } +{WHITE}{QOS_LEVEL_SN_START}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_SN_START; } +{WHITE}{QOS_LEVEL_SN_END}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_SN_END; } +{WHITE}{SOURCE_START}{WHITE} { SAVE_POS; return TK_SOURCE_START; } +{WHITE}{SOURCE_END}{WHITE} { SAVE_POS; return TK_SOURCE_END; } +{WHITE}{DESTINATION_START}{WHITE} { SAVE_POS; return TK_DESTINATION_START; } +{WHITE}{DESTINATION_END}{WHITE} { SAVE_POS; return TK_DESTINATION_END; } +{WHITE}{SERVICE_START}{WHITE} { SAVE_POS; return TK_SERVICE_START; } +{WHITE}{SERVICE_END}{WHITE} { SAVE_POS; return TK_SERVICE_END; } + +{WHITE}{COMMENT}{WHITE} { SAVE_POS; } /* swallow comment */ + +0[xX][0-9a-fA-F]+ { SAVE_POS; yylval = strdup(yytext); return TK_HEX_NUMBER; } +[0-9]+ { SAVE_POS; yylval = strdup(yytext); return TK_DEC_NUMBER; } +[ \t\n]+ { SAVE_POS; yylval = strdup(yytext); return TK_WHITE; } +- { SAVE_POS; yylval = strdup(yytext); return TK_DASH; } +: { SAVE_POS; yylval = strdup(yytext); return TK_DOTDOT; } +, { SAVE_POS; yylval = strdup(yytext); return TK_COMMA; } +. { SAVE_POS; yylval = strdup(yytext); return TK_TEXT;} + +%% + +static void save_pos() +{ + int i; + for (i = 0; i < yyleng; i++) + { + if (yytext[i] == '\n') + { + line_num ++; + column_num = 1; + } + else + column_num ++; + } +} + -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 17 07:04:05 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:04:05 +0200 Subject: [openib-general] [PATCH 2/6] osm: QoS parser generated lexer file Message-ID: <45AE3AE5.70109@dev.mellanox.co.il> Hi Hal. This patch is QoS parser generated lexer file Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser_l.c | 2637 +++++++++++++++++++++++++++++++++++++++++ 1 files changed, 2637 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser_l.c b/osm/opensm/osm_qos_parser_l.c new file mode 100644 index 0000000..591671c --- /dev/null +++ b/osm/opensm/osm_qos_parser_l.c @@ -0,0 +1,2637 @@ + +#line 3 "lex.yy.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 33 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart(yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +extern int yyleng; + +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, (yytext_ptr) ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef unsigned int yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; +static int yy_n_chars; /* number of characters read into yy_ch_buf */ +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 0; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart (FILE *input_file ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ); +void yy_delete_buffer (YY_BUFFER_STATE b ); +void yy_flush_buffer (YY_BUFFER_STATE b ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ); +void yypop_buffer_state (void ); + +static void yyensure_buffer_stack (void ); +static void yy_load_buffer_state (void ); +static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); + +#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len ); + +void *yyalloc (yy_size_t ); +void *yyrealloc (void *,yy_size_t ); +void yyfree (void * ); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +typedef unsigned char YY_CHAR; + +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +typedef int yy_state_type; + +extern int yylineno; + +int yylineno = 1; + +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state (void ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); +static int yy_get_next_buffer (void ); +static void yy_fatal_error (yyconst char msg[] ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + yyleng = (size_t) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; + +#define YY_NUM_RULES 85 +#define YY_END_OF_BUFFER 86 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[580] = + { 0, + 0, 0, 86, 84, 80, 80, 83, 81, 79, 79, + 82, 84, 84, 80, 0, 0, 79, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 78, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 57, 55, 0, + 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 58, 56, 0, 32, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 57, 55, 0, 31, 11, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 58, 56, 0, 32, 12, 0, 0, + 0, 0, 0, 29, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, + + 0, 77, 0, 0, 0, 30, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 59, 0, 29, 27, 0, 9, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 77, 0, 60, 0, 30, 28, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 35, 59, 0, + 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, + 0, 0, 36, 60, 0, 28, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 72, 0, 0, 0, 0, 0, 0, 0, 35, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 75, 0, 0, 71, 0, 0, 0, 0, 0, + 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 76, 0, 0, 72, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 76, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 0, 61, 19, 17, 0, 13, 15, 0, 53, 0, + 0, 0, 21, 0, 0, 0, 0, 0, 47, 0, + 0, 0, 40, 0, 62, 20, 18, 0, 14, 16, + 0, 54, 0, 0, 0, 22, 0, 0, 0, 0, + 0, 48, 0, 0, 0, 39, 0, 61, 19, 17, + 7, 0, 13, 15, 0, 53, 51, 0, 3, 21, + 63, 0, 0, 0, 45, 47, 0, 0, 0, 40, + 0, 62, 20, 18, 8, 0, 14, 16, 0, 54, + 52, 0, 4, 22, 64, 0, 0, 0, 46, 48, + + 0, 0, 37, 73, 7, 5, 0, 51, 0, 3, + 63, 25, 33, 0, 0, 45, 43, 0, 38, 74, + 8, 6, 0, 52, 0, 4, 64, 26, 34, 0, + 0, 46, 44, 0, 37, 73, 5, 69, 0, 25, + 33, 23, 0, 43, 41, 38, 74, 6, 70, 0, + 26, 34, 24, 0, 44, 42, 69, 0, 23, 49, + 41, 70, 0, 24, 50, 42, 67, 0, 49, 68, + 0, 50, 67, 65, 68, 66, 65, 66, 0 + } ; + +static yyconst flex_int32_t yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 5, 6, 1, 7, 8, 9, 10, + 9, 9, 9, 9, 9, 9, 9, 11, 1, 12, + 1, 13, 14, 1, 15, 15, 15, 15, 15, 15, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, + 1, 1, 1, 1, 1, 1, 17, 18, 19, 20, + + 21, 22, 23, 24, 25, 1, 1, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 16, + 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int32_t yy_meta[39] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int16_t yy_base[583] = + { 0, + 0, 13, 841, 842, 26, 29, 842, 842, 26, 36, + 842, 43, 827, 78, 0, 826, 43, 0, 832, 76, + 46, 818, 810, 814, 802, 801, 798, 38, 39, 802, + 813, 92, 800, 795, 801, 56, 0, 820, 806, 798, + 802, 790, 789, 786, 55, 65, 790, 801, 96, 788, + 783, 789, 83, 782, 796, 779, 782, 781, 774, 781, + 787, 774, 773, 771, 769, 770, 7, 788, 765, 786, + 777, 20, 85, 791, 764, 778, 761, 764, 763, 756, + 763, 769, 756, 755, 753, 751, 752, 20, 770, 747, + 768, 759, 83, 750, 745, 743, 749, 740, 768, 752, + + 751, 737, 736, 763, 747, 731, 730, 99, 112, 733, + 124, 751, 739, 730, 755, 754, 730, 725, 723, 729, + 720, 748, 732, 731, 717, 716, 743, 727, 711, 710, + 126, 128, 713, 130, 731, 719, 710, 708, 707, 714, + 725, 707, 710, 722, 728, 708, 726, 108, 725, 705, + 703, 134, 137, 709, 140, 142, 702, 708, 712, 691, + 690, 697, 708, 690, 693, 705, 711, 691, 709, 120, + 708, 688, 686, 146, 149, 692, 152, 154, 685, 691, + 675, 694, 678, 156, 692, 679, 158, 669, 668, 41, + 680, 683, 670, 677, 671, 677, 689, 673, 160, 670, + + 686, 162, 658, 677, 661, 164, 675, 662, 166, 652, + 651, 147, 663, 666, 653, 660, 654, 660, 672, 656, + 169, 653, 669, 48, 171, 657, 174, 176, 646, 178, + 634, 646, 84, 653, 633, 634, 641, 632, 640, 643, + 149, 650, 638, 160, 185, 179, 187, 644, 193, 195, + 633, 197, 621, 633, 169, 640, 620, 621, 628, 619, + 627, 630, 169, 637, 625, 181, 184, 206, 208, 614, + 210, 622, 616, 616, 615, 618, 615, 620, 621, 614, + 603, 610, 623, 616, 617, 214, 627, 607, 602, 611, + 612, 197, 218, 220, 594, 222, 602, 596, 596, 595, + + 598, 595, 600, 601, 594, 583, 590, 603, 596, 597, + 224, 607, 587, 582, 591, 592, 576, 578, 226, 581, + 571, 583, 575, 567, 581, 579, 573, 574, 578, 566, + 570, 230, 565, 575, 232, 566, 568, 553, 560, 570, + 555, 557, 234, 560, 550, 562, 554, 546, 560, 558, + 552, 553, 557, 545, 549, 236, 544, 554, 238, 545, + 547, 532, 539, 549, 537, 552, 535, 550, 549, 548, + 530, 546, 545, 236, 551, 518, 542, 520, 241, 523, + 526, 526, 526, 536, 518, 521, 517, 532, 515, 530, + 529, 528, 510, 526, 525, 239, 531, 498, 522, 500, + + 244, 503, 506, 506, 506, 516, 498, 501, 499, 248, + 497, 251, 253, 255, 235, 257, 259, 491, 261, 510, + 490, 508, 263, 507, 498, 497, 490, 503, 268, 494, + 493, 486, 271, 484, 273, 275, 277, 254, 279, 281, + 478, 283, 497, 477, 495, 286, 494, 485, 484, 477, + 490, 288, 481, 480, 487, 290, 486, 292, 294, 296, + 298, 485, 300, 302, 469, 304, 306, 461, 308, 310, + 312, 482, 303, 469, 315, 317, 480, 459, 478, 319, + 477, 321, 323, 325, 327, 476, 329, 331, 460, 335, + 337, 452, 339, 341, 343, 473, 322, 460, 345, 347, + + 471, 450, 349, 351, 354, 356, 469, 358, 455, 360, + 362, 364, 366, 467, 445, 368, 370, 465, 372, 374, + 376, 378, 464, 380, 450, 382, 384, 386, 388, 217, + 157, 390, 392, 110, 394, 396, 398, 400, 84, 402, + 404, 406, 84, 408, 410, 412, 414, 416, 418, 70, + 420, 422, 424, 70, 426, 428, 430, 421, 433, 435, + 437, 439, 430, 442, 444, 446, 448, 26, 450, 453, + 0, 455, 457, 459, 462, 464, 466, 468, 842, 471, + 2, 473 + } ; + +static yyconst flex_int16_t yy_def[583] = + { 0, + 580, 580, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 12, 579, 579, 581, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 581, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 582, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 582, 582, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 582, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 0, 579, + 579, 579 + } ; + +static yyconst flex_int16_t yy_nxt[881] = + { 0, + 579, 5, 6, 37, 7, 8, 579, 9, 10, 10, + 11, 12, 576, 13, 5, 6, 107, 7, 8, 108, + 9, 10, 10, 11, 12, 113, 13, 14, 14, 130, + 14, 14, 131, 17, 17, 17, 114, 15, 574, 16, + 15, 18, 16, 17, 17, 17, 19, 53, 53, 20, + 17, 17, 17, 267, 60, 62, 21, 73, 73, 22, + 268, 23, 24, 233, 25, 26, 61, 63, 234, 27, + 28, 81, 29, 30, 31, 32, 33, 34, 35, 14, + 14, 83, 565, 82, 53, 53, 73, 73, 136, 15, + 563, 16, 39, 84, 40, 41, 560, 42, 43, 137, + + 152, 152, 44, 45, 558, 46, 47, 48, 49, 50, + 51, 52, 66, 153, 153, 275, 87, 67, 276, 68, + 69, 88, 556, 89, 90, 155, 155, 174, 174, 175, + 175, 177, 177, 191, 192, 152, 152, 193, 153, 153, + 194, 155, 155, 199, 199, 213, 214, 174, 174, 215, + 175, 175, 216, 177, 177, 221, 221, 227, 227, 230, + 230, 199, 199, 245, 245, 249, 249, 252, 252, 255, + 221, 221, 269, 269, 256, 227, 227, 271, 271, 230, + 230, 284, 285, 288, 292, 289, 245, 245, 294, 294, + 554, 293, 290, 291, 249, 249, 296, 296, 252, 252, + + 300, 309, 310, 301, 313, 317, 314, 319, 319, 269, + 269, 271, 271, 315, 316, 335, 335, 318, 341, 343, + 343, 294, 294, 296, 296, 359, 359, 319, 319, 553, + 342, 379, 379, 335, 335, 343, 343, 401, 401, 359, + 359, 418, 379, 379, 441, 401, 401, 461, 419, 456, + 456, 442, 458, 458, 459, 459, 460, 460, 463, 463, + 464, 464, 466, 466, 470, 470, 485, 462, 420, 476, + 476, 443, 480, 480, 482, 482, 483, 483, 484, 484, + 487, 487, 488, 488, 490, 490, 486, 494, 494, 500, + 500, 456, 456, 458, 458, 459, 459, 460, 460, 505, + + 505, 463, 463, 464, 464, 466, 466, 508, 508, 510, + 510, 470, 470, 511, 511, 513, 516, 516, 476, 476, + 480, 480, 482, 482, 483, 483, 484, 484, 521, 521, + 487, 487, 488, 488, 529, 514, 490, 490, 524, 524, + 526, 526, 494, 494, 527, 527, 532, 532, 500, 500, + 535, 535, 536, 536, 530, 505, 505, 537, 537, 508, + 508, 510, 510, 511, 511, 540, 540, 541, 541, 516, + 516, 544, 544, 546, 546, 547, 547, 521, 521, 548, + 548, 524, 524, 526, 526, 527, 527, 551, 551, 552, + 552, 532, 532, 555, 555, 535, 535, 536, 536, 537, + + 537, 557, 557, 540, 540, 541, 541, 559, 559, 544, + 544, 561, 561, 546, 546, 547, 547, 548, 548, 562, + 562, 551, 551, 552, 552, 564, 564, 555, 555, 566, + 566, 557, 557, 567, 559, 559, 569, 569, 561, 561, + 562, 562, 570, 564, 564, 572, 572, 566, 566, 573, + 573, 569, 569, 568, 575, 575, 572, 572, 573, 573, + 577, 577, 571, 575, 575, 578, 578, 577, 577, 578, + 578, 4, 4, 115, 115, 550, 549, 545, 543, 542, + 539, 538, 534, 533, 531, 528, 525, 523, 522, 520, + 519, 518, 517, 515, 512, 509, 507, 506, 504, 503, + + 502, 501, 499, 498, 497, 496, 495, 493, 492, 491, + 489, 481, 479, 478, 477, 475, 474, 473, 472, 471, + 469, 468, 467, 465, 457, 455, 454, 453, 452, 451, + 450, 449, 448, 447, 446, 445, 444, 440, 439, 438, + 437, 436, 435, 434, 433, 432, 431, 430, 429, 428, + 427, 426, 425, 424, 423, 422, 421, 417, 416, 415, + 414, 413, 412, 411, 410, 409, 408, 407, 406, 405, + 404, 403, 402, 400, 399, 398, 397, 396, 395, 394, + 393, 392, 391, 390, 389, 388, 387, 386, 385, 384, + 383, 382, 381, 380, 378, 377, 376, 375, 374, 373, + + 372, 371, 370, 369, 368, 367, 366, 365, 364, 363, + 362, 361, 360, 358, 357, 356, 355, 354, 353, 352, + 351, 350, 349, 348, 347, 346, 345, 344, 340, 339, + 338, 337, 336, 334, 333, 332, 331, 330, 329, 328, + 327, 326, 325, 324, 323, 322, 321, 320, 312, 311, + 308, 307, 306, 305, 304, 303, 302, 299, 298, 297, + 295, 287, 286, 283, 282, 281, 280, 279, 278, 277, + 274, 273, 272, 270, 266, 265, 264, 263, 262, 261, + 260, 259, 258, 257, 254, 253, 251, 250, 248, 247, + 246, 244, 243, 242, 241, 240, 239, 238, 237, 236, + + 235, 232, 231, 229, 228, 226, 225, 224, 223, 222, + 220, 219, 218, 217, 212, 211, 210, 209, 208, 207, + 206, 205, 204, 203, 202, 201, 200, 198, 197, 196, + 195, 190, 189, 188, 187, 186, 185, 184, 183, 182, + 181, 180, 179, 178, 176, 173, 172, 171, 170, 169, + 168, 167, 166, 165, 164, 163, 162, 161, 160, 159, + 116, 158, 157, 156, 154, 151, 150, 149, 148, 147, + 146, 145, 144, 143, 142, 141, 140, 139, 138, 135, + 134, 133, 132, 129, 128, 127, 126, 125, 124, 123, + 122, 121, 120, 119, 118, 117, 116, 112, 111, 110, + + 109, 106, 105, 104, 103, 102, 101, 100, 99, 98, + 97, 96, 95, 94, 93, 92, 91, 86, 85, 80, + 79, 78, 77, 76, 75, 74, 72, 71, 70, 65, + 64, 59, 58, 57, 56, 55, 54, 38, 36, 36, + 579, 3, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579 + } ; + +static yyconst flex_int16_t yy_chk[881] = + { 0, + 0, 1, 1, 581, 1, 1, 0, 1, 1, 1, + 1, 1, 571, 1, 2, 2, 67, 2, 2, 67, + 2, 2, 2, 2, 2, 72, 2, 5, 5, 88, + 6, 6, 88, 9, 9, 9, 72, 5, 568, 5, + 6, 9, 6, 10, 10, 10, 12, 21, 21, 12, + 17, 17, 17, 224, 28, 29, 12, 36, 36, 12, + 224, 12, 12, 190, 12, 12, 28, 29, 190, 12, + 12, 45, 12, 12, 12, 12, 12, 12, 12, 14, + 14, 46, 554, 45, 53, 53, 73, 73, 93, 14, + 550, 14, 20, 46, 20, 20, 543, 20, 20, 93, + + 108, 108, 20, 20, 539, 20, 20, 20, 20, 20, + 20, 20, 32, 109, 109, 233, 49, 32, 233, 32, + 32, 49, 534, 49, 49, 111, 111, 131, 131, 132, + 132, 134, 134, 148, 148, 152, 152, 148, 153, 153, + 148, 155, 155, 156, 156, 170, 170, 174, 174, 170, + 175, 175, 170, 177, 177, 178, 178, 184, 184, 187, + 187, 199, 199, 202, 202, 206, 206, 209, 209, 212, + 221, 221, 225, 225, 212, 227, 227, 228, 228, 230, + 230, 241, 241, 244, 246, 244, 245, 245, 247, 247, + 531, 246, 244, 244, 249, 249, 250, 250, 252, 252, + + 255, 263, 263, 255, 266, 267, 266, 268, 268, 269, + 269, 271, 271, 266, 266, 286, 286, 267, 292, 293, + 293, 294, 294, 296, 296, 311, 311, 319, 319, 530, + 292, 332, 332, 335, 335, 343, 343, 356, 356, 359, + 359, 374, 379, 379, 396, 401, 401, 415, 374, 410, + 410, 396, 412, 412, 413, 413, 414, 414, 416, 416, + 417, 417, 419, 419, 423, 423, 438, 415, 374, 429, + 429, 396, 433, 433, 435, 435, 436, 436, 437, 437, + 439, 439, 440, 440, 442, 442, 438, 446, 446, 452, + 452, 456, 456, 458, 458, 459, 459, 460, 460, 461, + + 461, 463, 463, 464, 464, 466, 466, 467, 467, 469, + 469, 470, 470, 471, 471, 473, 475, 475, 476, 476, + 480, 480, 482, 482, 483, 483, 484, 484, 485, 485, + 487, 487, 488, 488, 497, 473, 490, 490, 491, 491, + 493, 493, 494, 494, 495, 495, 499, 499, 500, 500, + 503, 503, 504, 504, 497, 505, 505, 506, 506, 508, + 508, 510, 510, 511, 511, 512, 512, 513, 513, 516, + 516, 517, 517, 519, 519, 520, 520, 521, 521, 522, + 522, 524, 524, 526, 526, 527, 527, 528, 528, 529, + 529, 532, 532, 533, 533, 535, 535, 536, 536, 537, + + 537, 538, 538, 540, 540, 541, 541, 542, 542, 544, + 544, 545, 545, 546, 546, 547, 547, 548, 548, 549, + 549, 551, 551, 552, 552, 553, 553, 555, 555, 556, + 556, 557, 557, 558, 559, 559, 560, 560, 561, 561, + 562, 562, 563, 564, 564, 565, 565, 566, 566, 567, + 567, 569, 569, 558, 570, 570, 572, 572, 573, 573, + 574, 574, 563, 575, 575, 576, 576, 577, 577, 578, + 578, 580, 580, 582, 582, 525, 523, 518, 515, 514, + 509, 507, 502, 501, 498, 496, 492, 489, 486, 481, + 479, 478, 477, 474, 472, 468, 465, 462, 457, 455, + + 454, 453, 451, 450, 449, 448, 447, 445, 444, 443, + 441, 434, 432, 431, 430, 428, 427, 426, 425, 424, + 422, 421, 420, 418, 411, 409, 408, 407, 406, 405, + 404, 403, 402, 400, 399, 398, 397, 395, 394, 393, + 392, 391, 390, 389, 388, 387, 386, 385, 384, 383, + 382, 381, 380, 378, 377, 376, 375, 373, 372, 371, + 370, 369, 368, 367, 366, 365, 364, 363, 362, 361, + 360, 358, 357, 355, 354, 353, 352, 351, 350, 349, + 348, 347, 346, 345, 344, 342, 341, 340, 339, 338, + 337, 336, 334, 333, 331, 330, 329, 328, 327, 326, + + 325, 324, 323, 322, 321, 320, 318, 317, 316, 315, + 314, 313, 312, 310, 309, 308, 307, 306, 305, 304, + 303, 302, 301, 300, 299, 298, 297, 295, 291, 290, + 289, 288, 287, 285, 284, 283, 282, 281, 280, 279, + 278, 277, 276, 275, 274, 273, 272, 270, 265, 264, + 262, 261, 260, 259, 258, 257, 256, 254, 253, 251, + 248, 243, 242, 240, 239, 238, 237, 236, 235, 234, + 232, 231, 229, 226, 223, 222, 220, 219, 218, 217, + 216, 215, 214, 213, 211, 210, 208, 207, 205, 204, + 203, 201, 200, 198, 197, 196, 195, 194, 193, 192, + + 191, 189, 188, 186, 185, 183, 182, 181, 180, 179, + 176, 173, 172, 171, 169, 168, 167, 166, 165, 164, + 163, 162, 161, 160, 159, 158, 157, 154, 151, 150, + 149, 147, 146, 145, 144, 143, 142, 141, 140, 139, + 138, 137, 136, 135, 133, 130, 129, 128, 127, 126, + 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, + 115, 114, 113, 112, 110, 107, 106, 105, 104, 103, + 102, 101, 100, 99, 98, 97, 96, 95, 94, 92, + 91, 90, 89, 87, 86, 85, 84, 83, 82, 81, + 80, 79, 78, 77, 76, 75, 74, 71, 70, 69, + + 68, 66, 65, 64, 63, 62, 61, 60, 59, 58, + 57, 56, 55, 54, 52, 51, 50, 48, 47, 44, + 43, 42, 41, 40, 39, 38, 35, 34, 33, 31, + 30, 27, 26, 25, 24, 23, 22, 19, 16, 13, + 3, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579, + 579, 579, 579, 579, 579, 579, 579, 579, 579, 579 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +extern int yy_flex_debug; +int yy_flex_debug = 0; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +#line 2 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Lexer of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +#define SAVE_POS save_pos() +static void save_pos(); + +extern int column_num; +extern int line_num; +extern FILE * yyin; + +#line 891 "lex.yy.c" + +#define INITIAL 0 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals (void ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (void ); +#else +extern int yywrap (void ); +#endif +#endif + + static void yyunput (int c,char *buf_ptr ); + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (void ); +#else +static int input (void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + size_t n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (void); + +#define YY_DECL int yylex (void) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +#line 140 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" + + +#line 1047 "lex.yy.c" + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ); + } + + yy_load_buffer_state( ); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = (yy_start); +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 580 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 842 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = (yy_hold_char); + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 142 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_XML_VERSION_START; } + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 143 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_XML_VERSION_END; } + YY_BREAK +case 3: +/* rule 3 can match eol */ +YY_RULE_SETUP +#line 144 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_POLICY_START; } + YY_BREAK +case 4: +/* rule 4 can match eol */ +YY_RULE_SETUP +#line 145 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_POLICY_END; } + YY_BREAK +case 5: +/* rule 5 can match eol */ +YY_RULE_SETUP +#line 146 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUPS_START; } + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 147 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUPS_END; } + YY_BREAK +case 7: +/* rule 7 can match eol */ +YY_RULE_SETUP +#line 148 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUP_START; } + YY_BREAK +case 8: +/* rule 8 can match eol */ +YY_RULE_SETUP +#line 149 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUP_END; } + YY_BREAK +case 9: +/* rule 9 can match eol */ +YY_RULE_SETUP +#line 150 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NAME_START; } + YY_BREAK +case 10: +/* rule 10 can match eol */ +YY_RULE_SETUP +#line 151 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NAME_END; } + YY_BREAK +case 11: +/* rule 11 can match eol */ +YY_RULE_SETUP +#line 152 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_USE_START; } + YY_BREAK +case 12: +/* rule 12 can match eol */ +YY_RULE_SETUP +#line 153 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_USE_END; } + YY_BREAK +case 13: +/* rule 13 can match eol */ +YY_RULE_SETUP +#line 154 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GUID_START; } + YY_BREAK +case 14: +/* rule 14 can match eol */ +YY_RULE_SETUP +#line 155 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GUID_END; } + YY_BREAK +case 15: +/* rule 15 can match eol */ +YY_RULE_SETUP +#line 156 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_NAME_START; } + YY_BREAK +case 16: +/* rule 16 can match eol */ +YY_RULE_SETUP +#line 157 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_NAME_END; } + YY_BREAK +case 17: +/* rule 17 can match eol */ +YY_RULE_SETUP +#line 158 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PARTITION_START; } + YY_BREAK +case 18: +/* rule 18 can match eol */ +YY_RULE_SETUP +#line 159 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PARTITION_END; } + YY_BREAK +case 19: +/* rule 19 can match eol */ +YY_RULE_SETUP +#line 160 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NODE_TYPE_START; } + YY_BREAK +case 20: +/* rule 20 can match eol */ +YY_RULE_SETUP +#line 161 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NODE_TYPE_END; } + YY_BREAK +case 21: +/* rule 21 can match eol */ +YY_RULE_SETUP +#line 162 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_SETUP_START; } + YY_BREAK +case 22: +/* rule 22 can match eol */ +YY_RULE_SETUP +#line 163 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_SETUP_END; } + YY_BREAK +case 23: +/* rule 23 can match eol */ +YY_RULE_SETUP +#line 164 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLES_START; } + YY_BREAK +case 24: +/* rule 24 can match eol */ +YY_RULE_SETUP +#line 165 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLES_END; } + YY_BREAK +case 25: +/* rule 25 can match eol */ +YY_RULE_SETUP +#line 166 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_SCOPE_START; } + YY_BREAK +case 26: +/* rule 26 can match eol */ +YY_RULE_SETUP +#line 167 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_SCOPE_END; } + YY_BREAK +case 27: +/* rule 27 can match eol */ +YY_RULE_SETUP +#line 168 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_GROUP_START; } + YY_BREAK +case 28: +/* rule 28 can match eol */ +YY_RULE_SETUP +#line 169 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_GROUP_END; } + YY_BREAK +case 29: +/* rule 29 can match eol */ +YY_RULE_SETUP +#line 170 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_FROM_START; } + YY_BREAK +case 30: +/* rule 30 can match eol */ +YY_RULE_SETUP +#line 171 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_FROM_END; } + YY_BREAK +case 31: +/* rule 31 can match eol */ +YY_RULE_SETUP +#line 172 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_TO_START; } + YY_BREAK +case 32: +/* rule 32 can match eol */ +YY_RULE_SETUP +#line 173 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_TO_END; } + YY_BREAK +case 33: +/* rule 33 can match eol */ +YY_RULE_SETUP +#line 174 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLE_START; } + YY_BREAK +case 34: +/* rule 34 can match eol */ +YY_RULE_SETUP +#line 175 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLE_END; } + YY_BREAK +case 35: +/* rule 35 can match eol */ +YY_RULE_SETUP +#line 176 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_START; } + YY_BREAK +case 36: +/* rule 36 can match eol */ +YY_RULE_SETUP +#line 177 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_END; } + YY_BREAK +case 37: +/* rule 37 can match eol */ +YY_RULE_SETUP +#line 178 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_FROM_START; } + YY_BREAK +case 38: +/* rule 38 can match eol */ +YY_RULE_SETUP +#line 179 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_FROM_END; } + YY_BREAK +case 39: +/* rule 39 can match eol */ +YY_RULE_SETUP +#line 180 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_TO_START; } + YY_BREAK +case 40: +/* rule 40 can match eol */ +YY_RULE_SETUP +#line 181 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_TO_END; } + YY_BREAK +case 41: +/* rule 41 can match eol */ +YY_RULE_SETUP +#line 182 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_TABLES_START; } + YY_BREAK +case 42: +/* rule 42 can match eol */ +YY_RULE_SETUP +#line 183 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_TABLES_END; } + YY_BREAK +case 43: +/* rule 43 can match eol */ +YY_RULE_SETUP +#line 184 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_SCOPE_START; } + YY_BREAK +case 44: +/* rule 44 can match eol */ +YY_RULE_SETUP +#line 185 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_SCOPE_END; } + YY_BREAK +case 45: +/* rule 45 can match eol */ +YY_RULE_SETUP +#line 186 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_START; } + YY_BREAK +case 46: +/* rule 46 can match eol */ +YY_RULE_SETUP +#line 187 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_END; } + YY_BREAK +case 47: +/* rule 47 can match eol */ +YY_RULE_SETUP +#line 188 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_LOW_START; } + YY_BREAK +case 48: +/* rule 48 can match eol */ +YY_RULE_SETUP +#line 189 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_LOW_END; } + YY_BREAK +case 49: +/* rule 49 can match eol */ +YY_RULE_SETUP +#line 190 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_LIMIT_START; } + YY_BREAK +case 50: +/* rule 50 can match eol */ +YY_RULE_SETUP +#line 191 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_LIMIT_END; } + YY_BREAK +case 51: +/* rule 51 can match eol */ +YY_RULE_SETUP +#line 192 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVELS_START; } + YY_BREAK +case 52: +/* rule 52 can match eol */ +YY_RULE_SETUP +#line 193 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVELS_END; } + YY_BREAK +case 53: +/* rule 53 can match eol */ +YY_RULE_SETUP +#line 194 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_START; } + YY_BREAK +case 54: +/* rule 54 can match eol */ +YY_RULE_SETUP +#line 195 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_END; } + YY_BREAK +case 55: +/* rule 55 can match eol */ +YY_RULE_SETUP +#line 196 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SN_START; } + YY_BREAK +case 56: +/* rule 56 can match eol */ +YY_RULE_SETUP +#line 197 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SN_END; } + YY_BREAK +case 57: +/* rule 57 can match eol */ +YY_RULE_SETUP +#line 198 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL_START; } + YY_BREAK +case 58: +/* rule 58 can match eol */ +YY_RULE_SETUP +#line 199 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL_END; } + YY_BREAK +case 59: +/* rule 59 can match eol */ +YY_RULE_SETUP +#line 200 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_CLASS_START; } + YY_BREAK +case 60: +/* rule 60 can match eol */ +YY_RULE_SETUP +#line 201 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_CLASS_END; } + YY_BREAK +case 61: +/* rule 61 can match eol */ +YY_RULE_SETUP +#line 202 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_MTU_LIMIT_START; } + YY_BREAK +case 62: +/* rule 62 can match eol */ +YY_RULE_SETUP +#line 203 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_MTU_LIMIT_END; } + YY_BREAK +case 63: +/* rule 63 can match eol */ +YY_RULE_SETUP +#line 204 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_RATE_LIMIT_START; } + YY_BREAK +case 64: +/* rule 64 can match eol */ +YY_RULE_SETUP +#line 205 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_RATE_LIMIT_END; } + YY_BREAK +case 65: +/* rule 65 can match eol */ +YY_RULE_SETUP +#line 206 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULES_START; } + YY_BREAK +case 66: +/* rule 66 can match eol */ +YY_RULE_SETUP +#line 207 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULES_END; } + YY_BREAK +case 67: +/* rule 67 can match eol */ +YY_RULE_SETUP +#line 208 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULE_START; } + YY_BREAK +case 68: +/* rule 68 can match eol */ +YY_RULE_SETUP +#line 209 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULE_END; } + YY_BREAK +case 69: +/* rule 69 can match eol */ +YY_RULE_SETUP +#line 210 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_SN_START; } + YY_BREAK +case 70: +/* rule 70 can match eol */ +YY_RULE_SETUP +#line 211 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_SN_END; } + YY_BREAK +case 71: +/* rule 71 can match eol */ +YY_RULE_SETUP +#line 212 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SOURCE_START; } + YY_BREAK +case 72: +/* rule 72 can match eol */ +YY_RULE_SETUP +#line 213 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SOURCE_END; } + YY_BREAK +case 73: +/* rule 73 can match eol */ +YY_RULE_SETUP +#line 214 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_DESTINATION_START; } + YY_BREAK +case 74: +/* rule 74 can match eol */ +YY_RULE_SETUP +#line 215 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_DESTINATION_END; } + YY_BREAK +case 75: +/* rule 75 can match eol */ +YY_RULE_SETUP +#line 216 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SERVICE_START; } + YY_BREAK +case 76: +/* rule 76 can match eol */ +YY_RULE_SETUP +#line 217 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SERVICE_END; } + YY_BREAK +case 77: +/* rule 77 can match eol */ +YY_RULE_SETUP +#line 219 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; } /* swallow comment */ + YY_BREAK +case 78: +YY_RULE_SETUP +#line 221 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_HEX_NUMBER; } + YY_BREAK +case 79: +YY_RULE_SETUP +#line 222 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DEC_NUMBER; } + YY_BREAK +case 80: +/* rule 80 can match eol */ +YY_RULE_SETUP +#line 223 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_WHITE; } + YY_BREAK +case 81: +YY_RULE_SETUP +#line 224 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DASH; } + YY_BREAK +case 82: +YY_RULE_SETUP +#line 225 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DOTDOT; } + YY_BREAK +case 83: +YY_RULE_SETUP +#line 226 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_COMMA; } + YY_BREAK +case 84: +YY_RULE_SETUP +#line 227 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_TEXT;} + YY_BREAK +case 85: +YY_RULE_SETUP +#line 229 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +ECHO; + YY_BREAK +#line 1633 "lex.yy.c" +case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + register char *source = (yytext_ptr); + register int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER; + + int yy_c_buf_p_offset = + (int) ((yy_c_buf_p) - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart(yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = (yy_start); + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 580 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + register int yy_is_jam; + register char *yy_cp = (yy_c_buf_p); + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 580 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 579); + + return yy_is_jam ? 0 : yy_current_state; +} + + static void yyunput (int c, register char * yy_bp ) +{ + register char *yy_cp; + + yy_cp = (yy_c_buf_p); + + /* undo effects of setting up yytext */ + *yy_cp = (yy_hold_char); + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = (yy_n_chars) + 2; + register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; + register char *source = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; + + while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size; + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + (yytext_ptr) = yy_bp; + (yy_hold_char) = *yy_cp; + (yy_c_buf_p) = yy_cp; +} + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + int offset = (yy_c_buf_p) - (yytext_ptr); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart(yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( ) ) + return EOF; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ); + } + + yy_init_buffer(YY_CURRENT_BUFFER,input_file ); + yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer(b,file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * + */ + void yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree((void *) b->yy_ch_buf ); + + yyfree((void *) b ); +} + +#ifndef __cplusplus +extern int isatty (int ); +#endif /* __cplusplus */ + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + yy_flush_buffer(b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (void) +{ + int num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; + (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + int grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer(b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param str a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (yyconst char * yystr ) +{ + + return yy_scan_bytes(yystr,strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param bytes the byte buffer to scan + * @param len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = _yybytes_len + 2; + buf = (char *) yyalloc(n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer(buf,n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yy_fatal_error (yyconst char* msg ) +{ + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int yyget_lineno (void) +{ + + return yylineno; +} + +/** Get the input stream. + * + */ +FILE *yyget_in (void) +{ + return yyin; +} + +/** Get the output stream. + * + */ +FILE *yyget_out (void) +{ + return yyout; +} + +/** Get the length of the current token. + * + */ +int yyget_leng (void) +{ + return yyleng; +} + +/** Get the current token. + * + */ + +char *yyget_text (void) +{ + return yytext; +} + +/** Set the current line number. + * @param line_number + * + */ +void yyset_lineno (int line_number ) +{ + + yylineno = line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param in_str A readable stream. + * + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * in_str ) +{ + yyin = in_str ; +} + +void yyset_out (FILE * out_str ) +{ + yyout = out_str ; +} + +int yyget_debug (void) +{ + return yy_flex_debug; +} + +void yyset_debug (int bdebug ) +{ + yy_flex_debug = bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + (yy_buffer_stack) = 0; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = (char *) 0; + (yy_init) = 0; + (yy_start) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = (FILE *) 0; + yyout = (FILE *) 0; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) +{ + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * s ) +{ + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size ) +{ + return (void *) malloc( size ); +} + +void *yyrealloc (void * ptr, yy_size_t size ) +{ + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); +} + +void yyfree (void * ptr ) +{ + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 229 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" + + + +static void save_pos() +{ + int i; + for (i = 0; i < yyleng; i++) + { + if (yytext[i] == '\n') + { + line_num ++; + column_num = 1; + } + else + column_num ++; + } +} + + -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 17 07:12:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:12:42 +0200 Subject: [openib-general] [PATCH 3/6] osm: QoS parser grammar (Yacc) file Message-ID: <45AE3CEA.3090300@dev.mellanox.co.il> Hi Hal. This patch is QoS parser grammar (Yacc) file Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser.y | 1822 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1822 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser.y b/osm/opensm/osm_qos_parser.y new file mode 100644 index 0000000..ba1e374 --- /dev/null +++ b/osm/opensm/osm_qos_parser.y @@ -0,0 +1,1822 @@ +%{ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Grammar of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +static void __osm_qos_parse_tree_init(); + +static char * __osm_qos_parser_strip_white(char * str); + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str); + +static void __osm_qos_parser_port_group_start(); +static void __osm_qos_parser_port_group_end(); + +static void __osm_qos_parser_sl2vl_scope_start(); +static void __osm_qos_parser_sl2vl_scope_end(); + +static void __osm_qos_parser_vlarb_scope_start(); +static void __osm_qos_parser_vlarb_scope_end(); + +static void __osm_qos_parser_qos_level_start(); +static void __osm_qos_parser_qos_level_end(); + +static void __osm_qos_parser_match_rule_start(); +static void __osm_qos_parser_match_rule_end(); + +extern char * yytext; +extern void yyerror (char *s); +extern int yylex (void); +extern FILE * yyin; + +#define OSM_QOS_MAX_LINE_LEN 10000 +char str_buffer[OSM_QOS_MAX_LINE_LEN]; +#define RESET_BUFFER str_buffer[0] = '\0' + +int column_num; +int line_num; + +osm_qos_parse_tree_t * p_qos_parse_tree = NULL; +osm_qos_port_group_t * p_current_port_group = NULL; +osm_qos_sl2vl_scope_t * p_current_sl2vl_scope = NULL; +osm_qos_vlarb_scope_t * p_current_vlarb_scope = NULL; +osm_qos_level_t * p_current_qos_level = NULL; +osm_qos_match_rule_t * p_current_qos_match_rule = NULL; + +osm_log_t * p_osm_log = NULL; + +/***************************************************/ + +%} + +%token TK_HEX_NUMBER +%token TK_DEC_NUMBER +%token TK_WHITE +%token TK_DASH +%token TK_DOTDOT +%token TK_COMMA +%token TK_TEXT + +%token TK_XML_VERSION_START +%token TK_XML_VERSION_END +%token TK_QOS_POLICY_START +%token TK_QOS_POLICY_END +%token TK_PORT_GROUPS_START +%token TK_PORT_GROUPS_END +%token TK_PORT_GROUP_START +%token TK_PORT_GROUP_END +%token TK_NAME_START +%token TK_NAME_END +%token TK_USE_START +%token TK_USE_END +%token TK_PORT_GUID_START +%token TK_PORT_GUID_END +%token TK_PORT_NAME_START +%token TK_PORT_NAME_END +%token TK_PARTITION_START +%token TK_PARTITION_END +%token TK_NODE_TYPE_START +%token TK_NODE_TYPE_END +%token TK_QOS_SETUP_START +%token TK_QOS_SETUP_END +%token TK_SL2VL_TABLES_START +%token TK_SL2VL_TABLES_END +%token TK_SL2VL_SCOPE_START +%token TK_SL2VL_SCOPE_END +%token TK_GROUP_START +%token TK_GROUP_END +%token TK_FROM_START +%token TK_FROM_END +%token TK_TO_START +%token TK_TO_END +%token TK_SL2VL_TABLE_START +%token TK_SL2VL_TABLE_END +%token TK_ACROSS_START +%token TK_ACROSS_END +%token TK_ACROSS_FROM_START +%token TK_ACROSS_FROM_END +%token TK_ACROSS_TO_START +%token TK_ACROSS_TO_END +%token TK_VLARB_TABLES_START +%token TK_VLARB_TABLES_END +%token TK_VLARB_SCOPE_START +%token TK_VLARB_SCOPE_END +%token TK_VLARB_HIGH_START +%token TK_VLARB_HIGH_END +%token TK_VLARB_LOW_START +%token TK_VLARB_LOW_END +%token TK_VLARB_HIGH_LIMIT_START +%token TK_VLARB_HIGH_LIMIT_END +%token TK_QOS_LEVELS_START +%token TK_QOS_LEVELS_END +%token TK_QOS_LEVEL_START +%token TK_QOS_LEVEL_END +%token TK_SN_START +%token TK_SN_END +%token TK_SL_START +%token TK_SL_END +%token TK_CLASS_START +%token TK_CLASS_END +%token TK_MTU_LIMIT_START +%token TK_MTU_LIMIT_END +%token TK_RATE_LIMIT_START +%token TK_RATE_LIMIT_END +%token TK_QOS_MATCH_RULES_START +%token TK_QOS_MATCH_RULES_END +%token TK_QOS_MATCH_RULE_START +%token TK_QOS_MATCH_RULE_END +%token TK_QOS_LEVEL_SN_START +%token TK_QOS_LEVEL_SN_END +%token TK_SOURCE_START +%token TK_SOURCE_END +%token TK_DESTINATION_START +%token TK_DESTINATION_END +%token TK_SERVICE_START +%token TK_SERVICE_END + +%start head + +%% + +head: xml_version qos_policy + ; + +xml_version: /* empty */ + | TK_XML_VERSION_START any_text TK_XML_VERSION_END + ; + +qos_policy: /* empty */ + | qos_policy qos_policy_start qos_policy_entries qos_policy_end + ; + +qos_policy_start: TK_QOS_POLICY_START + ; + +qos_policy_end: TK_QOS_POLICY_END + ; + +qos_policy_entries: /* empty */ + | qos_policy_entries qos_policy_entry + ; + +qos_policy_entry: port_groups + | qos_setup + | qos_levels + | qos_match_rules + ; + + /* + * Parsing ... : + * + * + * + * Storage + * our SRP storage targets + * 0x1000000000000001 + * 0x1000000000000002 + * + * + * Virtual Servers + * node desc and IB port # + * vs1/HCA-1/P1 + * vs3/HCA-1/P1 + * vs3/HCA-2/P1 + * + * + * Partition 1 + * default settings + * Part1 + * + * + * Routers + * all routers + * ROUTER + * + * + */ + +port_groups: port_groups_start port_group port_groups_end + ; + +port_groups_start: TK_PORT_GROUPS_START + ; + +port_groups_end: TK_PORT_GROUPS_END + ; + +port_group: single_port_group + | port_group single_port_group + ; + +single_port_group: port_group_start port_group_entries port_group_end + ; + +port_group_start: TK_PORT_GROUP_START { + __osm_qos_parser_port_group_start(); + } + ; + +port_group_end: TK_PORT_GROUP_END { + __osm_qos_parser_port_group_end(); + } + ; + +port_group_entries: /* empty */ + | port_group_entries port_group_entry + ; + +port_group_entry: name + | use + | port_guid + | port_name + | partition + | node_type + ; + + /* + * Parsing ... : + * + * + * + * + * Part1 + * * + * * + * 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 + * + * + * Storage + * StorageXX + * StorageYY + * * + * 1 + * 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1 + * + * + * + * + * + * Storage + * * + * 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 + * 8:255,9:127,10:63,11:31,12:15,13:7,14:3 + * 10 + * + * + * + */ + +qos_setup: qos_setup_start qos_setup_items qos_setup_end + ; + +qos_setup_start: TK_QOS_SETUP_START + ; + +qos_setup_end: TK_QOS_SETUP_END + ; + +qos_setup_items: /* empty */ + | qos_setup_items sl2vl_tables + | qos_setup_items vlarb_tables + ; + + /* + * + * + * ... + * + * ... + * + */ + +sl2vl_tables: sl2vl_tables_start sl2vl_scope_items sl2vl_tables_end + ; + +sl2vl_tables_start: TK_SL2VL_TABLES_START + ; + +sl2vl_tables_end: TK_SL2VL_TABLES_END + ; + +sl2vl_scope_items: /* empty */ + | sl2vl_scope_items sl2vl_scope + ; + +sl2vl_scope: sl2vl_scope_start sl2vl_scope_entries sl2vl_scope_end + ; + +sl2vl_scope_start: TK_SL2VL_SCOPE_START { + __osm_qos_parser_sl2vl_scope_start(); + } + ; + +sl2vl_scope_end: TK_SL2VL_SCOPE_END { + __osm_qos_parser_sl2vl_scope_end(); + } + ; + +sl2vl_scope_entries:/* empty */ + | sl2vl_scope_entries sl2vl_scope_entry + ; + +sl2vl_scope_entry: group + | across + | across_from + | across_to + | from + | to + | sl2vl_table + ; + + /* + * + * + * ... + * + * ... + * + */ + +vlarb_tables: vlarb_tables_start vlarb_scope_items vlarb_tables_end + ; + +vlarb_tables_start: TK_VLARB_TABLES_START + ; + +vlarb_tables_end: TK_VLARB_TABLES_END + ; + +vlarb_scope_items: /* empty */ + | vlarb_scope_items vlarb_scope + ; + +vlarb_scope: vlarb_scope_start vlarb_scope_entries vlarb_scope_end + ; + +vlarb_scope_start: TK_VLARB_SCOPE_START { + __osm_qos_parser_vlarb_scope_start(); + } + ; + +vlarb_scope_end: TK_VLARB_SCOPE_END { + __osm_qos_parser_vlarb_scope_end(); + } + ; + +vlarb_scope_entries:/* empty */ + | vlarb_scope_entries vlarb_scope_entry + ; + +vlarb_scope_entry: group + | across + | vlarb_high + | vlarb_low + | vlarb_high_limit + ; + + /* + * Parsing ... : + * + * + * + * 1 + * for the lowest priority comm + * 16 + * + * + * 2 + * low latency best bandwidth + * 0 + * 7 + * + * + * 3 + * just an example + * 0 + * 32 + * 1 + * 1 + * + * + */ + +qos_levels: qos_levels_start qos_level_items qos_levels_end + ; + +qos_levels_start: TK_QOS_LEVELS_START + ; + +qos_levels_end: TK_QOS_LEVELS_END + ; + +qos_level_items: /* empty */ + | qos_level_items qos_level_start qos_level_entries qos_level_end + ; + +qos_level_start: TK_QOS_LEVEL_START { + __osm_qos_parser_qos_level_start(); + } + ; + +qos_level_end: TK_QOS_LEVEL_END { + __osm_qos_parser_qos_level_end(); + } + ; + +qos_level_entries: /* empty */ + | qos_level_entries qos_level_entry + ; + +qos_level_entry: sn + | use + | sl + | qos_level_class + | mtu_limit + | rate_limit + ; + + /* + * Parsing ... : + * + * + * + * + * 1 + * low latency by class 7-9 or 11> + * 7-9,11 + * 1 + * + * + * 2 + * Storage targets connection> + * Storage + * 22,4719 + * 3 + * + * + */ + +qos_match_rules: qos_match_rules_start qos_match_rule_items qos_match_rules_end + ; + +qos_match_rules_start: TK_QOS_MATCH_RULES_START + ; + +qos_match_rules_end: TK_QOS_MATCH_RULES_END + ; + +qos_match_rule_items: /* empty */ + | qos_match_rule_items qos_match_rule_start qos_match_rule_entries qos_match_rule_end + ; + +qos_match_rule_start: TK_QOS_MATCH_RULE_START { + __osm_qos_parser_match_rule_start(); + } + ; + +qos_match_rule_end: TK_QOS_MATCH_RULE_END { + __osm_qos_parser_match_rule_end(); + } + ; + +qos_match_rule_entries: /* empty */ + | qos_match_rule_entries qos_match_rule_entry + ; + +qos_match_rule_entry: use + | match_rule_class + | source + | destination + | service + | qos_level_sn + ; + + /* + * values + */ + +name: name_start any_text TK_NAME_END { + /* of - one instance */ + p_current_port_group->name = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +name_start: TK_NAME_START { + /* of - one instance */ + RESET_BUFFER; + if (p_current_port_group->name) + { + yyerror(" has multiple tags"); + return 1; + } + } + ; + +use: use_start any_text TK_USE_END { + /* of ,, - one instance */ + if (p_current_port_group) + p_current_port_group->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_level) + p_current_qos_level->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_match_rule) + p_current_qos_match_rule->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else + CL_ASSERT(0); + } + ; + +use_start: TK_USE_START { + RESET_BUFFER; + if (p_current_port_group) + { + /* of - one instance */ + if (p_current_port_group->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_level) + { + /* of - one instance */ + if (p_current_qos_level->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_match_rule) + { + /* of - one instance */ + if (p_current_qos_match_rule->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else + CL_ASSERT(0); + } + ; + +port_name: port_name_start any_text TK_PORT_NAME_END { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_names, + p_str_item, + NULL); + } + ; + +port_name_start: TK_PORT_NAME_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +port_guid: port_guid_start hex_number TK_PORT_GUID_END { + osm_qos_uint64_vector_item_t * p_uint64_item = + (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + + __osm_qos_parser_str2uint64(&p_uint64_item->value, + __osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_guids, + p_uint64_item, + NULL); + } + ; + +port_guid_start: TK_PORT_GUID_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +partition: partition_start any_text TK_PARTITION_END { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->partitions, + p_str_item, + NULL); + } + ; + +partition_start: TK_PARTITION_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +node_type: node_type_start any_text TK_NODE_TYPE_END { + /* in - any num of instances */ + uint8_t tmp_node_type; + char * clean_str = __osm_qos_parser_strip_white(str_buffer); + + if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_ROUTER) == 0) + tmp_node_type = IB_NODE_TYPE_ROUTER; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_CA) == 0) + tmp_node_type = IB_NODE_TYPE_CA; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_SWITCH) == 0) + tmp_node_type = IB_NODE_TYPE_SWITCH; + else + { + yyerror("wrong value"); + return 1; + } + + osm_qos_uint32_vector_item_t * p_uint32_item = + (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + + p_uint32_item->value = tmp_node_type; + cl_ptr_vector_insert(&p_current_port_group->node_types, + p_uint32_item, + NULL); + } + ; + +node_type_start: TK_NODE_TYPE_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +group: group_start any_text TK_GROUP_END { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + if (p_current_sl2vl_scope) + cl_ptr_vector_insert(&p_current_sl2vl_scope->groups, + p_str_item, + NULL); + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->groups, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + ; + +group_start: TK_GROUP_START { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + ; + +across: across_start any_text TK_ACROSS_END { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + + /* inserting this both to across_to and to across_from */ + if (p_current_sl2vl_scope) { + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->across, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + ; + +across_start: TK_ACROSS_START { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + ; + +across_from: across_from_start any_text TK_ACROSS_FROM_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + } + ; + +across_from_start: TK_ACROSS_FROM_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +across_to: across_to_start any_text TK_ACROSS_TO_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + ; + +across_to_start: TK_ACROSS_TO_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +from: from_start any_text TK_FROM_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->from, + p_str_item, + NULL); + } + ; + +from_start: TK_FROM_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +to: to_start any_text TK_TO_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->to, + p_str_item, + NULL); + } + ; + +to_start: TK_TO_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +sl2vl_table: sl2vl_table_start whitespace + any_number num_list_wo_whites any_number + whitespace TK_SL2VL_TABLE_END { + /* in - one instance of 16 numbers */ + uint8_t counter = 0; + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + + /* checking the syntax */ + for (i = 0; tmp_str[i] != '\0'; i++) + if (tmp_str[i] == ',') + counter ++; + if (counter != 15) + { + yyerror("wrong number of values in (should be 16)"); + return 1; + } + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str," ,"); + while (tok != NULL && i < 16) + { + p_current_sl2vl_scope->sl2vl_table[i++] = atoi(tok); + tok = strtok (NULL, " ,"); + } + if (tok != NULL) + { + yyerror("wrong syntax in "); + return 1; + } + } + ; + +sl2vl_table_start: TK_SL2VL_TABLE_START { + /* in - one instance of 16 numbers */ + RESET_BUFFER; + } + ; + +sn: sn_start any_number TK_SN_END { + /* in - one instance */ + p_current_qos_level->sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +sn_start: TK_SN_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +sl: sl_start any_number TK_SL_END { + /* in - one instance */ + p_current_qos_level->sl = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +sl_start: TK_SL_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +mtu_limit: mtu_limit_start any_number TK_MTU_LIMIT_END { + /* in - one instance */ + p_current_qos_level->mtu_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +mtu_limit_start: TK_MTU_LIMIT_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +rate_limit: rate_limit_start any_number TK_RATE_LIMIT_END { + /* in - one instance */ + p_current_qos_level->rate_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +rate_limit_start: TK_RATE_LIMIT_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +qos_level_class: qos_level_class_start any_number TK_CLASS_END { + /* in - one instance of one number */ + p_current_qos_level->class = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +qos_level_class_start: TK_CLASS_START { + /* in - one instance of one number */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +vlarb_high_limit: vlarb_high_limit_start any_number TK_VLARB_HIGH_LIMIT_END { + /* in - one instance of one number */ + p_current_vlarb_scope->vl_high_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +vlarb_high_limit_start: TK_VLARB_HIGH_LIMIT_START { + /* in - one instance of one number */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +vlarb_high: vlarb_high_start whitespace + any_number num_list_with_dotdot_wo_whites any_number + whitespace TK_VLARB_HIGH_END { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_highs, + p_hl_item, + NULL); + } + + } + ; + +vlarb_high_start: TK_VLARB_HIGH_START { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +vlarb_low: vlarb_low_start whitespace + any_number num_list_with_dotdot_wo_whites any_number + whitespace TK_VLARB_LOW_END { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_lows, + p_hl_item, + NULL); + } + + } + ; + +vlarb_low_start: TK_VLARB_LOW_START { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +source: source_start any_text TK_SOURCE_END { + /* in - text */ + p_current_qos_match_rule->source = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +source_start: TK_SOURCE_START { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +destination: destination_start any_text TK_DESTINATION_END { + /* in - text */ + p_current_qos_match_rule->destination = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +destination_start: TK_DESTINATION_START { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +match_rule_class: match_rule_class_start whitespace + any_number num_list_with_dash_wo_whites any_number + whitespace TK_CLASS_END { + /* in - "7-9,11" */ + uint32_t i; + uint32_t j; + uint32_t range_low; + uint32_t range_high; + char * dash_ptr = NULL; + char * tok = NULL; + char * tokens[10000] = {NULL}; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint32_vector_item_t * p_uint32_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i] == ',' || tmp_str[i] == '-') && + (tmp_str[i-1] == ',' || tmp_str[i-1] == '-') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line with ',' being a delimiter */ + tok = strtok (tmp_str," ,"); + for (i = 0; tok != NULL; tok = strtok (NULL, " ,")) + tokens[i++] = tok; + + for (i = 0; tokens[i] != NULL; i++) + { + dash_ptr = strstr(tokens[i],"-"); + if (dash_ptr != NULL) + { + /* this is number range: i-j */ + *dash_ptr = '\0'; + range_low = atoi(tokens[i]); + *dash_ptr = '-'; + range_high = atoi(dash_ptr + 1); + if (range_low > range_high) + { + uint32_t tmp_num = range_low; + range_low = range_high; + range_high = tmp_num; + } + for (j = range_low; j <= range_high; j++) + { + p_uint32_item = (osm_qos_uint32_vector_item_t *) + malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = j; + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint32_item, + NULL); + } + } + else + { + /* this is a single number */ + p_uint32_item = (osm_qos_uint32_vector_item_t *) + malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = atoi(tokens[i]); + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint32_item, + NULL); + } + } + } + ; + +match_rule_class_start: TK_CLASS_START { + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +service: service_start whitespace + any_number num_list_wo_whites any_number + whitespace TK_SERVICE_END { + /* in - list of ',' delimited numbers */ + uint32_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint32_vector_item_t * p_uint32_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + tok = strtok (tmp_str," ,"); + while (tok != NULL) + { + p_uint32_item = (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = atoi(tok); + cl_ptr_vector_insert(&p_current_qos_match_rule->services, + p_uint32_item, + NULL); + tok = strtok (NULL, " ,"); + } + + } + ; + +service_start: TK_SERVICE_START { + /* in - list of ',' delimited numbers */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +qos_level_sn: qos_level_sn_start any_number TK_QOS_LEVEL_SN_END { + /* in - single number */ + + osm_qos_uint32_vector_item_t * p_uint32_item = + (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + + p_uint32_item->value = atoi(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_qos_match_rule->qos_level_sns, + p_uint32_item, + NULL); + } + ; + +qos_level_sn_start: TK_QOS_LEVEL_SN_START { + /* in - single number */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + + /* + * Common part + */ + + +any_text: /* empty */ + | any_text text + ; + +text: pure_text + | any_number + ; + +pure_text: TK_TEXT { + strcat(str_buffer,$1); + free($1); + } + | TK_WHITE { + strcat(str_buffer,$1); + free($1); + } + | TK_DASH { + strcat(str_buffer,$1); + free($1); + } + | TK_DOTDOT { + strcat(str_buffer,$1); + free($1); + } + | TK_COMMA { + strcat(str_buffer,$1); + free($1); + } + ; + +num_list_wo_whites: whitespace + | num_list_wo_whites whitespace + | num_list_wo_whites dec_number + | num_list_wo_whites hex_number + | num_list_wo_whites comma + ; + +num_list_with_dotdot_wo_whites: whitespace + | num_list_with_dotdot_wo_whites whitespace + | num_list_with_dotdot_wo_whites dec_number + | num_list_with_dotdot_wo_whites hex_number + | num_list_with_dotdot_wo_whites comma + | num_list_with_dotdot_wo_whites dotdot + ; + +num_list_with_dash_wo_whites: whitespace + | num_list_with_dash_wo_whites whitespace + | num_list_with_dash_wo_whites dec_number + | num_list_with_dash_wo_whites hex_number + | num_list_with_dash_wo_whites comma + | num_list_with_dash_wo_whites dash + ; + +comma: TK_COMMA { + strcat(str_buffer,$1); + free($1); + } + ; + +dotdot: TK_DOTDOT { + strcat(str_buffer,$1); + free($1); + } + ; + +dash: TK_DASH { + strcat(str_buffer,$1); + free($1); + } + ; + +any_number: dec_number + | hex_number + ; + +dec_number: TK_DEC_NUMBER { + strcat(str_buffer,$1); + free($1); + } + ; + +hex_number: TK_HEX_NUMBER { + strcat(str_buffer,$1); + free($1); + } + ; + +whitespace: /* empty */ + | TK_WHITE { + /* swallow whitespace */ + free($1); + } + ; + +%% + +/*************************************************** + ***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file, + OUT osm_qos_parse_tree_t ** pp_qos_parse_tree) +{ + int res = 0; + p_osm_log = p_log; + + //~ OSM_LOG_ENTER(p_osm_log, osm_qos_parse); + + *pp_qos_parse_tree = NULL; + yyin = fopen (policy_file, "r"); + if (!yyin) + { + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "osm_qos_parse: ERR AC01: " + //~ "Failed opening QoS policy file (%s)\n", + //~ policy_file); + printf( "osm_qos_parse: ERR AC01: " + "Failed opening QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + column_num = 1; + line_num = 1; + __osm_qos_parse_tree_init(); + str_buffer[0] = '\0'; + + res = yyparse(); + + if (res == 0) + { + *pp_qos_parse_tree = p_qos_parse_tree; + } + else + { + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "osm_qos_parse: ERR AC02: " + //~ "Failed parsing QoS policy file (%s)\n", + //~ policy_file); + printf( "osm_qos_parse: ERR AC02: " + "Failed parsing QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + + Exit: + if (yyin) + fclose(yyin); + //~ OSM_LOG_EXIT(p_osm_log); + return res; +} + +/*************************************************** + ***************************************************/ + +int yywrap() +{ + return(1); +} + +/*************************************************** + ***************************************************/ + +void yyerror (char *s) +{ + //~ OSM_LOG_ENTER(p_osm_log, yyerror); + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "yyerror: ERR AC03: " + //~ "Syntax error (line %d:%d): %s. " + //~ "Last text read: \"%s\"\n", + //~ line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + printf( "yyerror: ERR AC03: " + "Syntax error (line %d:%d): %s. " + "Last text read: \"%s\"\n", + line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + //~ OSM_LOG_EXIT(p_osm_log); +} + +/*************************************************** + ***************************************************/ + +static char * __osm_qos_parser_strip_white(char * str) +{ + int i; + for (i = (strlen(str)-1); i >= 0; i--) + { + if (isspace(str[i])) + str[i] = '\0'; + else + break; + } + for (i = 0; i < strlen(str); i++) + { + if (!isspace(str[i])) + break; + } + return &(str[i]); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str) +{ +#if __WORDSIZE == 64 + *p_val = strtoul(str, NULL, 0); +#else + *p_val = strtoull(str, NULL, 0); +#endif +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parse_tree_init() +{ + p_qos_parse_tree = (osm_qos_parse_tree_t *) + malloc(sizeof(osm_qos_parse_tree_t)); + + memset(p_qos_parse_tree, 0, sizeof(osm_qos_parse_tree_t)); + + cl_ptr_vector_init(&p_qos_parse_tree->port_groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->sl2vl_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->vlarb_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_levels, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_match_rules, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_start() +{ + p_current_port_group = (osm_qos_port_group_t *) + malloc(sizeof(osm_qos_port_group_t)); + memset(p_current_port_group, 0, sizeof(osm_qos_port_group_t)); + + cl_ptr_vector_init(&p_current_port_group->port_guids, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->port_names, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->partitions, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->node_types, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->port_groups, + p_current_port_group, + NULL); + p_current_port_group = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_start() +{ + p_current_sl2vl_scope = (osm_qos_sl2vl_scope_t *) + malloc(sizeof(osm_qos_sl2vl_scope_t)); + memset(p_current_sl2vl_scope, 0, sizeof(osm_qos_sl2vl_scope_t)); + + cl_ptr_vector_init(&p_current_sl2vl_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->to, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_to, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->sl2vl_tables, + p_current_sl2vl_scope, + NULL); + p_current_sl2vl_scope = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_start() +{ + p_current_vlarb_scope = (osm_qos_vlarb_scope_t *) + malloc(sizeof(osm_qos_vlarb_scope_t)); + memset(p_current_vlarb_scope, 0, sizeof(osm_qos_vlarb_scope_t)); + + cl_ptr_vector_init(&p_current_vlarb_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->across, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_highs, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_lows, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->vlarb_tables, + p_current_vlarb_scope, + NULL); + p_current_vlarb_scope = NULL; +} + + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_start() +{ + p_current_qos_level = (osm_qos_level_t *) + malloc(sizeof(osm_qos_level_t)); + memset(p_current_qos_level, 0, sizeof(osm_qos_level_t)); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_levels, + p_current_qos_level, + NULL); + p_current_qos_level = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_start() +{ + p_current_qos_match_rule = (osm_qos_match_rule_t *) + malloc(sizeof(osm_qos_match_rule_t)); + memset(p_current_qos_match_rule, 0, sizeof(osm_qos_match_rule_t)); + + cl_ptr_vector_init(&p_current_qos_match_rule->services, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->qos_level_sns, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->classes, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_match_rules, + p_current_qos_match_rule, + NULL); + p_current_qos_match_rule = NULL; +} + +/*************************************************** + ***************************************************/ + +/* free all the memory occupied by the parse tree data structure */ +void osm_qos_parser_destroy_parse_tree( + IN osm_qos_parse_tree_t * p_qos_parse_tree) +{ + uint32_t i; + uint32_t j; + + osm_qos_port_group_t * p_port_group = NULL; + osm_qos_sl2vl_scope_t * p_sl2vl_scope = NULL; + osm_qos_vlarb_scope_t * p_vlarb_scope = NULL; + osm_qos_level_t * p_qos_level = NULL; + osm_qos_match_rule_t * p_qos_match_rule = NULL; + + osm_qos_string_vector_item_t * p_str_vector_item = NULL; + osm_qos_uint64_vector_item_t * p_uint64_vector_item = NULL; + osm_qos_uint32_vector_item_t * p_uint32_vector_item = NULL; + osm_qos_vlarb_hl_vector_item_t * p_vlarb_hl_vector_item = NULL; + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->port_groups); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->port_groups, i, (void **)&p_port_group); + + if (p_port_group->name) + free(p_port_group->name); + if (p_port_group->use) + free(p_port_group->use); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_guids); j++) + { + cl_ptr_vector_at(&p_port_group->port_guids, j, (void **)&p_uint64_vector_item); + free(p_uint64_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_guids); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_names); j++) + { + cl_ptr_vector_at(&p_port_group->port_names, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_names); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->partitions); j++) + { + cl_ptr_vector_at(&p_port_group->partitions, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->partitions); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->node_types); j++) + { + cl_ptr_vector_at(&p_port_group->node_types, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->node_types); + + free(p_port_group); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->port_groups); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->sl2vl_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->sl2vl_tables, i, (void **)&p_sl2vl_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->groups); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->to); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_to); + + free(p_sl2vl_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->sl2vl_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->vlarb_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->vlarb_tables, i, (void **)&p_vlarb_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->groups); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->across); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->across, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->across); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_highs); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_highs, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_highs); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_lows); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_lows, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_lows); + + free(p_vlarb_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->vlarb_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_levels); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_levels, i, (void **)&p_qos_level); + if (p_qos_level->use) + free(p_qos_level->use); + free(p_qos_level); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_levels); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_match_rules); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_match_rules, i, (void **)&p_qos_match_rule); + + if (p_qos_match_rule->use) + free(p_qos_match_rule->use); + if (p_qos_match_rule->source) + free(p_qos_match_rule->source); + if (p_qos_match_rule->destination) + free(p_qos_match_rule->destination); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->services); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->services, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->services); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->qos_level_sns); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->qos_level_sns, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->qos_level_sns); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->classes); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->classes, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->classes); + + free(p_qos_match_rule); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_match_rules); + + free(p_qos_parse_tree); +} + +/*************************************************** + ***************************************************/ -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 17 07:19:01 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:19:01 +0200 Subject: [openib-general] [PATCH 4/6] osm: QoS parser Yacc-generated C and H grammar files Message-ID: <45AE3E65.9050502@dev.mellanox.co.il> Hi Hal This patch is a QoS parser Yacc-generated C and H grammar files Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_qos_parser_y.h | 214 +++ osm/opensm/osm_qos_parser_y.c | 3215 +++++++++++++++++++++++++++++++++ 2 files changed, 3429 insertions(+), 0 deletions(-) diff --git a/osm/include/opensm/osm_qos_parser_y.h b/osm/include/opensm/osm_qos_parser_y.h new file mode 100644 index 0000000..40e9205 --- /dev/null +++ b/osm/include/opensm/osm_qos_parser_y.h @@ -0,0 +1,214 @@ +/* A Bison parser, made by GNU Bison 1.875. */ + +/* Skeleton parser for Yacc-like parsing with Bison, + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, when this file is copied by Bison into a + Bison output file, you may use that output file without restriction. + This special exception was added by the Free Software Foundation + in version 1.24 of Bison. */ + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + TK_HEX_NUMBER = 258, + TK_DEC_NUMBER = 259, + TK_WHITE = 260, + TK_DASH = 261, + TK_DOTDOT = 262, + TK_COMMA = 263, + TK_TEXT = 264, + TK_XML_VERSION_START = 265, + TK_XML_VERSION_END = 266, + TK_QOS_POLICY_START = 267, + TK_QOS_POLICY_END = 268, + TK_PORT_GROUPS_START = 269, + TK_PORT_GROUPS_END = 270, + TK_PORT_GROUP_START = 271, + TK_PORT_GROUP_END = 272, + TK_NAME_START = 273, + TK_NAME_END = 274, + TK_USE_START = 275, + TK_USE_END = 276, + TK_PORT_GUID_START = 277, + TK_PORT_GUID_END = 278, + TK_PORT_NAME_START = 279, + TK_PORT_NAME_END = 280, + TK_PARTITION_START = 281, + TK_PARTITION_END = 282, + TK_NODE_TYPE_START = 283, + TK_NODE_TYPE_END = 284, + TK_QOS_SETUP_START = 285, + TK_QOS_SETUP_END = 286, + TK_SL2VL_TABLES_START = 287, + TK_SL2VL_TABLES_END = 288, + TK_SL2VL_SCOPE_START = 289, + TK_SL2VL_SCOPE_END = 290, + TK_GROUP_START = 291, + TK_GROUP_END = 292, + TK_FROM_START = 293, + TK_FROM_END = 294, + TK_TO_START = 295, + TK_TO_END = 296, + TK_SL2VL_TABLE_START = 297, + TK_SL2VL_TABLE_END = 298, + TK_ACROSS_START = 299, + TK_ACROSS_END = 300, + TK_ACROSS_FROM_START = 301, + TK_ACROSS_FROM_END = 302, + TK_ACROSS_TO_START = 303, + TK_ACROSS_TO_END = 304, + TK_VLARB_TABLES_START = 305, + TK_VLARB_TABLES_END = 306, + TK_VLARB_SCOPE_START = 307, + TK_VLARB_SCOPE_END = 308, + TK_VLARB_HIGH_START = 309, + TK_VLARB_HIGH_END = 310, + TK_VLARB_LOW_START = 311, + TK_VLARB_LOW_END = 312, + TK_VLARB_HIGH_LIMIT_START = 313, + TK_VLARB_HIGH_LIMIT_END = 314, + TK_QOS_LEVELS_START = 315, + TK_QOS_LEVELS_END = 316, + TK_QOS_LEVEL_START = 317, + TK_QOS_LEVEL_END = 318, + TK_SN_START = 319, + TK_SN_END = 320, + TK_SL_START = 321, + TK_SL_END = 322, + TK_CLASS_START = 323, + TK_CLASS_END = 324, + TK_MTU_LIMIT_START = 325, + TK_MTU_LIMIT_END = 326, + TK_RATE_LIMIT_START = 327, + TK_RATE_LIMIT_END = 328, + TK_QOS_MATCH_RULES_START = 329, + TK_QOS_MATCH_RULES_END = 330, + TK_QOS_MATCH_RULE_START = 331, + TK_QOS_MATCH_RULE_END = 332, + TK_QOS_LEVEL_SN_START = 333, + TK_QOS_LEVEL_SN_END = 334, + TK_SOURCE_START = 335, + TK_SOURCE_END = 336, + TK_DESTINATION_START = 337, + TK_DESTINATION_END = 338, + TK_SERVICE_START = 339, + TK_SERVICE_END = 340 + }; +#endif +#define TK_HEX_NUMBER 258 +#define TK_DEC_NUMBER 259 +#define TK_WHITE 260 +#define TK_DASH 261 +#define TK_DOTDOT 262 +#define TK_COMMA 263 +#define TK_TEXT 264 +#define TK_XML_VERSION_START 265 +#define TK_XML_VERSION_END 266 +#define TK_QOS_POLICY_START 267 +#define TK_QOS_POLICY_END 268 +#define TK_PORT_GROUPS_START 269 +#define TK_PORT_GROUPS_END 270 +#define TK_PORT_GROUP_START 271 +#define TK_PORT_GROUP_END 272 +#define TK_NAME_START 273 +#define TK_NAME_END 274 +#define TK_USE_START 275 +#define TK_USE_END 276 +#define TK_PORT_GUID_START 277 +#define TK_PORT_GUID_END 278 +#define TK_PORT_NAME_START 279 +#define TK_PORT_NAME_END 280 +#define TK_PARTITION_START 281 +#define TK_PARTITION_END 282 +#define TK_NODE_TYPE_START 283 +#define TK_NODE_TYPE_END 284 +#define TK_QOS_SETUP_START 285 +#define TK_QOS_SETUP_END 286 +#define TK_SL2VL_TABLES_START 287 +#define TK_SL2VL_TABLES_END 288 +#define TK_SL2VL_SCOPE_START 289 +#define TK_SL2VL_SCOPE_END 290 +#define TK_GROUP_START 291 +#define TK_GROUP_END 292 +#define TK_FROM_START 293 +#define TK_FROM_END 294 +#define TK_TO_START 295 +#define TK_TO_END 296 +#define TK_SL2VL_TABLE_START 297 +#define TK_SL2VL_TABLE_END 298 +#define TK_ACROSS_START 299 +#define TK_ACROSS_END 300 +#define TK_ACROSS_FROM_START 301 +#define TK_ACROSS_FROM_END 302 +#define TK_ACROSS_TO_START 303 +#define TK_ACROSS_TO_END 304 +#define TK_VLARB_TABLES_START 305 +#define TK_VLARB_TABLES_END 306 +#define TK_VLARB_SCOPE_START 307 +#define TK_VLARB_SCOPE_END 308 +#define TK_VLARB_HIGH_START 309 +#define TK_VLARB_HIGH_END 310 +#define TK_VLARB_LOW_START 311 +#define TK_VLARB_LOW_END 312 +#define TK_VLARB_HIGH_LIMIT_START 313 +#define TK_VLARB_HIGH_LIMIT_END 314 +#define TK_QOS_LEVELS_START 315 +#define TK_QOS_LEVELS_END 316 +#define TK_QOS_LEVEL_START 317 +#define TK_QOS_LEVEL_END 318 +#define TK_SN_START 319 +#define TK_SN_END 320 +#define TK_SL_START 321 +#define TK_SL_END 322 +#define TK_CLASS_START 323 +#define TK_CLASS_END 324 +#define TK_MTU_LIMIT_START 325 +#define TK_MTU_LIMIT_END 326 +#define TK_RATE_LIMIT_START 327 +#define TK_RATE_LIMIT_END 328 +#define TK_QOS_MATCH_RULES_START 329 +#define TK_QOS_MATCH_RULES_END 330 +#define TK_QOS_MATCH_RULE_START 331 +#define TK_QOS_MATCH_RULE_END 332 +#define TK_QOS_LEVEL_SN_START 333 +#define TK_QOS_LEVEL_SN_END 334 +#define TK_SOURCE_START 335 +#define TK_SOURCE_END 336 +#define TK_DESTINATION_START 337 +#define TK_DESTINATION_END 338 +#define TK_SERVICE_START 339 +#define TK_SERVICE_END 340 + + + + +#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + +extern YYSTYPE yylval; + + + diff --git a/osm/opensm/osm_qos_parser_y.c b/osm/opensm/osm_qos_parser_y.c new file mode 100644 index 0000000..2b16ccc --- /dev/null +++ b/osm/opensm/osm_qos_parser_y.c @@ -0,0 +1,3215 @@ +/* A Bison parser, made by GNU Bison 1.875. */ + +/* Skeleton parser for Yacc-like parsing with Bison, + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, when this file is copied by Bison into a + Bison output file, you may use that output file without restriction. + This special exception was added by the Free Software Foundation + in version 1.24 of Bison. */ + +/* Written by Richard Stallman by simplifying the original so called + ``semantic'' parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + TK_HEX_NUMBER = 258, + TK_DEC_NUMBER = 259, + TK_WHITE = 260, + TK_DASH = 261, + TK_DOTDOT = 262, + TK_COMMA = 263, + TK_TEXT = 264, + TK_XML_VERSION_START = 265, + TK_XML_VERSION_END = 266, + TK_QOS_POLICY_START = 267, + TK_QOS_POLICY_END = 268, + TK_PORT_GROUPS_START = 269, + TK_PORT_GROUPS_END = 270, + TK_PORT_GROUP_START = 271, + TK_PORT_GROUP_END = 272, + TK_NAME_START = 273, + TK_NAME_END = 274, + TK_USE_START = 275, + TK_USE_END = 276, + TK_PORT_GUID_START = 277, + TK_PORT_GUID_END = 278, + TK_PORT_NAME_START = 279, + TK_PORT_NAME_END = 280, + TK_PARTITION_START = 281, + TK_PARTITION_END = 282, + TK_NODE_TYPE_START = 283, + TK_NODE_TYPE_END = 284, + TK_QOS_SETUP_START = 285, + TK_QOS_SETUP_END = 286, + TK_SL2VL_TABLES_START = 287, + TK_SL2VL_TABLES_END = 288, + TK_SL2VL_SCOPE_START = 289, + TK_SL2VL_SCOPE_END = 290, + TK_GROUP_START = 291, + TK_GROUP_END = 292, + TK_FROM_START = 293, + TK_FROM_END = 294, + TK_TO_START = 295, + TK_TO_END = 296, + TK_SL2VL_TABLE_START = 297, + TK_SL2VL_TABLE_END = 298, + TK_ACROSS_START = 299, + TK_ACROSS_END = 300, + TK_ACROSS_FROM_START = 301, + TK_ACROSS_FROM_END = 302, + TK_ACROSS_TO_START = 303, + TK_ACROSS_TO_END = 304, + TK_VLARB_TABLES_START = 305, + TK_VLARB_TABLES_END = 306, + TK_VLARB_SCOPE_START = 307, + TK_VLARB_SCOPE_END = 308, + TK_VLARB_HIGH_START = 309, + TK_VLARB_HIGH_END = 310, + TK_VLARB_LOW_START = 311, + TK_VLARB_LOW_END = 312, + TK_VLARB_HIGH_LIMIT_START = 313, + TK_VLARB_HIGH_LIMIT_END = 314, + TK_QOS_LEVELS_START = 315, + TK_QOS_LEVELS_END = 316, + TK_QOS_LEVEL_START = 317, + TK_QOS_LEVEL_END = 318, + TK_SN_START = 319, + TK_SN_END = 320, + TK_SL_START = 321, + TK_SL_END = 322, + TK_CLASS_START = 323, + TK_CLASS_END = 324, + TK_MTU_LIMIT_START = 325, + TK_MTU_LIMIT_END = 326, + TK_RATE_LIMIT_START = 327, + TK_RATE_LIMIT_END = 328, + TK_QOS_MATCH_RULES_START = 329, + TK_QOS_MATCH_RULES_END = 330, + TK_QOS_MATCH_RULE_START = 331, + TK_QOS_MATCH_RULE_END = 332, + TK_QOS_LEVEL_SN_START = 333, + TK_QOS_LEVEL_SN_END = 334, + TK_SOURCE_START = 335, + TK_SOURCE_END = 336, + TK_DESTINATION_START = 337, + TK_DESTINATION_END = 338, + TK_SERVICE_START = 339, + TK_SERVICE_END = 340 + }; +#endif +#define TK_HEX_NUMBER 258 +#define TK_DEC_NUMBER 259 +#define TK_WHITE 260 +#define TK_DASH 261 +#define TK_DOTDOT 262 +#define TK_COMMA 263 +#define TK_TEXT 264 +#define TK_XML_VERSION_START 265 +#define TK_XML_VERSION_END 266 +#define TK_QOS_POLICY_START 267 +#define TK_QOS_POLICY_END 268 +#define TK_PORT_GROUPS_START 269 +#define TK_PORT_GROUPS_END 270 +#define TK_PORT_GROUP_START 271 +#define TK_PORT_GROUP_END 272 +#define TK_NAME_START 273 +#define TK_NAME_END 274 +#define TK_USE_START 275 +#define TK_USE_END 276 +#define TK_PORT_GUID_START 277 +#define TK_PORT_GUID_END 278 +#define TK_PORT_NAME_START 279 +#define TK_PORT_NAME_END 280 +#define TK_PARTITION_START 281 +#define TK_PARTITION_END 282 +#define TK_NODE_TYPE_START 283 +#define TK_NODE_TYPE_END 284 +#define TK_QOS_SETUP_START 285 +#define TK_QOS_SETUP_END 286 +#define TK_SL2VL_TABLES_START 287 +#define TK_SL2VL_TABLES_END 288 +#define TK_SL2VL_SCOPE_START 289 +#define TK_SL2VL_SCOPE_END 290 +#define TK_GROUP_START 291 +#define TK_GROUP_END 292 +#define TK_FROM_START 293 +#define TK_FROM_END 294 +#define TK_TO_START 295 +#define TK_TO_END 296 +#define TK_SL2VL_TABLE_START 297 +#define TK_SL2VL_TABLE_END 298 +#define TK_ACROSS_START 299 +#define TK_ACROSS_END 300 +#define TK_ACROSS_FROM_START 301 +#define TK_ACROSS_FROM_END 302 +#define TK_ACROSS_TO_START 303 +#define TK_ACROSS_TO_END 304 +#define TK_VLARB_TABLES_START 305 +#define TK_VLARB_TABLES_END 306 +#define TK_VLARB_SCOPE_START 307 +#define TK_VLARB_SCOPE_END 308 +#define TK_VLARB_HIGH_START 309 +#define TK_VLARB_HIGH_END 310 +#define TK_VLARB_LOW_START 311 +#define TK_VLARB_LOW_END 312 +#define TK_VLARB_HIGH_LIMIT_START 313 +#define TK_VLARB_HIGH_LIMIT_END 314 +#define TK_QOS_LEVELS_START 315 +#define TK_QOS_LEVELS_END 316 +#define TK_QOS_LEVEL_START 317 +#define TK_QOS_LEVEL_END 318 +#define TK_SN_START 319 +#define TK_SN_END 320 +#define TK_SL_START 321 +#define TK_SL_END 322 +#define TK_CLASS_START 323 +#define TK_CLASS_END 324 +#define TK_MTU_LIMIT_START 325 +#define TK_MTU_LIMIT_END 326 +#define TK_RATE_LIMIT_START 327 +#define TK_RATE_LIMIT_END 328 +#define TK_QOS_MATCH_RULES_START 329 +#define TK_QOS_MATCH_RULES_END 330 +#define TK_QOS_MATCH_RULE_START 331 +#define TK_QOS_MATCH_RULE_END 332 +#define TK_QOS_LEVEL_SN_START 333 +#define TK_QOS_LEVEL_SN_END 334 +#define TK_SOURCE_START 335 +#define TK_SOURCE_END 336 +#define TK_DESTINATION_START 337 +#define TK_DESTINATION_END 338 +#define TK_SERVICE_START 339 +#define TK_SERVICE_END 340 + + + + +/* Copy the first part of user declarations. */ +#line 1 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Grammar of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +static void __osm_qos_parse_tree_init(); + +static char * __osm_qos_parser_strip_white(char * str); + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str); + +static void __osm_qos_parser_port_group_start(); +static void __osm_qos_parser_port_group_end(); + +static void __osm_qos_parser_sl2vl_scope_start(); +static void __osm_qos_parser_sl2vl_scope_end(); + +static void __osm_qos_parser_vlarb_scope_start(); +static void __osm_qos_parser_vlarb_scope_end(); + +static void __osm_qos_parser_qos_level_start(); +static void __osm_qos_parser_qos_level_end(); + +static void __osm_qos_parser_match_rule_start(); +static void __osm_qos_parser_match_rule_end(); + +extern char * yytext; +extern void yyerror (char *s); +extern int yylex (void); +extern FILE * yyin; + +#define OSM_QOS_MAX_LINE_LEN 10000 +char str_buffer[OSM_QOS_MAX_LINE_LEN]; +#define RESET_BUFFER str_buffer[0] = '\0' + +int column_num; +int line_num; + +osm_qos_parse_tree_t * p_qos_parse_tree = NULL; +osm_qos_port_group_t * p_current_port_group = NULL; +osm_qos_sl2vl_scope_t * p_current_sl2vl_scope = NULL; +osm_qos_vlarb_scope_t * p_current_vlarb_scope = NULL; +osm_qos_level_t * p_current_qos_level = NULL; +osm_qos_match_rule_t * p_current_qos_match_rule = NULL; + +osm_log_t * p_osm_log = NULL; + +/***************************************************/ + + + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + + + +/* Copy the second part of user declarations. */ + + +/* Line 214 of yacc.c. */ +#line 352 "y.tab.c" + +#if ! defined (yyoverflow) || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# if YYSTACK_USE_ALLOCA +# define YYSTACK_ALLOC alloca +# else +# ifndef YYSTACK_USE_ALLOCA +# if defined (alloca) || defined (_ALLOCA_H) +# define YYSTACK_ALLOC alloca +# else +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# else +# if defined (__STDC__) || defined (__cplusplus) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# endif +# define YYSTACK_ALLOC malloc +# define YYSTACK_FREE free +# endif +#endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */ + + +#if (! defined (yyoverflow) \ + && (! defined (__cplusplus) \ + || (YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + short yyss; + YYSTYPE yyvs; + }; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (short) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + register YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (0) +# endif +# endif + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack, Stack, yysize); \ + Stack = &yyptr->Stack; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined (__STDC__) || defined (__cplusplus) + typedef signed char yysigned_char; +#else + typedef short yysigned_char; +#endif + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 5 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 308 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 86 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 120 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 185 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 271 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 340 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const unsigned char yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85 +}; + +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const unsigned short yyprhs[] = +{ + 0, 0, 3, 6, 7, 11, 12, 17, 19, 21, + 22, 25, 27, 29, 31, 33, 37, 39, 41, 43, + 46, 50, 52, 54, 55, 58, 60, 62, 64, 66, + 68, 70, 74, 76, 78, 79, 82, 85, 89, 91, + 93, 94, 97, 101, 103, 105, 106, 109, 111, 113, + 115, 117, 119, 121, 123, 127, 129, 131, 132, 135, + 139, 141, 143, 144, 147, 149, 151, 153, 155, 157, + 161, 163, 165, 166, 171, 173, 175, 176, 179, 181, + 183, 185, 187, 189, 191, 195, 197, 199, 200, 205, + 207, 209, 210, 213, 215, 217, 219, 221, 223, 225, + 229, 231, 235, 237, 241, 243, 247, 249, 253, 255, + 259, 261, 265, 267, 271, 273, 277, 279, 283, 285, + 289, 291, 295, 297, 305, 307, 311, 313, 317, 319, + 323, 325, 329, 331, 335, 337, 341, 343, 351, 353, + 361, 363, 367, 369, 373, 375, 383, 385, 393, 395, + 399, 401, 402, 405, 407, 409, 411, 413, 415, 417, + 419, 421, 424, 427, 430, 433, 435, 438, 441, 444, + 447, 450, 452, 455, 458, 461, 464, 467, 469, 471, + 473, 475, 477, 479, 481, 482 +}; + +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const short yyrhs[] = +{ + 87, 0, -1, 88, 89, -1, -1, 10, 193, 11, + -1, -1, 89, 90, 92, 91, -1, 12, -1, 13, + -1, -1, 92, 93, -1, 94, -1, 103, -1, 125, + -1, 133, -1, 95, 97, 96, -1, 14, -1, 15, + -1, 98, -1, 97, 98, -1, 99, 101, 100, -1, + 16, -1, 17, -1, -1, 101, 102, -1, 141, -1, + 143, -1, 147, -1, 145, -1, 149, -1, 151, -1, + 104, 106, 105, -1, 30, -1, 31, -1, -1, 106, + 107, -1, 106, 116, -1, 108, 110, 109, -1, 32, + -1, 33, -1, -1, 110, 111, -1, 112, 114, 113, + -1, 34, -1, 35, -1, -1, 114, 115, -1, 153, + -1, 155, -1, 157, -1, 159, -1, 161, -1, 163, + -1, 165, -1, 117, 119, 118, -1, 50, -1, 51, + -1, -1, 119, 120, -1, 121, 123, 122, -1, 52, + -1, 53, -1, -1, 123, 124, -1, 153, -1, 155, + -1, 179, -1, 181, -1, 177, -1, 126, 128, 127, + -1, 60, -1, 61, -1, -1, 128, 129, 131, 130, + -1, 62, -1, 63, -1, -1, 131, 132, -1, 167, + -1, 143, -1, 169, -1, 175, -1, 171, -1, 173, + -1, 134, 136, 135, -1, 74, -1, 75, -1, -1, + 136, 137, 139, 138, -1, 76, -1, 77, -1, -1, + 139, 140, -1, 143, -1, 187, -1, 183, -1, 185, + -1, 189, -1, 191, -1, 142, 193, 19, -1, 18, + -1, 144, 193, 21, -1, 20, -1, 146, 193, 25, + -1, 24, -1, 148, 204, 23, -1, 22, -1, 150, + 193, 27, -1, 26, -1, 152, 193, 29, -1, 28, + -1, 154, 193, 37, -1, 36, -1, 156, 193, 45, + -1, 44, -1, 158, 193, 47, -1, 46, -1, 160, + 193, 49, -1, 48, -1, 162, 193, 39, -1, 38, + -1, 164, 193, 41, -1, 40, -1, 166, 205, 202, + 196, 202, 205, 43, -1, 42, -1, 168, 202, 65, + -1, 64, -1, 170, 202, 67, -1, 66, -1, 172, + 202, 71, -1, 70, -1, 174, 202, 73, -1, 72, + -1, 176, 202, 69, -1, 68, -1, 178, 202, 59, + -1, 58, -1, 180, 205, 202, 197, 202, 205, 55, + -1, 54, -1, 182, 205, 202, 197, 202, 205, 57, + -1, 56, -1, 184, 193, 81, -1, 80, -1, 186, + 193, 83, -1, 82, -1, 188, 205, 202, 198, 202, + 205, 69, -1, 68, -1, 190, 205, 202, 196, 202, + 205, 85, -1, 84, -1, 192, 202, 79, -1, 78, + -1, -1, 193, 194, -1, 195, -1, 202, -1, 9, + -1, 5, -1, 6, -1, 7, -1, 8, -1, 205, + -1, 196, 205, -1, 196, 203, -1, 196, 204, -1, + 196, 199, -1, 205, -1, 197, 205, -1, 197, 203, + -1, 197, 204, -1, 197, 199, -1, 197, 200, -1, + 205, -1, 198, 205, -1, 198, 203, -1, 198, 204, + -1, 198, 199, -1, 198, 201, -1, 8, -1, 7, + -1, 6, -1, 203, -1, 204, -1, 4, -1, 3, + -1, -1, 5, -1 +}; + +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const unsigned short yyrline[] = +{ + 0, 186, 186, 189, 190, 193, 194, 197, 200, 203, + 204, 207, 208, 209, 210, 243, 246, 249, 252, 253, + 256, 259, 264, 269, 270, 273, 274, 275, 276, 277, + 278, 314, 317, 320, 323, 324, 325, 337, 340, 343, + 346, 347, 350, 353, 358, 363, 364, 367, 368, 369, + 370, 371, 372, 373, 385, 388, 391, 394, 395, 398, + 401, 406, 411, 412, 415, 416, 417, 418, 419, 448, + 451, 454, 457, 458, 461, 466, 471, 472, 475, 476, + 477, 478, 479, 480, 504, 507, 510, 513, 514, 517, + 522, 527, 528, 531, 532, 533, 534, 535, 536, 543, + 549, 560, 573, 607, 619, 626, 638, 645, 657, 664, + 691, 698, 718, 725, 750, 757, 770, 778, 791, 799, + 812, 820, 833, 841, 882, 888, 894, 901, 907, 914, + 920, 927, 933, 940, 946, 953, 959, 966, 1002, 1009, + 1045, 1052, 1058, 1065, 1071, 1078, 1146, 1152, 1184, 1191, + 1204, 1216, 1217, 1220, 1221, 1224, 1228, 1232, 1236, 1240, + 1246, 1247, 1248, 1249, 1250, 1253, 1254, 1255, 1256, 1257, + 1258, 1261, 1262, 1263, 1264, 1265, 1266, 1269, 1275, 1281, + 1287, 1288, 1291, 1297, 1303, 1304 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE +/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "TK_HEX_NUMBER", "TK_DEC_NUMBER", + "TK_WHITE", "TK_DASH", "TK_DOTDOT", "TK_COMMA", "TK_TEXT", + "TK_XML_VERSION_START", "TK_XML_VERSION_END", "TK_QOS_POLICY_START", + "TK_QOS_POLICY_END", "TK_PORT_GROUPS_START", "TK_PORT_GROUPS_END", + "TK_PORT_GROUP_START", "TK_PORT_GROUP_END", "TK_NAME_START", + "TK_NAME_END", "TK_USE_START", "TK_USE_END", "TK_PORT_GUID_START", + "TK_PORT_GUID_END", "TK_PORT_NAME_START", "TK_PORT_NAME_END", + "TK_PARTITION_START", "TK_PARTITION_END", "TK_NODE_TYPE_START", + "TK_NODE_TYPE_END", "TK_QOS_SETUP_START", "TK_QOS_SETUP_END", + "TK_SL2VL_TABLES_START", "TK_SL2VL_TABLES_END", "TK_SL2VL_SCOPE_START", + "TK_SL2VL_SCOPE_END", "TK_GROUP_START", "TK_GROUP_END", "TK_FROM_START", + "TK_FROM_END", "TK_TO_START", "TK_TO_END", "TK_SL2VL_TABLE_START", + "TK_SL2VL_TABLE_END", "TK_ACROSS_START", "TK_ACROSS_END", + "TK_ACROSS_FROM_START", "TK_ACROSS_FROM_END", "TK_ACROSS_TO_START", + "TK_ACROSS_TO_END", "TK_VLARB_TABLES_START", "TK_VLARB_TABLES_END", + "TK_VLARB_SCOPE_START", "TK_VLARB_SCOPE_END", "TK_VLARB_HIGH_START", + "TK_VLARB_HIGH_END", "TK_VLARB_LOW_START", "TK_VLARB_LOW_END", + "TK_VLARB_HIGH_LIMIT_START", "TK_VLARB_HIGH_LIMIT_END", + "TK_QOS_LEVELS_START", "TK_QOS_LEVELS_END", "TK_QOS_LEVEL_START", + "TK_QOS_LEVEL_END", "TK_SN_START", "TK_SN_END", "TK_SL_START", + "TK_SL_END", "TK_CLASS_START", "TK_CLASS_END", "TK_MTU_LIMIT_START", + "TK_MTU_LIMIT_END", "TK_RATE_LIMIT_START", "TK_RATE_LIMIT_END", + "TK_QOS_MATCH_RULES_START", "TK_QOS_MATCH_RULES_END", + "TK_QOS_MATCH_RULE_START", "TK_QOS_MATCH_RULE_END", + "TK_QOS_LEVEL_SN_START", "TK_QOS_LEVEL_SN_END", "TK_SOURCE_START", + "TK_SOURCE_END", "TK_DESTINATION_START", "TK_DESTINATION_END", + "TK_SERVICE_START", "TK_SERVICE_END", "$accept", "head", "xml_version", + "qos_policy", "qos_policy_start", "qos_policy_end", + "qos_policy_entries", "qos_policy_entry", "port_groups", + "port_groups_start", "port_groups_end", "port_group", + "single_port_group", "port_group_start", "port_group_end", + "port_group_entries", "port_group_entry", "qos_setup", + "qos_setup_start", "qos_setup_end", "qos_setup_items", "sl2vl_tables", + "sl2vl_tables_start", "sl2vl_tables_end", "sl2vl_scope_items", + "sl2vl_scope", "sl2vl_scope_start", "sl2vl_scope_end", + "sl2vl_scope_entries", "sl2vl_scope_entry", "vlarb_tables", + "vlarb_tables_start", "vlarb_tables_end", "vlarb_scope_items", + "vlarb_scope", "vlarb_scope_start", "vlarb_scope_end", + "vlarb_scope_entries", "vlarb_scope_entry", "qos_levels", + "qos_levels_start", "qos_levels_end", "qos_level_items", + "qos_level_start", "qos_level_end", "qos_level_entries", + "qos_level_entry", "qos_match_rules", "qos_match_rules_start", + "qos_match_rules_end", "qos_match_rule_items", "qos_match_rule_start", + "qos_match_rule_end", "qos_match_rule_entries", "qos_match_rule_entry", + "name", "name_start", "use", "use_start", "port_name", + "port_name_start", "port_guid", "port_guid_start", "partition", + "partition_start", "node_type", "node_type_start", "group", + "group_start", "across", "across_start", "across_from", + "across_from_start", "across_to", "across_to_start", "from", + "from_start", "to", "to_start", "sl2vl_table", "sl2vl_table_start", + "sn", "sn_start", "sl", "sl_start", "mtu_limit", "mtu_limit_start", + "rate_limit", "rate_limit_start", "qos_level_class", + "qos_level_class_start", "vlarb_high_limit", "vlarb_high_limit_start", + "vlarb_high", "vlarb_high_start", "vlarb_low", "vlarb_low_start", + "source", "source_start", "destination", "destination_start", + "match_rule_class", "match_rule_class_start", "service", + "service_start", "qos_level_sn", "qos_level_sn_start", "any_text", + "text", "pure_text", "num_list_wo_whites", + "num_list_with_dotdot_wo_whites", "num_list_with_dash_wo_whites", + "comma", "dotdot", "dash", "any_number", "dec_number", "hex_number", + "whitespace", 0 +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const unsigned short yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, + 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, + 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, + 335, 336, 337, 338, 339, 340 +}; +# endif + +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const unsigned char yyr1[] = +{ + 0, 86, 87, 88, 88, 89, 89, 90, 91, 92, + 92, 93, 93, 93, 93, 94, 95, 96, 97, 97, + 98, 99, 100, 101, 101, 102, 102, 102, 102, 102, + 102, 103, 104, 105, 106, 106, 106, 107, 108, 109, + 110, 110, 111, 112, 113, 114, 114, 115, 115, 115, + 115, 115, 115, 115, 116, 117, 118, 119, 119, 120, + 121, 122, 123, 123, 124, 124, 124, 124, 124, 125, + 126, 127, 128, 128, 129, 130, 131, 131, 132, 132, + 132, 132, 132, 132, 133, 134, 135, 136, 136, 137, + 138, 139, 139, 140, 140, 140, 140, 140, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 193, 194, 194, 195, 195, 195, 195, 195, + 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, + 197, 198, 198, 198, 198, 198, 198, 199, 200, 201, + 202, 202, 203, 204, 205, 205 +}; + +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const unsigned char yyr2[] = +{ + 0, 2, 2, 0, 3, 0, 4, 1, 1, 0, + 2, 1, 1, 1, 1, 3, 1, 1, 1, 2, + 3, 1, 1, 0, 2, 1, 1, 1, 1, 1, + 1, 3, 1, 1, 0, 2, 2, 3, 1, 1, + 0, 2, 3, 1, 1, 0, 2, 1, 1, 1, + 1, 1, 1, 1, 3, 1, 1, 0, 2, 3, + 1, 1, 0, 2, 1, 1, 1, 1, 1, 3, + 1, 1, 0, 4, 1, 1, 0, 2, 1, 1, + 1, 1, 1, 1, 3, 1, 1, 0, 4, 1, + 1, 0, 2, 1, 1, 1, 1, 1, 1, 3, + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 7, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 3, 1, 3, 1, 7, 1, 7, + 1, 3, 1, 3, 1, 7, 1, 7, 1, 3, + 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, + 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, + 1, 1, 1, 1, 0, 1 +}; + +/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state + STATE-NUM when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const unsigned char yydefact[] = +{ + 3, 151, 0, 5, 0, 1, 2, 183, 182, 156, + 157, 158, 159, 155, 4, 152, 153, 154, 180, 181, + 7, 9, 0, 8, 16, 32, 70, 85, 6, 10, + 11, 0, 12, 34, 13, 72, 14, 87, 21, 0, + 18, 23, 0, 0, 0, 17, 15, 19, 0, 33, + 38, 55, 31, 35, 40, 36, 57, 71, 74, 69, + 76, 86, 89, 84, 91, 22, 100, 102, 106, 104, + 108, 110, 20, 24, 25, 151, 26, 151, 28, 151, + 27, 0, 29, 151, 30, 151, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 39, 43, 37, 41, + 45, 56, 60, 54, 58, 62, 75, 126, 128, 134, + 130, 132, 73, 77, 79, 78, 0, 80, 0, 82, + 0, 83, 0, 81, 0, 146, 90, 150, 142, 144, + 148, 88, 92, 93, 95, 151, 96, 151, 94, 184, + 97, 184, 98, 0, 99, 101, 103, 105, 107, 109, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 185, + 0, 0, 0, 44, 112, 120, 122, 124, 114, 116, + 118, 42, 46, 47, 151, 48, 151, 49, 151, 50, + 151, 51, 151, 52, 151, 53, 184, 61, 138, 140, + 136, 59, 63, 64, 65, 68, 0, 66, 184, 67, + 184, 125, 127, 129, 131, 133, 141, 143, 184, 184, + 149, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 171, 0, 160, 111, 113, 115, 117, 119, + 121, 184, 135, 184, 184, 179, 177, 175, 176, 184, + 173, 174, 172, 164, 184, 162, 163, 161, 0, 0, + 165, 0, 0, 0, 184, 178, 169, 170, 184, 167, + 168, 166, 184, 145, 147, 0, 0, 0, 123, 137, + 139 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const short yydefgoto[] = +{ + -1, 2, 3, 6, 21, 28, 22, 29, 30, 31, + 46, 39, 40, 41, 72, 48, 73, 32, 33, 52, + 42, 53, 54, 98, 86, 99, 100, 171, 150, 172, + 55, 56, 103, 87, 104, 105, 191, 151, 192, 34, + 35, 59, 43, 60, 112, 88, 113, 36, 37, 63, + 44, 64, 131, 89, 132, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, + 186, 115, 116, 117, 118, 119, 120, 121, 122, 123, + 124, 195, 196, 197, 198, 199, 200, 134, 135, 136, + 137, 138, 139, 140, 141, 142, 143, 4, 15, 16, + 223, 249, 221, 243, 257, 238, 17, 18, 19, 224 +}; + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -180 +static const short yypact[] = +{ + -6, -180, 8, -180, 297, -180, 2, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, 13, -180, -180, -180, -180, -180, -180, -180, + -180, 1, -180, -180, -180, -180, -180, -180, -180, 17, + -180, -180, -20, 6, 14, -180, -180, -180, 271, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, 38, -180, -180, -180, -180, 80, 72, 81, -18, + 263, 241, 233, 30, 226, 199, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, 126, -180, 126, -180, + 126, -180, 126, -180, 126, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, 51, + -180, 51, -180, 126, -180, -180, -180, -180, -180, -180, + 239, 207, -7, -2, -10, 3, 5, 31, 16, -180, + 126, 126, -8, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, 51, -180, -180, -180, + -180, -180, -180, -180, -180, -180, 126, -180, 51, -180, + 51, -180, -180, -180, -180, -180, -180, -180, 51, 51, + -180, 189, 168, 154, 88, 182, 175, 126, 27, 126, + 126, 114, -180, 75, -180, -180, -180, -180, -180, -180, + -180, 51, -180, 51, 51, -180, -180, -180, -180, 51, + 19, 34, -180, -180, 51, -37, -33, -180, 75, 215, + -180, 215, 46, 42, 51, -180, -180, -180, 51, -42, + -26, -180, 51, -180, -180, 89, 83, 84, -180, -180, + -180 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const short yypgoto[] = +{ + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, 104, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, 76, -180, -180, + -180, -180, -180, -180, -180, -180, -180, 4, -180, 15, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, + -180, -180, -180, -180, -180, -180, -180, -28, -180, -180, + -62, -35, -180, -167, -180, -180, -115, -179, -81, -123 +}; + +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If zero, do what YYDEFACT says. + If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -182 +static const short yytable[] = +{ + 93, 152, 67, 153, 1, 154, -180, 155, 5, 156, + -181, 49, 50, -180, 20, -180, 160, 38, 161, 7, + 8, 9, 10, 11, 12, 13, 23, 24, 162, -181, + 51, -181, 45, 38, 7, 8, 9, 10, 11, 12, + 13, 7, 240, 25, 245, 208, 209, 90, -180, 91, + 125, 92, -181, 147, 237, 94, 159, 95, 201, 126, + 127, 203, 128, 217, 129, 202, 130, 57, 58, 245, + 259, 210, 259, 26, 205, 219, 204, 220, 7, 8, + 159, 218, 256, 236, 256, 222, 232, 27, -180, 61, + 62, 7, 8, 9, 10, 11, 12, 13, 242, 207, + 247, 67, 231, -181, 233, 234, 239, 157, 244, 158, + 250, 250, 206, 96, 97, 263, 252, 7, 8, 159, + 235, 253, 236, 101, 102, 247, 261, 264, 261, 7, + 8, 265, 268, 254, 258, 266, 262, 228, 269, 267, + 241, 270, 246, 47, 106, 107, 211, 108, 212, 109, + 213, 110, 214, 111, 215, 193, 216, 7, 8, 9, + 10, 11, 12, 13, 114, 133, 194, 246, 260, 248, + 260, 7, 8, 9, 10, 11, 12, 13, 7, 8, + 9, 10, 11, 12, 13, 7, 8, 9, 10, 11, + 12, 13, 7, 8, 9, 10, 11, 12, 13, 251, + 0, 227, 7, 8, 9, 10, 11, 12, 13, 0, + 0, 0, 0, 226, 0, 0, 230, 0, 7, 8, + 159, 229, 255, 236, 0, 0, 225, 0, 149, 7, + 8, 9, 10, 11, 12, 13, 7, 8, 9, 10, + 11, 12, 13, 164, 7, 8, 9, 10, 11, 12, + 13, 168, 0, 148, 0, 0, 0, 0, 146, 0, + 187, 188, 145, 189, 0, 190, 7, 8, 9, 10, + 11, 12, 13, 0, 163, 164, 0, 165, 0, 166, + 0, 167, 144, 168, 0, 169, 0, 170, 65, 66, + 0, 67, 0, 68, 0, 69, 0, 70, 0, 71, + 7, 8, 9, 10, 11, 12, 13, 0, 14 +}; + +static const short yycheck[] = +{ + 81, 116, 20, 118, 10, 120, 43, 122, 0, 124, + 43, 31, 32, 55, 12, 57, 139, 16, 141, 3, + 4, 5, 6, 7, 8, 9, 13, 14, 143, 55, + 50, 57, 15, 16, 3, 4, 5, 6, 7, 8, + 9, 3, 221, 30, 223, 160, 161, 75, 85, 77, + 68, 79, 85, 23, 221, 83, 5, 85, 65, 77, + 78, 71, 80, 186, 82, 67, 84, 61, 62, 248, + 249, 79, 251, 60, 69, 198, 73, 200, 3, 4, + 5, 196, 249, 8, 251, 208, 59, 74, 69, 75, + 76, 3, 4, 5, 6, 7, 8, 9, 221, 83, + 223, 20, 217, 69, 219, 220, 221, 135, 223, 137, + 233, 234, 81, 33, 34, 69, 239, 3, 4, 5, + 6, 244, 8, 51, 52, 248, 249, 85, 251, 3, + 4, 254, 43, 248, 249, 258, 251, 49, 55, 262, + 221, 57, 223, 39, 63, 64, 174, 66, 176, 68, + 178, 70, 180, 72, 182, 151, 184, 3, 4, 5, + 6, 7, 8, 9, 88, 89, 151, 248, 249, 231, + 251, 3, 4, 5, 6, 7, 8, 9, 3, 4, + 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, + 8, 9, 3, 4, 5, 6, 7, 8, 9, 234, + -1, 47, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, 45, -1, -1, 41, -1, 3, 4, + 5, 39, 7, 8, -1, -1, 37, -1, 29, 3, + 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, + 7, 8, 9, 36, 3, 4, 5, 6, 7, 8, + 9, 44, -1, 27, -1, -1, -1, -1, 25, -1, + 53, 54, 21, 56, -1, 58, 3, 4, 5, 6, + 7, 8, 9, -1, 35, 36, -1, 38, -1, 40, + -1, 42, 19, 44, -1, 46, -1, 48, 17, 18, + -1, 20, -1, 22, -1, 24, -1, 26, -1, 28, + 3, 4, 5, 6, 7, 8, 9, -1, 11 +}; + +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const unsigned char yystos[] = +{ + 0, 10, 87, 88, 193, 0, 89, 3, 4, 5, + 6, 7, 8, 9, 11, 194, 195, 202, 203, 204, + 12, 90, 92, 13, 14, 30, 60, 74, 91, 93, + 94, 95, 103, 104, 125, 126, 133, 134, 16, 97, + 98, 99, 106, 128, 136, 15, 96, 98, 101, 31, + 32, 50, 105, 107, 108, 116, 117, 61, 62, 127, + 129, 75, 76, 135, 137, 17, 18, 20, 22, 24, + 26, 28, 100, 102, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 110, 119, 131, 139, + 193, 193, 193, 204, 193, 193, 33, 34, 109, 111, + 112, 51, 52, 118, 120, 121, 63, 64, 66, 68, + 70, 72, 130, 132, 143, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 68, 77, 78, 80, 82, + 84, 138, 140, 143, 183, 184, 185, 186, 187, 188, + 189, 190, 191, 192, 19, 21, 25, 23, 27, 29, + 114, 123, 202, 202, 202, 202, 202, 193, 193, 5, + 205, 205, 202, 35, 36, 38, 40, 42, 44, 46, + 48, 113, 115, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 53, 54, 56, + 58, 122, 124, 153, 155, 177, 178, 179, 180, 181, + 182, 65, 67, 71, 73, 69, 81, 83, 202, 202, + 79, 193, 193, 193, 193, 193, 193, 205, 202, 205, + 205, 198, 205, 196, 205, 37, 45, 47, 49, 39, + 41, 202, 59, 202, 202, 6, 8, 199, 201, 202, + 203, 204, 205, 199, 202, 203, 204, 205, 196, 197, + 205, 197, 205, 205, 202, 7, 199, 200, 202, 203, + 204, 205, 202, 69, 85, 205, 205, 205, 43, 55, + 57 +}; + +#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__) +# define YYSIZE_T __SIZE_TYPE__ +#endif +#if ! defined (YYSIZE_T) && defined (size_t) +# define YYSIZE_T size_t +#endif +#if ! defined (YYSIZE_T) +# if defined (__STDC__) || defined (__cplusplus) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# endif +#endif +#if ! defined (YYSIZE_T) +# define YYSIZE_T unsigned int +#endif + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrlab1 + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. */ + +#define YYFAIL goto yyerrlab + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + yytoken = YYTRANSLATE (yychar); \ + YYPOPSTACK; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror ("syntax error: cannot back up");\ + YYERROR; \ + } \ +while (0) + +#define YYTERROR 1 +#define YYERRCODE 256 + +/* YYLLOC_DEFAULT -- Compute the default location (before the actions + are run). */ + +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + Current.first_line = Rhs[1].first_line; \ + Current.first_column = Rhs[1].first_column; \ + Current.last_line = Rhs[N].last_line; \ + Current.last_column = Rhs[N].last_column; +#endif + +/* YYLEX -- calling `yylex' with the right arguments. */ + +#ifdef YYLEX_PARAM +# define YYLEX yylex (YYLEX_PARAM) +#else +# define YYLEX yylex () +#endif + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +# define YYDSYMPRINT(Args) \ +do { \ + if (yydebug) \ + yysymprint Args; \ +} while (0) + +# define YYDSYMPRINTF(Title, Token, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yysymprint (stderr, \ + Token, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (cinluded). | +`------------------------------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yy_stack_print (short *bottom, short *top) +#else +static void +yy_stack_print (bottom, top) + short *bottom; + short *top; +#endif +{ + YYFPRINTF (stderr, "Stack now"); + for (/* Nothing. */; bottom <= top; ++bottom) + YYFPRINTF (stderr, " %d", *bottom); + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yy_reduce_print (int yyrule) +#else +static void +yy_reduce_print (yyrule) + int yyrule; +#endif +{ + int yyi; + unsigned int yylineno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ", + yyrule - 1, yylineno); + /* Print the symbols being reduced, and their result. */ + for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++) + YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]); + YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]); +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (Rule); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YYDSYMPRINT(Args) +# define YYDSYMPRINTF(Title, Token, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#if YYMAXDEPTH == 0 +# undef YYMAXDEPTH +#endif + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined (__GLIBC__) && defined (_STRING_H) +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +# if defined (__STDC__) || defined (__cplusplus) +yystrlen (const char *yystr) +# else +yystrlen (yystr) + const char *yystr; +# endif +{ + register const char *yys = yystr; + + while (*yys++ != '\0') + continue; + + return yys - yystr - 1; +} +# endif +# endif + +# ifndef yystpcpy +# if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE) +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +# if defined (__STDC__) || defined (__cplusplus) +yystpcpy (char *yydest, const char *yysrc) +# else +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +# endif +{ + register char *yyd = yydest; + register const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +#endif /* !YYERROR_VERBOSE */ + + + +#if YYDEBUG +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep) +#else +static void +yysymprint (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE *yyvaluep; +#endif +{ + /* Pacify ``unused variable'' warnings. */ + (void) yyvaluep; + + if (yytype < YYNTOKENS) + { + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); +# ifdef YYPRINT + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + } + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + switch (yytype) + { + default: + break; + } + YYFPRINTF (yyoutput, ")"); +} + +#endif /* ! YYDEBUG */ +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yydestruct (int yytype, YYSTYPE *yyvaluep) +#else +static void +yydestruct (yytype, yyvaluep) + int yytype; + YYSTYPE *yyvaluep; +#endif +{ + /* Pacify ``unused variable'' warnings. */ + (void) yyvaluep; + + switch (yytype) + { + + default: + break; + } +} + + +/* Prevent warnings from -Wmissing-prototypes. */ + +#ifdef YYPARSE_PARAM +# if defined (__STDC__) || defined (__cplusplus) +int yyparse (void *YYPARSE_PARAM); +# else +int yyparse (); +# endif +#else /* ! YYPARSE_PARAM */ +#if defined (__STDC__) || defined (__cplusplus) +int yyparse (void); +#else +int yyparse (); +#endif +#endif /* ! YYPARSE_PARAM */ + + + +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; + +/* Number of syntax errors so far. */ +int yynerrs; + + + +/*----------. +| yyparse. | +`----------*/ + +#ifdef YYPARSE_PARAM +# if defined (__STDC__) || defined (__cplusplus) +int yyparse (void *YYPARSE_PARAM) +# else +int yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; +# endif +#else /* ! YYPARSE_PARAM */ +#if defined (__STDC__) || defined (__cplusplus) +int +yyparse (void) +#else +int +yyparse () + +#endif +#endif +{ + + register int yystate; + register int yyn; + int yyresult; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + + /* Three stacks and their tools: + `yyss': related to states, + `yyvs': related to semantic values, + `yyls': related to locations. + + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + short yyssa[YYINITDEPTH]; + short *yyss = yyssa; + register short *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + register YYSTYPE *yyvsp; + + + +#define YYPOPSTACK (yyvsp--, yyssp--) + + YYSIZE_T yystacksize = YYINITDEPTH; + + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + /* When reducing, the number of symbols on the RHS of the reduced + rule. */ + int yylen; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + + yyssp = yyss; + yyvsp = yyvs; + + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. so pushing a state here evens the stacks. + */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + short *yyss1 = yyss; + + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow ("parser stack overflow", + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyoverflowlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyoverflowlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + short *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyoverflowlab; + YYSTACK_RELOCATE (yyss); + YYSTACK_RELOCATE (yyvs); + +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + +/* Do appropriate processing given the current state. */ +/* Read a lookahead token if we need one and don't already have one. */ +/* yyresume: */ + + /* First try to decide what to do without reference to lookahead token. */ + + yyn = yypact[yystate]; + if (yyn == YYPACT_NINF) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = YYLEX; + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yyn == 0 || yyn == YYTABLE_NINF) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + if (yyn == YYFINAL) + YYACCEPT; + + /* Shift the lookahead token. */ + YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken])); + + /* Discard the token being shifted unless it is eof. */ + if (yychar != YYEOF) + yychar = YYEMPTY; + + *++yyvsp = yylval; + + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + yystate = yyn; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 21: +#line 259 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_port_group_start(); + } + break; + + case 22: +#line 264 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_port_group_end(); + } + break; + + case 43: +#line 353 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_sl2vl_scope_start(); + } + break; + + case 44: +#line 358 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_sl2vl_scope_end(); + } + break; + + case 60: +#line 401 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_vlarb_scope_start(); + } + break; + + case 61: +#line 406 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_vlarb_scope_end(); + } + break; + + case 74: +#line 461 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_qos_level_start(); + } + break; + + case 75: +#line 466 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_qos_level_end(); + } + break; + + case 89: +#line 517 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_match_rule_start(); + } + break; + + case 90: +#line 522 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_match_rule_end(); + } + break; + + case 99: +#line 543 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of - one instance */ + p_current_port_group->name = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 100: +#line 549 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of - one instance */ + RESET_BUFFER; + if (p_current_port_group->name) + { + yyerror(" has multiple tags"); + return 1; + } + } + break; + + case 101: +#line 560 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of ,, - one instance */ + if (p_current_port_group) + p_current_port_group->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_level) + p_current_qos_level->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_match_rule) + p_current_qos_match_rule->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else + CL_ASSERT(0); + } + break; + + case 102: +#line 573 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + RESET_BUFFER; + if (p_current_port_group) + { + /* of - one instance */ + if (p_current_port_group->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_level) + { + /* of - one instance */ + if (p_current_qos_level->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_match_rule) + { + /* of - one instance */ + if (p_current_qos_match_rule->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else + CL_ASSERT(0); + } + break; + + case 103: +#line 607 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_names, + p_str_item, + NULL); + } + break; + + case 104: +#line 619 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 105: +#line 626 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + osm_qos_uint64_vector_item_t * p_uint64_item = + (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + + __osm_qos_parser_str2uint64(&p_uint64_item->value, + __osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_guids, + p_uint64_item, + NULL); + } + break; + + case 106: +#line 638 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 107: +#line 645 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->partitions, + p_str_item, + NULL); + } + break; + + case 108: +#line 657 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 109: +#line 664 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + uint8_t tmp_node_type; + char * clean_str = __osm_qos_parser_strip_white(str_buffer); + + if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_ROUTER) == 0) + tmp_node_type = IB_NODE_TYPE_ROUTER; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_CA) == 0) + tmp_node_type = IB_NODE_TYPE_CA; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_SWITCH) == 0) + tmp_node_type = IB_NODE_TYPE_SWITCH; + else + { + yyerror("wrong value"); + return 1; + } + + osm_qos_uint32_vector_item_t * p_uint32_item = + (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + + p_uint32_item->value = tmp_node_type; + cl_ptr_vector_insert(&p_current_port_group->node_types, + p_uint32_item, + NULL); + } + break; + + case 110: +#line 691 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 111: +#line 698 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + if (p_current_sl2vl_scope) + cl_ptr_vector_insert(&p_current_sl2vl_scope->groups, + p_str_item, + NULL); + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->groups, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + break; + + case 112: +#line 718 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + break; + + case 113: +#line 725 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + + /* inserting this both to across_to and to across_from */ + if (p_current_sl2vl_scope) { + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->across, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + break; + + case 114: +#line 750 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + break; + + case 115: +#line 757 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + } + break; + + case 116: +#line 770 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 117: +#line 778 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + break; + + case 118: +#line 791 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 119: +#line 799 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->from, + p_str_item, + NULL); + } + break; + + case 120: +#line 812 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 121: +#line 820 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->to, + p_str_item, + NULL); + } + break; + + case 122: +#line 833 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 123: +#line 843 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of 16 numbers */ + uint8_t counter = 0; + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + + /* checking the syntax */ + for (i = 0; tmp_str[i] != '\0'; i++) + if (tmp_str[i] == ',') + counter ++; + if (counter != 15) + { + yyerror("wrong number of values in (should be 16)"); + return 1; + } + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str," ,"); + while (tok != NULL && i < 16) + { + p_current_sl2vl_scope->sl2vl_table[i++] = atoi(tok); + tok = strtok (NULL, " ,"); + } + if (tok != NULL) + { + yyerror("wrong syntax in "); + return 1; + } + } + break; + + case 124: +#line 882 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of 16 numbers */ + RESET_BUFFER; + } + break; + + case 125: +#line 888 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 126: +#line 894 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 127: +#line 901 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->sl = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 128: +#line 907 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 129: +#line 914 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->mtu_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 130: +#line 920 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 131: +#line 927 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->rate_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 132: +#line 933 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 133: +#line 940 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + p_current_qos_level->class = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 134: +#line 946 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 135: +#line 953 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + p_current_vlarb_scope->vl_high_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 136: +#line 959 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 137: +#line 968 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_highs, + p_hl_item, + NULL); + } + + } + break; + + case 138: +#line 1002 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 139: +#line 1011 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_lows, + p_hl_item, + NULL); + } + + } + break; + + case 140: +#line 1045 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 141: +#line 1052 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + p_current_qos_match_rule->source = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 142: +#line 1058 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 143: +#line 1065 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + p_current_qos_match_rule->destination = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 144: +#line 1071 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 145: +#line 1080 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - "7-9,11" */ + uint32_t i; + uint32_t j; + uint32_t range_low; + uint32_t range_high; + char * dash_ptr = NULL; + char * tok = NULL; + char * tokens[10000] = {NULL}; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint32_vector_item_t * p_uint32_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i] == ',' || tmp_str[i] == '-') && + (tmp_str[i-1] == ',' || tmp_str[i-1] == '-') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line with ',' being a delimiter */ + tok = strtok (tmp_str," ,"); + for (i = 0; tok != NULL; tok = strtok (NULL, " ,")) + tokens[i++] = tok; + + for (i = 0; tokens[i] != NULL; i++) + { + dash_ptr = strstr(tokens[i],"-"); + if (dash_ptr != NULL) + { + /* this is number range: i-j */ + *dash_ptr = '\0'; + range_low = atoi(tokens[i]); + *dash_ptr = '-'; + range_high = atoi(dash_ptr + 1); + if (range_low > range_high) + { + uint32_t tmp_num = range_low; + range_low = range_high; + range_high = tmp_num; + } + for (j = range_low; j <= range_high; j++) + { + p_uint32_item = (osm_qos_uint32_vector_item_t *) + malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = j; + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint32_item, + NULL); + } + } + else + { + /* this is a single number */ + p_uint32_item = (osm_qos_uint32_vector_item_t *) + malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = atoi(tokens[i]); + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint32_item, + NULL); + } + } + } + break; + + case 146: +#line 1146 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 147: +#line 1154 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of ',' delimited numbers */ + uint32_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint32_vector_item_t * p_uint32_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + tok = strtok (tmp_str," ,"); + while (tok != NULL) + { + p_uint32_item = (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + p_uint32_item->value = atoi(tok); + cl_ptr_vector_insert(&p_current_qos_match_rule->services, + p_uint32_item, + NULL); + tok = strtok (NULL, " ,"); + } + + } + break; + + case 148: +#line 1184 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of ',' delimited numbers */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 149: +#line 1191 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - single number */ + + osm_qos_uint32_vector_item_t * p_uint32_item = + (osm_qos_uint32_vector_item_t *)malloc(sizeof(osm_qos_uint32_vector_item_t)); + + p_uint32_item->value = atoi(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_qos_match_rule->qos_level_sns, + p_uint32_item, + NULL); + } + break; + + case 150: +#line 1204 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - single number */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 155: +#line 1224 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 156: +#line 1228 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 157: +#line 1232 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 158: +#line 1236 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 159: +#line 1240 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 177: +#line 1269 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 178: +#line 1275 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 179: +#line 1281 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 182: +#line 1291 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 183: +#line 1297 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 185: +#line 1304 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* swallow whitespace */ + free(yyvsp[0]); + } + break; + + + } + +/* Line 991 of yacc.c. */ +#line 2491 "y.tab.c" + + yyvsp -= yylen; + yyssp -= yylen; + + + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if YYERROR_VERBOSE + yyn = yypact[yystate]; + + if (YYPACT_NINF < yyn && yyn < YYLAST) + { + YYSIZE_T yysize = 0; + int yytype = YYTRANSLATE (yychar); + char *yymsg; + int yyx, yycount; + + yycount = 0; + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. */ + for (yyx = yyn < 0 ? -yyn : 0; + yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + yysize += yystrlen (yytname[yyx]) + 15, yycount++; + yysize += yystrlen ("syntax error, unexpected ") + 1; + yysize += yystrlen (yytname[yytype]); + yymsg = (char *) YYSTACK_ALLOC (yysize); + if (yymsg != 0) + { + char *yyp = yystpcpy (yymsg, "syntax error, unexpected "); + yyp = yystpcpy (yyp, yytname[yytype]); + + if (yycount < 5) + { + yycount = 0; + for (yyx = yyn < 0 ? -yyn : 0; + yyx < (int) (sizeof (yytname) / sizeof (char *)); + yyx++) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + { + const char *yyq = ! yycount ? ", expecting " : " or "; + yyp = yystpcpy (yyp, yyq); + yyp = yystpcpy (yyp, yytname[yyx]); + yycount++; + } + } + yyerror (yymsg); + YYSTACK_FREE (yymsg); + } + else + yyerror ("syntax error; also virtual memory exhausted"); + } + else +#endif /* YYERROR_VERBOSE */ + yyerror ("syntax error"); + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + /* Return failure if at end of input. */ + if (yychar == YYEOF) + { + /* Pop the error token. */ + YYPOPSTACK; + /* Pop the rest of the stack. */ + while (yyss < yyssp) + { + YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); + yydestruct (yystos[*yyssp], yyvsp); + YYPOPSTACK; + } + YYABORT; + } + + YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc); + yydestruct (yytoken, &yylval); + yychar = YYEMPTY; + + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab2; + + +/*----------------------------------------------------. +| yyerrlab1 -- error raised explicitly by an action. | +`----------------------------------------------------*/ +yyerrlab1: + + /* Suppress GCC warning that yyerrlab1 is unused when no action + invokes YYERROR. */ +#if defined (__GNUC_MINOR__) && 2093 <= (__GNUC__ * 1000 + __GNUC_MINOR__) \ + && !defined __cplusplus + __attribute__ ((__unused__)) +#endif + + + goto yyerrlab2; + + +/*---------------------------------------------------------------. +| yyerrlab2 -- pop states until the error token can be shifted. | +`---------------------------------------------------------------*/ +yyerrlab2: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (yyn != YYPACT_NINF) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); + yydestruct (yystos[yystate], yyvsp); + yyvsp--; + yystate = *--yyssp; + + YY_STACK_PRINT (yyss, yyssp); + } + + if (yyn == YYFINAL) + YYACCEPT; + + YYDPRINTF ((stderr, "Shifting error token, ")); + + *++yyvsp = yylval; + + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#ifndef yyoverflow +/*----------------------------------------------. +| yyoverflowlab -- parser overflow comes here. | +`----------------------------------------------*/ +yyoverflowlab: + yyerror ("parser stack overflow"); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + return yyresult; +} + + +#line 1310 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + + +/*************************************************** + ***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file, + OUT osm_qos_parse_tree_t ** pp_qos_parse_tree) +{ + int res = 0; + p_osm_log = p_log; + + //~ OSM_LOG_ENTER(p_osm_log, osm_qos_parse); + + *pp_qos_parse_tree = NULL; + yyin = fopen (policy_file, "r"); + if (!yyin) + { + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "osm_qos_parse: ERR AC01: " + //~ "Failed opening QoS policy file (%s)\n", + //~ policy_file); + printf( "osm_qos_parse: ERR AC01: " + "Failed opening QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + column_num = 1; + line_num = 1; + __osm_qos_parse_tree_init(); + str_buffer[0] = '\0'; + + res = yyparse(); + + if (res == 0) + { + *pp_qos_parse_tree = p_qos_parse_tree; + } + else + { + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "osm_qos_parse: ERR AC02: " + //~ "Failed parsing QoS policy file (%s)\n", + //~ policy_file); + printf( "osm_qos_parse: ERR AC02: " + "Failed parsing QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + + Exit: + if (yyin) + fclose(yyin); + //~ OSM_LOG_EXIT(p_osm_log); + return res; +} + +/*************************************************** + ***************************************************/ + +int yywrap() +{ + return(1); +} + +/*************************************************** + ***************************************************/ + +void yyerror (char *s) +{ + //~ OSM_LOG_ENTER(p_osm_log, yyerror); + //~ osm_log(p_osm_log, OSM_LOG_ERROR, + //~ "yyerror: ERR AC03: " + //~ "Syntax error (line %d:%d): %s. " + //~ "Last text read: \"%s\"\n", + //~ line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + printf( "yyerror: ERR AC03: " + "Syntax error (line %d:%d): %s. " + "Last text read: \"%s\"\n", + line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + //~ OSM_LOG_EXIT(p_osm_log); +} + +/*************************************************** + ***************************************************/ + +static char * __osm_qos_parser_strip_white(char * str) +{ + int i; + for (i = (strlen(str)-1); i >= 0; i--) + { + if (isspace(str[i])) + str[i] = '\0'; + else + break; + } + for (i = 0; i < strlen(str); i++) + { + if (!isspace(str[i])) + break; + } + return &(str[i]); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str) +{ +#if __WORDSIZE == 64 + *p_val = strtoul(str, NULL, 0); +#else + *p_val = strtoull(str, NULL, 0); +#endif +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parse_tree_init() +{ + p_qos_parse_tree = (osm_qos_parse_tree_t *) + malloc(sizeof(osm_qos_parse_tree_t)); + + memset(p_qos_parse_tree, 0, sizeof(osm_qos_parse_tree_t)); + + cl_ptr_vector_init(&p_qos_parse_tree->port_groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->sl2vl_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->vlarb_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_levels, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_match_rules, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_start() +{ + p_current_port_group = (osm_qos_port_group_t *) + malloc(sizeof(osm_qos_port_group_t)); + memset(p_current_port_group, 0, sizeof(osm_qos_port_group_t)); + + cl_ptr_vector_init(&p_current_port_group->port_guids, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->port_names, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->partitions, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->node_types, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->port_groups, + p_current_port_group, + NULL); + p_current_port_group = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_start() +{ + p_current_sl2vl_scope = (osm_qos_sl2vl_scope_t *) + malloc(sizeof(osm_qos_sl2vl_scope_t)); + memset(p_current_sl2vl_scope, 0, sizeof(osm_qos_sl2vl_scope_t)); + + cl_ptr_vector_init(&p_current_sl2vl_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->to, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_to, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->sl2vl_tables, + p_current_sl2vl_scope, + NULL); + p_current_sl2vl_scope = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_start() +{ + p_current_vlarb_scope = (osm_qos_vlarb_scope_t *) + malloc(sizeof(osm_qos_vlarb_scope_t)); + memset(p_current_vlarb_scope, 0, sizeof(osm_qos_vlarb_scope_t)); + + cl_ptr_vector_init(&p_current_vlarb_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->across, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_highs, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_lows, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->vlarb_tables, + p_current_vlarb_scope, + NULL); + p_current_vlarb_scope = NULL; +} + + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_start() +{ + p_current_qos_level = (osm_qos_level_t *) + malloc(sizeof(osm_qos_level_t)); + memset(p_current_qos_level, 0, sizeof(osm_qos_level_t)); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_levels, + p_current_qos_level, + NULL); + p_current_qos_level = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_start() +{ + p_current_qos_match_rule = (osm_qos_match_rule_t *) + malloc(sizeof(osm_qos_match_rule_t)); + memset(p_current_qos_match_rule, 0, sizeof(osm_qos_match_rule_t)); + + cl_ptr_vector_init(&p_current_qos_match_rule->services, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->qos_level_sns, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->classes, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_match_rules, + p_current_qos_match_rule, + NULL); + p_current_qos_match_rule = NULL; +} + +/*************************************************** + ***************************************************/ + +/* free all the memory occupied by the parse tree data structure */ +void osm_qos_parser_destroy_parse_tree( + IN osm_qos_parse_tree_t * p_qos_parse_tree) +{ + uint32_t i; + uint32_t j; + + osm_qos_port_group_t * p_port_group = NULL; + osm_qos_sl2vl_scope_t * p_sl2vl_scope = NULL; + osm_qos_vlarb_scope_t * p_vlarb_scope = NULL; + osm_qos_level_t * p_qos_level = NULL; + osm_qos_match_rule_t * p_qos_match_rule = NULL; + + osm_qos_string_vector_item_t * p_str_vector_item = NULL; + osm_qos_uint64_vector_item_t * p_uint64_vector_item = NULL; + osm_qos_uint32_vector_item_t * p_uint32_vector_item = NULL; + osm_qos_vlarb_hl_vector_item_t * p_vlarb_hl_vector_item = NULL; + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->port_groups); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->port_groups, i, (void **)&p_port_group); + + if (p_port_group->name) + free(p_port_group->name); + if (p_port_group->use) + free(p_port_group->use); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_guids); j++) + { + cl_ptr_vector_at(&p_port_group->port_guids, j, (void **)&p_uint64_vector_item); + free(p_uint64_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_guids); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_names); j++) + { + cl_ptr_vector_at(&p_port_group->port_names, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_names); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->partitions); j++) + { + cl_ptr_vector_at(&p_port_group->partitions, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->partitions); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->node_types); j++) + { + cl_ptr_vector_at(&p_port_group->node_types, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->node_types); + + free(p_port_group); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->port_groups); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->sl2vl_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->sl2vl_tables, i, (void **)&p_sl2vl_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->groups); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->to); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_to); + + free(p_sl2vl_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->sl2vl_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->vlarb_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->vlarb_tables, i, (void **)&p_vlarb_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->groups); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->across); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->across, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->across); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_highs); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_highs, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_highs); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_lows); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_lows, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_lows); + + free(p_vlarb_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->vlarb_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_levels); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_levels, i, (void **)&p_qos_level); + if (p_qos_level->use) + free(p_qos_level->use); + free(p_qos_level); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_levels); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_match_rules); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_match_rules, i, (void **)&p_qos_match_rule); + + if (p_qos_match_rule->use) + free(p_qos_match_rule->use); + if (p_qos_match_rule->source) + free(p_qos_match_rule->source); + if (p_qos_match_rule->destination) + free(p_qos_match_rule->destination); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->services); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->services, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->services); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->qos_level_sns); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->qos_level_sns, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->qos_level_sns); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->classes); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->classes, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->classes); + + free(p_qos_match_rule); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_match_rules); + + free(p_qos_parse_tree); +} + +/*************************************************** + ***************************************************/ + -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 17 07:22:27 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:22:27 +0200 Subject: [openib-general] [PATCH 5/6] osm: QoS parser header file that defines parse tree data structures Message-ID: <45AE3F33.5080509@dev.mellanox.co.il> Hi Hal This patch is a QoS parser header file that defines parse tree data structures. Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_qos_parser.h | 172 +++++++++++++++++++++++++++++++++++ 1 files changed, 172 insertions(+), 0 deletions(-) diff --git a/osm/include/opensm/osm_qos_parser.h b/osm/include/opensm/osm_qos_parser.h new file mode 100644 index 0000000..4e27c8c --- /dev/null +++ b/osm/include/opensm/osm_qos_parser.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#ifndef OSM_QOS_PARSER_H +#define OSM_QOS_PARSER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define YYSTYPE char * + +#define OSM_QOS_NODE_TYPE_CA "CA" +#define OSM_QOS_NODE_TYPE_SWITCH "SWITCH" +#define OSM_QOS_NODE_TYPE_ROUTER "ROUTER" + +typedef enum {UNDEF, ROUTER, CA, SWITCH} node_type_t; + +/***************************************************/ + +typedef struct osm_qos_string_vector_item_t_ { + cl_map_item_t map_item; + char * str; +} osm_qos_string_vector_item_t; + +typedef struct osm_qos_uint64_vector_item_t_ { + cl_map_item_t map_item; + uint64_t value; +} osm_qos_uint64_vector_item_t; + +typedef struct osm_qos_uint32_vector_item_t_ { + cl_map_item_t map_item; + uint32_t value; +} osm_qos_uint32_vector_item_t; + +typedef struct osm_qos_vlarb_hl_vector_item_t_ { + cl_map_item_t map_item; + uint32_t sl; + uint32_t credits; +} osm_qos_vlarb_hl_vector_item_t; + +/***************************************************/ + +typedef struct osm_qos_port_group_t_ { + cl_map_item_t map_item; + char * name; + char * use; + cl_ptr_vector_t port_guids; /* vector of uint64_t */ + cl_ptr_vector_t port_names; /* vector of string */ + cl_ptr_vector_t partitions; /* vector of string */ + cl_ptr_vector_t node_types; /* vector of uint8_t */ +} osm_qos_port_group_t; + +/***************************************************/ + +typedef struct osm_qos_sl2vl_scope_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t groups; /* vector of string */ + cl_ptr_vector_t from; /* vector of string */ + cl_ptr_vector_t to; /* vector of string */ + cl_ptr_vector_t across_from; /* vector of string */ + cl_ptr_vector_t across_to; /* vector of string */ + uint8_t sl2vl_table[16]; +} osm_qos_sl2vl_scope_t; + +/***************************************************/ + +typedef struct osm_qos_vlarb_scope_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t groups; /* vector of string */ + cl_ptr_vector_t across; /* vector of string */ + cl_ptr_vector_t vlarb_highs; /* vector of string */ + cl_ptr_vector_t vlarb_lows; /* vector of string */ + uint32_t vl_high_limit; +} osm_qos_vlarb_scope_t; + +/***************************************************/ + +typedef struct osm_qos_level_t_ { + cl_map_item_t map_item; + char * use; + uint32_t sn; + uint32_t sl; + uint32_t mtu_limit; + uint32_t rate_limit; + uint32_t class; +} osm_qos_level_t; + +/***************************************************/ + +typedef struct osm_qos_match_rule_t_ { + cl_map_item_t map_item; + char * use; + char * source; + char * destination; + cl_ptr_vector_t services; /* vector of uint32 */ + cl_ptr_vector_t qos_level_sns; /* vector of uint32 */ + cl_ptr_vector_t classes; /* vector of uint32 */ +} osm_qos_match_rule_t; + +/***************************************************/ + +typedef struct osm_qos_parse_tree_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t port_groups; /* vector of osm_qos_port_group_t */ + cl_ptr_vector_t sl2vl_tables; /* vector of osm_qos_sl2vl_scope_t */ + cl_ptr_vector_t vlarb_tables; /* vector of osm_qos_vlarb_scope_t */ + cl_ptr_vector_t qos_levels; /* vector of osm_qos_level_t */ + cl_ptr_vector_t qos_match_rules; /* vector of osm_qos_match_rule_t */ +} osm_qos_parse_tree_t; + +/***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file, + OUT osm_qos_parse_tree_t ** pp_qos_parse_tree); + +void osm_qos_parser_destroy_parse_tree( + IN osm_qos_parse_tree_t * p_qos_parse_tree); + +/***************************************************/ + +#endif /* ifndef OSM_QOS_PARSER_H */ -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 17 07:25:27 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 17 Jan 2007 17:25:27 +0200 Subject: [openib-general] [PATCH 6/6] osm: Compiling new QoS parser files Message-ID: <45AE3FE7.4070601@dev.mellanox.co.il> Hi Hal Patch for compiling new QoS parser files. Signed-off-by: Yevgeny Kliteynik --- osm/include/Makefile.am | 2 ++ osm/opensm/Makefile.am | 16 +++++++++++++++- osm/opensm/configure.in | 4 ++++ 3 files changed, 21 insertions(+), 1 deletions(-) diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index d6bdd84..62db6c2 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -118,6 +118,8 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_drop_mgr.h \ $(srcdir)/opensm/osm_port_info_rcv.h \ $(srcdir)/opensm/osm_state_mgr_ctrl.h \ + $(srcdir)/opensm/osm_qos_parser.h \ + $(srcdir)/opensm/osm_qos_parser_y.h \ $(srcdir)/complib/cl_thread_osd.h \ $(srcdir)/complib/cl_packon.h \ $(srcdir)/complib/cl_atomic_osd.h \ diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index dd6dbae..7fff9d7 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -71,7 +71,21 @@ opensm_SOURCES = main.c osm_console.c os osm_ucast_mgr.c osm_ucast_updn.c \ osm_ucast_file.c osm_ucast_ftree.c \ osm_vl15intf.c osm_vl_arb_rcv.c \ - osm_vl_arb_rcv_ctrl.c st.c + osm_vl_arb_rcv_ctrl.c st.c \ + osm_qos_parser_y.c osm_qos_parser_l.c + +osm_qos_parser_y.c: @MAINTAINER_MODE_TRUE@ $(srcdir)/osm_qos_parser.y + $(YACC) -y -d $(srcdir)/osm_qos_parser.y + mv y.tab.c osm_qos_parser_y.c + mv y.tab.h osm_qos_parser_y.h + cp -f osm_qos_parser_y.c $(srcdir)/ + cp -f osm_qos_parser_y.h $(srcdir)/../include/opensm/ + +osm_qos_parser_l.c: @MAINTAINER_MODE_TRUE@ $(srcdir)/osm_qos_parser.l + $(LEX) $(srcdir)/osm_qos_parser.l + mv lex.yy.c osm_qos_parser_l.c + cp -f osm_qos_parser_l.c $(srcdir)/ + if OSMV_OPENIB opensm_CFLAGS = -Wall $(OSMV_CFLAGS) -fno-strict-aliasing -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 opensm_CXXFLAGS = -Wall $(OSMV_CFLAGS) -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in index cecf932..496e806 100644 --- a/osm/opensm/configure.in +++ b/osm/opensm/configure.in @@ -14,6 +14,8 @@ if test -z $opensm_api_version; then fi AC_SUBST(opensm_api_version) +AM_MAINTAINER_MODE + dnl Checks for programs AC_PROG_CXX AC_PROG_CC @@ -22,6 +24,8 @@ AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_LIBTOOL +AM_PROG_LEX +AC_PROG_YACC dnl Checks for libraries -- 1.4.4.1.GIT From vlad at dev.mellanox.co.il Wed Jan 17 07:37:29 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 17 Jan 2007 17:37:29 +0200 Subject: [openib-general] OpenIB Wiki pages updated Message-ID: <45AE42B9.4020602@dev.mellanox.co.il> Hi, The following links were added to https://wiki.openfabrics.org/tiki-index.php: OFED 1.2 HowTo HOWTO Build OFA user package HOWTO Build OFA kernel package HOWTO add userspace package to ofa_user Regards, Vladimir From mst at mellanox.co.il Wed Jan 17 08:00:47 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 18:00:47 +0200 Subject: [openib-general] [PATCH] IB/ipoib_cm: reduce MTU on connected->datagram mode change Message-ID: <20070117160047.GA10889@mellanox.co.il> UD mode requires MTU <= broadcast group MTU, make sure MTU is valid when switching back from connected mode. Signed-off-by: Michael S. Tsirkin --- Incremental patch on top of the IPoIB CM. Pls apply in for-mm. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index bb6a9b1..43500fd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1127,6 +1127,7 @@ static ssize_t set_mode(struct class_device *cdev, if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + dev->mtu = min(priv->mcast_mtu, dev->mtu); ipoib_flush_paths(dev); return count; } -- MST From swise at opengridcomputing.com Wed Jan 17 08:16:32 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Wed, 17 Jan 2007 10:16:32 -0600 Subject: [openib-general] [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 In-Reply-To: <20070116084257.GT22940@mellanox.co.il> References: <20070115211917.10511.83814.stgit@dell3.ogc.int> <20070116084257.GT22940@mellanox.co.il> Message-ID: <1169050593.27530.0.camel@linux-q667.site> > BTW, Steve, I wouldn't start working on backports from SLES9. > I'd start with 2.6.19 and go back to 2.6.11 over kernel.org versions, > just making sure they build (we have this build environment on openfabrics.org, > or its easy to check all versions out from kernel.org git). > In this way you get incrementtall a host of data on what is and what isn't in > which kernel version. > > Then adding support for a distro is just a matter of finding a closest > kernel.org kernel and mostly removing stuff that distro backported from newer kernels. > Michael, Is there something magic about stopping at 2.6.11? From robert.j.woodruff at intel.com Wed Jan 17 09:41:39 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Wed, 17 Jan 2007 09:41:39 -0800 Subject: [openib-general] OpenIB Wiki pages updated In-Reply-To: <45AE42B9.4020602@dev.mellanox.co.il> Message-ID: Vladimir wrote, >Hi, >The following links were added to https://wiki.openfabrics.org/tiki-index.php: >OFED 1.2 HowTo >HOWTO Build OFA user package >HOWTO Build OFA kernel package >HOWTO add userspace package to ofa_user >Regards, >Vladimir How do I make it build the rdma_cm and rdma_ucm, there appears to be no configure option in the ./configure script of the daily builds even though the code is in the core directory. ( I tried the 2/17/2007 kernel daily build). woody _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From raleigh at systemfabricworks.com Wed Jan 17 09:48:17 2007 From: raleigh at systemfabricworks.com (Raleigh F Rinehart) Date: Wed, 17 Jan 2007 11:48:17 -0600 Subject: [openib-general] madeye Message-ID: <45AE6161.40300@systemfabricworks.com> I'm trying to use madeye in OFED 1.1 Release to do some debugging but it does not seem to be present. I cracked open src tarball and all the right bits seem to be there (Kconfig, makefile, src) but it doesn't seem to get built and installed as part of the normal installation procedure (running install.sh). Has anyone had any success at building, installing and using madeye in a release version of OFED? thanks, -raleigh cat /usr/local/ofed/BUILD_ID OFED-1.1 openib-1.1 (REV=9905) # User space https://openib.org/svn/gen2/branches/1.1/src/userspace Git: ref: refs/heads/ofed_1_1 commit a083ec1174cb4b5a5052ef5de9a8175df82e864a # MPI mpi_osu-0.9.7-mlx2.2.0.tgz openmpi-1.1.1-1.src.rpm mpitests-2.0-0.src.rpm uname -a Linux merrill2 2.6.16.21-0.8-smp #1 SMP Mon Jul 3 18:25:39 UTC 2006 x86_64 x86_64 x86_64 GNU/Linux cat /etc/SuSE-release SUSE Linux Enterprise Server 10 (x86_64) VERSION = 10 From halr at voltaire.com Wed Jan 17 09:44:55 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 17 Jan 2007 12:44:55 -0500 Subject: [openib-general] nightly osm_sim report 2007-01-15:normal completion In-Reply-To: <200701150517.l0F5HYYB014461@sw053.yok.mtl.com> References: <200701150517.l0F5HYYB014461@sw053.yok.mtl.com> Message-ID: <1169055887.31746.505.camel@hal.voltaire.com> Hi Eitan, On Mon, 2007-01-15 at 00:17, Eitan Zahavi wrote: > OSM Simulation Regression Summary > OpenSM rev = Sat_Jan_13_09:43:14_2007 1f8015 > ibutils rev = Wed_Jan_3_11:42:12_2007 913448 > Total=410 Pass=409 Fail=1 > > Pass: > 30 Stability IS1-16.topo > 30 Pkey IS1-16.topo > 30 OsmTest IS1-16.topo > 30 OsmStress IS1-16.topo > 30 Multicast IS1-16.topo > 30 LidMgr IS1-16.topo > 10 Stability IS3-loop.topo > 10 Stability IS3-128.topo > 10 Pkey IS3-128.topo > 10 OsmTest IS3-loop.topo > 10 OsmTest IS3-128.topo > 10 Multicast IS3-loop.topo > 10 Multicast IS3-128.topo > 10 LidMgr IS3-128.topo > 10 FatTree part-4-ary-3-tree.topo > 10 FatTree merge-roots-reorder-4-ary-2-tree.topo > 10 FatTree merge-roots-4-ary-2-tree.topo > 10 FatTree merge-root-4-ary-3-tree.topo > 10 FatTree merge-root-12-ary-2-tree.topo > 10 FatTree merge-2-ary-4-tree.topo > 10 FatTree half-4-ary-3-tree.topo > 10 FatTree blend-4-ary-2-tree.topo > 10 FatTree 4-ary-4-tree.topo > 10 FatTree 4-ary-3-tree.topo > 10 FatTree 32nodes-3lvl-is1.topo > 10 FatTree 2-ary-4-tree.topo > 10 FatTree 12-node-spaced.topo > 10 FatTree 12-ary-2-tree.topo > 9 OsmStress IS3-128.topo > > Failures: > 1 OsmStress IS3-128.topo Any idea on this failure from a couple of days ago ? Will the other previous failures still be investigated at some point ? -- Hal From mshefty at ichips.intel.com Wed Jan 17 09:49:39 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 17 Jan 2007 09:49:39 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AE01D9.3000107@voltaire.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <20070104225319.GI4155@mellanox.co.il> <45A396AC.3050306@voltaire.com> <45A3D9BB.2080408@ichips.intel.com> <45A49174.5040009@voltaire.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> <45AE01D9.3000107@voltaire.com> Message-ID: <45AE61B3.3010002@ichips.intel.com> > +1 used only for unicast > +2 used only for multicast > +3 used for both unicast and multicast If you view this as the use case for one side only, we also have option 3 communicating with options 1 and 2. I would list these as: +4 unicast QP to unicast and multicast QP +5 multicast QP to unicast and multicast QP Today, all of these work. What you're wanting to add is the ability to communicate with an ipoib multicast group. I'd like to do this without breaking any of the existing communications, or treat ipoib separately for security reasons. > To make things simple, the solution i suggest is that that the RDMA CM > would --not-- do this modify QP/QKEY (that is would set the 0x12345678 > qkey on the modify qp to init) and rather leave it to the RDMA CM > consumer --if-- they wish to do so. However it will use the ipv4 > broadcast group qkey for doing mcast joins and report this qkey to the > user in the ud param of the event. We need to be able to handle options 4 and 5 as well. > this (what qkey is assigned to the ipv4 broadcast group by different > SAs) is orthogonal to the discussion we do here. This depends on whether verbs allows, or if it should allow, a user to specify a controlled qkey when configuring their QP. - Sean From sweitzen at cisco.com Wed Jan 17 09:54:45 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Wed, 17 Jan 2007 09:54:45 -0800 Subject: [openib-general] madeye In-Reply-To: <45AE6161.40300@systemfabricworks.com> Message-ID: It's not well integrated into install.sh, you have to run: OPENIB_PARAMS="--with-madeye-mod" ./install.sh Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of > Raleigh F Rinehart > Sent: Wednesday, January 17, 2007 9:48 AM > To: openib-general at openib.org > Subject: [openib-general] madeye > > I'm trying to use madeye in OFED 1.1 Release to do some > debugging but it > does not seem to be present. I cracked open src tarball and all the > right bits seem to be there (Kconfig, makefile, src) but it > doesn't seem > to get built and installed as part of the normal installation > procedure > (running install.sh). Has anyone had any success at building, > installing and using madeye in a release version of OFED? > > thanks, > -raleigh > > > cat /usr/local/ofed/BUILD_ID > OFED-1.1 > > openib-1.1 (REV=9905) > # User space > https://openib.org/svn/gen2/branches/1.1/src/userspace > Git: > ref: refs/heads/ofed_1_1 > commit a083ec1174cb4b5a5052ef5de9a8175df82e864a > > # MPI > mpi_osu-0.9.7-mlx2.2.0.tgz > openmpi-1.1.1-1.src.rpm > mpitests-2.0-0.src.rpm > > uname -a > Linux merrill2 2.6.16.21-0.8-smp #1 SMP Mon Jul 3 18:25:39 UTC 2006 > x86_64 x86_64 x86_64 GNU/Linux > > cat /etc/SuSE-release > SUSE Linux Enterprise Server 10 (x86_64) > VERSION = 10 > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Wed Jan 17 10:02:47 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Wed, 17 Jan 2007 12:02:47 -0600 Subject: [openib-general] OpenIB Wiki pages updated In-Reply-To: References: Message-ID: <1169056968.27530.7.camel@linux-q667.site> try --with-addr_trans-mod On Wed, 2007-01-17 at 09:41 -0800, Woodruff, Robert J wrote: > Vladimir wrote, > >Hi, > >The following links were added to > https://wiki.openfabrics.org/tiki-index.php: > > >OFED 1.2 HowTo > >HOWTO Build OFA user package > >HOWTO Build OFA kernel package > >HOWTO add userspace package to ofa_user > > > >Regards, > >Vladimir > > How do I make it build the rdma_cm and rdma_ucm, there appears to be no > configure > option in the ./configure script of the daily builds even though the > code is in > the core directory. ( I tried the 2/17/2007 kernel > daily build). > > woody > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From eeb at bartonsoftware.com Wed Jan 17 10:07:44 2007 From: eeb at bartonsoftware.com (Eric Barton) Date: Wed, 17 Jan 2007 18:07:44 -0000 Subject: [openib-general] SDP performance Message-ID: <048f01c73a62$6333e5a0$0281a8c0@ebpc> Hi, Does anyone have any performance measurements of SDP over OpenFabrics? Can any of the SDP developers confirm that SDP can be zero-copy both on sending and receiving? If so, are there tunables that affect when to copy and when not? Cheers, Eric --------------------------------------------------- |Eric Barton Barton Software | |9 York Gardens Tel: +44 (117) 330 1575 | |Clifton Mobile: +44 (7909) 680 356 | |Bristol BS8 4LL Fax: call first | |United Kingdom E-Mail: eeb at bartonsoftware.com| --------------------------------------------------- From or.gerlitz at gmail.com Wed Jan 17 10:08:42 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 17 Jan 2007 20:08:42 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <20070117133924.GH13720@mellanox.co.il> References: <45AE1A45.2060005@voltaire.com> <20070117133924.GH13720@mellanox.co.il> Message-ID: <15ddcffd0701171008xf73c79fp5052cad9177d1804@mail.gmail.com> On 1/17/07, Michael S. Tsirkin wrote: > > not following you here, how does qkey relates to RC QPs ? > Currently you can block userspace from creating QPs by unloading uverbs module. > Maybe we should make it possible to block creating UD QPs from userspace > as a separate security measure. I don't think this is valid option for most of the IB production env. but if you want to add blocking UD QP creation to ib_uverbs as mod param whose default value is --unset--, i don't really care. Or. From or.gerlitz at gmail.com Wed Jan 17 10:18:29 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 17 Jan 2007 20:18:29 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AE61B3.3010002@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> <45AE01D9.3000107@voltaire.com> <45AE61B3.3010002@ichips.intel.com> Message-ID: <15ddcffd0701171018i14069211v6d7ee653e32b3e6a@mail.gmail.com> On 1/17/07, Sean Hefty wrote: > > +1 used only for unicast > > +2 used only for multicast > > +3 used for both unicast and multicast > If you view this as the use case for one side only, we also have option 3 > communicating with options 1 and 2. I would list these as: OK > +4 unicast QP to unicast and multicast QP i think you mean 3 <--> 1 that is unicast and multicast QP to unicast QP > +5 multicast QP to unicast and multicast QP i think you mean 3 <--> 2 that is unicast and multicast QP to multicast QP > Today, all of these work. What you're wanting to add is the ability to > communicate with an ipoib multicast group. I'd like to do this without breaking > any of the existing communications, or treat ipoib separately for security reasons. makes sense, so my suggestion is "leave this (using the ipoib qkey) to the user" if you prefer to have two group types: rdmacm and ipoib - that's fine. we would use ipoib type groups and in the envs that seting the qkey to be the ipoib would not break our communication (that is where we do need to interop with IPoIB) - we would do it, else we would do nothing. > > To make things simple, the solution i suggest is that that the RDMA CM > > would --not-- do this modify QP/QKEY (that is would set the 0x12345678 > > qkey on the modify qp to init) and rather leave it to the RDMA CM > > consumer --if-- they wish to do so. However it will use the ipv4 > > broadcast group qkey for doing mcast joins and report this qkey to the > > user in the ud param of the event. > > We need to be able to handle options 4 and 5 as well. indeed, i have addressed that above. > > this (what qkey is assigned to the ipv4 broadcast group by different > > SAs) is orthogonal to the discussion we do here. > This depends on whether verbs allows, or if it should allow, a user to specify a > controlled qkey when configuring their QP. I don't think there is any limitation today in the verbs layer, actually for our testing so far we patches the rdmacm not set the sig byte and use the ipoib (ie not override it in core/cma.c) and we manage to interop fine with ipoib. From or.gerlitz at gmail.com Wed Jan 17 10:27:14 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 17 Jan 2007 20:27:14 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <20070117140455.GK13720@mellanox.co.il> References: <45ACF218.5020400@mellanox.co.il> <45ACF622.4060603@voltaire.com> <45ADDD11.5040006@dev.mellanox.co.il> <45AE18DC.4020606@voltaire.com> <20070117140455.GK13720@mellanox.co.il> Message-ID: <15ddcffd0701171027h39d089dfi15b4499b0fa34782@mail.gmail.com> On 1/17/07, Michael S. Tsirkin wrote: > > Quoting Or Gerlitz : > > I understand that the change involves letting the rdma cm know the SID > > when the consumer calls --rdma_resolve_route-- where today it get to > > know the SID when the consumer calls --rdma_connect-- . So this is not > > an internal RDMA CM change but rather also changes the API. > > Same for SRP as the api of ib_sa_path_rec_get (that is the structure it > > gets as input) changes, the SRP code also changes. > > Any, can you send the mthca and rdmacm/rdmacm-consumers changes as > > RFC/PATCH over the list before the actual code freeze??? > I didn't start on this code yet, but it does not look like a > huge project, I hope to post code by next week. > To avoid major disruptions all over the stack, my preference for OFED 1.2 > would be to add new API calls and a module option (off by default) for cma/srp > to use them. the rdmacm api change is not such a big deal and if you want to change it only for the kernel portion for the ofed 1.2 it makes sense to me. I really don't think --adding-- a special api is the way to go. Doing it in "end in mind" fashion, work on a patch, send it to the rdmacm maintainer/list for RFC and so on. > For OFED 1.2, I only planned to implement this for SDP and SRP. > I do not expect all this to be mergeable in 2.6.21 time frame, > so maybe that's enough. SDP is coded over the RDMA CM and i say above my suggestion is not to add a special API, so just dp the same QoS patching you do to SDP to iSER etc. Or. From halr at voltaire.com Wed Jan 17 10:35:24 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 17 Jan 2007 13:35:24 -0500 Subject: [openib-general] [PATCH] opensm: fix segfault with up/down and root nodes file. In-Reply-To: <20070109232129.GP17240@sashak.voltaire.com> References: <20070109232129.GP17240@sashak.voltaire.com> Message-ID: <1169058906.31746.3403.camel@hal.voltaire.com> On Tue, 2007-01-09 at 18:21, Sasha Khapyorsky wrote: > Segfault happens when yet non-initialized lid_matrix tables are > accessed - with up/down routing engine when root nodes are provided by > user and lid matrices are not pre-created. There is the fix. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From mshefty at ichips.intel.com Wed Jan 17 11:23:37 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 17 Jan 2007 11:23:37 -0800 Subject: [openib-general] multicast code/merge status In-Reply-To: <15ddcffd0701171018i14069211v6d7ee653e32b3e6a@mail.gmail.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> <45AE01D9.3000107@voltaire.com> <45AE61B3.3010002@ichips.intel.com> <15ddcffd0701171018i14069211v6d7ee653e32b3e6a@mail.gmail.com> Message-ID: <45AE77B9.1080508@ichips.intel.com> > makes sense, so my suggestion is "leave this (using the ipoib qkey) to > the user" This is fine, but it may change when the user needs to make this choice. E.g. when creating the QP, versus joining the multicast group, in order to support the valid options. The selection also needs to be conveyed to the kernel somehow. At this point, maybe we just need to start looking at specific implementations. > I don't think there is any limitation today in the verbs layer, > actually for our testing so far we patches the rdmacm not set the sig > byte and use the ipoib (ie not override it in core/cma.c) and we > manage to interop fine with ipoib. Maybe this shouldn't be allowed for any random application. Perhaps there's a way to treat controlled qkeys similar to the privileged port space. - Sean From swise at opengridcomputing.com Wed Jan 17 11:49:49 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:49:49 -0600 Subject: [openib-general] [PATCH RFC 00/10] ofed_1_2 - Chelsio backports Message-ID: <20070117194949.30830.55013.stgit@dell3.ogc.int> Michael, I took your advice and backported the chelsio code to kernel.org kernels 2.6.19 through 2.6.11 and then to sles9sp3. Attached are the patches, one for each kernel.org version. Please review. Thanks, Steve. From swise at opengridcomputing.com Wed Jan 17 11:49:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:49:51 -0600 Subject: [openib-general] [PATCH RFC 01/10] ofed_1_2 Chelsio backport to 2.6.19 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117194951.30830.98084.stgit@dell3.ogc.int> Chelsio backport to 2.6.19 Signed-off-by: Steve Wise --- .../backport/2.6.19/include/linux/genalloc.h | 42 +++++ .../backport/2.6.19/include/linux/workqueue.h | 9 + .../backport/2.6.19/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.19/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.19/linux_genalloc_to_2_6_20.patch | 17 ++ 5 files changed, 277 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.19/include/linux/genalloc.h b/kernel_addons/backport/2.6.19/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.19/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.19/include/linux/workqueue.h b/kernel_addons/backport/2.6.19/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.19/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.19/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.19/include/src/genalloc.c b/kernel_addons/backport/2.6.19/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.19/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch b/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch new file mode 100644 index 0000000..93fee2b --- /dev/null +++ b/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch @@ -0,0 +1,17 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From swise at opengridcomputing.com Wed Jan 17 11:49:58 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:49:58 -0600 Subject: [openib-general] [PATCH RFC 04/10] ofed_1_2 Chelsio backport to 2.6.16 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117194958.30830.21707.stgit@dell3.ogc.int> Chelsio backport to 2.6.16 Signed-off-by: Steve Wise --- .../backport/2.6.16/include/linux/genalloc.h | 42 +++++ .../backport/2.6.16/include/linux/interrupt.h | 17 ++ .../backport/2.6.16/include/linux/netdevice.h | 4 .../backport/2.6.16/include/linux/random.h | 15 ++ .../backport/2.6.16/include/linux/skbuff.h | 3 .../backport/2.6.16/include/linux/workqueue.h | 9 + .../backport/2.6.16/include/net/netevent.h | 33 ++++ .../backport/2.6.16/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.16/include/src/netevent.c | 71 ++++++++ .../backport/2.6.16/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.16/linux_stuff_to_2_6_17.patch | 24 +++ 11 files changed, 427 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.16/include/linux/genalloc.h b/kernel_addons/backport/2.6.16/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.16/include/linux/interrupt.h b/kernel_addons/backport/2.6.16/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.16/include/linux/netdevice.h b/kernel_addons/backport/2.6.16/include/linux/netdevice.h index 5641019..225eeda 100644 --- a/kernel_addons/backport/2.6.16/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.16/include/linux/netdevice.h @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.16/include/linux/random.h b/kernel_addons/backport/2.6.16/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.16/include/linux/skbuff.h b/kernel_addons/backport/2.6.16/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.16/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.16/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.16/include/linux/workqueue.h b/kernel_addons/backport/2.6.16/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.16/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.16/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.16/include/net/netevent.h b/kernel_addons/backport/2.6.16/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.16/include/src/genalloc.c b/kernel_addons/backport/2.6.16/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.16/include/src/netevent.c b/kernel_addons/backport/2.6.16/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.16/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o From swise at opengridcomputing.com Wed Jan 17 11:49:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:49:53 -0600 Subject: [openib-general] [PATCH RFC 02/10] ofed_1_2 Chelsio backport to 2.6.18 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117194953.30830.97017.stgit@dell3.ogc.int> Chelsio backport to 2.6.18 Signed-off-by: Steve Wise --- .../backport/2.6.18/include/linux/genalloc.h | 42 +++++ .../backport/2.6.18/include/linux/interrupt.h | 17 ++ .../backport/2.6.18/include/linux/netdevice.h | 9 + .../backport/2.6.18/include/linux/random.h | 15 ++ .../backport/2.6.18/include/linux/skbuff.h | 1 .../backport/2.6.18/include/linux/workqueue.h | 9 + .../backport/2.6.18/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.18/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.18/linux_genalloc_to_2_6_20.patch | 17 ++ 9 files changed, 319 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.18/include/linux/genalloc.h b/kernel_addons/backport/2.6.18/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.18/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.18/include/linux/interrupt.h b/kernel_addons/backport/2.6.18/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.18/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.18/include/linux/netdevice.h b/kernel_addons/backport/2.6.18/include/linux/netdevice.h new file mode 100644 index 0000000..61a6deb --- /dev/null +++ b/kernel_addons/backport/2.6.18/include/linux/netdevice.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_NETDEVICE_TO_2_6_18 +#define BACKPORT_LINUX_NETDEVICE_TO_2_6_18 +#include_next + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + +#endif diff --git a/kernel_addons/backport/2.6.18/include/linux/random.h b/kernel_addons/backport/2.6.18/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.18/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.18/include/linux/skbuff.h b/kernel_addons/backport/2.6.18/include/linux/skbuff.h index 4845283..ca5edc0 100644 --- a/kernel_addons/backport/2.6.18/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.18/include/linux/skbuff.h @@ -4,5 +4,6 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW #endif diff --git a/kernel_addons/backport/2.6.18/include/linux/workqueue.h b/kernel_addons/backport/2.6.18/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.18/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.18/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.18/include/src/genalloc.c b/kernel_addons/backport/2.6.18/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.18/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch b/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch new file mode 100644 index 0000000..93fee2b --- /dev/null +++ b/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch @@ -0,0 +1,17 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From swise at opengridcomputing.com Wed Jan 17 11:49:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:49:56 -0600 Subject: [openib-general] [PATCH RFC 03/10] ofed_1_2 Chelsio backport to 2.6.17 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117194955.30830.69761.stgit@dell3.ogc.int> Chelsio backport to 2.6.17 Signed-off-by: Steve Wise --- .../backport/2.6.17/include/linux/genalloc.h | 42 +++++ .../backport/2.6.17/include/linux/interrupt.h | 17 ++ .../backport/2.6.17/include/linux/netdevice.h | 4 .../backport/2.6.17/include/linux/random.h | 15 ++ .../backport/2.6.17/include/linux/skbuff.h | 3 .../backport/2.6.17/include/linux/workqueue.h | 9 + .../backport/2.6.17/include/net/netevent.h | 33 ++++ .../backport/2.6.17/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.17/include/src/netevent.c | 69 ++++++++ .../backport/2.6.17/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.17/linux_stuff_to_2_6_17.patch | 24 +++ 11 files changed, 425 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.17/include/linux/genalloc.h b/kernel_addons/backport/2.6.17/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.17/include/linux/interrupt.h b/kernel_addons/backport/2.6.17/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.17/include/linux/netdevice.h b/kernel_addons/backport/2.6.17/include/linux/netdevice.h index 5641019..225eeda 100644 --- a/kernel_addons/backport/2.6.17/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.17/include/linux/netdevice.h @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.17/include/linux/random.h b/kernel_addons/backport/2.6.17/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.17/include/linux/skbuff.h b/kernel_addons/backport/2.6.17/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.17/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.17/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.17/include/linux/workqueue.h b/kernel_addons/backport/2.6.17/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.17/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.17/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.17/include/net/netevent.h b/kernel_addons/backport/2.6.17/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.17/include/src/genalloc.c b/kernel_addons/backport/2.6.17/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.17/include/src/netevent.c b/kernel_addons/backport/2.6.17/include/src/netevent.c new file mode 100644 index 0000000..35d02c3 --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/src/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o From swise at opengridcomputing.com Wed Jan 17 11:50:02 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:02 -0600 Subject: [openib-general] [PATCH RFC 06/10] ofed_1_2 Backport chelsio to 2.6.14 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195002.30830.75306.stgit@dell3.ogc.int> Backport chelsio to 2.6.14 Signed-off-by: Steve Wise --- .../backport/2.6.14/include/linux/genalloc.h | 42 +++++ .../backport/2.6.14/include/linux/interrupt.h | 17 ++ .../backport/2.6.14/include/linux/netdevice.h | 9 + .../backport/2.6.14/include/linux/random.h | 15 ++ .../backport/2.6.14/include/linux/skbuff.h | 3 .../backport/2.6.14/include/linux/types.h | 6 + .../backport/2.6.14/include/linux/workqueue.h | 9 + .../backport/2.6.14/include/net/netevent.h | 33 ++++ .../backport/2.6.14/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.14/include/src/netevent.c | 71 ++++++++ .../backport/2.6.14/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.14/linux_stuff_to_2_6_17.patch | 24 +++ 12 files changed, 438 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.14/include/linux/genalloc.h b/kernel_addons/backport/2.6.14/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.14/include/linux/interrupt.h b/kernel_addons/backport/2.6.14/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.14/include/linux/netdevice.h b/kernel_addons/backport/2.6.14/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.14/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.14/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.14/include/linux/random.h b/kernel_addons/backport/2.6.14/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.14/include/linux/skbuff.h b/kernel_addons/backport/2.6.14/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.14/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.14/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.14/include/linux/types.h b/kernel_addons/backport/2.6.14/include/linux/types.h new file mode 100644 index 0000000..86e334f --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/linux/types.h @@ -0,0 +1,6 @@ +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 +#define BACKPORT_LINUX_TYPES_TO_2_6_15 +#include_next + +#define BITS_PER_BYTE 8 +#endif diff --git a/kernel_addons/backport/2.6.14/include/linux/workqueue.h b/kernel_addons/backport/2.6.14/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.14/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.14/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.14/include/net/netevent.h b/kernel_addons/backport/2.6.14/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.14/include/src/genalloc.c b/kernel_addons/backport/2.6.14/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.14/include/src/netevent.c b/kernel_addons/backport/2.6.14/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.14/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o From swise at opengridcomputing.com Wed Jan 17 11:50:06 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:06 -0600 Subject: [openib-general] [PATCH RFC 08/10] ofed_1_2 Backport Chelsio to 2.6.12 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195006.30830.96595.stgit@dell3.ogc.int> Backport Chelsio to 2.6.12 Signed-off-by: Steve Wise --- .../backport/2.6.12/include/linux/ethtool.h | 9 + .../backport/2.6.12/include/linux/genalloc.h | 42 +++++ .../backport/2.6.12/include/linux/interrupt.h | 17 ++ .../backport/2.6.12/include/linux/netdevice.h | 9 + .../backport/2.6.12/include/linux/random.h | 15 ++ .../backport/2.6.12/include/linux/skbuff.h | 3 .../backport/2.6.12/include/linux/types.h | 2 .../backport/2.6.12/include/linux/workqueue.h | 9 + .../backport/2.6.12/include/net/netevent.h | 33 ++++ .../backport/2.6.12/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.12/include/src/netevent.c | 71 ++++++++ .../backport/2.6.12/cxgb3_main_to_2_6_13.patch | 12 + .../backport/2.6.12/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.12/linux_stuff_to_2_6_17.patch | 24 +++ .../backport/2.6.12/t3_hw_to_2_6_13.patch | 13 ++ 15 files changed, 468 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.12/include/linux/ethtool.h b/kernel_addons/backport/2.6.12/include/linux/ethtool.h new file mode 100644 index 0000000..d03127c --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/linux/ethtool.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 + +#include_next + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#endif diff --git a/kernel_addons/backport/2.6.12/include/linux/genalloc.h b/kernel_addons/backport/2.6.12/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.12/include/linux/interrupt.h b/kernel_addons/backport/2.6.12/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.12/include/linux/netdevice.h b/kernel_addons/backport/2.6.12/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.12/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.12/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.12/include/linux/random.h b/kernel_addons/backport/2.6.12/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.12/include/linux/skbuff.h b/kernel_addons/backport/2.6.12/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.12/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.12/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.12/include/linux/types.h b/kernel_addons/backport/2.6.12/include/linux/types.h index c06977a..53c7a33 100644 --- a/kernel_addons/backport/2.6.12/include/linux/types.h +++ b/kernel_addons/backport/2.6.12/include/linux/types.h @@ -7,4 +7,6 @@ #ifdef __KERNEL__ typedef unsigned int gfp_t; #endif +#define BITS_PER_BYTE 8 + #endif diff --git a/kernel_addons/backport/2.6.12/include/linux/workqueue.h b/kernel_addons/backport/2.6.12/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.12/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.12/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.12/include/net/netevent.h b/kernel_addons/backport/2.6.12/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.12/include/src/genalloc.c b/kernel_addons/backport/2.6.12/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.12/include/src/netevent.c b/kernel_addons/backport/2.6.12/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.12/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) diff --git a/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o diff --git a/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch new file mode 100644 index 0000000..611e9dc --- /dev/null +++ b/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch @@ -0,0 +1,13 @@ +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From swise at opengridcomputing.com Wed Jan 17 11:50:00 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:00 -0600 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to 2.6.15 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195000.30830.84557.stgit@dell3.ogc.int> Backport cxgb3 to 2.6.15 Signed-off-by: Steve Wise --- .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ .../backport/2.6.15/include/linux/interrupt.h | 17 ++ .../backport/2.6.15/include/linux/netdevice.h | 9 + .../backport/2.6.15/include/linux/random.h | 15 ++ .../backport/2.6.15/include/linux/skbuff.h | 3 .../backport/2.6.15/include/linux/types.h | 6 + .../backport/2.6.15/include/linux/workqueue.h | 9 + .../backport/2.6.15/include/net/netevent.h | 33 ++++ .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ 12 files changed, 438 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h new file mode 100644 index 0000000..86e334f --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/linux/types.h @@ -0,0 +1,6 @@ +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 +#define BACKPORT_LINUX_TYPES_TO_2_6_15 +#include_next + +#define BITS_PER_BYTE 8 +#endif diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o From swise at opengridcomputing.com Wed Jan 17 11:50:04 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:04 -0600 Subject: [openib-general] [PATCH RFC 07/10] ofed_1_2 Backport Chelsio to 2.6.13 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195004.30830.52168.stgit@dell3.ogc.int> Backport Chelsio to 2.6.13 Signed-off-by: Steve Wise --- .../backport/2.6.13/include/linux/ethtool.h | 9 + .../backport/2.6.13/include/linux/genalloc.h | 42 +++++ .../backport/2.6.13/include/linux/interrupt.h | 17 ++ .../backport/2.6.13/include/linux/netdevice.h | 9 + .../backport/2.6.13/include/linux/random.h | 15 ++ .../backport/2.6.13/include/linux/skbuff.h | 3 .../backport/2.6.13/include/linux/types.h | 2 .../backport/2.6.13/include/linux/workqueue.h | 9 + .../backport/2.6.13/include/net/netevent.h | 33 ++++ .../backport/2.6.13/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.13/include/src/netevent.c | 71 ++++++++ .../backport/2.6.13/cxgb3_main_to_2_6_13.patch | 12 + .../backport/2.6.13/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.13/linux_stuff_to_2_6_17.patch | 24 +++ .../backport/2.6.13/t3_hw_to_2_6_13.patch | 13 ++ 15 files changed, 468 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.13/include/linux/ethtool.h b/kernel_addons/backport/2.6.13/include/linux/ethtool.h new file mode 100644 index 0000000..d03127c --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/linux/ethtool.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 + +#include_next + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#endif diff --git a/kernel_addons/backport/2.6.13/include/linux/genalloc.h b/kernel_addons/backport/2.6.13/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.13/include/linux/interrupt.h b/kernel_addons/backport/2.6.13/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.13/include/linux/netdevice.h b/kernel_addons/backport/2.6.13/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.13/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.13/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.13/include/linux/random.h b/kernel_addons/backport/2.6.13/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.13/include/linux/skbuff.h b/kernel_addons/backport/2.6.13/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.13/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.13/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.13/include/linux/types.h b/kernel_addons/backport/2.6.13/include/linux/types.h index c06977a..53c7a33 100644 --- a/kernel_addons/backport/2.6.13/include/linux/types.h +++ b/kernel_addons/backport/2.6.13/include/linux/types.h @@ -7,4 +7,6 @@ #ifdef __KERNEL__ typedef unsigned int gfp_t; #endif +#define BITS_PER_BYTE 8 + #endif diff --git a/kernel_addons/backport/2.6.13/include/linux/workqueue.h b/kernel_addons/backport/2.6.13/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.13/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.13/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.13/include/net/netevent.h b/kernel_addons/backport/2.6.13/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.13/include/src/genalloc.c b/kernel_addons/backport/2.6.13/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.13/include/src/netevent.c b/kernel_addons/backport/2.6.13/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.13/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) diff --git a/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o diff --git a/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch new file mode 100644 index 0000000..611e9dc --- /dev/null +++ b/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch @@ -0,0 +1,13 @@ +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From swise at opengridcomputing.com Wed Jan 17 11:50:10 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:10 -0600 Subject: [openib-general] [PATCH RFC 10/10] ofed_1_2 Backport Chelsio to sles9sp3 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195010.30830.98048.stgit@dell3.ogc.int> Backport Chelsio to sles9sp3 Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/linux/ethtool.h | 9 + .../2.6.5_sles9_sp3/include/linux/genalloc.h | 42 +++++ .../2.6.5_sles9_sp3/include/linux/interrupt.h | 17 ++ .../backport/2.6.5_sles9_sp3/include/linux/kfifo.h | 157 ++++++++++++++++++ .../backport/2.6.5_sles9_sp3/include/linux/mii.h | 18 ++ .../backport/2.6.5_sles9_sp3/include/linux/mm.h | 20 ++ .../2.6.5_sles9_sp3/include/linux/netdevice.h | 13 ++ .../backport/2.6.5_sles9_sp3/include/linux/pci.h | 2 .../2.6.5_sles9_sp3/include/linux/random.h | 15 ++ .../2.6.5_sles9_sp3/include/linux/skbuff.h | 3 .../backport/2.6.5_sles9_sp3/include/linux/slab.h | 19 -- .../2.6.5_sles9_sp3/include/linux/spinlock.h | 8 + .../backport/2.6.5_sles9_sp3/include/linux/types.h | 2 .../2.6.5_sles9_sp3/include/linux/workqueue.h | 8 + .../backport/2.6.5_sles9_sp3/include/net/dst.h | 17 ++ .../2.6.5_sles9_sp3/include/net/neighbour.h | 7 + .../2.6.5_sles9_sp3/include/net/netevent.h | 33 ++++ .../2.6.5_sles9_sp3/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.5_sles9_sp3/include/src/kfifo.c | 196 +++++++++++++++++++++++ .../2.6.5_sles9_sp3/include/src/netevent.c | 71 ++++++++ .../2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch | 12 + .../2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch | 12 + .../2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch | 35 ++++ .../linux_stream_idr_to_2_6_5-7_244.patch | 25 --- .../linux_stuff_to_2_6_5-7_244.patch | 46 +++++ .../mthca_provider_3465_to_2_6_9.patch | 15 -- .../2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch | 43 +++++ 27 files changed, 985 insertions(+), 58 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h new file mode 100644 index 0000000..d03127c --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 + +#include_next + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h new file mode 100644 index 0000000..48eccd8 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h @@ -0,0 +1,157 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef _LINUX_KFIFO_H +#define _LINUX_KFIFO_H + +#ifdef __KERNEL__ + +#include +#include + +struct kfifo { + unsigned char *buffer; /* the buffer holding the data */ + unsigned int size; /* the size of the allocated buffer */ + unsigned int in; /* data is added at offset (in % size) */ + unsigned int out; /* data is extracted from off. (out % size) */ + spinlock_t *lock; /* protects concurrent modifications */ +}; + +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, + spinlock_t *lock); +extern void kfifo_free(struct kfifo *fifo); +extern unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); +extern unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); + +/** + * __kfifo_reset - removes the entire FIFO contents, no locking version + * @fifo: the fifo to be emptied. + */ +static inline void __kfifo_reset(struct kfifo *fifo) +{ + fifo->in = fifo->out = 0; +} + +/** + * kfifo_reset - removes the entire FIFO contents + * @fifo: the fifo to be emptied. + */ +static inline void kfifo_reset(struct kfifo *fifo) +{ + unsigned long flags; + + spin_lock_irqsave(fifo->lock, flags); + + __kfifo_reset(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); +} + +/** + * kfifo_put - puts some data into the FIFO + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + */ +static inline unsigned int kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_put(fifo, buffer, len); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * kfifo_get - gets some data from the FIFO + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + */ +static inline unsigned int kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_get(fifo, buffer, len); + + /* + * optimization: if the FIFO is empty, set the indices to 0 + * so we don't wrap the next time + */ + if (fifo->in == fifo->out) + fifo->in = fifo->out = 0; + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version + * @fifo: the fifo to be used. + */ +static inline unsigned int __kfifo_len(struct kfifo *fifo) +{ + return fifo->in - fifo->out; +} + +/** + * kfifo_len - returns the number of bytes available in the FIFO + * @fifo: the fifo to be used. + */ +static inline unsigned int kfifo_len(struct kfifo *fifo) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_len(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +#else +#warning "don't include kernel headers in userspace" +#endif /* __KERNEL__ */ +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h new file mode 100644 index 0000000..3ba8e73 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h @@ -0,0 +1,18 @@ +#ifndef BACKPORT_LINUX_MII_TO_SLES9SP3 +#define BACKPORT_LINUX_MII_TO_SLES9SP3 + +#include_next + +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymetric pause */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ + +static inline struct mii_ioctl_data *if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h new file mode 100644 index 0000000..77ee6fc --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h @@ -0,0 +1,20 @@ +#ifndef BACKPORT_LINUX_MM_TO_SLES9SP3 +#define BACKPORT_LINUX_MM_TO_SLES9SP3 + +#include_next + +static inline int +remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); +} + +static inline int +io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return io_remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); +} + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h index 5641019..2e18642 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h @@ -15,4 +15,17 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + +#define NETDEV_TX_OK 0 /* driver took care of packet */ +#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ +#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ + #endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h index b43b19c..beb954b 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h @@ -6,6 +6,8 @@ #include_next #define PCI_EXP_DEVCTL 8 /* Device Control */ #define PCI_EXP_LNKCTL 16 /* Link Control */ #define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ struct msix_entry { u16 vector; /* kernel uses to write allocated vector */ diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h index cc56236..0d91d86 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h @@ -4,6 +4,7 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW /** * skb_header_release - release reference to header @@ -41,4 +42,6 @@ static inline int skb_can_coalesce(struc return 0; } +#define gso_size tso_size + #endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h index 0540cc6..c8285ac 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h @@ -12,12 +12,6 @@ static inline void *kzalloc(size_t size, return ret; } -#endif -#include_next - -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline void *kmemdup(const void *src, size_t len, gfp_t gfp) { @@ -29,19 +23,10 @@ void *kmemdup(const void *src, size_t le return p; } -#endif -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline -void *kmemdup(const void *src, size_t len, gfp_t gfp) +void *kmalloc_node(size_t size, gfp_t flags, int nid) { - void *p; - - p = kmalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; + return kmalloc(size, flags); } #endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h index 4644d50..00506f4 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h @@ -3,6 +3,7 @@ #define BACKPORT_LINUX_SPINLOCK_H #include_next #define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED +#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED #define spin_trylock_irqsave(lock, flags) \ ({ \ @@ -13,4 +14,11 @@ ({ \ #define spin_lock_nested(lock, subclass) spin_lock(lock) +#define spin_trylock_irq(lock) \ +({ \ + local_irq_disable(); \ + spin_trylock(lock) ? \ + 1 : ({ local_irq_enable(); 0; }); \ +}) + #endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h index c06977a..53c7a33 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h @@ -7,4 +7,6 @@ #ifdef __KERNEL__ typedef unsigned int gfp_t; #endif +#define BITS_PER_BYTE 8 + #endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h index 330f47f..c054ed2 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, + struct delayed_work *dwork) +{ + while (!cancel_delayed_work(&dwork->work)) + flush_workqueue(wq); +} #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h new file mode 100644 index 0000000..69cca51 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_NET_DST_2_6_11 +#define BACKPORT_NET_DST_2_6_11 + +#include +#include_next + +static inline u32 dst_mtu(struct dst_entry *dst) +{ + u32 mtu = dst_metric(dst, RTAX_MTU); + /* + * Alexey put it here, so ask him about it :) + */ + barrier(); + return mtu; +} + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h new file mode 100644 index 0000000..573320d --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h @@ -0,0 +1,7 @@ +#ifndef BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 +#define BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 + +#include +#include_next + +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c new file mode 100644 index 0000000..5d1d907 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c @@ -0,0 +1,196 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include + +/** + * kfifo_init - allocates a new FIFO using a preallocated buffer + * @buffer: the preallocated buffer to be used. + * @size: the size of the internal buffer, this have to be a power of 2. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the + * struct kfifo with kfree(). + */ +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock) +{ + struct kfifo *fifo; + + /* size must be a power of 2 */ + BUG_ON(size & (size - 1)); + + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); + if (!fifo) + return ERR_PTR(-ENOMEM); + + fifo->buffer = buffer; + fifo->size = size; + fifo->in = fifo->out = 0; + fifo->lock = lock; + + return fifo; +} +EXPORT_SYMBOL(kfifo_init); + +/** + * kfifo_alloc - allocates a new FIFO and its internal buffer + * @size: the size of the internal buffer to be allocated. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * The size will be rounded-up to a power of 2. + */ +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) +{ + unsigned char *buffer; + struct kfifo *ret; + + /* + * round up to the next power of 2, since our 'let the indices + * wrap' tachnique works only in this case. + */ + if (size & (size - 1)) { + BUG_ON(size > 0x80000000); + size = roundup_pow_of_two(size); + } + + buffer = kmalloc(size, gfp_mask); + if (!buffer) + return ERR_PTR(-ENOMEM); + + ret = kfifo_init(buffer, size, gfp_mask, lock); + + if (IS_ERR(ret)) + kfree(buffer); + + return ret; +} +EXPORT_SYMBOL(kfifo_alloc); + +/** + * kfifo_free - frees the FIFO + * @fifo: the fifo to be freed. + */ +void kfifo_free(struct kfifo *fifo) +{ + kfree(fifo->buffer); + kfree(fifo); +} +EXPORT_SYMBOL(kfifo_free); + +/** + * __kfifo_put - puts some data into the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->size - fifo->in + fifo->out); + + /* + * Ensure that we sample the fifo->out index -before- we + * start putting bytes into the kfifo. + */ + + smp_mb(); + + /* first put the data starting from fifo->in to buffer end */ + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); + + /* then put the rest (if any) at the beginning of the buffer */ + memcpy(fifo->buffer, buffer + l, len - l); + + /* + * Ensure that we add the bytes to the kfifo -before- + * we update the fifo->in index. + */ + + smp_wmb(); + + fifo->in += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_put); + +/** + * __kfifo_get - gets some data from the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->in - fifo->out); + + /* + * Ensure that we sample the fifo->in index -before- we + * start removing bytes from the kfifo. + */ + + smp_rmb(); + + /* first get the data from fifo->out until the end of the buffer */ + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); + + /* then get the rest (if any) from the beginning of the buffer */ + memcpy(buffer + l, fifo->buffer, len - l); + + /* + * Ensure that we remove the bytes from the kfifo -before- + * we update the fifo->out index. + */ + + smp_mb(); + + fifo->out += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_get); diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch new file mode 100644 index 0000000..af468f7 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch @@ -0,0 +1,35 @@ +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c +index 3237fc8..2a38953 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c +@@ -234,7 +234,7 @@ static void *alloc_ep(int size, gfp_t gf + epc = kmalloc(size, gfp); + if (epc) { + memset(epc, 0, size); +- kref_init(&epc->kref); ++ kref_init(&epc->kref, __free_ep); + spin_lock_init(&epc->lock); + init_waitqueue_head(&epc->waitq); + } +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct + } + }; + +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) ++ if (ip_route_output_key(&rt, &fl)) + return NULL; + return rt; + } +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h +index 893f9d0..e54e202 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h +@@ -57,7 +57,7 @@ #define MPA_FLAGS_MASK 0xE0 + #define put_ep(ep) { \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ +- kref_put(&((ep)->kref), __free_ep); \ ++ kref_put(&((ep)->kref)); \ + } + + #define get_ep(ep) { \ diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch deleted file mode 100644 index 74d8403..0000000 --- a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch +++ /dev/null @@ -1,25 +0,0 @@ -diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile -index 163d991..2cd239f 100644 ---- a/drivers/infiniband/core/Makefile -+++ b/drivers/infiniband/core/Makefile -@@ -26,3 +26,6 @@ ib_ucm-y := ucm.o - - ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ - uverbs_marshall.o -+ -+obj-$(CONFIG_INFINIBAND) += stream.o -+ib_core-y += stream.o ib_idr.o -diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c -new file mode 100644 -index 0000000..96a48fe ---- /dev/null -+++ b/drivers/infiniband/core/stream.c -@@ -0,0 +1 @@ -+#include "src/stream.c" -diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c -new file mode 100644 -index 0000000..58cf933 ---- /dev/null -+++ b/drivers/infiniband/core/ib_idr.c -@@ -0,0 +1 @@ -+#include "src/ib_idr.c" diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch new file mode 100644 index 0000000..8733e1a --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch @@ -0,0 +1,46 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/stream.c +@@ -0,0 +1 @@ ++#include "src/stream.c" +diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/ib_idr.c +@@ -0,0 +1 @@ ++#include "src/ib_idr.c" +diff --git a/drivers/infiniband/core/kfifo.c b/drivers/infiniband/core/kfifo.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/kfifo.c +@@ -0,0 +1 @@ ++#include "src/kfifo.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,6 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++obj-$(CONFIG_INFINIBAND) += stream.o ++ib_core-y += stream.o ib_idr.o genalloc.o netevent.o kfifo.o diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch b/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch deleted file mode 100644 index a3febff..0000000 --- a/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch +++ /dev/null @@ -1,15 +0,0 @@ -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:01:20.000000000 +0200 -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:03:14.000000000 +0200 -@@ -359,8 +359,8 @@ static int mthca_mmap_uar(struct ib_ucon - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - -- if (io_remap_pfn_range(vma, vma->vm_start, -- to_mucontext(context)->uar.pfn, -+ if (remap_page_range(vma, vma->vm_start, -+ (to_mucontext(context)->uar.pfn) << PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch new file mode 100644 index 0000000..a667be0 --- /dev/null +++ b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch @@ -0,0 +1,43 @@ +diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h +index 463ca32..b931fd2 100755 +--- a/drivers/net/cxgb3/adapter.h ++++ b/drivers/net/cxgb3/adapter.h +@@ -179,6 +179,7 @@ struct adapter { + struct list_head adapter_list; + void __iomem *regs; + struct pci_dev *pdev; ++ u32 saved_pci_state[16]; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned long flags; +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3250,7 +3250,7 @@ int t3_reset_adapter(struct adapter *ada + uint16_t devid = 0; + + if (is_pcie(adapter)) +- pci_save_state(adapter->pdev); ++ pci_save_state(adapter->pdev, adapter->saved_pci_state); + t3_write_reg(adapter, A_PL_RST, F_CRSTWRM | F_CRSTWRMMODE); + + /* +@@ -3268,7 +3268,7 @@ int t3_reset_adapter(struct adapter *ada + return -1; + + if (is_pcie(adapter)) +- pci_restore_state(adapter->pdev); ++ pci_restore_state(adapter->pdev, adapter->saved_pci_state); + return 0; + } + +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From swise at opengridcomputing.com Wed Jan 17 11:50:08 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 17 Jan 2007 13:50:08 -0600 Subject: [openib-general] [PATCH RFC 09/10] ofed_1_2 Backport Chelsio to 2.6.11 In-Reply-To: <20070117194949.30830.55013.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> Message-ID: <20070117195008.30830.48783.stgit@dell3.ogc.int> Backport Chelsio to 2.6.11 Signed-off-by: Steve Wise --- .../backport/2.6.11/include/linux/ethtool.h | 9 + .../backport/2.6.11/include/linux/genalloc.h | 42 +++++ .../backport/2.6.11/include/linux/interrupt.h | 17 ++ kernel_addons/backport/2.6.11/include/linux/mm.h | 8 + .../backport/2.6.11/include/linux/netdevice.h | 9 + .../backport/2.6.11/include/linux/random.h | 15 ++ .../backport/2.6.11/include/linux/skbuff.h | 2 kernel_addons/backport/2.6.11/include/linux/slab.h | 19 -- .../backport/2.6.11/include/linux/types.h | 2 .../backport/2.6.11/include/linux/workqueue.h | 8 + kernel_addons/backport/2.6.11/include/net/dst.h | 16 ++ .../backport/2.6.11/include/net/netevent.h | 33 ++++ .../backport/2.6.11/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.11/include/src/netevent.c | 71 ++++++++ .../backport/2.6.11/cxgb3_main_to_2_6_13.patch | 12 + .../backport/2.6.11/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.11/linux_stuff_to_2_6_17.patch | 24 +++ .../2.6.11/mthca_provider_3465_to_2_6_11.patch | 13 -- .../backport/2.6.11/t3_hw_to_2_6_13.patch | 13 ++ 19 files changed, 492 insertions(+), 31 deletions(-) diff --git a/kernel_addons/backport/2.6.11/include/linux/ethtool.h b/kernel_addons/backport/2.6.11/include/linux/ethtool.h new file mode 100644 index 0000000..d03127c --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/linux/ethtool.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 + +#include_next + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#endif diff --git a/kernel_addons/backport/2.6.11/include/linux/genalloc.h b/kernel_addons/backport/2.6.11/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.11/include/linux/interrupt.h b/kernel_addons/backport/2.6.11/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.11/include/linux/mm.h b/kernel_addons/backport/2.6.11/include/linux/mm.h new file mode 100644 index 0000000..1a1cf11 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/linux/mm.h @@ -0,0 +1,8 @@ +#ifndef BACKPORT_LINUX_MM_TO_2_6_11 +#define BACKPORT_LINUX_MM_TO_2_6_11 + +#include_next + +#define io_remap_pfn_range remap_pfn_range + +#endif diff --git a/kernel_addons/backport/2.6.11/include/linux/netdevice.h b/kernel_addons/backport/2.6.11/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.11/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.11/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.11/include/linux/random.h b/kernel_addons/backport/2.6.11/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.11/include/linux/skbuff.h b/kernel_addons/backport/2.6.11/include/linux/skbuff.h index a4d9195..62bfb3b 100644 --- a/kernel_addons/backport/2.6.11/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.11/include/linux/skbuff.h @@ -4,6 +4,7 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW /** * skb_header_release - release reference to header @@ -17,5 +18,6 @@ static inline void skb_header_release(st { } +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.11/include/linux/slab.h b/kernel_addons/backport/2.6.11/include/linux/slab.h index bd9671d..9d82afa 100644 --- a/kernel_addons/backport/2.6.11/include/linux/slab.h +++ b/kernel_addons/backport/2.6.11/include/linux/slab.h @@ -22,12 +22,6 @@ static inline char *kstrdup(const char * return s; } -#endif -#include_next - -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline void *kmemdup(const void *src, size_t len, gfp_t gfp) { @@ -39,19 +33,10 @@ void *kmemdup(const void *src, size_t le return p; } -#endif -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline -void *kmemdup(const void *src, size_t len, gfp_t gfp) +void *kmalloc_node(size_t size, gfp_t flags, int nid) { - void *p; - - p = kmalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; + return kmalloc(size, flags); } #endif diff --git a/kernel_addons/backport/2.6.11/include/linux/types.h b/kernel_addons/backport/2.6.11/include/linux/types.h index c06977a..53c7a33 100644 --- a/kernel_addons/backport/2.6.11/include/linux/types.h +++ b/kernel_addons/backport/2.6.11/include/linux/types.h @@ -7,4 +7,6 @@ #ifdef __KERNEL__ typedef unsigned int gfp_t; #endif +#define BITS_PER_BYTE 8 + #endif diff --git a/kernel_addons/backport/2.6.11/include/linux/workqueue.h b/kernel_addons/backport/2.6.11/include/linux/workqueue.h index 330f47f..c054ed2 100644 --- a/kernel_addons/backport/2.6.11/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.11/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, + struct delayed_work *dwork) +{ + while (!cancel_delayed_work(&dwork->work)) + flush_workqueue(wq); +} #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } diff --git a/kernel_addons/backport/2.6.11/include/net/dst.h b/kernel_addons/backport/2.6.11/include/net/dst.h new file mode 100644 index 0000000..ec5e7b4 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/net/dst.h @@ -0,0 +1,16 @@ +#ifndef BACKPORT_NET_DST_2_6_11 +#define BACKPORT_NET_DST_2_6_11 + +#include_next + +static inline u32 dst_mtu(struct dst_entry *dst) +{ + u32 mtu = dst_metric(dst, RTAX_MTU); + /* + * Alexey put it here, so ask him about it :) + */ + barrier(); + return mtu; +} + +#endif diff --git a/kernel_addons/backport/2.6.11/include/net/netevent.h b/kernel_addons/backport/2.6.11/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.11/include/src/genalloc.c b/kernel_addons/backport/2.6.11/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.11/include/src/netevent.c b/kernel_addons/backport/2.6.11/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.11/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) diff --git a/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o diff --git a/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch b/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch deleted file mode 100644 index 402c00a..0000000 --- a/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 15:17:58.000000000 +0200 -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 15:20:19.000000000 +0200 -@@ -359,7 +359,7 @@ static int mthca_mmap_uar(struct ib_ucon - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - -- if (io_remap_pfn_range(vma, vma->vm_start, -+ if (remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; diff --git a/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch new file mode 100644 index 0000000..611e9dc --- /dev/null +++ b/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch @@ -0,0 +1,13 @@ +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From arkady at netapp.com Wed Jan 17 12:11:40 2007 From: arkady at netapp.com (Arkady Kanevsky) Date: Wed, 17 Jan 2007 15:11:40 -0500 Subject: [openib-general] SVN deprication Message-ID: <200701171511.41352.arkady@netapp.com> Jeff and Co, Is there a way to find out the date of a specific SVN revision #? I can no longer access svn: svn info -r 5400 https://openfabric.org/svn svn: PROPFIND request failed on '/svn' svn: PROPFIND of '/svn': could not connect to server (https://openfabric.org) Is the SVN server depricated for good? Do we have an SVN log somewhere in a git? If yes, how can I find the correlation between Linux version and SVN revision? Thanks, Arkady From mst at mellanox.co.il Wed Jan 17 12:30:27 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 22:30:27 +0200 Subject: [openib-general] [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 In-Reply-To: <1169050593.27530.0.camel@linux-q667.site> References: <1169050593.27530.0.camel@linux-q667.site> Message-ID: <20070117203027.GD4329@mellanox.co.il> > Quoting Steve WIse : > Subject: Re: [PATCH RFC 00/21] ofed_1_2 - Chelsio Backport to SLES9SP3 > > > BTW, Steve, I wouldn't start working on backports from SLES9. > > I'd start with 2.6.19 and go back to 2.6.11 over kernel.org versions, > > just making sure they build (we have this build environment on openfabrics.org, > > or its easy to check all versions out from kernel.org git). > > In this way you get incrementtall a host of data on what is and what isn't in > > which kernel version. > > > > Then adding support for a distro is just a matter of finding a closest > > kernel.org kernel and mostly removing stuff that distro backported from newer kernels. > > > > Michael, > > Is there something magic about stopping at 2.6.11? Yes. AFAIK IB subsystem was added to upstream kernel in 2.6.11. So our kernel.org support has grown historically from there, and we just added a couple of older distro kernels. -- MST From jsquyres at cisco.com Wed Jan 17 12:30:01 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 17 Jan 2007 15:30:01 -0500 Subject: [openib-general] SVN deprication In-Reply-To: <200701171511.41352.arkady@netapp.com> References: <200701171511.41352.arkady@netapp.com> Message-ID: <37B44B9F-2C9A-4B1A-AD6F-0177F4BCCCD9@cisco.com> SVN is still available, but it is at a new URL: https://svn.openfabrics.org/svn/openib. All the history and everything should be there; let me know if you have any problems. On Jan 17, 2007, at 3:11 PM, Arkady Kanevsky wrote: > Jeff and Co, > Is there a way to find out the date of a specific SVN revision #? > I can no longer access svn: > svn info -r 5400 https://openfabric.org/svn > svn: PROPFIND request failed on '/svn' > svn: PROPFIND of '/svn': could not connect to server (https:// > openfabric.org) > > Is the SVN server depricated for good? > Do we have an SVN log somewhere in a git? > If yes, how can I find the correlation between Linux version and > SVN revision? > Thanks, > Arkady > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Wed Jan 17 12:32:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 22:32:15 +0200 Subject: [openib-general] SDP performance In-Reply-To: <048f01c73a62$6333e5a0$0281a8c0@ebpc> References: <048f01c73a62$6333e5a0$0281a8c0@ebpc> Message-ID: <20070117203215.GE4329@mellanox.co.il> > Can any of the SDP developers confirm that SDP can be zero-copy both on > sending and receiving? If so, are there tunables that affect when to copy > and when not? Our SDP does not support zero-copy on either end at the moment. -- MST From Arkady.Kanevsky at netapp.com Wed Jan 17 12:37:24 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 17 Jan 2007 15:37:24 -0500 Subject: [openib-general] SVN deprication Message-ID: Thanks Jeff. This works. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Jeff Squyres [mailto:jsquyres at cisco.com] > Sent: Wednesday, January 17, 2007 3:30 PM > To: Kanevsky, Arkady > Cc: openib-general at openib.org > Subject: Re: [openib-general] SVN deprication > > SVN is still available, but it is at a new URL: > > https://svn.openfabrics.org/svn/openib. > > All the history and everything should be there; let me know > if you have any problems. > > > On Jan 17, 2007, at 3:11 PM, Arkady Kanevsky wrote: > > > Jeff and Co, > > Is there a way to find out the date of a specific SVN revision #? > > I can no longer access svn: > > svn info -r 5400 https://openfabric.org/svn > > svn: PROPFIND request failed on '/svn' > > svn: PROPFIND of '/svn': could not connect to server (https:// > > openfabric.org) > > > > Is the SVN server depricated for good? > > Do we have an SVN log somewhere in a git? > > If yes, how can I find the correlation between Linux > version and SVN > > revision? > > Thanks, > > Arkady > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > > openib-general > > > -- > Jeff Squyres > Server Virtualization Business Unit > Cisco Systems > From mst at mellanox.co.il Wed Jan 17 12:49:54 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 17 Jan 2007 22:49:54 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <15ddcffd0701171027h39d089dfi15b4499b0fa34782@mail.gmail.com> References: <15ddcffd0701171027h39d089dfi15b4499b0fa34782@mail.gmail.com> Message-ID: <20070117204954.GF4329@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze > > On 1/17/07, Michael S. Tsirkin wrote: > > > Quoting Or Gerlitz : > > > > I understand that the change involves letting the rdma cm know the SID > > > when the consumer calls --rdma_resolve_route-- where today it get to > > > know the SID when the consumer calls --rdma_connect-- . So this is not > > > an internal RDMA CM change but rather also changes the API. > > > > Same for SRP as the api of ib_sa_path_rec_get (that is the structure it > > > gets as input) changes, the SRP code also changes. > > > > Any, can you send the mthca and rdmacm/rdmacm-consumers changes as > > > RFC/PATCH over the list before the actual code freeze??? > > > I didn't start on this code yet, but it does not look like a > > huge project, I hope to post code by next week. > > > To avoid major disruptions all over the stack, my preference for OFED 1.2 > > would be to add new API calls and a module option (off by default) for cma/srp > > to use them. > > the rdmacm api change is not such a big deal and if you want to change > it only for the kernel portion for the ofed 1.2 it makes sense to me. > I really don't think --adding-- a special api is the way to go. Doing > it in "end in mind" fashion, work on a patch, send it to the rdmacm > maintainer/list for RFC and so on. > > > For OFED 1.2, I only planned to implement this for SDP and SRP. > > I do not expect all this to be mergeable in 2.6.21 time frame, > > so maybe that's enough. > > SDP is coded over the RDMA CM and i say above my suggestion is not to > add a special API, so just dp the same QoS patching you do to SDP to > iSER etc. Sounds too risky to me, this is technology preview code so I want to have all this stuff off by default but easily enabled by users who want to demo. After I post the rest of the code, if you like you'll be able to post an iser patch to add this stuff to iser as well. -- MST From rdreier at cisco.com Wed Jan 17 13:20:52 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 17 Jan 2007 13:20:52 -0800 Subject: [openib-general] Using GM + OpenIB in the same process at the same time. In-Reply-To: (Galen Shipman's message of "Wed, 17 Jan 2007 13:59:52 -0700") References: Message-ID: [adding openib-general CC] > I have a question about using GM + OpenIB at the same time, it seems > to be causing bad things to happen (process goes into state D) :-). > Here is the issue: > In Open MPI we allow striping of an MPI message across multiple > interconnects at once. In this case I am using GM and OpenIB. This is > using an RDMA pipeline protocol which attempts to overlap > registration and communication (RDMA Write). In the protocol the > target registers a chunk of the message and sends an RDMA Write > request to the origin, the origin then registers the corresponding > chunk of memory and initiates an RDMA Write. Upon completion of the > RDMA Write an RDMA FIN message is sent from the origin to the target. > The target is allowed to have 4 RDMA Write requests outstanding at > any time. > As an example, lets say that the user buffer extends from address 3 > through 12200. The target begins by registering lets say address 3 - > 8000 with OpenIB, under the covers the addresses are page aligned so > we actually register from 0 through 8191. An RDMA Write request is > sent to the origin, note that the origin will only RDMA Write into > addresses 3 - 8000. > The target then begins registering address 8001 through 12200 with > GM, again under the covers the addresses are page aligned so we > actually register from 4096 through 12287 and send an RDMA Write > request to the origin. Again note that the origin will only RDMA > Write into address 8001 through 12200. > > The problem is that when this occurs the process goes into D state > (uninterruptible sleep). After this occurs I am still able to use GM > and OpenIB individually and can even attempt to use them together > (with the result of the process again going into state D). Finding out where the process is sleeping would probably be useful. You can do "cat /proc//wchan" to get a little info. Even better would be to to "echo t > /proc/sysrq-trigger" and send the complete kernel log messages that that produces (and also include the PID that is stuck in uninterruptible sleep). However I think it will probably be up to myricom to debug this in the end -- my ability to figure out what's happening is very limited without the GM sources, and I'm not that interested in debugging someone else's proprietary software anyway. - R. From parks at lanl.gov Wed Jan 17 13:40:21 2007 From: parks at lanl.gov (parks fields) Date: Wed, 17 Jan 2007 14:40:21 -0700 Subject: [openib-general] OFED1.1 and Fed core 6 install question Message-ID: <1169070021.2927.16.camel@rover.lanl.gov> When trying to install OFED1.1 on my completely updated AMD 64bit Fedora core 6 machine I can't get past the following error. ERROR: The sysfsutils-devel package is required to build libibverbs_devel RPM I have tried a custom install and not selecting libibverbs_devel but it to to use it anyway. Even the basic install selects it. I have all these RPMs installed libsysfs-2.0.0-6 sysfsutils-2.1.0-1 sysfsutils-debuginfo-2.1.0 libsysfs-devel-2.0.0-6 But no where on RPMfind or the rest of the net do I find sysfsutils-devel for Fedora core 6 X86_64..... Even when I build sysfs from the tar ball I don't get a sysfsutils-devel Ideas ?? thanks parks -- parks fields HPC-5 From sashak at voltaire.com Wed Jan 17 14:02:08 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 00:02:08 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> Message-ID: <20070117220208.GH21043@sashak.voltaire.com> On 13:34 Tue 16 Jan , Jeff Squyres wrote: > FWIW, having git's for the MPI implementations was asked for on the > call yesterday (by Tziporet, IIRC?). The rationale, as I understood > it, was threefold: > > 1. Putting the MPI release in git provides a level of OFED-specific > history and version control. This was explicitly stated on the call > yesterday. Which history information we are expecting to see between bin-file-ver1 and bin-file-ver2, where files bin-file-ver* are never changed? > 2. MPI's have concrete "releases" to OFED just like all other ULP's, > especially if there is any OFED-specific packaging involved in the > MPI's release. This was not stated on the call, but it makes sense > to me. > > 3. Putting everything in git makes it nicely uniform for OFED to be > assembled. This was not stated on the call, and I'm sure it's not a > requirement, but it is a little nice to be uniform when assembling > OFED (my $0.02). > > 4. We used to put the MPI releases in SVN (tarball or SRPM) for prior > OFED release processes, Yes, and it was bad practice IMO. GIT and SVN are version tracking tools, mostly usable for sources and not for compilation results. Why one should install git if everything really needed is just to download file from the server? > so putting them in a git seems to parallel > that procedure. Just file hosting should be perfectly enough for the all above. I don't see any real reason to use git as non-versioned binary files storage. Sasha From rdreier at cisco.com Wed Jan 17 14:00:00 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 17 Jan 2007 14:00:00 -0800 Subject: [openib-general] [libibverbs/examples] [PATCH] Added checks to memory allocation failure when using asprintf in pingpong tests In-Reply-To: <1169026675.20182.1.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Wed, 17 Jan 2007 11:37:54 +0200") References: <1169026675.20182.1.camel@mtls05.yok.mtl.com> Message-ID: Thanks, applied. From jsquyres at cisco.com Wed Jan 17 14:04:48 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 17 Jan 2007 17:04:48 -0500 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <20070117220208.GH21043@sashak.voltaire.com> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> <20070117220208.GH21043@sashak.voltaire.com> Message-ID: <1B76802D-8FD6-497A-BEF0-9842B55012F7@cisco.com> On Jan 17, 2007, at 5:02 PM, Sasha Khapyorsky wrote: >> 1. Putting the MPI release in git provides a level of OFED-specific >> history and version control. This was explicitly stated on the call >> yesterday. > > Which history information we are expecting to see between bin-file- > ver1 > and bin-file-ver2, where files bin-file-ver* are never changed? I think the point is when they *do* change. >> 2. MPI's have concrete "releases" to OFED just like all other ULP's, >> especially if there is any OFED-specific packaging involved in the >> MPI's release. This was not stated on the call, but it makes sense >> to me. >> >> 3. Putting everything in git makes it nicely uniform for OFED to be >> assembled. This was not stated on the call, and I'm sure it's not a >> requirement, but it is a little nice to be uniform when assembling >> OFED (my $0.02). >> >> 4. We used to put the MPI releases in SVN (tarball or SRPM) for prior >> OFED release processes, > > Yes, and it was bad practice IMO. GIT and SVN are version tracking > tools, > mostly usable for sources and not for compilation results. Why one > should install git if everything really needed is just to download > file > from the server? The SRPMs are not compilation results. Putting compilation results in a version tracking tool would be useless, I agree. >> so putting them in a git seems to parallel >> that procedure. > > Just file hosting should be perfectly enough for the all above. I > don't > see any real reason to use git as non-versioned binary files storage. I think the point was that you could then get a definitive set of files that were shipped in OFED version x.y -- you could accurately rebuild OFED regardless of what files are hosted on the other open source web sites. A perfect example is that the MVAPICH1 package in OFED is prepared by Mellanox, not OSU. So there was no web site to make that tarball and support files available from. Another example is that open source projects may decide to no longer host older versions of their software -- OFA may not be able to control that. The point here is that version control principles apply to binaries just as well as they apply to sources (indeed, the files we're talking about here are binary bundles of sources). Just my $0.02. :-) -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From hnguyen at linux.vnet.ibm.com Wed Jan 17 14:12:13 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 17 Jan 2007 23:12:13 +0100 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability Message-ID: <200701172312.14840.hnguyen@linux.vnet.ibm.com> Hello, here is a patch for ehca_uverbs.c with the following changes: - Rename mm_open/close() to ehca_mm_open/close() respectively - Refactor ehca_mmap() into sub-functions ehca_mmap_cq/qp(), which then call the new common sub-functions ehca_mmap_fw() and ehca_mmap_queue() to register firmware memory block and queue pages respectively Roland, please note that I applied the previous patches to your git tree for-2.6.21 before creating this patch. I also realized a compile issue with the patch from Michael T. in ehca_reqs.c regarding "return qp pointer in ib_wc". For this I'll send another patch. Thanks! Nam Signed-off-by Hoang-Nam Nguyen --- ehca_uverbs.c | 266 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 146 insertions(+), 120 deletions(-) diff -Nurp infiniband/drivers/infiniband/hw/ehca/ehca_uverbs.c infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c --- infiniband/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-17 21:39:01.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-17 21:17:00.000000000 +0100 @@ -68,7 +68,7 @@ int ehca_dealloc_ucontext(struct ib_ucon return 0; } -static void mm_open(struct vm_area_struct *vma) +static void ehca_mm_open(struct vm_area_struct *vma) { u32 *count = (u32*)vma->vm_private_data; if (!count) { @@ -84,7 +84,7 @@ static void mm_open(struct vm_area_struc vma->vm_start, vma->vm_end, *count); } -static void mm_close(struct vm_area_struct *vma) +static void ehca_mm_close(struct vm_area_struct *vma) { u32 *count = (u32*)vma->vm_private_data; if (!count) { @@ -98,26 +98,150 @@ static void mm_close(struct vm_area_stru } static struct vm_operations_struct vm_ops = { - .open = mm_open, - .close = mm_close, + .open = ehca_mm_open, + .close = ehca_mm_close, }; -static int ehca_mmap_qpages(struct vm_area_struct *vma, struct ipz_queue *queue) +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) { + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, + vsize, vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; u64 start, ofs; struct page *page; - int rc = 0; + + vma->vm_flags |= VM_RESERVED; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); page = virt_to_page(virt_addr); - rc = vm_insert_page(vma, start, page); - if (unlikely(rc)) { - ehca_gen_err("vm_insert_page() failed rc=%x", rc); - return rc; + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%x", ret); + return ret; } start += PAGE_SIZE; } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_fw() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } + + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; + } + break; + + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; + } + return 0; } @@ -129,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *contex u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - u64 vsize, physical; unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; @@ -155,52 +278,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(cq->ib_cq.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = cq->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - ehca_dbg(cq->ib_cq.device, - "vsize=%lx physical=%lx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - vma->vm_private_data = &cq->mm_count_galpa; - cq->mm_count_galpa++; - break; - - case 2: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); - vma->vm_flags |= VM_RESERVED; - ret = ehca_mmap_qpages(vma, &cq->ipz_queue); - if (unlikely(ret)) { - ehca_gen_err("ehca_mmap_qpages() failed rc=%x " - "cq_num=%x", ret, cq->cq_number); - return ret; - } - vma->vm_private_data = &cq->mm_count_queue; - cq->mm_count_queue++; - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; @@ -224,67 +307,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(qp->ib_qp.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = qp->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", - vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - vma->vm_private_data = &qp->mm_count_galpa; - qp->mm_count_galpa++; - break; - - case 2: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); - vma->vm_flags |= VM_RESERVED; - ret = ehca_mmap_qpages(vma, &qp->ipz_rqueue); - if (unlikely(ret)) { - ehca_gen_err("ehca_mmap_qpages(rq) failed " - "rc=%x qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - vma->vm_private_data = &qp->mm_count_rqueue; - qp->mm_count_rqueue++; - break; - - case 3: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); - vma->vm_flags |= VM_RESERVED; - ret = ehca_mmap_qpages(vma, &qp->ipz_squeue); - if (unlikely(ret)) { - ehca_gen_err("ehca_mmap_qpages(sq) failed " - "rc=%x qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - vma->vm_private_data = &qp->mm_count_squeue; - qp->mm_count_squeue++; - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; @@ -293,7 +321,5 @@ int ehca_mmap(struct ib_ucontext *contex return -EINVAL; } - vma->vm_ops = &vm_ops; - return 0; } From ralph.campbell at qlogic.com Wed Jan 17 14:22:50 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Wed, 17 Jan 2007 14:22:50 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 Message-ID: <1169072570.7052.46.camel@brick.pathscale.com> IB/core - ib_umad can cause address alignment fault In user_mad.c, the definition for struct ib_umad_packet includes struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on architectures which have strict alignment for load/stores. This patch fixes the problem by changing the offset on which struct ib_user_mad is defined within struct ib_umad_packet. Thanks go to John W. Marland for finding this. Signed-off-by: Ralph Campbell diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c --- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 +++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 @@ -125,7 +125,7 @@ struct ib_umad_packet { struct ib_mad_send_buf *msg; struct ib_mad_recv_wc *recv_wc; struct list_head list; - int length; + long length; struct ib_user_mad mad; }; From mst at mellanox.co.il Wed Jan 17 14:22:55 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 00:22:55 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <45ADD8D0.8080301@dev.mellanox.co.il> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> <45ADD8D0.8080301@dev.mellanox.co.il> Message-ID: <20070117222255.GB25305@mellanox.co.il> > > I don't really care one way or another; this was just my > > understanding of why it was requested. > > > > > > > > Jeff is correct - I requested this from the reasons above. I think all we need for OFED is just a *fixed* URL where OFED build script can download the OFED-specific SRPM for 1.2. If this is a problem for OSU it can be hosted at the openfabrics server. Correct? -- MST From jsquyres at cisco.com Wed Jan 17 14:53:34 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 17 Jan 2007 17:53:34 -0500 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <20070117222255.GB25305@mellanox.co.il> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> <45ADD8D0.8080301@dev.mellanox.co.il> <20070117222255.GB25305@mellanox.co.il> Message-ID: This is such a trivial matter that it really isn't worth arguing about. :-) Tell us MPI guys how you want MPI releases published to OFED and we'll do it. On Jan 17, 2007, at 5:22 PM, Michael S. Tsirkin wrote: >>> I don't really care one way or another; this was just my >>> understanding of why it was requested. >>> >>> >>> >> >> Jeff is correct - I requested this from the reasons above. > > I think all we need for OFED is just a *fixed* URL where OFED > build script can download the OFED-specific SRPM for 1.2. > > If this is a problem for OSU it can be hosted at the openfabrics > server. > > Correct? > -- > MST -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From sashak at voltaire.com Wed Jan 17 15:10:36 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 01:10:36 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <20070117222255.GB25305@mellanox.co.il> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> <45ADD8D0.8080301@dev.mellanox.co.il> <20070117222255.GB25305@mellanox.co.il> Message-ID: <20070117231036.GL21043@sashak.voltaire.com> On 00:22 Thu 18 Jan , Michael S. Tsirkin wrote: > > > I don't really care one way or another; this was just my > > > understanding of why it was requested. > > > > > > > > > > > > > Jeff is correct - I requested this from the reasons above. > > I think all we need for OFED is just a *fixed* URL where OFED > build script can download the OFED-specific SRPM for 1.2. > > If this is a problem for OSU it can be hosted at the openfabrics server. > > Correct? Seems reasonable for me. Sasha From sashak at voltaire.com Wed Jan 17 15:25:03 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 01:25:03 +0200 Subject: [openib-general] [openfabrics-ewg] Reminder: OFED 1.2 In-Reply-To: <1B76802D-8FD6-497A-BEF0-9842B55012F7@cisco.com> References: <20070116010419.GD16348@sashak.voltaire.com> <20070116083109.GS22940@mellanox.co.il> <20070117220208.GH21043@sashak.voltaire.com> <1B76802D-8FD6-497A-BEF0-9842B55012F7@cisco.com> Message-ID: <20070117232503.GM21043@sashak.voltaire.com> On 17:04 Wed 17 Jan , Jeff Squyres wrote: > On Jan 17, 2007, at 5:02 PM, Sasha Khapyorsky wrote: > > >>1. Putting the MPI release in git provides a level of OFED-specific > >>history and version control. This was explicitly stated on the call > >>yesterday. > > > >Which history information we are expecting to see between bin-file- > >ver1 > >and bin-file-ver2, where files bin-file-ver* are never changed? > > I think the point is when they *do* change. But when they do change we update the version and create new binary file - bin-file-ver2.1 . > >>2. MPI's have concrete "releases" to OFED just like all other ULP's, > >>especially if there is any OFED-specific packaging involved in the > >>MPI's release. This was not stated on the call, but it makes sense > >>to me. > >> > >>3. Putting everything in git makes it nicely uniform for OFED to be > >>assembled. This was not stated on the call, and I'm sure it's not a > >>requirement, but it is a little nice to be uniform when assembling > >>OFED (my $0.02). > >> > >>4. We used to put the MPI releases in SVN (tarball or SRPM) for prior > >>OFED release processes, > > > >Yes, and it was bad practice IMO. GIT and SVN are version tracking > >tools, > >mostly usable for sources and not for compilation results. Why one > >should install git if everything really needed is just to download > >file > >from the server? > > The SRPMs are not compilation results. Right, it is source packaging results - similar meaning. > Putting compilation results > in a version tracking tool would be useless, I agree. > > >>so putting them in a git seems to parallel > >>that procedure. > > > >Just file hosting should be perfectly enough for the all above. I > >don't > >see any real reason to use git as non-versioned binary files storage. > > I think the point was that you could then get a definitive set of > files that were shipped in OFED version x.y -- you could accurately > rebuild OFED regardless of what files are hosted on the other open > source web sites. This is tracked in fetch/build scripts, and it is under version control. External packages can be hosted (or just copyed) on the OFA site. > A perfect example is that the MVAPICH1 package in > OFED is prepared by Mellanox, not OSU. So there was no web site to > make that tarball and support files available from. Another example > is that open source projects may decide to no longer host older > versions of their software -- OFA may not be able to control that. > > The point here is that version control principles apply to binaries > just as well as they apply to sources (indeed, the files we're > talking about here are binary bundles of sources). Only if we are going to change such files, but I guess in our case we are not - instead we will create new package files with new versions. Sasha From sashak at voltaire.com Wed Jan 17 16:01:20 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 02:01:20 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45AE3A4E.4050501@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> Message-ID: <20070118000120.GO21043@sashak.voltaire.com> Hi Yevgeny, On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > Hi Hal > > The following series of six patches implements QoS policy file parser: > > 1. QoS parser Lex file > 2. QoS parser Lex-generated c file > 3. QoS parser grammar (Yacc) file > 4. QoS parser Yacc-generated grammar c and h file > 5. QoS parser header file that defines parse tree data structures > 6. Changes in makefiles and configure.in file for compiling QoS parser files Is there any description of proposed format and functionality? Also what about using human readable formats? Sasha From sashak at voltaire.com Wed Jan 17 16:42:24 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 02:42:24 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45A664CC.8050200@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> Message-ID: <20070118004224.GQ21043@sashak.voltaire.com> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 09:47 Wed 10 Jan , Yevgeny Kliteynik wrote: > >> Hi Sasha, > >> > >> Sasha Khapyorsky wrote: > >>> Hi Yevgeny, > >>> > >>> On 12:05 Tue 09 Jan , Yevgeny Kliteynik wrote: > >>>> Hi Sasha. > >>>> Thanks for spotting this issue - I missed it when I reviewed the patch. > >>>> Here's how things look in windows: > >>>> * openlog() and closelog() are redefined to nothing in windows config header > >>>> * cl_log_event() implementation on windows does all the work - gets handler > >>>> to event log, logs the message, and closes the handler. > >>>> It does *not* uses syslog(). > >>>> > >>>> To use syslog/sysopen/sysclose on windows, a user have to install some > >>>> package (e.g. "Interix") that provides various daemons (such as syslogd), > >>>> similar to Linux/Unix: > >>>> http://msdn2.microsoft.com/en-us/library/ms811897.aspx#ucmgch10_topic12 > >>>> http://www.interix.com/ > >>> And this is not done for WinOpenSM now. Is this would be useful option? I > >>> guess this could solve a lot of compatibility problems and simplify the > >>> porting dramatically, right? > >> I'll address this question to the windows guys. But I seriously doubt that > >> they would want to make OSM depend on some third-party product, whether it > >> is commercial or free. > > > > I doubt too :(. But ask them if it is not too hard. Interesting, how > > WinOSM development works? AFAIK it is open source and published on the > > net. Who is the maintainer? Is there any active mailing list? > > If the product is commercial, then the answer is no. If it is free, > then there might be some license issues that may cause problems. But > theoretically, if there would be some high-quality free product than > will not pose any legal problems distributing it, and if it will be > *really* useful (not just for logging functions), then it's possible. So what do you mean - "Interix" has (or may have) legal issues if it will be used with OpenSM? What about pure opensource - http://sourceware.org/pthreads-win32/? It is licensed under LGPL, I see on the net many positive reports about stability and usability. > As for the mailing list it's openib-windows at openib.org. You can access > it here: http://openib.org/mailman/listinfo/openib-windows > > >>>> So replacing cl_log_event() with syslog() would definitely cause a problem. > >>>> > >>>> A (pretty ugly) solution can be writing a wrapper function to cl_log_event() > >>>> on windows and redefining syslog() to use this wrapper. > >>> Like #define syslog() cl_log_event() ? Not so ugly (better than > >>> combination of openlog()/closelog() and "unrelated" cl_log_event()), > >>> and perfectly simple IMO. > >> "#define syslog() cl_log_event()" will not do the job, since these functions > >> have different definitios: > >> syslog(int priority, char *message, ...); > >> cl_log_event(char*, cl_log_type_t, char*, void*, uint32_t); > >> so there should be wrapper just for windows that will prepare the message > >> for cl_log_event() as a single string. > > > > Understood. > > > > And what the purpose of: > > > > #ifdef WIN32 > > OsmReportState(buffer); > > #endif /* WIN32 */ > > > > in osm_log() (right after cl_log_event()/syslog())? > > It reports SM states when SM is running as window service. Could this be moved from common code into syslog() wrapper? Sasha From sashak at voltaire.com Wed Jan 17 16:44:36 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 02:44:36 +0200 Subject: [openib-general] [PATCH TRIVIAL] opensm: make osm object local Message-ID: <20070118004436.GR21043@sashak.voltaire.com> This defines osm object as local variable. Signed-off-by: Sasha Khapyorsky --- osm/opensm/main.c | 12 +----------- 1 files changed, 1 insertions(+), 11 deletions(-) diff --git a/osm/opensm/main.c b/osm/opensm/main.c index ff517eb..919f71d 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -57,17 +57,6 @@ #include #include -/******************************************************************** - D E F I N E G L O B A L V A R I A B L E S -*********************************************************************/ -/* - This is the global opensm object. - One opensm object is required per subnet. - Future versions could support multiple subnets by - instantiating more than one opensm object. -*/ -osm_opensm_t osm; - volatile unsigned int osm_exit_flag = 0; static volatile unsigned int osm_hup_flag = 0; @@ -545,6 +534,7 @@ main( int argc, char* argv[] ) { + osm_opensm_t osm; osm_subn_opt_t opt; ib_net64_t sm_key = 0; ib_api_status_t status; -- 1.5.0.rc0.g2484-dirty From halr at voltaire.com Wed Jan 17 18:30:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 17 Jan 2007 21:30:47 -0500 Subject: [openib-general] [PATCH TRIVIAL] opensm: make osm object local In-Reply-To: <20070118004436.GR21043@sashak.voltaire.com> References: <20070118004436.GR21043@sashak.voltaire.com> Message-ID: <1169087430.31746.31244.camel@hal.voltaire.com> On Wed, 2007-01-17 at 19:44, Sasha Khapyorsky wrote: > This defines osm object as local variable. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From mshefty at ichips.intel.com Wed Jan 17 19:52:29 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 17 Jan 2007 19:52:29 -0800 Subject: [openib-general] [libibcm] [PATCH] Added checks to memory allocation failure when using asprintf In-Reply-To: <1168941094.12444.2.camel@mtls05.yok.mtl.com> References: <1168941094.12444.2.camel@mtls05.yok.mtl.com> Message-ID: <45AEEEFD.8010302@ichips.intel.com> Thanks - committed. From mst at mellanox.co.il Wed Jan 17 20:39:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 06:39:23 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <1169072570.7052.46.camel@brick.pathscale.com> References: <1169072570.7052.46.camel@brick.pathscale.com> Message-ID: <20070118043923.GC25305@mellanox.co.il> > Quoting Ralph Campbell : > Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > IB/core - ib_umad can cause address alignment fault > > In user_mad.c, the definition for struct ib_umad_packet includes > struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() > tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on > architectures which have strict alignment for load/stores. > This patch fixes the problem by changing the offset on which > struct ib_user_mad is defined within struct ib_umad_packet. > > Thanks go to John W. Marland for finding this. > > Signed-off-by: Ralph Campbell > > diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c > --- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 > +++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 > @@ -125,7 +125,7 @@ struct ib_umad_packet { > struct ib_mad_send_buf *msg; > struct ib_mad_recv_wc *recv_wc; > struct list_head list; > - int length; > + long length; > struct ib_user_mad mad; > }; This does not make sense to me - do we have to replace all int fields with long now? Looks like a compiler or makefile bug in your setup - struct fields should be naturally aligned. -- MST From mst at mellanox.co.il Wed Jan 17 21:00:14 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 07:00:14 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118004224.GQ21043@sashak.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118004224.GQ21043@sashak.voltaire.com> Message-ID: <20070118050014.GD25305@mellanox.co.il> > What about pure opensource - http://sourceware.org/pthreads-win32/? It > is licensed under LGPL, I see on the net many positive reports about > stability and usability. I used it to do a windows port of linux complib at some point and opensm seemed to work fine with it. What it was lacking at that point was support for 64 bit applications, and for some reason (which is still unclear to me) there was a strong desire to run opensm in 64 bit mode. Seems to have been fixed now, BTW. -- MST From eitan at sw053.yok.mtl.com Wed Jan 17 21:21:33 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Thu, 18 Jan 2007 07:21:33 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-18:normal completion Message-ID: <200701180521.l0I5LXaL006220@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Wed_Jan_17_13:32:47_2007 c2a39a ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From vlad at dev.mellanox.co.il Thu Jan 18 01:28:07 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 11:28:07 +0200 Subject: [openib-general] OpenIB Wiki pages updated In-Reply-To: References: Message-ID: <1169112487.23809.17.camel@swlab50.yok.mtl.com> On Wed, 2007-01-17 at 09:41 -0800, Woodruff, Robert J wrote: > Vladimir wrote, > >Hi, > >The following links were added to > https://wiki.openfabrics.org/tiki-index.php: > > >OFED 1.2 HowTo > >HOWTO Build OFA user package > >HOWTO Build OFA kernel package > >HOWTO add userspace package to ofa_user > > > >Regards, > >Vladimir > > How do I make it build the rdma_cm and rdma_ucm, there appears to be no > configure > option in the ./configure script of the daily builds even though the > code is in > the core directory. ( I tried the 2/17/2007 kernel > daily build). > Use --with-addr_trans-mod option. This option is set by default. Regards, Vladimir From vlad at dev.mellanox.co.il Thu Jan 18 01:32:07 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 11:32:07 +0200 Subject: [openib-general] madeye In-Reply-To: <45AE6161.40300@systemfabricworks.com> References: <45AE6161.40300@systemfabricworks.com> Message-ID: <1169112727.23809.21.camel@swlab50.yok.mtl.com> On Wed, 2007-01-17 at 11:48 -0600, Raleigh F Rinehart wrote: > I'm trying to use madeye in OFED 1.1 Release to do some debugging but it > does not seem to be present. I cracked open src tarball and all the > right bits seem to be there (Kconfig, makefile, src) but it doesn't seem > to get built and installed as part of the normal installation procedure > (running install.sh). Has anyone had any success at building, > installing and using madeye in a release version of OFED? > Run export OPENIB_PARAMS="--with-madeye-mod" before running install.sh or build.sh See OFED-1.1/docs/ofed.conf-example. Note: Remove already created RPMs from OFED-1.1/RPMS directory in order to rebuild RPM with madeye. Regards, Vladimir From vlad at dev.mellanox.co.il Thu Jan 18 01:39:46 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 11:39:46 +0200 Subject: [openib-general] OFED1.1 and Fed core 6 install question In-Reply-To: <1169070021.2927.16.camel@rover.lanl.gov> References: <1169070021.2927.16.camel@rover.lanl.gov> Message-ID: <1169113186.23809.25.camel@swlab50.yok.mtl.com> On Wed, 2007-01-17 at 14:40 -0700, parks fields wrote: > When trying to install OFED1.1 on my completely updated AMD 64bit > Fedora core 6 machine I can't get past the following error. > > ERROR: The sysfsutils-devel package is required to build > libibverbs_devel RPM > > I have tried a custom install and not selecting libibverbs_devel but it > to to use it anyway. Even the basic install selects it. > > I have all these RPMs installed > > libsysfs-2.0.0-6 > sysfsutils-2.1.0-1 > sysfsutils-debuginfo-2.1.0 > libsysfs-devel-2.0.0-6 > > > > But no where on RPMfind or the rest of the net do I find > sysfsutils-devel for Fedora core 6 X86_64..... > > Even when I build sysfs from the tar ball I don't get a sysfsutils-devel > > Ideas ?? > OFED-1.1 does not support Fedora Core 6. If you want a workaround then replace sysfsutils-devel by libsysfs-devel in the OFED-1.1/build_env.sh. Regards, Vladimir > > thanks > parks > > > > > > > From mst at mellanox.co.il Thu Jan 18 02:06:12 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 12:06:12 +0200 Subject: [openib-general] [PATCH] fix thinko in libibverbs (was Re: [libibverbs] [PATCH] Added checks to memory allocation failure when using asprintf) In-Reply-To: References: <1168941329.12444.6.camel@mtls05.yok.mtl.com> Message-ID: <20070118100612.GA26822@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [libibverbs] [PATCH] Added checks to memory allocation failure when using asprintf > > Thanks, applied to master and stable branches of libibverbs. The patch you applied seems different from what Dotan has posted, so at least the libibverbs 1.1 branch currently does not work at all: # ibv_devinfo libibverbs: Fatal: couldn't read uverbs ABI version. Here's a fix which makes it work for me: asprintf returns the number of bytes printed, so the way to check for failure is < 0. Signed-off-by: Michael S. Tsirkin --- diff --git a/src/device.c b/src/device.c index f7ef224..96f906c 100644 --- a/src/device.c +++ b/src/device.c @@ -111,7 +111,7 @@ struct ibv_context *ibv_open_device(struct ibv_device *device) int cmd_fd; struct ibv_context *context; - if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name)) + if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name) < 0) return NULL; /* diff --git a/src/sysfs.c b/src/sysfs.c index 392a675..85aee39 100644 --- a/src/sysfs.c +++ b/src/sysfs.c @@ -82,7 +82,7 @@ int ibv_read_sysfs_file(const char *dir, const char *file, int fd; int len; - if (asprintf(&path, "%s/%s", dir, file)) + if (asprintf(&path, "%s/%s", dir, file) < 0) return -1; fd = open(path, O_RDONLY); -- MST From cap at nsc.liu.se Thu Jan 18 02:20:14 2007 From: cap at nsc.liu.se (Peter Kjellstrom) Date: Thu, 18 Jan 2007 11:20:14 +0100 Subject: [openib-general] OFED1.1 and Fed core 6 install question In-Reply-To: <1169113186.23809.25.camel@swlab50.yok.mtl.com> References: <1169070021.2927.16.camel@rover.lanl.gov> <1169113186.23809.25.camel@swlab50.yok.mtl.com> Message-ID: <200701181120.17915.cap@nsc.liu.se> On Thursday 18 January 2007 10:39, Vladimir Sokolovsky wrote: > On Wed, 2007-01-17 at 14:40 -0700, parks fields wrote: ... > OFED-1.1 does not support Fedora Core 6. > If you want a workaround then replace sysfsutils-devel by libsysfs-devel > in the OFED-1.1/build_env.sh. Also, fwiw, the IB-stuff in an updated fc6 is probably newer than OFED-1.1. @fc6 # yum list "libib*" ... libibverbs.i386 1.0.4-1.fc6 extras ... /Peter > Regards, > Vladimir > > > thanks > > parks -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available URL: From ogerlitz at voltaire.com Thu Jan 18 02:27:00 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 18 Jan 2007 12:27:00 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <20070117204954.GF4329@mellanox.co.il> References: <15ddcffd0701171027h39d089dfi15b4499b0fa34782@mail.gmail.com> <20070117204954.GF4329@mellanox.co.il> Message-ID: <45AF4B74.4000906@voltaire.com> Michael S. Tsirkin wrote: > Sounds too risky to me, this is technology preview code so > I want to have all this stuff off by default but easily > enabled by users who want to demo. I really don't want us to go again through things like yours (MST, Jack) vs. Sean rdma_establish, ucma versions etc. Like it or not, as was defined by the founders, OFED is --not-- a framework for development and unless there is a very specific reason (*) its kernel/user content should be based on code that have --passed through this component maintainer-- As been said over this list lets not treat OFED as a framework to shovel in unreviewed code. If you feel that your mthca and rdmacm QoS changes should be under CONFIG_EXPERIMENTAL , for-mm etc, specify this when you send the patches for review. Bottom line, lets not hind behind obscure definitions like "technology preview" to escape from normal processes where there -is- an alternative, the point here is not to meet the code freeze dead line avoiding normal processes - lets use processes and extend the deadline for the QoS merge if needed. (*) So far, the only case where people felt it makes sense to merge out of tree code was the local-sa and it is done by this component maintainer. > After I post the rest of the code, if you like you'll be able to > post an iser patch to add this stuff to iser as well. this is irrelevant till we resolve the process. From sashak at voltaire.com Thu Jan 18 02:41:00 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 12:41:00 +0200 Subject: [openib-general] [PATCH] opensm: osm_console: don't poll invalid file descriptors Message-ID: <20070118104100.GC22422@sashak.voltaire.com> Don't poll invalid file descriptors (-1), when listening socket is disabled or connection is not accepted yet. Signed-off-by: Sasha Khapyorsky --- osm/opensm/osm_console.c | 7 ++++++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index 9610e21..c6566af 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -446,6 +446,8 @@ void osm_console(osm_opensm_t *p_osm) char *p_line; size_t len; ssize_t n; + struct pollfd *fds; + nfds_t nfds; pollfd[0].fd = p_osm->console.socket; pollfd[0].events = POLLIN|POLLOUT; @@ -455,7 +457,10 @@ void osm_console(osm_opensm_t *p_osm) pollfd[1].events = POLLIN|POLLOUT; pollfd[1].revents = 0; - if (poll(pollfd, pollfd[1].fd >= 0 ? 2 : 1, 10000) <= 0) + fds = p_osm->console.socket < 0 ? &pollfd[1] : pollfd; + nfds = p_osm->console.socket < 0 || pollfd[1].fd < 0 ? 1 : 2; + + if (poll(fds, nfds, 10000) <= 0) return; #ifdef ENABLE_OSM_CONSOLE_SOCKET -- 1.5.0.rc0.g2484-dirty From sashak at voltaire.com Thu Jan 18 02:42:23 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 12:42:23 +0200 Subject: [openib-general] [PATCH] opensm: osm_console: don't poll for POLLOUT events. In-Reply-To: <20070118104100.GC22422@sashak.voltaire.com> References: <20070118104100.GC22422@sashak.voltaire.com> Message-ID: <20070118104223.GD22422@sashak.voltaire.com> Then connection is accepted POLLOUT event is up most of the time (but not handled) and osm_console() just loops without doing something. Signed-off-by: Sasha Khapyorsky --- osm/opensm/osm_console.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c index c6566af..af3a0f8 100644 --- a/osm/opensm/osm_console.c +++ b/osm/opensm/osm_console.c @@ -450,11 +450,11 @@ void osm_console(osm_opensm_t *p_osm) nfds_t nfds; pollfd[0].fd = p_osm->console.socket; - pollfd[0].events = POLLIN|POLLOUT; + pollfd[0].events = POLLIN; pollfd[0].revents = 0; pollfd[1].fd = p_osm->console.in_fd; - pollfd[1].events = POLLIN|POLLOUT; + pollfd[1].events = POLLIN; pollfd[1].revents = 0; fds = p_osm->console.socket < 0 ? &pollfd[1] : pollfd; -- 1.5.0.rc0.g2484-dirty From pasquale.davide at gmail.com Thu Jan 18 03:19:15 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Thu, 18 Jan 2007 12:19:15 +0100 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: <20070112112201.GB2802@mellanox.co.il> References: <20070112112201.GB2802@mellanox.co.il> Message-ID: Starting opensm I see this error in /var/log/osm.log: OpenSM Rev:openib-2.0.5 OpenIB svn Exported revision Jan 18 12:11:39 628147 [95AA8160] -> osm_vendor_bind: Binding to port 0x18feffff8c7a8d Jan 18 12:11:39 629557 [95AA8160] -> osm_vendor_bind: Binding to port 0x18feffff8c7a8d Jan 18 12:11:39 630605 [41401960] -> SM port is down Jan 18 12:11:39 630693 [41401960] -> __osm_sm_state_mgr_signal_error: ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state IB_SMINFO_STATE_DISCOVERING Jan 18 12:11:49 631170 [41E02960] -> SM port is down Jan 18 12:11:49 631238 [41E02960] -> __osm_sm_state_mgr_signal_error: ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state IB_SMINFO_STATE_DISCOVERING and the SM port is always down. Thanks. On 1/12/07, Michael S. Tsirkin wrote: > > > Quoting Davide Pasquale : > > Subject: Re: OFED 1.1 build issue > > > > > > > > > > No idea. > > Try running a preprocessor on flint.cpp (add -E flag to g++ line) > > find the relevant line in the output and see why does it try > > to pull in bitvector. > > > > -- > > MST > > > > > > Ok! > > > > > > I have successfullly compiled OFED.1.1 on my platform after a new > installation! > > > > Now how can I use opensm and openib utilities ? > > Thanks. > > Try reading man opensm. > > > -- > MST > -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at mellanox.co.il Thu Jan 18 03:42:19 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Thu, 18 Jan 2007 13:42:19 +0200 Subject: [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release Message-ID: <45AF5D1B.2000704@mellanox.co.il> I suggest that in OFED 1.2 we will not include the NAPI support The reasons are: * IBM interrupt handler change to support NAPI will not be ready on time * IPoIB UD with NAPI was not tested thoroughly * IPoIB CM mode was tested without NAPI and got to a good stability. If no one objects I will change the OFED 1.2 plan Thanks, Tziporet From halr at voltaire.com Thu Jan 18 04:34:43 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 18 Jan 2007 07:34:43 -0500 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: <20070112112201.GB2802@mellanox.co.il> Message-ID: <1169123080.31746.67663.camel@hal.voltaire.com> On Thu, 2007-01-18 at 06:19, Davide Pasquale wrote: > Starting opensm I see this error in /var/log/osm.log: > > OpenSM Rev:openib-2.0.5 OpenIB svn Exported revision > Jan 18 12:11:39 628147 [95AA8160] -> osm_vendor_bind: Binding to port > 0x18feffff8c7a8d > Jan 18 12:11:39 629557 [95AA8160] -> osm_vendor_bind: Binding to port > 0x18feffff8c7a8d > Jan 18 12:11:39 630605 [41401960] -> SM port is down > Jan 18 12:11:39 630693 [41401960] -> __osm_sm_state_mgr_signal_error: > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > IB_SMINFO_STATE_DISCOVERING > Jan 18 12:11:49 631170 [41E02960] -> SM port is down > Jan 18 12:11:49 631238 [41E02960] -> __osm_sm_state_mgr_signal_error: > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > IB_SMINFO_STATE_DISCOVERING > > and the SM port is always down. The error message is benign. Is the SM port plugged into any other IB device ? -- Hal > > Thanks. > > > On 1/12/07, Michael S. Tsirkin wrote: > > Quoting Davide Pasquale : > > Subject: Re: OFED 1.1 build issue > > > > > > > > > > No idea. > > Try running a preprocessor on flint.cpp (add -E flag > to g++ line) > > find the relevant line in the output and see why does > it try > > to pull in bitvector. > > > > -- > > MST > > > > > > Ok! > > > > > > I have successfullly compiled OFED.1.1 on my platform after > a new installation! > > > > Now how can I use opensm and openib utilities ? > > Thanks. > > Try reading man opensm. > > > -- > MST > > > > ______________________________________________________________________ > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From pasquale.davide at gmail.com Thu Jan 18 05:02:32 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Thu, 18 Jan 2007 14:02:32 +0100 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: <1169123080.31746.67663.camel@hal.voltaire.com> References: <20070112112201.GB2802@mellanox.co.il> <1169123080.31746.67663.camel@hal.voltaire.com> Message-ID: On 18 Jan 2007 07:34:43 -0500, Hal Rosenstock wrote: > > On Thu, 2007-01-18 at 06:19, Davide Pasquale wrote: > > Starting opensm I see this error in /var/log/osm.log: > > > > OpenSM Rev:openib-2.0.5 OpenIB svn Exported revision > > Jan 18 12:11:39 628147 [95AA8160] -> osm_vendor_bind: Binding to port > > 0x18feffff8c7a8d > > Jan 18 12:11:39 629557 [95AA8160] -> osm_vendor_bind: Binding to port > > 0x18feffff8c7a8d > > Jan 18 12:11:39 630605 [41401960] -> SM port is down > > Jan 18 12:11:39 630693 [41401960] -> __osm_sm_state_mgr_signal_error: > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > IB_SMINFO_STATE_DISCOVERING > > Jan 18 12:11:49 631170 [41E02960] -> SM port is down > > Jan 18 12:11:49 631238 [41E02960] -> __osm_sm_state_mgr_signal_error: > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > IB_SMINFO_STATE_DISCOVERING > > > > and the SM port is always down. > > The error message is benign. > > Is the SM port plugged into any other IB device ? > > -- Hal Hi Hal, we are using HP Blade System and each blade has an infiniband card onboard. The SM port is plugged in the Infiniband switch internal to the blade enclosure. Is this information helpful for you ? -------------- next part -------------- An HTML attachment was scrubbed... URL: From halr at voltaire.com Thu Jan 18 05:19:34 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 18 Jan 2007 08:19:34 -0500 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: <20070112112201.GB2802@mellanox.co.il> <1169123080.31746.67663.camel@hal.voltaire.com> Message-ID: <1169126162.31746.70598.camel@hal.voltaire.com> On Thu, 2007-01-18 at 08:02, Davide Pasquale wrote: > > On 18 Jan 2007 07:34:43 -0500, Hal Rosenstock > wrote: > On Thu, 2007-01-18 at 06:19, Davide Pasquale wrote: > > Starting opensm I see this error in /var/log/osm.log: > > > > OpenSM Rev:openib-2.0.5 OpenIB svn Exported revision > > Jan 18 12:11:39 628147 [95AA8160] -> osm_vendor_bind: > Binding to port > > 0x18feffff8c7a8d > > Jan 18 12:11:39 629557 [95AA8160] -> osm_vendor_bind: > Binding to port > > 0x18feffff8c7a8d > > Jan 18 12:11:39 630605 [41401960] -> SM port is down > > Jan 18 12:11:39 630693 [41401960] -> > __osm_sm_state_mgr_signal_error: > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > IB_SMINFO_STATE_DISCOVERING > > Jan 18 12:11:49 631170 [41E02960] -> SM port is down > > Jan 18 12:11:49 631238 [41E02960] -> > __osm_sm_state_mgr_signal_error: > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > IB_SMINFO_STATE_DISCOVERING > > > > and the SM port is always down. > > The error message is benign. > > Is the SM port plugged into any other IB device ? > > -- Hal > > Hi Hal, > > we are using HP Blade System and each blade has an infiniband card > onboard. > The SM port is plugged in the Infiniband switch internal to the blade > enclosure. > Is this information helpful for you ? The port being down has nothing to do with SM operation. For some reason, there is no connectivity or negotiation between the blades and the switch. -- Hal > > > > > From mst at mellanox.co.il Thu Jan 18 05:30:36 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 15:30:36 +0200 Subject: [openib-general] RFC: shorten wq names Message-ID: <20070118133036.GD30391@mellanox.co.il> Sean, older kernels didn't allow work queue names > 10 chars. Since local sa is OFED only anyway, could you make life easier for backports us by renaming the workqueue from local_sa_wq to local_sa? I would like to note that including _wq in workqueue name just wastes memory since workqueues can't be confused with regular processes in ps output, anyway. Do I'd like to propose we shorten wq names a bit: ib_addr_wq -> ib_addr rdma_cm_wq -> rdma_cm iw_cm_wq -> iw_cm ib_mcast_wq -> ib_mcast OK? -- MST From pasquale.davide at gmail.com Thu Jan 18 05:52:42 2007 From: pasquale.davide at gmail.com (Davide Pasquale) Date: Thu, 18 Jan 2007 14:52:42 +0100 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: <1169126162.31746.70598.camel@hal.voltaire.com> References: <20070112112201.GB2802@mellanox.co.il> <1169123080.31746.67663.camel@hal.voltaire.com> <1169126162.31746.70598.camel@hal.voltaire.com> Message-ID: On 18 Jan 2007 08:19:34 -0500, Hal Rosenstock wrote: > > On Thu, 2007-01-18 at 08:02, Davide Pasquale wrote: > > > > On 18 Jan 2007 07:34:43 -0500, Hal Rosenstock > > wrote: > > On Thu, 2007-01-18 at 06:19, Davide Pasquale wrote: > > > Starting opensm I see this error in /var/log/osm.log: > > > > > > OpenSM Rev:openib-2.0.5 OpenIB svn Exported revision > > > Jan 18 12:11:39 628147 [95AA8160] -> osm_vendor_bind: > > Binding to port > > > 0x18feffff8c7a8d > > > Jan 18 12:11:39 629557 [95AA8160] -> osm_vendor_bind: > > Binding to port > > > 0x18feffff8c7a8d > > > Jan 18 12:11:39 630605 [41401960] -> SM port is down > > > Jan 18 12:11:39 630693 [41401960] -> > > __osm_sm_state_mgr_signal_error: > > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > > IB_SMINFO_STATE_DISCOVERING > > > Jan 18 12:11:49 631170 [41E02960] -> SM port is down > > > Jan 18 12:11:49 631238 [41E02960] -> > > __osm_sm_state_mgr_signal_error: > > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in state > > > IB_SMINFO_STATE_DISCOVERING > > > > > > and the SM port is always down. > > > > The error message is benign. > > > > Is the SM port plugged into any other IB device ? > > > > -- Hal > > > > Hi Hal, > > > > we are using HP Blade System and each blade has an infiniband card > > onboard. > > The SM port is plugged in the Infiniband switch internal to the blade > > enclosure. > > Is this information helpful for you ? > > The port being down has nothing to do with SM operation. For some > reason, there is no connectivity or negotiation between the blades and > the switch. > > -- Hal > > > > > > > > > > > > > Thanks! What can I look to in order to solve this problem ? Regards, Davide. -------------- next part -------------- An HTML attachment was scrubbed... URL: From halr at voltaire.com Thu Jan 18 06:01:45 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 18 Jan 2007 09:01:45 -0500 Subject: [openib-general] OFED 1.1 build issue In-Reply-To: References: <20070112112201.GB2802@mellanox.co.il> <1169123080.31746.67663.camel@hal.voltaire.com> <1169126162.31746.70598.camel@hal.voltaire.com> Message-ID: <1169128895.31746.73017.camel@hal.voltaire.com> On Thu, 2007-01-18 at 08:52, Davide Pasquale wrote: > On 18 Jan 2007 08:19:34 -0500, Hal Rosenstock > wrote: > On Thu, 2007-01-18 at 08:02, Davide Pasquale wrote: > > > > On 18 Jan 2007 07:34:43 -0500, Hal Rosenstock > > > wrote: > > On Thu, 2007-01-18 at 06:19, Davide Pasquale wrote: > > > Starting opensm I see this error in > /var/log/osm.log: > > > > > > OpenSM Rev:openib-2.0.5 OpenIB svn Exported > revision > > > Jan 18 12:11:39 628147 [95AA8160] -> > osm_vendor_bind: > > Binding to port > > > 0x18feffff8c7a8d > > > Jan 18 12:11:39 629557 [95AA8160] -> > osm_vendor_bind: > > Binding to port > > > 0x18feffff8c7a8d > > > Jan 18 12:11:39 630605 [41401960] -> SM port is > down > > > Jan 18 12:11:39 630693 [41401960] -> > > __osm_sm_state_mgr_signal_error: > > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in > state > > > IB_SMINFO_STATE_DISCOVERING > > > Jan 18 12:11:49 631170 [41E02960] -> SM port is > down > > > Jan 18 12:11:49 631238 [41E02960] -> > > __osm_sm_state_mgr_signal_error: > > > ERR 3207: Invalid signal OSM_SM_SIGNAL_DISCOVER in > state > > > IB_SMINFO_STATE_DISCOVERING > > > > > > and the SM port is always down. > > > > The error message is benign. > > > > Is the SM port plugged into any other IB device ? > > > > -- Hal > > > > Hi Hal, > > > > we are using HP Blade System and each blade has an > infiniband card > > onboard. > > The SM port is plugged in the Infiniband switch internal to > the blade > > enclosure. > > Is this information helpful for you ? > > The port being down has nothing to do with SM operation. For > some > reason, there is no connectivity or negotiation between the > blades and > the switch. > > -- Hal > > > > > > > > > > > > > > Thanks! > What can I look to in order to solve this problem ? I don't know the HP blade system so the only thing I can say to try is to unseat and reseat all the blades (HCAs and switch(es)) to see if this resolves the problem. If it doesn't, I have no clue. -- Hal > > Regards, > Davide. > > From ogerlitz at voltaire.com Thu Jan 18 06:10:46 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 18 Jan 2007 16:10:46 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <20070117135204.GJ13720@mellanox.co.il> References: <45ACD2F6.8010705@voltaire.com> <20070116133507.GC22940@mellanox.co.il> <45ACE53A.9010605@voltaire.com> <45AE1C10.70003@voltaire.com> <20070117135204.GJ13720@mellanox.co.il> Message-ID: <45AF7FE6.60200@voltaire.com> Michael S. Tsirkin wrote: >> However, since understanding this patch in detail is important to a peer >> member individual/company of the community (myself/Voltaire)fo/openib-general > > I really would like to help. What is it that you want to know? > Here's an explanation from an older mail. Does this help? > > Work around for neighbour destructor issue for kernels < 2.6.17: > keep a global list of all ipoib neighbours. Use it in destructor to > 1. Verify that this neighbour belongs to an ipoib device > 2. Check that the neighbour is the last one to use the destructor, > if so reset the destructor pointer OK, thanks for the info. The context here is the bonding support. We had an issue with distro (eg RH4 U3, SLES10) kernels that was not reproduced with upstream kernels and it seems to be related to the change you have pushed to 2.6.17. I will let you know if we need more clarifications. Or. From ogerlitz at voltaire.com Thu Jan 18 06:42:50 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 18 Jan 2007 16:42:50 +0200 Subject: [openib-general] multicast code/merge status In-Reply-To: <45AE77B9.1080508@ichips.intel.com> References: <000301c73035$223950d0$8698070a@amr.corp.intel.com> <45A52512.1040003@ichips.intel.com> <15ddcffd0701101047t1d3c8c31k71def243136e9784@mail.gmail.com> <45A5373C.2060306@ichips.intel.com> <45ACECD3.90106@voltaire.com> <45ACFED0.9050000@ichips.intel.com> <45AD0627.7050305@voltaire.com> <45AD0D71.9040705@ichips.intel.com> <45AE01D9.3000107@voltaire.com> <45AE61B3.3010002@ichips.intel.com> <15ddcffd0701171018i14069211v6d7ee653e32b3e6a@mail.gmail.com> <45AE77B9.1080508@ichips.intel.com> Message-ID: <45AF876A.5050501@voltaire.com> > This is fine, but it may change when the user needs to make this > choice. E.g. when creating the QP, versus joining the multicast group, > in order to support the valid options. The selection also needs to be > conveyed to the kernel somehow. At this point, maybe we just need to > start looking at specific implementations. Indeed. I will send a patch early next week. Or. From jwm at prairieinet.net Thu Jan 18 07:28:16 2007 From: jwm at prairieinet.net (John W. Marland) Date: Thu, 18 Jan 2007 09:28:16 -0600 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118043923.GC25305@mellanox.co.il> References: <1169072570.7052.46.camel@brick.pathscale.com> <20070118043923.GC25305@mellanox.co.il> Message-ID: <45AF9210.4090401@prairieinet.net> Michael S. Tsirkin wrote: >>Quoting Ralph Campbell : >>Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 >> >>IB/core - ib_umad can cause address alignment fault >> >>In user_mad.c, the definition for struct ib_umad_packet includes >>struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() >>tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on >>architectures which have strict alignment for load/stores. >>This patch fixes the problem by changing the offset on which >>struct ib_user_mad is defined within struct ib_umad_packet. >> >>Thanks go to John W. Marland for finding this. >> >>Signed-off-by: Ralph Campbell >> >>diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c >>--- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 >>+++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 >>@@ -125,7 +125,7 @@ struct ib_umad_packet { >> struct ib_mad_send_buf *msg; >> struct ib_mad_recv_wc *recv_wc; >> struct list_head list; >>- int length; >>+ long length; >> struct ib_user_mad mad; >> }; >> >> > >This does not make sense to me - do we have to replace all int fields with long >now? Looks like a compiler or makefile bug in your setup - struct fields should >be naturally aligned. > > > We should probably have given a more complete explanation. The unaligned access hits in two places, that I've tracked down so far. The one where it's easiest to see what's happening is in ib_umad_write. ______________________________________________________________________________________ if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); ---> this line causes the access problem rmpp_mad->mad_hdr.tid = *tid; } ________________________________________________________________________________________ The rmpp_mad variable is an ib_rmpp_mad pointer that is initialized from the packet->mad.data early in the function. Because the ib_umad_packet structure has a as it's last element an ib_user_mad structure, not a pointer to one, but the structure. This means that the Data[0] declaration at the end of the ib_umad structure is forced onto a 4 byte boundary. A chunk-o-memory is allocated to encapsulate the ib_umad_packet with the ib_user_mad AND enough space for a user created rmpp header. So the incoming structure is copied into the Data[0] area - and tid ends up on a 4 byte boundary even thought it's an 8 byte wide variable. Nothing to do with the compiler - more like pilot error :-) --------- Other possible solutions would be to exchange the position of the tid with the 16 bit variable before it or the one after it. That would put it on a correct alignment, but what about when someone else attaches the ib_mad_hdr to some other random place? Another solution would be to make the ib_user_mad structure in the ib_umad_packet into a pointer, but that would mean another kzalloc. The best solution general rule is probably to make sure that if you are going to embed a structure in another, that it's embedded on an $(ARCH_SIZE) boundary. I could have used an int PADD; variable but since there was a length field I figured that no one would mind if a longer length was possible. ....JW John W. Marland System Fabric Works From mst at mellanox.co.il Thu Jan 18 07:26:26 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 17:26:26 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <45AF7FE6.60200@voltaire.com> References: <45AF7FE6.60200@voltaire.com> Message-ID: <20070118152626.GA4298@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: OFED ipoib_8111_to_2_6_16.patch > > Michael S. Tsirkin wrote: > >> However, since understanding this patch in detail is important to a peer > >> member individual/company of the community (myself/Voltaire)fo/openib-general > > > > I really would like to help. What is it that you want to know? > > Here's an explanation from an older mail. Does this help? > > > > Work around for neighbour destructor issue for kernels < 2.6.17: > > keep a global list of all ipoib neighbours. Use it in destructor to > > 1. Verify that this neighbour belongs to an ipoib device > > 2. Check that the neighbour is the last one to use the destructor, > > if so reset the destructor pointer > > OK, thanks for the info. The context here is the bonding support. We had > an issue with distro (eg RH4 U3, SLES10) kernels that was not reproduced > with upstream kernels and it seems to be related to the change you have > pushed to 2.6.17. I will let you know if we need more clarifications. Was the issue triggered at ipoib module unload? -- MST From vlad at mellanox.co.il Thu Jan 18 08:12:17 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:12:17 +0200 Subject: [openib-general] [PATCH RFC 01/10] ofed_1_2 Chelsio backport to 2.6.19 In-Reply-To: <20070117194951.30830.98084.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117194951.30830.98084.stgit@dell3.ogc.int> Message-ID: <1169136740.23809.57.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:49 -0600, Steve Wise wrote: > Chelsio backport to 2.6.19 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.19/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.19/include/linux/workqueue.h | 9 + > .../backport/2.6.19/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.19/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.19/linux_genalloc_to_2_6_20.patch | 17 ++ > 5 files changed, 277 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.19/include/linux/genalloc.h b/kernel_addons/backport/2.6.19/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.19/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.19/include/linux/workqueue.h b/kernel_addons/backport/2.6.19/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.19/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.19/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.19/include/src/genalloc.c b/kernel_addons/backport/2.6.19/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.19/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.19/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch b/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch > new file mode 100644 > index 0000000..93fee2b > --- /dev/null > +++ b/kernel_patches/backport/2.6.19/linux_genalloc_to_2_6_20.patch > @@ -0,0 +1,17 @@ > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 163d991..2cd239f 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..96a48fe > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:12:47 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:12:47 +0200 Subject: [openib-general] [PATCH RFC 02/10] ofed_1_2 Chelsio backport to 2.6.18 In-Reply-To: <20070117194953.30830.97017.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117194953.30830.97017.stgit@dell3.ogc.int> Message-ID: <1169136767.23809.59.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:49 -0600, Steve Wise wrote: > Chelsio backport to 2.6.18 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.18/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.18/include/linux/interrupt.h | 17 ++ > .../backport/2.6.18/include/linux/netdevice.h | 9 + > .../backport/2.6.18/include/linux/random.h | 15 ++ > .../backport/2.6.18/include/linux/skbuff.h | 1 > .../backport/2.6.18/include/linux/workqueue.h | 9 + > .../backport/2.6.18/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.18/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.18/linux_genalloc_to_2_6_20.patch | 17 ++ > 9 files changed, 319 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.18/include/linux/genalloc.h b/kernel_addons/backport/2.6.18/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.18/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.18/include/linux/interrupt.h b/kernel_addons/backport/2.6.18/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.18/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.18/include/linux/netdevice.h b/kernel_addons/backport/2.6.18/include/linux/netdevice.h > new file mode 100644 > index 0000000..61a6deb > --- /dev/null > +++ b/kernel_addons/backport/2.6.18/include/linux/netdevice.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_NETDEVICE_TO_2_6_18 > +#define BACKPORT_LINUX_NETDEVICE_TO_2_6_18 > +#include_next > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > +#endif > diff --git a/kernel_addons/backport/2.6.18/include/linux/random.h b/kernel_addons/backport/2.6.18/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.18/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.18/include/linux/skbuff.h b/kernel_addons/backport/2.6.18/include/linux/skbuff.h > index 4845283..ca5edc0 100644 > --- a/kernel_addons/backport/2.6.18/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.18/include/linux/skbuff.h > @@ -4,5 +4,6 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > #endif > diff --git a/kernel_addons/backport/2.6.18/include/linux/workqueue.h b/kernel_addons/backport/2.6.18/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.18/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.18/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.18/include/src/genalloc.c b/kernel_addons/backport/2.6.18/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.18/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.18/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch b/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch > new file mode 100644 > index 0000000..93fee2b > --- /dev/null > +++ b/kernel_patches/backport/2.6.18/linux_genalloc_to_2_6_20.patch > @@ -0,0 +1,17 @@ > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 163d991..2cd239f 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..96a48fe > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:12:59 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:12:59 +0200 Subject: [openib-general] [PATCH RFC 03/10] ofed_1_2 Chelsio backport to 2.6.17 In-Reply-To: <20070117194955.30830.69761.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117194955.30830.69761.stgit@dell3.ogc.int> Message-ID: <1169136779.23809.61.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:49 -0600, Steve Wise wrote: > Chelsio backport to 2.6.17 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.17/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.17/include/linux/interrupt.h | 17 ++ > .../backport/2.6.17/include/linux/netdevice.h | 4 > .../backport/2.6.17/include/linux/random.h | 15 ++ > .../backport/2.6.17/include/linux/skbuff.h | 3 > .../backport/2.6.17/include/linux/workqueue.h | 9 + > .../backport/2.6.17/include/net/netevent.h | 33 ++++ > .../backport/2.6.17/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.17/include/src/netevent.c | 69 ++++++++ > .../backport/2.6.17/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.17/linux_stuff_to_2_6_17.patch | 24 +++ > 11 files changed, 425 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.17/include/linux/genalloc.h b/kernel_addons/backport/2.6.17/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.17/include/linux/interrupt.h b/kernel_addons/backport/2.6.17/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.17/include/linux/netdevice.h b/kernel_addons/backport/2.6.17/include/linux/netdevice.h > index 5641019..225eeda 100644 > --- a/kernel_addons/backport/2.6.17/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.17/include/linux/netdevice.h > @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.17/include/linux/random.h b/kernel_addons/backport/2.6.17/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.17/include/linux/skbuff.h b/kernel_addons/backport/2.6.17/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.17/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.17/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.17/include/linux/workqueue.h b/kernel_addons/backport/2.6.17/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.17/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.17/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.17/include/net/netevent.h b/kernel_addons/backport/2.6.17/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.17/include/src/genalloc.c b/kernel_addons/backport/2.6.17/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.17/include/src/netevent.c b/kernel_addons/backport/2.6.17/include/src/netevent.c > new file mode 100644 > index 0000000..35d02c3 > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/src/netevent.c > @@ -0,0 +1,69 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > + > +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.17/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:13:19 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:13:19 +0200 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to 2.6.15 In-Reply-To: <20070117195000.30830.84557.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195000.30830.84557.stgit@dell3.ogc.int> Message-ID: <1169136799.23809.65.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport cxgb3 to 2.6.15 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.15/include/linux/interrupt.h | 17 ++ > .../backport/2.6.15/include/linux/netdevice.h | 9 + > .../backport/2.6.15/include/linux/random.h | 15 ++ > .../backport/2.6.15/include/linux/skbuff.h | 3 > .../backport/2.6.15/include/linux/types.h | 6 + > .../backport/2.6.15/include/linux/workqueue.h | 9 + > .../backport/2.6.15/include/net/netevent.h | 33 ++++ > .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ > 12 files changed, 438 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h > new file mode 100644 > index 0000000..86e334f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/types.h > @@ -0,0 +1,6 @@ > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > +#include_next > + > +#define BITS_PER_BYTE 8 > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:13:09 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:13:09 +0200 Subject: [openib-general] [PATCH RFC 04/10] ofed_1_2 Chelsio backport to 2.6.16 In-Reply-To: <20070117194958.30830.21707.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117194958.30830.21707.stgit@dell3.ogc.int> Message-ID: <1169136790.23809.63.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:49 -0600, Steve Wise wrote: > Chelsio backport to 2.6.16 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.16/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.16/include/linux/interrupt.h | 17 ++ > .../backport/2.6.16/include/linux/netdevice.h | 4 > .../backport/2.6.16/include/linux/random.h | 15 ++ > .../backport/2.6.16/include/linux/skbuff.h | 3 > .../backport/2.6.16/include/linux/workqueue.h | 9 + > .../backport/2.6.16/include/net/netevent.h | 33 ++++ > .../backport/2.6.16/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.16/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.16/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.16/linux_stuff_to_2_6_17.patch | 24 +++ > 11 files changed, 427 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.16/include/linux/genalloc.h b/kernel_addons/backport/2.6.16/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.16/include/linux/interrupt.h b/kernel_addons/backport/2.6.16/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.16/include/linux/netdevice.h b/kernel_addons/backport/2.6.16/include/linux/netdevice.h > index 5641019..225eeda 100644 > --- a/kernel_addons/backport/2.6.16/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.16/include/linux/netdevice.h > @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.16/include/linux/random.h b/kernel_addons/backport/2.6.16/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.16/include/linux/skbuff.h b/kernel_addons/backport/2.6.16/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.16/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.16/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.16/include/linux/workqueue.h b/kernel_addons/backport/2.6.16/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.16/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.16/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.16/include/net/netevent.h b/kernel_addons/backport/2.6.16/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.16/include/src/genalloc.c b/kernel_addons/backport/2.6.16/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.16/include/src/netevent.c b/kernel_addons/backport/2.6.16/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.16/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.16/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:13:29 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:13:29 +0200 Subject: [openib-general] [PATCH RFC 06/10] ofed_1_2 Backport chelsio to 2.6.14 In-Reply-To: <20070117195002.30830.75306.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195002.30830.75306.stgit@dell3.ogc.int> Message-ID: <1169136809.23809.67.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport chelsio to 2.6.14 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.14/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.14/include/linux/interrupt.h | 17 ++ > .../backport/2.6.14/include/linux/netdevice.h | 9 + > .../backport/2.6.14/include/linux/random.h | 15 ++ > .../backport/2.6.14/include/linux/skbuff.h | 3 > .../backport/2.6.14/include/linux/types.h | 6 + > .../backport/2.6.14/include/linux/workqueue.h | 9 + > .../backport/2.6.14/include/net/netevent.h | 33 ++++ > .../backport/2.6.14/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.14/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.14/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.14/linux_stuff_to_2_6_17.patch | 24 +++ > 12 files changed, 438 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.14/include/linux/genalloc.h b/kernel_addons/backport/2.6.14/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.14/include/linux/interrupt.h b/kernel_addons/backport/2.6.14/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.14/include/linux/netdevice.h b/kernel_addons/backport/2.6.14/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.14/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.14/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.14/include/linux/random.h b/kernel_addons/backport/2.6.14/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.14/include/linux/skbuff.h b/kernel_addons/backport/2.6.14/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.14/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.14/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.14/include/linux/types.h b/kernel_addons/backport/2.6.14/include/linux/types.h > new file mode 100644 > index 0000000..86e334f > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/linux/types.h > @@ -0,0 +1,6 @@ > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > +#include_next > + > +#define BITS_PER_BYTE 8 > +#endif > diff --git a/kernel_addons/backport/2.6.14/include/linux/workqueue.h b/kernel_addons/backport/2.6.14/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.14/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.14/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.14/include/net/netevent.h b/kernel_addons/backport/2.6.14/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.14/include/src/genalloc.c b/kernel_addons/backport/2.6.14/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.14/include/src/netevent.c b/kernel_addons/backport/2.6.14/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.14/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.14/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.14/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:13:39 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:13:39 +0200 Subject: [openib-general] [PATCH RFC 07/10] ofed_1_2 Backport Chelsio to 2.6.13 In-Reply-To: <20070117195004.30830.52168.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195004.30830.52168.stgit@dell3.ogc.int> Message-ID: <1169136820.23809.69.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport Chelsio to 2.6.13 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.13/include/linux/ethtool.h | 9 + > .../backport/2.6.13/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.13/include/linux/interrupt.h | 17 ++ > .../backport/2.6.13/include/linux/netdevice.h | 9 + > .../backport/2.6.13/include/linux/random.h | 15 ++ > .../backport/2.6.13/include/linux/skbuff.h | 3 > .../backport/2.6.13/include/linux/types.h | 2 > .../backport/2.6.13/include/linux/workqueue.h | 9 + > .../backport/2.6.13/include/net/netevent.h | 33 ++++ > .../backport/2.6.13/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.13/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.13/cxgb3_main_to_2_6_13.patch | 12 + > .../backport/2.6.13/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.13/linux_stuff_to_2_6_17.patch | 24 +++ > .../backport/2.6.13/t3_hw_to_2_6_13.patch | 13 ++ > 15 files changed, 468 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.13/include/linux/ethtool.h b/kernel_addons/backport/2.6.13/include/linux/ethtool.h > new file mode 100644 > index 0000000..d03127c > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/linux/ethtool.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > + > +#include_next > + > +#define ADVERTISED_Pause (1 << 13) > +#define ADVERTISED_Asym_Pause (1 << 14) > + > +#endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/genalloc.h b/kernel_addons/backport/2.6.13/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.13/include/linux/interrupt.h b/kernel_addons/backport/2.6.13/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/netdevice.h b/kernel_addons/backport/2.6.13/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.13/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.13/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/random.h b/kernel_addons/backport/2.6.13/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/skbuff.h b/kernel_addons/backport/2.6.13/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.13/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.13/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/types.h b/kernel_addons/backport/2.6.13/include/linux/types.h > index c06977a..53c7a33 100644 > --- a/kernel_addons/backport/2.6.13/include/linux/types.h > +++ b/kernel_addons/backport/2.6.13/include/linux/types.h > @@ -7,4 +7,6 @@ #ifdef __KERNEL__ > typedef unsigned int gfp_t; > #endif > > +#define BITS_PER_BYTE 8 > + > #endif > diff --git a/kernel_addons/backport/2.6.13/include/linux/workqueue.h b/kernel_addons/backport/2.6.13/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.13/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.13/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.13/include/net/netevent.h b/kernel_addons/backport/2.6.13/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.13/include/src/genalloc.c b/kernel_addons/backport/2.6.13/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.13/include/src/netevent.c b/kernel_addons/backport/2.6.13/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.13/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch > new file mode 100644 > index 0000000..e6781f3 > --- /dev/null > +++ b/kernel_patches/backport/2.6.13/cxgb3_main_to_2_6_13.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > +index dfa035a..414ea84 100755 > +--- a/drivers/net/cxgb3/cxgb3_main.c > ++++ b/drivers/net/cxgb3/cxgb3_main.c > +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth > + .get_wol = get_wol, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > +- .get_perm_addr = ethtool_op_get_perm_addr > + }; > + > + static int in_range(int val, int lo, int hi) > diff --git a/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.13/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.13/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > diff --git a/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch > new file mode 100644 > index 0000000..611e9dc > --- /dev/null > +++ b/kernel_patches/backport/2.6.13/t3_hw_to_2_6_13.patch > @@ -0,0 +1,13 @@ > +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > +index 14ea6b9..f13f581 100755 > +--- a/drivers/net/cxgb3/t3_hw.c > ++++ b/drivers/net/cxgb3/t3_hw.c > +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada > + > + memcpy(adapter->port[i]->dev_addr, hw_addr, > + ETH_ALEN); > +- memcpy(adapter->port[i]->perm_addr, hw_addr, > +- ETH_ALEN); > + init_link_config(&p->link_config, p->port_type->caps); > + p->phy.ops->power_down(&p->phy, 1); > + if (!(p->port_type->caps & SUPPORTED_IRQ)) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From mst at mellanox.co.il Thu Jan 18 08:14:31 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 18:14:31 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <45AF9210.4090401@prairieinet.net> References: <1169072570.7052.46.camel@brick.pathscale.com> <20070118043923.GC25305@mellanox.co.il> <45AF9210.4090401@prairieinet.net> Message-ID: <20070118161430.GB4298@mellanox.co.il> > Quoting John W. Marland : > Subject: Re: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > Michael S. Tsirkin wrote: > > >>Quoting Ralph Campbell : > >>Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > >> > >>IB/core - ib_umad can cause address alignment fault > >> > >>In user_mad.c, the definition for struct ib_umad_packet includes > >>struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() > >>tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on > >>architectures which have strict alignment for load/stores. > >>This patch fixes the problem by changing the offset on which > >>struct ib_user_mad is defined within struct ib_umad_packet. > >> > >>Thanks go to John W. Marland for finding this. > >> > >>Signed-off-by: Ralph Campbell > >> > >>diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c > >>--- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 > >>+++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 > >>@@ -125,7 +125,7 @@ struct ib_umad_packet { > >> struct ib_mad_send_buf *msg; > >> struct ib_mad_recv_wc *recv_wc; > >> struct list_head list; > >>- int length; > >>+ long length; > >> struct ib_user_mad mad; > >> }; > >> > >> > > > >This does not make sense to me - do we have to replace all int fields with long > >now? Looks like a compiler or makefile bug in your setup - struct fields should > >be naturally aligned. > > > > > > > We should probably have given a more complete explanation. The > unaligned access hits in two places, that I've tracked down so far. > The one where it's easiest to see what's happening is in ib_umad_write. > ______________________________________________________________________________________ > if (!ib_response_mad(packet->msg->mad)) { > tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; > *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | > (be64_to_cpup(tid) & 0xffffffff)); > > ---> this line causes the access problem > rmpp_mad->mad_hdr.tid = *tid; > } > ________________________________________________________________________________________ > The rmpp_mad variable is an ib_rmpp_mad pointer that is initialized > from the packet->mad.data early in the function. > Because the ib_umad_packet structure has a as it's last element an > ib_user_mad structure, not a pointer to one, but the structure. > This means that the Data[0] declaration at the end of the ib_umad > structure is forced onto a 4 byte boundary. So the issue is that we are casting char *data which has no alignment guarantees to 64 bit number. We really must find a way to force 64 bit alignment for struct ib_user_mad all over. Would not something like the following simple trick work? struct ib_user_mad_hdr { ............. } __attribute__((aligned (8))); -- MST From vlad at mellanox.co.il Thu Jan 18 08:13:48 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:13:48 +0200 Subject: [openib-general] [PATCH RFC 08/10] ofed_1_2 Backport Chelsio to 2.6.12 In-Reply-To: <20070117195006.30830.96595.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195006.30830.96595.stgit@dell3.ogc.int> Message-ID: <1169136829.23809.71.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport Chelsio to 2.6.12 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.12/include/linux/ethtool.h | 9 + > .../backport/2.6.12/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.12/include/linux/interrupt.h | 17 ++ > .../backport/2.6.12/include/linux/netdevice.h | 9 + > .../backport/2.6.12/include/linux/random.h | 15 ++ > .../backport/2.6.12/include/linux/skbuff.h | 3 > .../backport/2.6.12/include/linux/types.h | 2 > .../backport/2.6.12/include/linux/workqueue.h | 9 + > .../backport/2.6.12/include/net/netevent.h | 33 ++++ > .../backport/2.6.12/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.12/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.12/cxgb3_main_to_2_6_13.patch | 12 + > .../backport/2.6.12/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.12/linux_stuff_to_2_6_17.patch | 24 +++ > .../backport/2.6.12/t3_hw_to_2_6_13.patch | 13 ++ > 15 files changed, 468 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.12/include/linux/ethtool.h b/kernel_addons/backport/2.6.12/include/linux/ethtool.h > new file mode 100644 > index 0000000..d03127c > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/linux/ethtool.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > + > +#include_next > + > +#define ADVERTISED_Pause (1 << 13) > +#define ADVERTISED_Asym_Pause (1 << 14) > + > +#endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/genalloc.h b/kernel_addons/backport/2.6.12/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.12/include/linux/interrupt.h b/kernel_addons/backport/2.6.12/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/netdevice.h b/kernel_addons/backport/2.6.12/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.12/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.12/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/random.h b/kernel_addons/backport/2.6.12/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/skbuff.h b/kernel_addons/backport/2.6.12/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.12/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.12/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/types.h b/kernel_addons/backport/2.6.12/include/linux/types.h > index c06977a..53c7a33 100644 > --- a/kernel_addons/backport/2.6.12/include/linux/types.h > +++ b/kernel_addons/backport/2.6.12/include/linux/types.h > @@ -7,4 +7,6 @@ #ifdef __KERNEL__ > typedef unsigned int gfp_t; > #endif > > +#define BITS_PER_BYTE 8 > + > #endif > diff --git a/kernel_addons/backport/2.6.12/include/linux/workqueue.h b/kernel_addons/backport/2.6.12/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.12/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.12/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.12/include/net/netevent.h b/kernel_addons/backport/2.6.12/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.12/include/src/genalloc.c b/kernel_addons/backport/2.6.12/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.12/include/src/netevent.c b/kernel_addons/backport/2.6.12/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.12/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch > new file mode 100644 > index 0000000..e6781f3 > --- /dev/null > +++ b/kernel_patches/backport/2.6.12/cxgb3_main_to_2_6_13.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > +index dfa035a..414ea84 100755 > +--- a/drivers/net/cxgb3/cxgb3_main.c > ++++ b/drivers/net/cxgb3/cxgb3_main.c > +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth > + .get_wol = get_wol, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > +- .get_perm_addr = ethtool_op_get_perm_addr > + }; > + > + static int in_range(int val, int lo, int hi) > diff --git a/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.12/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.12/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > diff --git a/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch > new file mode 100644 > index 0000000..611e9dc > --- /dev/null > +++ b/kernel_patches/backport/2.6.12/t3_hw_to_2_6_13.patch > @@ -0,0 +1,13 @@ > +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > +index 14ea6b9..f13f581 100755 > +--- a/drivers/net/cxgb3/t3_hw.c > ++++ b/drivers/net/cxgb3/t3_hw.c > +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada > + > + memcpy(adapter->port[i]->dev_addr, hw_addr, > + ETH_ALEN); > +- memcpy(adapter->port[i]->perm_addr, hw_addr, > +- ETH_ALEN); > + init_link_config(&p->link_config, p->port_type->caps); > + p->phy.ops->power_down(&p->phy, 1); > + if (!(p->port_type->caps & SUPPORTED_IRQ)) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:14:02 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:14:02 +0200 Subject: [openib-general] [PATCH RFC 09/10] ofed_1_2 Backport Chelsio to 2.6.11 In-Reply-To: <20070117195008.30830.48783.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195008.30830.48783.stgit@dell3.ogc.int> Message-ID: <1169136842.23809.73.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport Chelsio to 2.6.11 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.11/include/linux/ethtool.h | 9 + > .../backport/2.6.11/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.11/include/linux/interrupt.h | 17 ++ > kernel_addons/backport/2.6.11/include/linux/mm.h | 8 + > .../backport/2.6.11/include/linux/netdevice.h | 9 + > .../backport/2.6.11/include/linux/random.h | 15 ++ > .../backport/2.6.11/include/linux/skbuff.h | 2 > kernel_addons/backport/2.6.11/include/linux/slab.h | 19 -- > .../backport/2.6.11/include/linux/types.h | 2 > .../backport/2.6.11/include/linux/workqueue.h | 8 + > kernel_addons/backport/2.6.11/include/net/dst.h | 16 ++ > .../backport/2.6.11/include/net/netevent.h | 33 ++++ > .../backport/2.6.11/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.11/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.11/cxgb3_main_to_2_6_13.patch | 12 + > .../backport/2.6.11/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.11/linux_stuff_to_2_6_17.patch | 24 +++ > .../2.6.11/mthca_provider_3465_to_2_6_11.patch | 13 -- > .../backport/2.6.11/t3_hw_to_2_6_13.patch | 13 ++ > 19 files changed, 492 insertions(+), 31 deletions(-) > > diff --git a/kernel_addons/backport/2.6.11/include/linux/ethtool.h b/kernel_addons/backport/2.6.11/include/linux/ethtool.h > new file mode 100644 > index 0000000..d03127c > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/linux/ethtool.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > + > +#include_next > + > +#define ADVERTISED_Pause (1 << 13) > +#define ADVERTISED_Asym_Pause (1 << 14) > + > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/genalloc.h b/kernel_addons/backport/2.6.11/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.11/include/linux/interrupt.h b/kernel_addons/backport/2.6.11/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/mm.h b/kernel_addons/backport/2.6.11/include/linux/mm.h > new file mode 100644 > index 0000000..1a1cf11 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/linux/mm.h > @@ -0,0 +1,8 @@ > +#ifndef BACKPORT_LINUX_MM_TO_2_6_11 > +#define BACKPORT_LINUX_MM_TO_2_6_11 > + > +#include_next > + > +#define io_remap_pfn_range remap_pfn_range > + > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/netdevice.h b/kernel_addons/backport/2.6.11/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.11/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.11/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/random.h b/kernel_addons/backport/2.6.11/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/skbuff.h b/kernel_addons/backport/2.6.11/include/linux/skbuff.h > index a4d9195..62bfb3b 100644 > --- a/kernel_addons/backport/2.6.11/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.11/include/linux/skbuff.h > @@ -4,6 +4,7 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > /** > * skb_header_release - release reference to header > @@ -17,5 +18,6 @@ static inline void skb_header_release(st > { > } > > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/slab.h b/kernel_addons/backport/2.6.11/include/linux/slab.h > index bd9671d..9d82afa 100644 > --- a/kernel_addons/backport/2.6.11/include/linux/slab.h > +++ b/kernel_addons/backport/2.6.11/include/linux/slab.h > @@ -22,12 +22,6 @@ static inline char *kstrdup(const char * > return s; > } > > -#endif > -#include_next > - > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > void *kmemdup(const void *src, size_t len, gfp_t gfp) > { > @@ -39,19 +33,10 @@ void *kmemdup(const void *src, size_t le > return p; > } > > -#endif > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > -void *kmemdup(const void *src, size_t len, gfp_t gfp) > +void *kmalloc_node(size_t size, gfp_t flags, int nid) > { > - void *p; > - > - p = kmalloc(len, gfp); > - if (p) > - memcpy(p, src, len); > - return p; > + return kmalloc(size, flags); > } > > #endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/types.h b/kernel_addons/backport/2.6.11/include/linux/types.h > index c06977a..53c7a33 100644 > --- a/kernel_addons/backport/2.6.11/include/linux/types.h > +++ b/kernel_addons/backport/2.6.11/include/linux/types.h > @@ -7,4 +7,6 @@ #ifdef __KERNEL__ > typedef unsigned int gfp_t; > #endif > > +#define BITS_PER_BYTE 8 > + > #endif > diff --git a/kernel_addons/backport/2.6.11/include/linux/workqueue.h b/kernel_addons/backport/2.6.11/include/linux/workqueue.h > index 330f47f..c054ed2 100644 > --- a/kernel_addons/backport/2.6.11/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.11/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, > + struct delayed_work *dwork) > +{ > + while (!cancel_delayed_work(&dwork->work)) > + flush_workqueue(wq); > +} > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > diff --git a/kernel_addons/backport/2.6.11/include/net/dst.h b/kernel_addons/backport/2.6.11/include/net/dst.h > new file mode 100644 > index 0000000..ec5e7b4 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/net/dst.h > @@ -0,0 +1,16 @@ > +#ifndef BACKPORT_NET_DST_2_6_11 > +#define BACKPORT_NET_DST_2_6_11 > + > +#include_next > + > +static inline u32 dst_mtu(struct dst_entry *dst) > +{ > + u32 mtu = dst_metric(dst, RTAX_MTU); > + /* > + * Alexey put it here, so ask him about it :) > + */ > + barrier(); > + return mtu; > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/net/netevent.h b/kernel_addons/backport/2.6.11/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.11/include/src/genalloc.c b/kernel_addons/backport/2.6.11/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.11/include/src/netevent.c b/kernel_addons/backport/2.6.11/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.11/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch > new file mode 100644 > index 0000000..e6781f3 > --- /dev/null > +++ b/kernel_patches/backport/2.6.11/cxgb3_main_to_2_6_13.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > +index dfa035a..414ea84 100755 > +--- a/drivers/net/cxgb3/cxgb3_main.c > ++++ b/drivers/net/cxgb3/cxgb3_main.c > +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth > + .get_wol = get_wol, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > +- .get_perm_addr = ethtool_op_get_perm_addr > + }; > + > + static int in_range(int val, int lo, int hi) > diff --git a/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.11/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.11/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > diff --git a/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch b/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch > deleted file mode 100644 > index 402c00a..0000000 > --- a/kernel_patches/backport/2.6.11/mthca_provider_3465_to_2_6_11.patch > +++ /dev/null > @@ -1,13 +0,0 @@ > -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c > -=================================================================== > ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 15:17:58.000000000 +0200 > -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 15:20:19.000000000 +0200 > -@@ -359,7 +359,7 @@ static int mthca_mmap_uar(struct ib_ucon > - > - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > - > -- if (io_remap_pfn_range(vma, vma->vm_start, > -+ if (remap_pfn_range(vma, vma->vm_start, > - to_mucontext(context)->uar.pfn, > - PAGE_SIZE, vma->vm_page_prot)) > - return -EAGAIN; > diff --git a/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch b/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch > new file mode 100644 > index 0000000..611e9dc > --- /dev/null > +++ b/kernel_patches/backport/2.6.11/t3_hw_to_2_6_13.patch > @@ -0,0 +1,13 @@ > +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > +index 14ea6b9..f13f581 100755 > +--- a/drivers/net/cxgb3/t3_hw.c > ++++ b/drivers/net/cxgb3/t3_hw.c > +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada > + > + memcpy(adapter->port[i]->dev_addr, hw_addr, > + ETH_ALEN); > +- memcpy(adapter->port[i]->perm_addr, hw_addr, > +- ETH_ALEN); > + init_link_config(&p->link_config, p->port_type->caps); > + p->phy.ops->power_down(&p->phy, 1); > + if (!(p->port_type->caps & SUPPORTED_IRQ)) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:14:14 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:14:14 +0200 Subject: [openib-general] [PATCH RFC 10/10] ofed_1_2 Backport Chelsio to sles9sp3 In-Reply-To: <20070117195010.30830.98048.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195010.30830.98048.stgit@dell3.ogc.int> Message-ID: <1169136854.23809.75.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport Chelsio to sles9sp3 > > Signed-off-by: Steve Wise > --- > > .../2.6.5_sles9_sp3/include/linux/ethtool.h | 9 + > .../2.6.5_sles9_sp3/include/linux/genalloc.h | 42 +++++ > .../2.6.5_sles9_sp3/include/linux/interrupt.h | 17 ++ > .../backport/2.6.5_sles9_sp3/include/linux/kfifo.h | 157 ++++++++++++++++++ > .../backport/2.6.5_sles9_sp3/include/linux/mii.h | 18 ++ > .../backport/2.6.5_sles9_sp3/include/linux/mm.h | 20 ++ > .../2.6.5_sles9_sp3/include/linux/netdevice.h | 13 ++ > .../backport/2.6.5_sles9_sp3/include/linux/pci.h | 2 > .../2.6.5_sles9_sp3/include/linux/random.h | 15 ++ > .../2.6.5_sles9_sp3/include/linux/skbuff.h | 3 > .../backport/2.6.5_sles9_sp3/include/linux/slab.h | 19 -- > .../2.6.5_sles9_sp3/include/linux/spinlock.h | 8 + > .../backport/2.6.5_sles9_sp3/include/linux/types.h | 2 > .../2.6.5_sles9_sp3/include/linux/workqueue.h | 8 + > .../backport/2.6.5_sles9_sp3/include/net/dst.h | 17 ++ > .../2.6.5_sles9_sp3/include/net/neighbour.h | 7 + > .../2.6.5_sles9_sp3/include/net/netevent.h | 33 ++++ > .../2.6.5_sles9_sp3/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.5_sles9_sp3/include/src/kfifo.c | 196 +++++++++++++++++++++++ > .../2.6.5_sles9_sp3/include/src/netevent.c | 71 ++++++++ > .../2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch | 12 + > .../2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch | 12 + > .../2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch | 35 ++++ > .../linux_stream_idr_to_2_6_5-7_244.patch | 25 --- > .../linux_stuff_to_2_6_5-7_244.patch | 46 +++++ > .../mthca_provider_3465_to_2_6_9.patch | 15 -- > .../2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch | 43 +++++ > 27 files changed, 985 insertions(+), 58 deletions(-) > > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h > new file mode 100644 > index 0000000..d03127c > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/ethtool.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > + > +#include_next > + > +#define ADVERTISED_Pause (1 << 13) > +#define ADVERTISED_Asym_Pause (1 << 14) > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h > new file mode 100644 > index 0000000..48eccd8 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/kfifo.h > @@ -0,0 +1,157 @@ > +/* > + * A simple kernel FIFO implementation. > + * > + * Copyright (C) 2004 Stelian Pop > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + */ > +#ifndef _LINUX_KFIFO_H > +#define _LINUX_KFIFO_H > + > +#ifdef __KERNEL__ > + > +#include > +#include > + > +struct kfifo { > + unsigned char *buffer; /* the buffer holding the data */ > + unsigned int size; /* the size of the allocated buffer */ > + unsigned int in; /* data is added at offset (in % size) */ > + unsigned int out; /* data is extracted from off. (out % size) */ > + spinlock_t *lock; /* protects concurrent modifications */ > +}; > + > +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, > + gfp_t gfp_mask, spinlock_t *lock); > +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, > + spinlock_t *lock); > +extern void kfifo_free(struct kfifo *fifo); > +extern unsigned int __kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len); > +extern unsigned int __kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len); > + > +/** > + * __kfifo_reset - removes the entire FIFO contents, no locking version > + * @fifo: the fifo to be emptied. > + */ > +static inline void __kfifo_reset(struct kfifo *fifo) > +{ > + fifo->in = fifo->out = 0; > +} > + > +/** > + * kfifo_reset - removes the entire FIFO contents > + * @fifo: the fifo to be emptied. > + */ > +static inline void kfifo_reset(struct kfifo *fifo) > +{ > + unsigned long flags; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + __kfifo_reset(fifo); > + > + spin_unlock_irqrestore(fifo->lock, flags); > +} > + > +/** > + * kfifo_put - puts some data into the FIFO > + * @fifo: the fifo to be used. > + * @buffer: the data to be added. > + * @len: the length of the data to be added. > + * > + * This function copies at most 'len' bytes from the 'buffer' into > + * the FIFO depending on the free space, and returns the number of > + * bytes copied. > + */ > +static inline unsigned int kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_put(fifo, buffer, len); > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +/** > + * kfifo_get - gets some data from the FIFO > + * @fifo: the fifo to be used. > + * @buffer: where the data must be copied. > + * @len: the size of the destination buffer. > + * > + * This function copies at most 'len' bytes from the FIFO into the > + * 'buffer' and returns the number of copied bytes. > + */ > +static inline unsigned int kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_get(fifo, buffer, len); > + > + /* > + * optimization: if the FIFO is empty, set the indices to 0 > + * so we don't wrap the next time > + */ > + if (fifo->in == fifo->out) > + fifo->in = fifo->out = 0; > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +/** > + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version > + * @fifo: the fifo to be used. > + */ > +static inline unsigned int __kfifo_len(struct kfifo *fifo) > +{ > + return fifo->in - fifo->out; > +} > + > +/** > + * kfifo_len - returns the number of bytes available in the FIFO > + * @fifo: the fifo to be used. > + */ > +static inline unsigned int kfifo_len(struct kfifo *fifo) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_len(fifo); > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +#else > +#warning "don't include kernel headers in userspace" > +#endif /* __KERNEL__ */ > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h > new file mode 100644 > index 0000000..3ba8e73 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mii.h > @@ -0,0 +1,18 @@ > +#ifndef BACKPORT_LINUX_MII_TO_SLES9SP3 > +#define BACKPORT_LINUX_MII_TO_SLES9SP3 > + > +#include_next > + > +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ > +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ > +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymetric pause */ > +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ > +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ > +#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ > + > +static inline struct mii_ioctl_data *if_mii(struct ifreq *rq) > +{ > + return (struct mii_ioctl_data *) &rq->ifr_ifru; > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h > new file mode 100644 > index 0000000..77ee6fc > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/mm.h > @@ -0,0 +1,20 @@ > +#ifndef BACKPORT_LINUX_MM_TO_SLES9SP3 > +#define BACKPORT_LINUX_MM_TO_SLES9SP3 > + > +#include_next > + > +static inline int > +remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, > + unsigned long pfn, unsigned long size, pgprot_t prot) > +{ > + return remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); > +} > + > +static inline int > +io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, > + unsigned long pfn, unsigned long size, pgprot_t prot) > +{ > + return io_remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h > index 5641019..2e18642 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/netdevice.h > @@ -15,4 +15,17 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > +#define NETDEV_TX_OK 0 /* driver took care of packet */ > +#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ > +#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ > + > #endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h > index b43b19c..beb954b 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/pci.h > @@ -6,6 +6,8 @@ #include_next > #define PCI_EXP_DEVCTL 8 /* Device Control */ > #define PCI_EXP_LNKCTL 16 /* Link Control */ > #define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ > +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ > +#define PCI_EXP_LNKSTA 18 /* Link Status */ > > struct msix_entry { > u16 vector; /* kernel uses to write allocated vector */ > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > index cc56236..0d91d86 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/skbuff.h > @@ -4,6 +4,7 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > /** > * skb_header_release - release reference to header > @@ -41,4 +42,6 @@ static inline int skb_can_coalesce(struc > return 0; > } > > +#define gso_size tso_size > + > #endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h > index 0540cc6..c8285ac 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/slab.h > @@ -12,12 +12,6 @@ static inline void *kzalloc(size_t size, > return ret; > } > > -#endif > -#include_next > - > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > void *kmemdup(const void *src, size_t len, gfp_t gfp) > { > @@ -29,19 +23,10 @@ void *kmemdup(const void *src, size_t le > return p; > } > > -#endif > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > -void *kmemdup(const void *src, size_t len, gfp_t gfp) > +void *kmalloc_node(size_t size, gfp_t flags, int nid) > { > - void *p; > - > - p = kmalloc(len, gfp); > - if (p) > - memcpy(p, src, len); > - return p; > + return kmalloc(size, flags); > } > > #endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h > index 4644d50..00506f4 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/spinlock.h > @@ -3,6 +3,7 @@ #define BACKPORT_LINUX_SPINLOCK_H > > #include_next > #define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED > +#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED > > #define spin_trylock_irqsave(lock, flags) \ > ({ \ > @@ -13,4 +14,11 @@ ({ \ > > #define spin_lock_nested(lock, subclass) spin_lock(lock) > > +#define spin_trylock_irq(lock) \ > +({ \ > + local_irq_disable(); \ > + spin_trylock(lock) ? \ > + 1 : ({ local_irq_enable(); 0; }); \ > +}) > + > #endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h > index c06977a..53c7a33 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/types.h > @@ -7,4 +7,6 @@ #ifdef __KERNEL__ > typedef unsigned int gfp_t; > #endif > > +#define BITS_PER_BYTE 8 > + > #endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h > index 330f47f..c054ed2 100644 > --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, > + struct delayed_work *dwork) > +{ > + while (!cancel_delayed_work(&dwork->work)) > + flush_workqueue(wq); > +} > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h > new file mode 100644 > index 0000000..69cca51 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/dst.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_NET_DST_2_6_11 > +#define BACKPORT_NET_DST_2_6_11 > + > +#include > +#include_next > + > +static inline u32 dst_mtu(struct dst_entry *dst) > +{ > + u32 mtu = dst_metric(dst, RTAX_MTU); > + /* > + * Alexey put it here, so ask him about it :) > + */ > + barrier(); > + return mtu; > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h > new file mode 100644 > index 0000000..573320d > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/neighbour.h > @@ -0,0 +1,7 @@ > +#ifndef BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 > +#define BACKPORT_LINUX_NEIGHBOUR_TO_SLES9SP3 > + > +#include > +#include_next > + > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c > new file mode 100644 > index 0000000..5d1d907 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/kfifo.c > @@ -0,0 +1,196 @@ > +/* > + * A simple kernel FIFO implementation. > + * > + * Copyright (C) 2004 Stelian Pop > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + */ > + > +#include > +#include > +#include > +#include > +#include > + > +/** > + * kfifo_init - allocates a new FIFO using a preallocated buffer > + * @buffer: the preallocated buffer to be used. > + * @size: the size of the internal buffer, this have to be a power of 2. > + * @gfp_mask: get_free_pages mask, passed to kmalloc() > + * @lock: the lock to be used to protect the fifo buffer > + * > + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the > + * struct kfifo with kfree(). > + */ > +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, > + gfp_t gfp_mask, spinlock_t *lock) > +{ > + struct kfifo *fifo; > + > + /* size must be a power of 2 */ > + BUG_ON(size & (size - 1)); > + > + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); > + if (!fifo) > + return ERR_PTR(-ENOMEM); > + > + fifo->buffer = buffer; > + fifo->size = size; > + fifo->in = fifo->out = 0; > + fifo->lock = lock; > + > + return fifo; > +} > +EXPORT_SYMBOL(kfifo_init); > + > +/** > + * kfifo_alloc - allocates a new FIFO and its internal buffer > + * @size: the size of the internal buffer to be allocated. > + * @gfp_mask: get_free_pages mask, passed to kmalloc() > + * @lock: the lock to be used to protect the fifo buffer > + * > + * The size will be rounded-up to a power of 2. > + */ > +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) > +{ > + unsigned char *buffer; > + struct kfifo *ret; > + > + /* > + * round up to the next power of 2, since our 'let the indices > + * wrap' tachnique works only in this case. > + */ > + if (size & (size - 1)) { > + BUG_ON(size > 0x80000000); > + size = roundup_pow_of_two(size); > + } > + > + buffer = kmalloc(size, gfp_mask); > + if (!buffer) > + return ERR_PTR(-ENOMEM); > + > + ret = kfifo_init(buffer, size, gfp_mask, lock); > + > + if (IS_ERR(ret)) > + kfree(buffer); > + > + return ret; > +} > +EXPORT_SYMBOL(kfifo_alloc); > + > +/** > + * kfifo_free - frees the FIFO > + * @fifo: the fifo to be freed. > + */ > +void kfifo_free(struct kfifo *fifo) > +{ > + kfree(fifo->buffer); > + kfree(fifo); > +} > +EXPORT_SYMBOL(kfifo_free); > + > +/** > + * __kfifo_put - puts some data into the FIFO, no locking version > + * @fifo: the fifo to be used. > + * @buffer: the data to be added. > + * @len: the length of the data to be added. > + * > + * This function copies at most 'len' bytes from the 'buffer' into > + * the FIFO depending on the free space, and returns the number of > + * bytes copied. > + * > + * Note that with only one concurrent reader and one concurrent > + * writer, you don't need extra locking to use these functions. > + */ > +unsigned int __kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned int l; > + > + len = min(len, fifo->size - fifo->in + fifo->out); > + > + /* > + * Ensure that we sample the fifo->out index -before- we > + * start putting bytes into the kfifo. > + */ > + > + smp_mb(); > + > + /* first put the data starting from fifo->in to buffer end */ > + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); > + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); > + > + /* then put the rest (if any) at the beginning of the buffer */ > + memcpy(fifo->buffer, buffer + l, len - l); > + > + /* > + * Ensure that we add the bytes to the kfifo -before- > + * we update the fifo->in index. > + */ > + > + smp_wmb(); > + > + fifo->in += len; > + > + return len; > +} > +EXPORT_SYMBOL(__kfifo_put); > + > +/** > + * __kfifo_get - gets some data from the FIFO, no locking version > + * @fifo: the fifo to be used. > + * @buffer: where the data must be copied. > + * @len: the size of the destination buffer. > + * > + * This function copies at most 'len' bytes from the FIFO into the > + * 'buffer' and returns the number of copied bytes. > + * > + * Note that with only one concurrent reader and one concurrent > + * writer, you don't need extra locking to use these functions. > + */ > +unsigned int __kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned int l; > + > + len = min(len, fifo->in - fifo->out); > + > + /* > + * Ensure that we sample the fifo->in index -before- we > + * start removing bytes from the kfifo. > + */ > + > + smp_rmb(); > + > + /* first get the data from fifo->out until the end of the buffer */ > + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); > + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); > + > + /* then get the rest (if any) from the beginning of the buffer */ > + memcpy(buffer + l, fifo->buffer, len - l); > + > + /* > + * Ensure that we remove the bytes from the kfifo -before- > + * we update the fifo->out index. > + */ > + > + smp_mb(); > + > + fifo->out += len; > + > + return len; > +} > +EXPORT_SYMBOL(__kfifo_get); > diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch > new file mode 100644 > index 0000000..e6781f3 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_main_to_2_6_13.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > +index dfa035a..414ea84 100755 > +--- a/drivers/net/cxgb3/cxgb3_main.c > ++++ b/drivers/net/cxgb3/cxgb3_main.c > +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth > + .get_wol = get_wol, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > +- .get_perm_addr = ethtool_op_get_perm_addr > + }; > + > + static int in_range(int val, int lo, int hi) > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..af468f7 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/iwch_cm_to_2_6_5-7_244.patch > @@ -0,0 +1,35 @@ > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +index 3237fc8..2a38953 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +@@ -234,7 +234,7 @@ static void *alloc_ep(int size, gfp_t gf > + epc = kmalloc(size, gfp); > + if (epc) { > + memset(epc, 0, size); > +- kref_init(&epc->kref); > ++ kref_init(&epc->kref, __free_ep); > + spin_lock_init(&epc->lock); > + init_waitqueue_head(&epc->waitq); > + } > +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct > + } > + }; > + > +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) > ++ if (ip_route_output_key(&rt, &fl)) > + return NULL; > + return rt; > + } > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h > +index 893f9d0..e54e202 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h > +@@ -57,7 +57,7 @@ #define MPA_FLAGS_MASK 0xE0 > + #define put_ep(ep) { \ > + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ > + ep, atomic_read(&((ep)->kref.refcount))); \ > +- kref_put(&((ep)->kref), __free_ep); \ > ++ kref_put(&((ep)->kref)); \ > + } > + > + #define get_ep(ep) { \ > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch > deleted file mode 100644 > index 74d8403..0000000 > --- a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stream_idr_to_2_6_5-7_244.patch > +++ /dev/null > @@ -1,25 +0,0 @@ > -diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > -index 163d991..2cd239f 100644 > ---- a/drivers/infiniband/core/Makefile > -+++ b/drivers/infiniband/core/Makefile > -@@ -26,3 +26,6 @@ ib_ucm-y := ucm.o > - > - ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > - uverbs_marshall.o > -+ > -+obj-$(CONFIG_INFINIBAND) += stream.o > -+ib_core-y += stream.o ib_idr.o > -diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c > -new file mode 100644 > -index 0000000..96a48fe > ---- /dev/null > -+++ b/drivers/infiniband/core/stream.c > -@@ -0,0 +1 @@ > -+#include "src/stream.c" > -diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c > -new file mode 100644 > -index 0000000..58cf933 > ---- /dev/null > -+++ b/drivers/infiniband/core/ib_idr.c > -@@ -0,0 +1 @@ > -+#include "src/ib_idr.c" > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..8733e1a > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/linux_stuff_to_2_6_5-7_244.patch > @@ -0,0 +1,46 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/stream.c b/drivers/infiniband/core/stream.c > +new file mode 100644 > +index 0000000..96a48fe > +--- /dev/null > ++++ b/drivers/infiniband/core/stream.c > +@@ -0,0 +1 @@ > ++#include "src/stream.c" > +diff --git a/drivers/infiniband/core/ib_idr.c b/drivers/infiniband/core/ib_idr.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/ib_idr.c > +@@ -0,0 +1 @@ > ++#include "src/ib_idr.c" > +diff --git a/drivers/infiniband/core/kfifo.c b/drivers/infiniband/core/kfifo.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/kfifo.c > +@@ -0,0 +1 @@ > ++#include "src/kfifo.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,6 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++obj-$(CONFIG_INFINIBAND) += stream.o > ++ib_core-y += stream.o ib_idr.o genalloc.o netevent.o kfifo.o > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch b/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch > deleted file mode 100644 > index a3febff..0000000 > --- a/kernel_patches/backport/2.6.5_sles9_sp3/mthca_provider_3465_to_2_6_9.patch > +++ /dev/null > @@ -1,15 +0,0 @@ > -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c > -=================================================================== > ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:01:20.000000000 +0200 > -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:03:14.000000000 +0200 > -@@ -359,8 +359,8 @@ static int mthca_mmap_uar(struct ib_ucon > - > - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > - > -- if (io_remap_pfn_range(vma, vma->vm_start, > -- to_mucontext(context)->uar.pfn, > -+ if (remap_page_range(vma, vma->vm_start, > -+ (to_mucontext(context)->uar.pfn) << PAGE_SHIFT, > - PAGE_SIZE, vma->vm_page_prot)) > - return -EAGAIN; > - > diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..a667be0 > --- /dev/null > +++ b/kernel_patches/backport/2.6.5_sles9_sp3/t3_hw_to_2_6_5-7_244.patch > @@ -0,0 +1,43 @@ > +diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h > +index 463ca32..b931fd2 100755 > +--- a/drivers/net/cxgb3/adapter.h > ++++ b/drivers/net/cxgb3/adapter.h > +@@ -179,6 +179,7 @@ struct adapter { > + struct list_head adapter_list; > + void __iomem *regs; > + struct pci_dev *pdev; > ++ u32 saved_pci_state[16]; > + unsigned long registered_device_map; > + unsigned long open_device_map; > + unsigned long flags; > +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > +index 14ea6b9..f13f581 100755 > +--- a/drivers/net/cxgb3/t3_hw.c > ++++ b/drivers/net/cxgb3/t3_hw.c > +@@ -3250,7 +3250,7 @@ int t3_reset_adapter(struct adapter *ada > + uint16_t devid = 0; > + > + if (is_pcie(adapter)) > +- pci_save_state(adapter->pdev); > ++ pci_save_state(adapter->pdev, adapter->saved_pci_state); > + t3_write_reg(adapter, A_PL_RST, F_CRSTWRM | F_CRSTWRMMODE); > + > + /* > +@@ -3268,7 +3268,7 @@ int t3_reset_adapter(struct adapter *ada > + return -1; > + > + if (is_pcie(adapter)) > +- pci_restore_state(adapter->pdev); > ++ pci_restore_state(adapter->pdev, adapter->saved_pci_state); > + return 0; > + } > + > +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada > + > + memcpy(adapter->port[i]->dev_addr, hw_addr, > + ETH_ALEN); > +- memcpy(adapter->port[i]->perm_addr, hw_addr, > +- ETH_ALEN); > + init_link_config(&p->link_config, p->port_type->caps); > + p->phy.ops->power_down(&p->phy, 1); > + if (!(p->port_type->caps & SUPPORTED_IRQ)) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Thu Jan 18 08:31:11 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 10:31:11 -0600 Subject: [openib-general] [PATCH ] ofed_1_2 Backport Chelsio to RHEL4U4 Message-ID: <20070118163111.18825.58472.stgit@dell3.ogc.int> Backport Chelsio to RHEL4U4 Signed-off-by: Steve Wise --- .../backport/2.6.9_U4/include/linux/ethtool.h | 9 + .../backport/2.6.9_U4/include/linux/genalloc.h | 42 +++++ .../backport/2.6.9_U4/include/linux/interrupt.h | 17 ++ .../backport/2.6.9_U4/include/linux/kfifo.h | 157 ++++++++++++++++++ kernel_addons/backport/2.6.9_U4/include/linux/mm.h | 20 ++ .../backport/2.6.9_U4/include/linux/netdevice.h | 9 + .../backport/2.6.9_U4/include/linux/pci.h | 1 .../backport/2.6.9_U4/include/linux/random.h | 15 ++ .../backport/2.6.9_U4/include/linux/skbuff.h | 4 .../backport/2.6.9_U4/include/linux/slab.h | 19 -- .../backport/2.6.9_U4/include/linux/types.h | 8 + .../backport/2.6.9_U4/include/linux/workqueue.h | 8 + kernel_addons/backport/2.6.9_U4/include/net/dst.h | 16 ++ .../backport/2.6.9_U4/include/net/netevent.h | 33 ++++ .../backport/2.6.9_U4/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.9_U4/include/src/kfifo.c | 196 +++++++++++++++++++++++ .../backport/2.6.9_U4/include/src/netevent.c | 71 ++++++++ .../backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch | 12 + .../2.6.9_U4/cxgb3_makefile_to_2_6_19.patch | 12 + .../backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch | 13 ++ .../backport/2.6.9_U4/linux_stuff_to_2_6_17.patch | 31 ++++ .../2.6.9_U4/mthca_provider_3465_to_2_6_9.patch | 15 -- .../backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch | 43 +++++ 23 files changed, 917 insertions(+), 32 deletions(-) diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h b/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h new file mode 100644 index 0000000..d03127c --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 + +#include_next + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h b/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h b/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h b/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h new file mode 100644 index 0000000..48eccd8 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h @@ -0,0 +1,157 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef _LINUX_KFIFO_H +#define _LINUX_KFIFO_H + +#ifdef __KERNEL__ + +#include +#include + +struct kfifo { + unsigned char *buffer; /* the buffer holding the data */ + unsigned int size; /* the size of the allocated buffer */ + unsigned int in; /* data is added at offset (in % size) */ + unsigned int out; /* data is extracted from off. (out % size) */ + spinlock_t *lock; /* protects concurrent modifications */ +}; + +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, + spinlock_t *lock); +extern void kfifo_free(struct kfifo *fifo); +extern unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); +extern unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len); + +/** + * __kfifo_reset - removes the entire FIFO contents, no locking version + * @fifo: the fifo to be emptied. + */ +static inline void __kfifo_reset(struct kfifo *fifo) +{ + fifo->in = fifo->out = 0; +} + +/** + * kfifo_reset - removes the entire FIFO contents + * @fifo: the fifo to be emptied. + */ +static inline void kfifo_reset(struct kfifo *fifo) +{ + unsigned long flags; + + spin_lock_irqsave(fifo->lock, flags); + + __kfifo_reset(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); +} + +/** + * kfifo_put - puts some data into the FIFO + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + */ +static inline unsigned int kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_put(fifo, buffer, len); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * kfifo_get - gets some data from the FIFO + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + */ +static inline unsigned int kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_get(fifo, buffer, len); + + /* + * optimization: if the FIFO is empty, set the indices to 0 + * so we don't wrap the next time + */ + if (fifo->in == fifo->out) + fifo->in = fifo->out = 0; + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +/** + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version + * @fifo: the fifo to be used. + */ +static inline unsigned int __kfifo_len(struct kfifo *fifo) +{ + return fifo->in - fifo->out; +} + +/** + * kfifo_len - returns the number of bytes available in the FIFO + * @fifo: the fifo to be used. + */ +static inline unsigned int kfifo_len(struct kfifo *fifo) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(fifo->lock, flags); + + ret = __kfifo_len(fifo); + + spin_unlock_irqrestore(fifo->lock, flags); + + return ret; +} + +#else +#warning "don't include kernel headers in userspace" +#endif /* __KERNEL__ */ +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/mm.h b/kernel_addons/backport/2.6.9_U4/include/linux/mm.h new file mode 100644 index 0000000..77ee6fc --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/mm.h @@ -0,0 +1,20 @@ +#ifndef BACKPORT_LINUX_MM_TO_SLES9SP3 +#define BACKPORT_LINUX_MM_TO_SLES9SP3 + +#include_next + +static inline int +remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); +} + +static inline int +io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return io_remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); +} + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h b/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h index 5641019..2f12781 100644 --- a/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +static inline int __netif_rx_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/pci.h b/kernel_addons/backport/2.6.9_U4/include/linux/pci.h index bd47cd4..6e0df62 100644 --- a/kernel_addons/backport/2.6.9_U4/include/linux/pci.h +++ b/kernel_addons/backport/2.6.9_U4/include/linux/pci.h @@ -4,5 +4,6 @@ #define LINUX_PCI_BACKPORT_TO_2_6_9_H #include_next #define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ #endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/random.h b/kernel_addons/backport/2.6.9_U4/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h b/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h index 94b968a..62bfb3b 100644 --- a/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h @@ -3,7 +3,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next -#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW /** * skb_header_release - release reference to header @@ -17,5 +18,6 @@ static inline void skb_header_release(st { } +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/slab.h b/kernel_addons/backport/2.6.9_U4/include/linux/slab.h index 46ac6e5..cbf6311 100644 --- a/kernel_addons/backport/2.6.9_U4/include/linux/slab.h +++ b/kernel_addons/backport/2.6.9_U4/include/linux/slab.h @@ -1,10 +1,8 @@ -#include_next +#ifndef LINUX_SLAB_BACKPORT_H +#define LINUX_SLAB_BACKPORT_H #include_next -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline void *kmemdup(const void *src, size_t len, gfp_t gfp) { @@ -16,19 +14,10 @@ void *kmemdup(const void *src, size_t le return p; } -#endif -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 -#define BACKPORT_LINUX_STRING_TO_2_6_18 - static inline -void *kmemdup(const void *src, size_t len, gfp_t gfp) +void *kmalloc_node(size_t size, gfp_t flags, int nid) { - void *p; - - p = kmalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; + return kmalloc(size, flags); } #endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/types.h b/kernel_addons/backport/2.6.9_U4/include/linux/types.h new file mode 100644 index 0000000..0c19a43 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/linux/types.h @@ -0,0 +1,8 @@ +#ifndef LINUX_TYPES_BACKPORT_H +#define LINUX_TYPES_BACKPORT_H + +#include_next + +#define BITS_PER_BYTE 8 + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h b/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h index 330f47f..c054ed2 100644 --- a/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, + struct delayed_work *dwork) +{ + while (!cancel_delayed_work(&dwork->work)) + flush_workqueue(wq); +} #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } diff --git a/kernel_addons/backport/2.6.9_U4/include/net/dst.h b/kernel_addons/backport/2.6.9_U4/include/net/dst.h new file mode 100644 index 0000000..ec5e7b4 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/net/dst.h @@ -0,0 +1,16 @@ +#ifndef BACKPORT_NET_DST_2_6_11 +#define BACKPORT_NET_DST_2_6_11 + +#include_next + +static inline u32 dst_mtu(struct dst_entry *dst) +{ + u32 mtu = dst_metric(dst, RTAX_MTU); + /* + * Alexey put it here, so ask him about it :) + */ + barrier(); + return mtu; +} + +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/net/netevent.h b/kernel_addons/backport/2.6.9_U4/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c b/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c b/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c new file mode 100644 index 0000000..5d1d907 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c @@ -0,0 +1,196 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include + +/** + * kfifo_init - allocates a new FIFO using a preallocated buffer + * @buffer: the preallocated buffer to be used. + * @size: the size of the internal buffer, this have to be a power of 2. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the + * struct kfifo with kfree(). + */ +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock) +{ + struct kfifo *fifo; + + /* size must be a power of 2 */ + BUG_ON(size & (size - 1)); + + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); + if (!fifo) + return ERR_PTR(-ENOMEM); + + fifo->buffer = buffer; + fifo->size = size; + fifo->in = fifo->out = 0; + fifo->lock = lock; + + return fifo; +} +EXPORT_SYMBOL(kfifo_init); + +/** + * kfifo_alloc - allocates a new FIFO and its internal buffer + * @size: the size of the internal buffer to be allocated. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * The size will be rounded-up to a power of 2. + */ +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) +{ + unsigned char *buffer; + struct kfifo *ret; + + /* + * round up to the next power of 2, since our 'let the indices + * wrap' tachnique works only in this case. + */ + if (size & (size - 1)) { + BUG_ON(size > 0x80000000); + size = roundup_pow_of_two(size); + } + + buffer = kmalloc(size, gfp_mask); + if (!buffer) + return ERR_PTR(-ENOMEM); + + ret = kfifo_init(buffer, size, gfp_mask, lock); + + if (IS_ERR(ret)) + kfree(buffer); + + return ret; +} +EXPORT_SYMBOL(kfifo_alloc); + +/** + * kfifo_free - frees the FIFO + * @fifo: the fifo to be freed. + */ +void kfifo_free(struct kfifo *fifo) +{ + kfree(fifo->buffer); + kfree(fifo); +} +EXPORT_SYMBOL(kfifo_free); + +/** + * __kfifo_put - puts some data into the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->size - fifo->in + fifo->out); + + /* + * Ensure that we sample the fifo->out index -before- we + * start putting bytes into the kfifo. + */ + + smp_mb(); + + /* first put the data starting from fifo->in to buffer end */ + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); + + /* then put the rest (if any) at the beginning of the buffer */ + memcpy(fifo->buffer, buffer + l, len - l); + + /* + * Ensure that we add the bytes to the kfifo -before- + * we update the fifo->in index. + */ + + smp_wmb(); + + fifo->in += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_put); + +/** + * __kfifo_get - gets some data from the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->in - fifo->out); + + /* + * Ensure that we sample the fifo->in index -before- we + * start removing bytes from the kfifo. + */ + + smp_rmb(); + + /* first get the data from fifo->out until the end of the buffer */ + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); + + /* then get the rest (if any) from the beginning of the buffer */ + memcpy(buffer + l, fifo->buffer, len - l); + + /* + * Ensure that we remove the bytes from the kfifo -before- + * we update the fifo->out index. + */ + + smp_mb(); + + fifo->out += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_get); diff --git a/kernel_addons/backport/2.6.9_U4/include/src/netevent.c b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch new file mode 100644 index 0000000..e6781f3 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c +index dfa035a..414ea84 100755 +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth + .get_wol = get_wol, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +- .get_perm_addr = ethtool_op_get_perm_addr + }; + + static int in_range(int val, int lo, int hi) diff --git a/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch b/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch new file mode 100644 index 0000000..f8f51d6 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch @@ -0,0 +1,13 @@ +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c +index 3237fc8..2a38953 100644 +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct + } + }; + +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) ++ if (ip_route_output_key(&rt, &fl)) + return NULL; + return rt; + } diff --git a/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..e84b964 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch @@ -0,0 +1,31 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/kfifo.c b/drivers/infiniband/core/kfifo.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/kfifo.c +@@ -0,0 +1 @@ ++#include "src/kfifo.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o kfifo.o diff --git a/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch b/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch deleted file mode 100644 index a3febff..0000000 --- a/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch +++ /dev/null @@ -1,15 +0,0 @@ -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:01:20.000000000 +0200 -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:03:14.000000000 +0200 -@@ -359,8 +359,8 @@ static int mthca_mmap_uar(struct ib_ucon - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - -- if (io_remap_pfn_range(vma, vma->vm_start, -- to_mucontext(context)->uar.pfn, -+ if (remap_page_range(vma, vma->vm_start, -+ (to_mucontext(context)->uar.pfn) << PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - diff --git a/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch new file mode 100644 index 0000000..a667be0 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch @@ -0,0 +1,43 @@ +diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h +index 463ca32..b931fd2 100755 +--- a/drivers/net/cxgb3/adapter.h ++++ b/drivers/net/cxgb3/adapter.h +@@ -179,6 +179,7 @@ struct adapter { + struct list_head adapter_list; + void __iomem *regs; + struct pci_dev *pdev; ++ u32 saved_pci_state[16]; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned long flags; +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c +index 14ea6b9..f13f581 100755 +--- a/drivers/net/cxgb3/t3_hw.c ++++ b/drivers/net/cxgb3/t3_hw.c +@@ -3250,7 +3250,7 @@ int t3_reset_adapter(struct adapter *ada + uint16_t devid = 0; + + if (is_pcie(adapter)) +- pci_save_state(adapter->pdev); ++ pci_save_state(adapter->pdev, adapter->saved_pci_state); + t3_write_reg(adapter, A_PL_RST, F_CRSTWRM | F_CRSTWRMMODE); + + /* +@@ -3268,7 +3268,7 @@ int t3_reset_adapter(struct adapter *ada + return -1; + + if (is_pcie(adapter)) +- pci_restore_state(adapter->pdev); ++ pci_restore_state(adapter->pdev, adapter->saved_pci_state); + return 0; + } + +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada + + memcpy(adapter->port[i]->dev_addr, hw_addr, + ETH_ALEN); +- memcpy(adapter->port[i]->perm_addr, hw_addr, +- ETH_ALEN); + init_link_config(&p->link_config, p->port_type->caps); + p->phy.ops->power_down(&p->phy, 1); + if (!(p->port_type->caps & SUPPORTED_IRQ)) From swise at opengridcomputing.com Thu Jan 18 08:31:41 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 10:31:41 -0600 Subject: [openib-general] [PATCH ] ofed_1_2 Backport Chelsio to SLES10 Message-ID: <20070118163140.18847.95651.stgit@dell3.ogc.int> Backport Chelsio to SLES10 Signed-off-by: Steve Wise --- .../2.6.16_sles10/include/linux/genalloc.h | 42 +++++ .../2.6.16_sles10/include/linux/interrupt.h | 17 ++ .../2.6.16_sles10/include/linux/netdevice.h | 4 .../backport/2.6.16_sles10/include/linux/random.h | 15 ++ .../backport/2.6.16_sles10/include/linux/skbuff.h | 3 .../2.6.16_sles10/include/linux/workqueue.h | 9 + .../backport/2.6.16_sles10/include/net/netevent.h | 33 ++++ .../backport/2.6.16_sles10/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../backport/2.6.16_sles10/include/src/netevent.c | 71 ++++++++ .../2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch | 12 + .../2.6.16_sles10/linux_stuff_to_2_6_17.patch | 24 +++ 11 files changed, 427 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h b/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h b/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h b/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h index 5641019..225eeda 100644 --- a/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc spin_unlock(&dev->xmit_lock); } +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + #endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/random.h b/kernel_addons/backport/2.6.16_sles10/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h b/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h index 4845283..70bf011 100644 --- a/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size #endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h b/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h b/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c b/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o From vlad at mellanox.co.il Thu Jan 18 08:38:21 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:38:21 +0200 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to 2.6.15 In-Reply-To: <20070117195000.30830.84557.stgit@dell3.ogc.int> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195000.30830.84557.stgit@dell3.ogc.int> Message-ID: <1169138302.23809.81.camel@swlab50.yok.mtl.com> This patch breaks compilation on openfabrics server (Ubuntu: 2.6.15-23-server) gcc -m32 -Wp,-MD,/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.addr.o.d -nostdinc -isystem /usr/lib/gcc/i486-linux-gnu/4.0.3/include -D__KERNEL__ \ -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/ \ -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include \ -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include \ -Iinclude \ \ -include include/linux/autoconf.h \ -include /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include/linux/autoconf.h \ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -pipe -msoft-float -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -Iinclude/asm-i386/mach-default -Wdeclaration-after-statement -Wno-pointer-sign -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/ulp/ipoib -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/debug -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/hw/cxgb3/core -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/net/cxgb3 -DMODULE -DKBUILD_BASENAME=addr -DKBUILD_MODNAME=ib_addr -c -o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.tmp_addr.o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c In file included from include/linux/inetdevice.h:7, from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/inetdevice.h:4, from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:32: /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/netdevice.h:19: error: redefinition of '__netif_rx_schedule_prep' include/linux/netdevice.h:807: error: previous definition of '__netif_rx_schedule_prep' was here /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:61: warning: initialization from incompatible pointer type make[3]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.o] Error 1 make[2]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband] Error 2 make[1]: Leaving directory `/usr/src/linux-headers-2.6.15-23-server' make: *** [kernel] Error 2 Regards, Vladimir On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > Backport cxgb3 to 2.6.15 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.15/include/linux/interrupt.h | 17 ++ > .../backport/2.6.15/include/linux/netdevice.h | 9 + > .../backport/2.6.15/include/linux/random.h | 15 ++ > .../backport/2.6.15/include/linux/skbuff.h | 3 > .../backport/2.6.15/include/linux/types.h | 6 + > .../backport/2.6.15/include/linux/workqueue.h | 9 + > .../backport/2.6.15/include/net/netevent.h | 33 ++++ > .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ > 12 files changed, 438 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h > new file mode 100644 > index 0000000..86e334f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/linux/types.h > @@ -0,0 +1,6 @@ > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > +#include_next > + > +#define BITS_PER_BYTE 8 > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sean.hefty at intel.com Thu Jan 18 08:51:00 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 18 Jan 2007 08:51:00 -0800 Subject: [openib-general] shorten wq names In-Reply-To: <20070118133036.GD30391@mellanox.co.il> Message-ID: <001f01c73b20$d5f5da80$56d9180a@amr.corp.intel.com> >Sean, older kernels didn't allow work queue names > 10 chars. >Since local sa is OFED only anyway, could you make life easier >for backports us by renaming the workqueue from local_sa_wq to local_sa? I'll fix this today. >ib_addr_wq -> ib_addr >rdma_cm_wq -> rdma_cm >iw_cm_wq -> iw_cm >ib_mcast_wq -> ib_mcast This is fine by me. - Sean From swise at opengridcomputing.com Thu Jan 18 08:54:52 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 10:54:52 -0600 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to 2.6.15 In-Reply-To: <1169138302.23809.81.camel@swlab50.yok.mtl.com> References: <20070117194949.30830.55013.stgit@dell3.ogc.int> <20070117195000.30830.84557.stgit@dell3.ogc.int> <1169138302.23809.81.camel@swlab50.yok.mtl.com> Message-ID: <1169139292.15460.4.camel@stevo-desktop> Micheal/Vlad, This is not one of the kernels that has a set of backport files in kernel_addons. So its pulling in the kernel.org 2.6.15 backport files. Apparently Ubuntu has back-ported some stuff into their 2.6.15-23-server kernel. How shall we proceed? This isn't one of the ofed-1.2 supported distros or kernels, yet you're building against it. I'm willing to create a new directory kernel_addons/backport/2.6.15_Ubuntu for this and do the backport if it is the correct thing to do. Steve. On Thu, 2007-01-18 at 18:38 +0200, Vladimir Sokolovsky wrote: > This patch breaks compilation on openfabrics server (Ubuntu: 2.6.15-23-server) > > gcc -m32 -Wp,-MD,/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.addr.o.d -nostdinc -isystem /usr/lib/gcc/i486-linux-gnu/4.0.3/include -D__KERNEL__ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include \ > -Iinclude \ > \ > -include include/linux/autoconf.h \ > -include /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include/linux/autoconf.h \ > -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -pipe -msoft-float -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -Iinclude/asm-i386/mach-default -Wdeclaration-after-statement -Wno-pointer-sign -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/ulp/ipoib -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/debug -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/hw/cxgb3/core -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/net/cxgb3 -DMODULE -DKBUILD_BASENAME=addr -DKBUILD_MODNAME=ib_addr -c -o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.tmp_addr.o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr. c > In file included from include/linux/inetdevice.h:7, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/inetdevice.h:4, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:32: > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/netdevice.h:19: error: redefinition of '__netif_rx_schedule_prep' > include/linux/netdevice.h:807: error: previous definition of '__netif_rx_schedule_prep' was here > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:61: warning: initialization from incompatible pointer type > make[3]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.o] Error 1 > make[2]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core] Error 2 > make[1]: *** [_module_/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband] Error 2 > make[1]: Leaving directory `/usr/src/linux-headers-2.6.15-23-server' > make: *** [kernel] Error 2 > > Regards, > Vladimir > > > On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > > Backport cxgb3 to 2.6.15 > > > > Signed-off-by: Steve Wise > > --- > > > > .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ > > .../backport/2.6.15/include/linux/interrupt.h | 17 ++ > > .../backport/2.6.15/include/linux/netdevice.h | 9 + > > .../backport/2.6.15/include/linux/random.h | 15 ++ > > .../backport/2.6.15/include/linux/skbuff.h | 3 > > .../backport/2.6.15/include/linux/types.h | 6 + > > .../backport/2.6.15/include/linux/workqueue.h | 9 + > > .../backport/2.6.15/include/net/netevent.h | 33 ++++ > > .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ > > .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ > > .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + > > .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ > > 12 files changed, 438 insertions(+), 1 deletions(-) > > > > diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > new file mode 100644 > > index 0000000..3c23c68 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > @@ -0,0 +1,42 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > + > > +/* > > + * General purpose special memory pool descriptor. > > + */ > > +struct gen_pool { > > + rwlock_t lock; > > + struct list_head chunks; /* list of chunks in this pool */ > > + int min_alloc_order; /* minimum allocation order */ > > +}; > > + > > +/* > > + * General purpose special memory pool chunk descriptor. > > + */ > > +struct gen_pool_chunk { > > + spinlock_t lock; > > + struct list_head next_chunk; /* next chunk in pool */ > > + unsigned long start_addr; /* starting address of memory chunk */ > > + unsigned long end_addr; /* ending address of memory chunk */ > > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > > +}; > > + > > +extern struct gen_pool *ib_gen_pool_create(int, int); > > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > > +extern void ib_gen_pool_destroy(struct gen_pool *); > > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > > + > > +#define gen_pool_create ib_gen_pool_create > > +#define gen_pool_add ib_gen_pool_add > > +#define gen_pool_destroy ib_gen_pool_destroy > > +#define gen_pool_alloc ib_gen_pool_alloc > > +#define gen_pool_free ib_gen_pool_free > > diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > new file mode 100644 > > index 0000000..66e66a9 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > @@ -0,0 +1,17 @@ > > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#include_next > > + > > +static inline int > > +backport_request_irq(unsigned int irq, > > + irqreturn_t (*handler)(int, void *), > > + unsigned long flags, const char *dev_name, void *dev_id) > > +{ > > + return request_irq(irq, > > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > > + flags, dev_name, dev_id); > > +} > > + > > +#define request_irq backport_request_irq > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > index 5641019..2f12781 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > > spin_unlock(&dev->xmit_lock); > > } > > > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > > +{ > > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > > +} > > + > > +#undef SET_ETHTOOL_OPS > > +#define SET_ETHTOOL_OPS(netdev, ops) \ > > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > > + > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h > > new file mode 100644 > > index 0000000..2ea2e1f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/random.h > > @@ -0,0 +1,15 @@ > > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#include_next > > + > > +static inline u32 backport_random32(void) > > +{ > > + u32 v; > > + > > + get_random_bytes(&v, sizeof(u32)); > > + return v; > > +} > > + > > +#define random32 backport_random32 > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > index 4845283..70bf011 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > > #include_next > > > > #define CHECKSUM_PARTIAL CHECKSUM_HW > > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > + > > +#define gso_size tso_size > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h > > new file mode 100644 > > index 0000000..86e334f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/types.h > > @@ -0,0 +1,6 @@ > > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#include_next > > + > > +#define BITS_PER_BYTE 8 > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > index 330f47f..cc8b2cd 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > > return cancel_delayed_work(&work->work); > > } > > > > +static inline void > > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > > +{ > > + cancel_rearming_delayed_workqueue(wq, &work->work); > > +} > > + > > > > #undef INIT_WORK > > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > > > #undef DECLARE_WORK > > #define DECLARE_WORK(n, f) \ > > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > > #define DECLARE_DELAYED_WORK(n, f) \ > > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > > > #define queue_delayed_work backport_queue_delayed_work > > #define cancel_delayed_work backport_cancel_delayed_work > > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h > > new file mode 100644 > > index 0000000..e5d2162 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h > > @@ -0,0 +1,33 @@ > > +#ifndef _NET_EVENT_H > > +#define _NET_EVENT_H > > + > > +/* > > + * Generic netevent notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * Changes: > > + */ > > +#ifdef __KERNEL__ > > + > > +#include > > + > > +struct netevent_redirect { > > + struct dst_entry *old; > > + struct dst_entry *new; > > +}; > > + > > +enum netevent_notif_type { > > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > > +}; > > + > > +extern int register_netevent_notifier(struct notifier_block *nb); > > +extern int unregister_netevent_notifier(struct notifier_block *nb); > > +extern int call_netevent_notifiers(unsigned long val, void *v); > > + > > +#endif > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > new file mode 100644 > > index 0000000..75ae68c > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > @@ -0,0 +1,198 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * Copyright 2005 (C) Jes Sorensen > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > +#include > > +#include > > + > > + > > +/** > > + * gen_pool_create - create a new special memory pool > > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > > + * @nid: node id of the node the pool structure should be allocated on, or -1 > > + * > > + * Create a new special memory pool that can be used to manage special purpose > > + * memory not managed by the regular kmalloc/kfree interface. > > + */ > > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > > +{ > > + struct gen_pool *pool; > > + > > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > > + if (pool != NULL) { > > + rwlock_init(&pool->lock); > > + INIT_LIST_HEAD(&pool->chunks); > > + pool->min_alloc_order = min_alloc_order; > > + } > > + return pool; > > +} > > +EXPORT_SYMBOL(gen_pool_create); > > + > > +/** > > + * gen_pool_add - add a new chunk of special memory to the pool > > + * @pool: pool to add new memory chunk to > > + * @addr: starting address of memory chunk to add to pool > > + * @size: size in bytes of the memory chunk to add to pool > > + * @nid: node id of the node the chunk structure and bitmap should be > > + * allocated on, or -1 > > + * > > + * Add a new chunk of special memory to the specified pool. > > + */ > > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > > + int nid) > > +{ > > + struct gen_pool_chunk *chunk; > > + int nbits = size >> pool->min_alloc_order; > > + int nbytes = sizeof(struct gen_pool_chunk) + > > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > > + > > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > > + if (unlikely(chunk == NULL)) > > + return -1; > > + > > + memset(chunk, 0, nbytes); > > + spin_lock_init(&chunk->lock); > > + chunk->start_addr = addr; > > + chunk->end_addr = addr + size; > > + > > + write_lock(&pool->lock); > > + list_add(&chunk->next_chunk, &pool->chunks); > > + write_unlock(&pool->lock); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_add); > > + > > +/** > > + * gen_pool_destroy - destroy a special memory pool > > + * @pool: pool to destroy > > + * > > + * Destroy the specified special memory pool. Verifies that there are no > > + * outstanding allocations. > > + */ > > +void gen_pool_destroy(struct gen_pool *pool) > > +{ > > + struct list_head *_chunk, *_next_chunk; > > + struct gen_pool_chunk *chunk; > > + int order = pool->min_alloc_order; > > + int bit, end_bit; > > + > > + > > + write_lock(&pool->lock); > > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + list_del(&chunk->next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + bit = find_next_bit(chunk->bits, end_bit, 0); > > + BUG_ON(bit < end_bit); > > + > > + kfree(chunk); > > + } > > + kfree(pool); > > + return; > > +} > > +EXPORT_SYMBOL(gen_pool_destroy); > > + > > +/** > > + * gen_pool_alloc - allocate special memory from the pool > > + * @pool: pool to allocate from > > + * @size: number of bytes to allocate from the pool > > + * > > + * Allocate the requested number of bytes from the specified pool. > > + * Uses a first-fit algorithm. > > + */ > > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long addr, flags; > > + int order = pool->min_alloc_order; > > + int nbits, bit, start_bit, end_bit; > > + > > + if (size == 0) > > + return 0; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + end_bit -= nbits + 1; > > + > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = -1; > > + while (bit + 1 < end_bit) { > > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > > + if (bit >= end_bit) > > + break; > > + > > + start_bit = bit; > > + if (nbits > 1) { > > + bit = find_next_bit(chunk->bits, bit + nbits, > > + bit + 1); > > + if (bit - start_bit < nbits) > > + continue; > > + } > > + > > + addr = chunk->start_addr + > > + ((unsigned long)start_bit << order); > > + while (nbits--) > > + __set_bit(start_bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + read_unlock(&pool->lock); > > + return addr; > > + } > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + } > > + read_unlock(&pool->lock); > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_alloc); > > + > > +/** > > + * gen_pool_free - free allocated special memory back to the pool > > + * @pool: pool to free to > > + * @addr: starting address of memory to free back to pool > > + * @size: size in bytes of memory to free > > + * > > + * Free previously allocated special memory back to the specified pool. > > + */ > > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long flags; > > + int order = pool->min_alloc_order; > > + int bit, nbits; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > > + BUG_ON(addr + size > chunk->end_addr); > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = (addr - chunk->start_addr) >> order; > > + while (nbits--) > > + __clear_bit(bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + break; > > + } > > + } > > + BUG_ON(nbits > 0); > > + read_unlock(&pool->lock); > > +} > > +EXPORT_SYMBOL(gen_pool_free); > > diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c > > new file mode 100644 > > index 0000000..5ffadd1 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c > > @@ -0,0 +1,71 @@ > > +/* > > + * Network event notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version > > + * 2 of the License, or (at your option) any later version. > > + * > > + * Fixes: > > + */ > > + > > +#include > > +#include > > +#include > > +#include > > + > > +static struct notifier_block *netevent_notif_chain; > > + > > +/** > > + * register_netevent_notifier - register a netevent notifier block > > + * @nb: notifier > > + * > > + * Register a notifier to be called when a netevent occurs. > > + * The notifier passed is linked into the kernel structures and must > > + * not be reused until it has been unregistered. A negative errno code > > + * is returned on a failure. > > + */ > > +int register_netevent_notifier(struct notifier_block *nb) > > +{ > > + int err; > > + > > + err = notifier_chain_register(&netevent_notif_chain, nb); > > + return err; > > +} > > + > > +/** > > + * netevent_unregister_notifier - unregister a netevent notifier block > > + * @nb: notifier > > + * > > + * Unregister a notifier previously registered by > > + * register_neigh_notifier(). The notifier is unlinked into the > > + * kernel structures and may then be reused. A negative errno code > > + * is returned on a failure. > > + */ > > + > > +int unregister_netevent_notifier(struct notifier_block *nb) > > +{ > > + return notifier_chain_unregister(&netevent_notif_chain, nb); > > +} > > + > > +/** > > + * call_netevent_notifiers - call all netevent notifier blocks > > + * @val: value passed unmodified to notifier function > > + * @v: pointer passed unmodified to notifier function > > + * > > + * Call all neighbour notifier blocks. Parameters and return value > > + * are as for notifier_call_chain(). > > + */ > > + > > +int call_netevent_notifiers(unsigned long val, void *v) > > +{ > > + return notifier_call_chain(&netevent_notif_chain, val, v); > > +} > > + > > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > > diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > new file mode 100644 > > index 0000000..ad7e7f4 > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > @@ -0,0 +1,12 @@ > > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > > +index 3434679..bb008b6 100755 > > +--- a/drivers/net/cxgb3/Makefile > > ++++ b/drivers/net/cxgb3/Makefile > > +@@ -1,6 +1,7 @@ > > + # > > + # Chelsio T3 driver > > + # > > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > > + > > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > > + > > diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > new file mode 100644 > > index 0000000..eb2285f > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > @@ -0,0 +1,24 @@ > > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/genalloc.c > > +@@ -0,0 +1 @@ > > ++#include "src/genalloc.c" > > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/netevent.c > > +@@ -0,0 +1 @@ > > ++#include "src/netevent.c" > > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > > +index 50fb1cd..456bfd0 100644 > > +--- a/drivers/infiniband/core/Makefile > > ++++ b/drivers/infiniband/core/Makefile > > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > > + > > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > > + uverbs_marshall.o > > ++ > > ++ib_core-y += genalloc.o netevent.o > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:53:32 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:53:32 +0200 Subject: [openib-general] [PATCH ] ofed_1_2 Backport Chelsio to RHEL4U4 In-Reply-To: <20070118163111.18825.58472.stgit@dell3.ogc.int> References: <20070118163111.18825.58472.stgit@dell3.ogc.int> Message-ID: <1169139214.23809.83.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Thu, 2007-01-18 at 10:31 -0600, Steve Wise wrote: > Backport Chelsio to RHEL4U4 > > Signed-off-by: Steve Wise > --- > > .../backport/2.6.9_U4/include/linux/ethtool.h | 9 + > .../backport/2.6.9_U4/include/linux/genalloc.h | 42 +++++ > .../backport/2.6.9_U4/include/linux/interrupt.h | 17 ++ > .../backport/2.6.9_U4/include/linux/kfifo.h | 157 ++++++++++++++++++ > kernel_addons/backport/2.6.9_U4/include/linux/mm.h | 20 ++ > .../backport/2.6.9_U4/include/linux/netdevice.h | 9 + > .../backport/2.6.9_U4/include/linux/pci.h | 1 > .../backport/2.6.9_U4/include/linux/random.h | 15 ++ > .../backport/2.6.9_U4/include/linux/skbuff.h | 4 > .../backport/2.6.9_U4/include/linux/slab.h | 19 -- > .../backport/2.6.9_U4/include/linux/types.h | 8 + > .../backport/2.6.9_U4/include/linux/workqueue.h | 8 + > kernel_addons/backport/2.6.9_U4/include/net/dst.h | 16 ++ > .../backport/2.6.9_U4/include/net/netevent.h | 33 ++++ > .../backport/2.6.9_U4/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.9_U4/include/src/kfifo.c | 196 +++++++++++++++++++++++ > .../backport/2.6.9_U4/include/src/netevent.c | 71 ++++++++ > .../backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch | 12 + > .../2.6.9_U4/cxgb3_makefile_to_2_6_19.patch | 12 + > .../backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch | 13 ++ > .../backport/2.6.9_U4/linux_stuff_to_2_6_17.patch | 31 ++++ > .../2.6.9_U4/mthca_provider_3465_to_2_6_9.patch | 15 -- > .../backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch | 43 +++++ > 23 files changed, 917 insertions(+), 32 deletions(-) > > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h b/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h > new file mode 100644 > index 0000000..d03127c > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/ethtool.h > @@ -0,0 +1,9 @@ > +#ifndef BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > +#define BACKPORT_LINUX_ETHTOOL_TO_2_6_13 > + > +#include_next > + > +#define ADVERTISED_Pause (1 << 13) > +#define ADVERTISED_Asym_Pause (1 << 14) > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h b/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h b/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h b/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h > new file mode 100644 > index 0000000..48eccd8 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/kfifo.h > @@ -0,0 +1,157 @@ > +/* > + * A simple kernel FIFO implementation. > + * > + * Copyright (C) 2004 Stelian Pop > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + */ > +#ifndef _LINUX_KFIFO_H > +#define _LINUX_KFIFO_H > + > +#ifdef __KERNEL__ > + > +#include > +#include > + > +struct kfifo { > + unsigned char *buffer; /* the buffer holding the data */ > + unsigned int size; /* the size of the allocated buffer */ > + unsigned int in; /* data is added at offset (in % size) */ > + unsigned int out; /* data is extracted from off. (out % size) */ > + spinlock_t *lock; /* protects concurrent modifications */ > +}; > + > +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, > + gfp_t gfp_mask, spinlock_t *lock); > +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, > + spinlock_t *lock); > +extern void kfifo_free(struct kfifo *fifo); > +extern unsigned int __kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len); > +extern unsigned int __kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len); > + > +/** > + * __kfifo_reset - removes the entire FIFO contents, no locking version > + * @fifo: the fifo to be emptied. > + */ > +static inline void __kfifo_reset(struct kfifo *fifo) > +{ > + fifo->in = fifo->out = 0; > +} > + > +/** > + * kfifo_reset - removes the entire FIFO contents > + * @fifo: the fifo to be emptied. > + */ > +static inline void kfifo_reset(struct kfifo *fifo) > +{ > + unsigned long flags; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + __kfifo_reset(fifo); > + > + spin_unlock_irqrestore(fifo->lock, flags); > +} > + > +/** > + * kfifo_put - puts some data into the FIFO > + * @fifo: the fifo to be used. > + * @buffer: the data to be added. > + * @len: the length of the data to be added. > + * > + * This function copies at most 'len' bytes from the 'buffer' into > + * the FIFO depending on the free space, and returns the number of > + * bytes copied. > + */ > +static inline unsigned int kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_put(fifo, buffer, len); > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +/** > + * kfifo_get - gets some data from the FIFO > + * @fifo: the fifo to be used. > + * @buffer: where the data must be copied. > + * @len: the size of the destination buffer. > + * > + * This function copies at most 'len' bytes from the FIFO into the > + * 'buffer' and returns the number of copied bytes. > + */ > +static inline unsigned int kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_get(fifo, buffer, len); > + > + /* > + * optimization: if the FIFO is empty, set the indices to 0 > + * so we don't wrap the next time > + */ > + if (fifo->in == fifo->out) > + fifo->in = fifo->out = 0; > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +/** > + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version > + * @fifo: the fifo to be used. > + */ > +static inline unsigned int __kfifo_len(struct kfifo *fifo) > +{ > + return fifo->in - fifo->out; > +} > + > +/** > + * kfifo_len - returns the number of bytes available in the FIFO > + * @fifo: the fifo to be used. > + */ > +static inline unsigned int kfifo_len(struct kfifo *fifo) > +{ > + unsigned long flags; > + unsigned int ret; > + > + spin_lock_irqsave(fifo->lock, flags); > + > + ret = __kfifo_len(fifo); > + > + spin_unlock_irqrestore(fifo->lock, flags); > + > + return ret; > +} > + > +#else > +#warning "don't include kernel headers in userspace" > +#endif /* __KERNEL__ */ > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/mm.h b/kernel_addons/backport/2.6.9_U4/include/linux/mm.h > new file mode 100644 > index 0000000..77ee6fc > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/mm.h > @@ -0,0 +1,20 @@ > +#ifndef BACKPORT_LINUX_MM_TO_SLES9SP3 > +#define BACKPORT_LINUX_MM_TO_SLES9SP3 > + > +#include_next > + > +static inline int > +remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, > + unsigned long pfn, unsigned long size, pgprot_t prot) > +{ > + return remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); > +} > + > +static inline int > +io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, > + unsigned long pfn, unsigned long size, pgprot_t prot) > +{ > + return io_remap_page_range(vma, addr, pfn << PAGE_SHIFT, size, prot); > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h b/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h > index 5641019..2f12781 100644 > --- a/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/netdevice.h > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > +{ > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/pci.h b/kernel_addons/backport/2.6.9_U4/include/linux/pci.h > index bd47cd4..6e0df62 100644 > --- a/kernel_addons/backport/2.6.9_U4/include/linux/pci.h > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/pci.h > @@ -4,5 +4,6 @@ #define LINUX_PCI_BACKPORT_TO_2_6_9_H > #include_next > > #define PCI_EXP_LNKCTL 16 /* Link Control */ > +#define PCI_EXP_LNKSTA 18 /* Link Status */ > > #endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/random.h b/kernel_addons/backport/2.6.9_U4/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h b/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h > index 94b968a..62bfb3b 100644 > --- a/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/skbuff.h > @@ -3,7 +3,8 @@ #define LINUX_SKBUFF_H_BACKPORT > > #include_next > > -#define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > /** > * skb_header_release - release reference to header > @@ -17,5 +18,6 @@ static inline void skb_header_release(st > { > } > > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/slab.h b/kernel_addons/backport/2.6.9_U4/include/linux/slab.h > index 46ac6e5..cbf6311 100644 > --- a/kernel_addons/backport/2.6.9_U4/include/linux/slab.h > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/slab.h > @@ -1,10 +1,8 @@ > -#include_next > +#ifndef LINUX_SLAB_BACKPORT_H > +#define LINUX_SLAB_BACKPORT_H > > #include_next > > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > void *kmemdup(const void *src, size_t len, gfp_t gfp) > { > @@ -16,19 +14,10 @@ void *kmemdup(const void *src, size_t le > return p; > } > > -#endif > -#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > -#define BACKPORT_LINUX_STRING_TO_2_6_18 > - > static inline > -void *kmemdup(const void *src, size_t len, gfp_t gfp) > +void *kmalloc_node(size_t size, gfp_t flags, int nid) > { > - void *p; > - > - p = kmalloc(len, gfp); > - if (p) > - memcpy(p, src, len); > - return p; > + return kmalloc(size, flags); > } > > #endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/types.h b/kernel_addons/backport/2.6.9_U4/include/linux/types.h > new file mode 100644 > index 0000000..0c19a43 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/types.h > @@ -0,0 +1,8 @@ > +#ifndef LINUX_TYPES_BACKPORT_H > +#define LINUX_TYPES_BACKPORT_H > + > +#include_next > + > +#define BITS_PER_BYTE 8 > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h b/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h > index 330f47f..c054ed2 100644 > --- a/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.9_U4/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, > + struct delayed_work *dwork) > +{ > + while (!cancel_delayed_work(&dwork->work)) > + flush_workqueue(wq); > +} > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,7 +39,7 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > diff --git a/kernel_addons/backport/2.6.9_U4/include/net/dst.h b/kernel_addons/backport/2.6.9_U4/include/net/dst.h > new file mode 100644 > index 0000000..ec5e7b4 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/net/dst.h > @@ -0,0 +1,16 @@ > +#ifndef BACKPORT_NET_DST_2_6_11 > +#define BACKPORT_NET_DST_2_6_11 > + > +#include_next > + > +static inline u32 dst_mtu(struct dst_entry *dst) > +{ > + u32 mtu = dst_metric(dst, RTAX_MTU); > + /* > + * Alexey put it here, so ask him about it :) > + */ > + barrier(); > + return mtu; > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/net/netevent.h b/kernel_addons/backport/2.6.9_U4/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c b/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c b/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c > new file mode 100644 > index 0000000..5d1d907 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/src/kfifo.c > @@ -0,0 +1,196 @@ > +/* > + * A simple kernel FIFO implementation. > + * > + * Copyright (C) 2004 Stelian Pop > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + */ > + > +#include > +#include > +#include > +#include > +#include > + > +/** > + * kfifo_init - allocates a new FIFO using a preallocated buffer > + * @buffer: the preallocated buffer to be used. > + * @size: the size of the internal buffer, this have to be a power of 2. > + * @gfp_mask: get_free_pages mask, passed to kmalloc() > + * @lock: the lock to be used to protect the fifo buffer > + * > + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the > + * struct kfifo with kfree(). > + */ > +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, > + gfp_t gfp_mask, spinlock_t *lock) > +{ > + struct kfifo *fifo; > + > + /* size must be a power of 2 */ > + BUG_ON(size & (size - 1)); > + > + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); > + if (!fifo) > + return ERR_PTR(-ENOMEM); > + > + fifo->buffer = buffer; > + fifo->size = size; > + fifo->in = fifo->out = 0; > + fifo->lock = lock; > + > + return fifo; > +} > +EXPORT_SYMBOL(kfifo_init); > + > +/** > + * kfifo_alloc - allocates a new FIFO and its internal buffer > + * @size: the size of the internal buffer to be allocated. > + * @gfp_mask: get_free_pages mask, passed to kmalloc() > + * @lock: the lock to be used to protect the fifo buffer > + * > + * The size will be rounded-up to a power of 2. > + */ > +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) > +{ > + unsigned char *buffer; > + struct kfifo *ret; > + > + /* > + * round up to the next power of 2, since our 'let the indices > + * wrap' tachnique works only in this case. > + */ > + if (size & (size - 1)) { > + BUG_ON(size > 0x80000000); > + size = roundup_pow_of_two(size); > + } > + > + buffer = kmalloc(size, gfp_mask); > + if (!buffer) > + return ERR_PTR(-ENOMEM); > + > + ret = kfifo_init(buffer, size, gfp_mask, lock); > + > + if (IS_ERR(ret)) > + kfree(buffer); > + > + return ret; > +} > +EXPORT_SYMBOL(kfifo_alloc); > + > +/** > + * kfifo_free - frees the FIFO > + * @fifo: the fifo to be freed. > + */ > +void kfifo_free(struct kfifo *fifo) > +{ > + kfree(fifo->buffer); > + kfree(fifo); > +} > +EXPORT_SYMBOL(kfifo_free); > + > +/** > + * __kfifo_put - puts some data into the FIFO, no locking version > + * @fifo: the fifo to be used. > + * @buffer: the data to be added. > + * @len: the length of the data to be added. > + * > + * This function copies at most 'len' bytes from the 'buffer' into > + * the FIFO depending on the free space, and returns the number of > + * bytes copied. > + * > + * Note that with only one concurrent reader and one concurrent > + * writer, you don't need extra locking to use these functions. > + */ > +unsigned int __kfifo_put(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned int l; > + > + len = min(len, fifo->size - fifo->in + fifo->out); > + > + /* > + * Ensure that we sample the fifo->out index -before- we > + * start putting bytes into the kfifo. > + */ > + > + smp_mb(); > + > + /* first put the data starting from fifo->in to buffer end */ > + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); > + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); > + > + /* then put the rest (if any) at the beginning of the buffer */ > + memcpy(fifo->buffer, buffer + l, len - l); > + > + /* > + * Ensure that we add the bytes to the kfifo -before- > + * we update the fifo->in index. > + */ > + > + smp_wmb(); > + > + fifo->in += len; > + > + return len; > +} > +EXPORT_SYMBOL(__kfifo_put); > + > +/** > + * __kfifo_get - gets some data from the FIFO, no locking version > + * @fifo: the fifo to be used. > + * @buffer: where the data must be copied. > + * @len: the size of the destination buffer. > + * > + * This function copies at most 'len' bytes from the FIFO into the > + * 'buffer' and returns the number of copied bytes. > + * > + * Note that with only one concurrent reader and one concurrent > + * writer, you don't need extra locking to use these functions. > + */ > +unsigned int __kfifo_get(struct kfifo *fifo, > + unsigned char *buffer, unsigned int len) > +{ > + unsigned int l; > + > + len = min(len, fifo->in - fifo->out); > + > + /* > + * Ensure that we sample the fifo->in index -before- we > + * start removing bytes from the kfifo. > + */ > + > + smp_rmb(); > + > + /* first get the data from fifo->out until the end of the buffer */ > + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); > + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); > + > + /* then get the rest (if any) from the beginning of the buffer */ > + memcpy(buffer + l, fifo->buffer, len - l); > + > + /* > + * Ensure that we remove the bytes from the kfifo -before- > + * we update the fifo->out index. > + */ > + > + smp_mb(); > + > + fifo->out += len; > + > + return len; > +} > +EXPORT_SYMBOL(__kfifo_get); > diff --git a/kernel_addons/backport/2.6.9_U4/include/src/netevent.c b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch b/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch > new file mode 100644 > index 0000000..e6781f3 > --- /dev/null > +++ b/kernel_patches/backport/2.6.9_U4/cxgb3_main_to_2_6_13.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c > +index dfa035a..414ea84 100755 > +--- a/drivers/net/cxgb3/cxgb3_main.c > ++++ b/drivers/net/cxgb3/cxgb3_main.c > +@@ -1526,7 +1526,6 @@ static const struct ethtool_ops cxgb_eth > + .get_wol = get_wol, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > +- .get_perm_addr = ethtool_op_get_perm_addr > + }; > + > + static int in_range(int val, int lo, int hi) > diff --git a/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.9_U4/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch b/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch > new file mode 100644 > index 0000000..f8f51d6 > --- /dev/null > +++ b/kernel_patches/backport/2.6.9_U4/iwch_cm_to_2_6_9_U4.patch > @@ -0,0 +1,13 @@ > +diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +index 3237fc8..2a38953 100644 > +--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c > ++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c > +@@ -338,7 +338,7 @@ static struct rtable *find_route(struct > + } > + }; > + > +- if (ip_route_output_flow(&rt, &fl, NULL, 0)) > ++ if (ip_route_output_key(&rt, &fl)) > + return NULL; > + return rt; > + } > diff --git a/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..e84b964 > --- /dev/null > +++ b/kernel_patches/backport/2.6.9_U4/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,31 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/kfifo.c b/drivers/infiniband/core/kfifo.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/kfifo.c > +@@ -0,0 +1 @@ > ++#include "src/kfifo.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o kfifo.o > diff --git a/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch b/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch > deleted file mode 100644 > index a3febff..0000000 > --- a/kernel_patches/backport/2.6.9_U4/mthca_provider_3465_to_2_6_9.patch > +++ /dev/null > @@ -1,15 +0,0 @@ > -Index: linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c > -=================================================================== > ---- linux-kernel.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:01:20.000000000 +0200 > -+++ linux-kernel/drivers/infiniband/hw/mthca/mthca_provider.c 2005-11-24 14:03:14.000000000 +0200 > -@@ -359,8 +359,8 @@ static int mthca_mmap_uar(struct ib_ucon > - > - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > - > -- if (io_remap_pfn_range(vma, vma->vm_start, > -- to_mucontext(context)->uar.pfn, > -+ if (remap_page_range(vma, vma->vm_start, > -+ (to_mucontext(context)->uar.pfn) << PAGE_SHIFT, > - PAGE_SIZE, vma->vm_page_prot)) > - return -EAGAIN; > - > diff --git a/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch b/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch > new file mode 100644 > index 0000000..a667be0 > --- /dev/null > +++ b/kernel_patches/backport/2.6.9_U4/t3_hw_to_2_6_5-7_244.patch > @@ -0,0 +1,43 @@ > +diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h > +index 463ca32..b931fd2 100755 > +--- a/drivers/net/cxgb3/adapter.h > ++++ b/drivers/net/cxgb3/adapter.h > +@@ -179,6 +179,7 @@ struct adapter { > + struct list_head adapter_list; > + void __iomem *regs; > + struct pci_dev *pdev; > ++ u32 saved_pci_state[16]; > + unsigned long registered_device_map; > + unsigned long open_device_map; > + unsigned long flags; > +diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c > +index 14ea6b9..f13f581 100755 > +--- a/drivers/net/cxgb3/t3_hw.c > ++++ b/drivers/net/cxgb3/t3_hw.c > +@@ -3250,7 +3250,7 @@ int t3_reset_adapter(struct adapter *ada > + uint16_t devid = 0; > + > + if (is_pcie(adapter)) > +- pci_save_state(adapter->pdev); > ++ pci_save_state(adapter->pdev, adapter->saved_pci_state); > + t3_write_reg(adapter, A_PL_RST, F_CRSTWRM | F_CRSTWRMMODE); > + > + /* > +@@ -3268,7 +3268,7 @@ int t3_reset_adapter(struct adapter *ada > + return -1; > + > + if (is_pcie(adapter)) > +- pci_restore_state(adapter->pdev); > ++ pci_restore_state(adapter->pdev, adapter->saved_pci_state); > + return 0; > + } > + > +@@ -3357,8 +3357,6 @@ int __devinit t3_prep_adapter(struct ada > + > + memcpy(adapter->port[i]->dev_addr, hw_addr, > + ETH_ALEN); > +- memcpy(adapter->port[i]->perm_addr, hw_addr, > +- ETH_ALEN); > + init_link_config(&p->link_config, p->port_type->caps); > + p->phy.ops->power_down(&p->phy, 1); > + if (!(p->port_type->caps & SUPPORTED_IRQ)) > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at mellanox.co.il Thu Jan 18 08:53:49 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 18 Jan 2007 18:53:49 +0200 Subject: [openib-general] [PATCH ] ofed_1_2 Backport Chelsio to SLES10 In-Reply-To: <20070118163140.18847.95651.stgit@dell3.ogc.int> References: <20070118163140.18847.95651.stgit@dell3.ogc.int> Message-ID: <1169139229.23809.85.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Thu, 2007-01-18 at 10:31 -0600, Steve Wise wrote: > Backport Chelsio to SLES10 > > Signed-off-by: Steve Wise > --- > > .../2.6.16_sles10/include/linux/genalloc.h | 42 +++++ > .../2.6.16_sles10/include/linux/interrupt.h | 17 ++ > .../2.6.16_sles10/include/linux/netdevice.h | 4 > .../backport/2.6.16_sles10/include/linux/random.h | 15 ++ > .../backport/2.6.16_sles10/include/linux/skbuff.h | 3 > .../2.6.16_sles10/include/linux/workqueue.h | 9 + > .../backport/2.6.16_sles10/include/net/netevent.h | 33 ++++ > .../backport/2.6.16_sles10/include/src/genalloc.c | 198 +++++++++++++++++++++++ > .../backport/2.6.16_sles10/include/src/netevent.c | 71 ++++++++ > .../2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch | 12 + > .../2.6.16_sles10/linux_stuff_to_2_6_17.patch | 24 +++ > 11 files changed, 427 insertions(+), 1 deletions(-) > > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h b/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h b/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h b/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h > index 5641019..225eeda 100644 > --- a/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/netdevice.h > @@ -15,4 +15,8 @@ static inline void netif_tx_unlock(struc > spin_unlock(&dev->xmit_lock); > } > > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > #endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/random.h b/kernel_addons/backport/2.6.16_sles10/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h b/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h > index 4845283..70bf011 100644 > --- a/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/skbuff.h > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > #include_next > > #define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > > #endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h b/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h > index 330f47f..cc8b2cd 100644 > --- a/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h > +++ b/kernel_addons/backport/2.6.16_sles10/include/linux/workqueue.h > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > return cancel_delayed_work(&work->work); > } > > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > > #undef INIT_WORK > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > #undef DECLARE_WORK > #define DECLARE_WORK(n, f) \ > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > #define DECLARE_DELAYED_WORK(n, f) \ > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > #define queue_delayed_work backport_queue_delayed_work > #define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > #endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h b/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c b/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.16_sles10/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.16_sles10/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From mst at mellanox.co.il Thu Jan 18 09:32:12 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 19:32:12 +0200 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 In-Reply-To: <1169139292.15460.4.camel@stevo-desktop> References: <1169139292.15460.4.camel@stevo-desktop> Message-ID: <20070118173212.GC9890@mellanox.co.il> Yes, this makes sense. We probably want to test for something like 2.6.15-*-* to detect ubuntu. But, no mixed-case please. And maybe we want to name it 2.6.15_ubuntu606 just so its clear which ubuntu version it is. Quoting Steve Wise : Subject: Re: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 Micheal/Vlad, This is not one of the kernels that has a set of backport files in kernel_addons. So its pulling in the kernel.org 2.6.15 backport files. Apparently Ubuntu has back-ported some stuff into their 2.6.15-23-server kernel. How shall we proceed? This isn't one of the ofed-1.2 supported distros or kernels, yet you're building against it. I'm willing to create a new directory kernel_addons/backport/2.6.15_Ubuntu for this and do the backport if it is the correct thing to do. Steve. On Thu, 2007-01-18 at 18:38 +0200, Vladimir Sokolovsky wrote: > This patch breaks compilation on openfabrics server (Ubuntu: 2.6.15-23-server) > > gcc -m32 -Wp,-MD,/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.addr.o.d -nostdinc -isystem /usr/lib/gcc/i486-linux-gnu/4.0.3/include -D__KERNEL__ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include \ > -Iinclude \ > \ > -include include/linux/autoconf.h \ > -include /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include/linux/autoconf.h \ > -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -pipe -msoft-float -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -Iinclude/asm-i386/mach-default -Wdeclaration-after-statement -Wno-pointer-sign -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/ulp/ipoib -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/debug -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/hw/cxgb3/core -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/net/cxgb3 -DMODULE -DKBUILD_BASENAME=addr -DKBUILD_MODNAME=ib_addr -c -o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.tmp_addr.o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr. c > In file included from include/linux/inetdevice.h:7, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/inetdevice.h:4, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:32: > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/netdevice.h:19: error: redefinition of '__netif_rx_schedule_prep' > include/linux/netdevice.h:807: error: previous definition of '__netif_rx_schedule_prep' was here > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:61: warning: initialization from incompatible pointer type > make[3]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.o] Error 1 > make[2]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core] Error 2 > make[1]: *** [_module_/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband] Error 2 > make[1]: Leaving directory `/usr/src/linux-headers-2.6.15-23-server' > make: *** [kernel] Error 2 > > Regards, > Vladimir > > > On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > > Backport cxgb3 to 2.6.15 > > > > Signed-off-by: Steve Wise > > --- > > > > .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ > > .../backport/2.6.15/include/linux/interrupt.h | 17 ++ > > .../backport/2.6.15/include/linux/netdevice.h | 9 + > > .../backport/2.6.15/include/linux/random.h | 15 ++ > > .../backport/2.6.15/include/linux/skbuff.h | 3 > > .../backport/2.6.15/include/linux/types.h | 6 + > > .../backport/2.6.15/include/linux/workqueue.h | 9 + > > .../backport/2.6.15/include/net/netevent.h | 33 ++++ > > .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ > > .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ > > .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + > > .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ > > 12 files changed, 438 insertions(+), 1 deletions(-) > > > > diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > new file mode 100644 > > index 0000000..3c23c68 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > @@ -0,0 +1,42 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > + > > +/* > > + * General purpose special memory pool descriptor. > > + */ > > +struct gen_pool { > > + rwlock_t lock; > > + struct list_head chunks; /* list of chunks in this pool */ > > + int min_alloc_order; /* minimum allocation order */ > > +}; > > + > > +/* > > + * General purpose special memory pool chunk descriptor. > > + */ > > +struct gen_pool_chunk { > > + spinlock_t lock; > > + struct list_head next_chunk; /* next chunk in pool */ > > + unsigned long start_addr; /* starting address of memory chunk */ > > + unsigned long end_addr; /* ending address of memory chunk */ > > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > > +}; > > + > > +extern struct gen_pool *ib_gen_pool_create(int, int); > > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > > +extern void ib_gen_pool_destroy(struct gen_pool *); > > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > > + > > +#define gen_pool_create ib_gen_pool_create > > +#define gen_pool_add ib_gen_pool_add > > +#define gen_pool_destroy ib_gen_pool_destroy > > +#define gen_pool_alloc ib_gen_pool_alloc > > +#define gen_pool_free ib_gen_pool_free > > diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > new file mode 100644 > > index 0000000..66e66a9 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > @@ -0,0 +1,17 @@ > > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#include_next > > + > > +static inline int > > +backport_request_irq(unsigned int irq, > > + irqreturn_t (*handler)(int, void *), > > + unsigned long flags, const char *dev_name, void *dev_id) > > +{ > > + return request_irq(irq, > > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > > + flags, dev_name, dev_id); > > +} > > + > > +#define request_irq backport_request_irq > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > index 5641019..2f12781 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > > spin_unlock(&dev->xmit_lock); > > } > > > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > > +{ > > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > > +} > > + > > +#undef SET_ETHTOOL_OPS > > +#define SET_ETHTOOL_OPS(netdev, ops) \ > > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > > + > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h > > new file mode 100644 > > index 0000000..2ea2e1f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/random.h > > @@ -0,0 +1,15 @@ > > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#include_next > > + > > +static inline u32 backport_random32(void) > > +{ > > + u32 v; > > + > > + get_random_bytes(&v, sizeof(u32)); > > + return v; > > +} > > + > > +#define random32 backport_random32 > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > index 4845283..70bf011 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > > #include_next > > > > #define CHECKSUM_PARTIAL CHECKSUM_HW > > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > + > > +#define gso_size tso_size > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h > > new file mode 100644 > > index 0000000..86e334f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/types.h > > @@ -0,0 +1,6 @@ > > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#include_next > > + > > +#define BITS_PER_BYTE 8 > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > index 330f47f..cc8b2cd 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > > return cancel_delayed_work(&work->work); > > } > > > > +static inline void > > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > > +{ > > + cancel_rearming_delayed_workqueue(wq, &work->work); > > +} > > + > > > > #undef INIT_WORK > > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > > > #undef DECLARE_WORK > > #define DECLARE_WORK(n, f) \ > > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > > #define DECLARE_DELAYED_WORK(n, f) \ > > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > > > #define queue_delayed_work backport_queue_delayed_work > > #define cancel_delayed_work backport_cancel_delayed_work > > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h > > new file mode 100644 > > index 0000000..e5d2162 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h > > @@ -0,0 +1,33 @@ > > +#ifndef _NET_EVENT_H > > +#define _NET_EVENT_H > > + > > +/* > > + * Generic netevent notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * Changes: > > + */ > > +#ifdef __KERNEL__ > > + > > +#include > > + > > +struct netevent_redirect { > > + struct dst_entry *old; > > + struct dst_entry *new; > > +}; > > + > > +enum netevent_notif_type { > > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > > +}; > > + > > +extern int register_netevent_notifier(struct notifier_block *nb); > > +extern int unregister_netevent_notifier(struct notifier_block *nb); > > +extern int call_netevent_notifiers(unsigned long val, void *v); > > + > > +#endif > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > new file mode 100644 > > index 0000000..75ae68c > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > @@ -0,0 +1,198 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * Copyright 2005 (C) Jes Sorensen > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > +#include > > +#include > > + > > + > > +/** > > + * gen_pool_create - create a new special memory pool > > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > > + * @nid: node id of the node the pool structure should be allocated on, or -1 > > + * > > + * Create a new special memory pool that can be used to manage special purpose > > + * memory not managed by the regular kmalloc/kfree interface. > > + */ > > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > > +{ > > + struct gen_pool *pool; > > + > > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > > + if (pool != NULL) { > > + rwlock_init(&pool->lock); > > + INIT_LIST_HEAD(&pool->chunks); > > + pool->min_alloc_order = min_alloc_order; > > + } > > + return pool; > > +} > > +EXPORT_SYMBOL(gen_pool_create); > > + > > +/** > > + * gen_pool_add - add a new chunk of special memory to the pool > > + * @pool: pool to add new memory chunk to > > + * @addr: starting address of memory chunk to add to pool > > + * @size: size in bytes of the memory chunk to add to pool > > + * @nid: node id of the node the chunk structure and bitmap should be > > + * allocated on, or -1 > > + * > > + * Add a new chunk of special memory to the specified pool. > > + */ > > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > > + int nid) > > +{ > > + struct gen_pool_chunk *chunk; > > + int nbits = size >> pool->min_alloc_order; > > + int nbytes = sizeof(struct gen_pool_chunk) + > > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > > + > > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > > + if (unlikely(chunk == NULL)) > > + return -1; > > + > > + memset(chunk, 0, nbytes); > > + spin_lock_init(&chunk->lock); > > + chunk->start_addr = addr; > > + chunk->end_addr = addr + size; > > + > > + write_lock(&pool->lock); > > + list_add(&chunk->next_chunk, &pool->chunks); > > + write_unlock(&pool->lock); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_add); > > + > > +/** > > + * gen_pool_destroy - destroy a special memory pool > > + * @pool: pool to destroy > > + * > > + * Destroy the specified special memory pool. Verifies that there are no > > + * outstanding allocations. > > + */ > > +void gen_pool_destroy(struct gen_pool *pool) > > +{ > > + struct list_head *_chunk, *_next_chunk; > > + struct gen_pool_chunk *chunk; > > + int order = pool->min_alloc_order; > > + int bit, end_bit; > > + > > + > > + write_lock(&pool->lock); > > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + list_del(&chunk->next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + bit = find_next_bit(chunk->bits, end_bit, 0); > > + BUG_ON(bit < end_bit); > > + > > + kfree(chunk); > > + } > > + kfree(pool); > > + return; > > +} > > +EXPORT_SYMBOL(gen_pool_destroy); > > + > > +/** > > + * gen_pool_alloc - allocate special memory from the pool > > + * @pool: pool to allocate from > > + * @size: number of bytes to allocate from the pool > > + * > > + * Allocate the requested number of bytes from the specified pool. > > + * Uses a first-fit algorithm. > > + */ > > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long addr, flags; > > + int order = pool->min_alloc_order; > > + int nbits, bit, start_bit, end_bit; > > + > > + if (size == 0) > > + return 0; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + end_bit -= nbits + 1; > > + > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = -1; > > + while (bit + 1 < end_bit) { > > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > > + if (bit >= end_bit) > > + break; > > + > > + start_bit = bit; > > + if (nbits > 1) { > > + bit = find_next_bit(chunk->bits, bit + nbits, > > + bit + 1); > > + if (bit - start_bit < nbits) > > + continue; > > + } > > + > > + addr = chunk->start_addr + > > + ((unsigned long)start_bit << order); > > + while (nbits--) > > + __set_bit(start_bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + read_unlock(&pool->lock); > > + return addr; > > + } > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + } > > + read_unlock(&pool->lock); > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_alloc); > > + > > +/** > > + * gen_pool_free - free allocated special memory back to the pool > > + * @pool: pool to free to > > + * @addr: starting address of memory to free back to pool > > + * @size: size in bytes of memory to free > > + * > > + * Free previously allocated special memory back to the specified pool. > > + */ > > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long flags; > > + int order = pool->min_alloc_order; > > + int bit, nbits; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > > + BUG_ON(addr + size > chunk->end_addr); > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = (addr - chunk->start_addr) >> order; > > + while (nbits--) > > + __clear_bit(bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + break; > > + } > > + } > > + BUG_ON(nbits > 0); > > + read_unlock(&pool->lock); > > +} > > +EXPORT_SYMBOL(gen_pool_free); > > diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c > > new file mode 100644 > > index 0000000..5ffadd1 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c > > @@ -0,0 +1,71 @@ > > +/* > > + * Network event notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version > > + * 2 of the License, or (at your option) any later version. > > + * > > + * Fixes: > > + */ > > + > > +#include > > +#include > > +#include > > +#include > > + > > +static struct notifier_block *netevent_notif_chain; > > + > > +/** > > + * register_netevent_notifier - register a netevent notifier block > > + * @nb: notifier > > + * > > + * Register a notifier to be called when a netevent occurs. > > + * The notifier passed is linked into the kernel structures and must > > + * not be reused until it has been unregistered. A negative errno code > > + * is returned on a failure. > > + */ > > +int register_netevent_notifier(struct notifier_block *nb) > > +{ > > + int err; > > + > > + err = notifier_chain_register(&netevent_notif_chain, nb); > > + return err; > > +} > > + > > +/** > > + * netevent_unregister_notifier - unregister a netevent notifier block > > + * @nb: notifier > > + * > > + * Unregister a notifier previously registered by > > + * register_neigh_notifier(). The notifier is unlinked into the > > + * kernel structures and may then be reused. A negative errno code > > + * is returned on a failure. > > + */ > > + > > +int unregister_netevent_notifier(struct notifier_block *nb) > > +{ > > + return notifier_chain_unregister(&netevent_notif_chain, nb); > > +} > > + > > +/** > > + * call_netevent_notifiers - call all netevent notifier blocks > > + * @val: value passed unmodified to notifier function > > + * @v: pointer passed unmodified to notifier function > > + * > > + * Call all neighbour notifier blocks. Parameters and return value > > + * are as for notifier_call_chain(). > > + */ > > + > > +int call_netevent_notifiers(unsigned long val, void *v) > > +{ > > + return notifier_call_chain(&netevent_notif_chain, val, v); > > +} > > + > > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > > diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > new file mode 100644 > > index 0000000..ad7e7f4 > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > @@ -0,0 +1,12 @@ > > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > > +index 3434679..bb008b6 100755 > > +--- a/drivers/net/cxgb3/Makefile > > ++++ b/drivers/net/cxgb3/Makefile > > +@@ -1,6 +1,7 @@ > > + # > > + # Chelsio T3 driver > > + # > > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > > + > > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > > + > > diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > new file mode 100644 > > index 0000000..eb2285f > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > @@ -0,0 +1,24 @@ > > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/genalloc.c > > +@@ -0,0 +1 @@ > > ++#include "src/genalloc.c" > > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/netevent.c > > +@@ -0,0 +1 @@ > > ++#include "src/netevent.c" > > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > > +index 50fb1cd..456bfd0 100644 > > +--- a/drivers/infiniband/core/Makefile > > ++++ b/drivers/infiniband/core/Makefile > > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > > + > > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > > + uverbs_marshall.o > > ++ > > ++ib_core-y += genalloc.o netevent.o > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mst at mellanox.co.il Thu Jan 18 09:33:27 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 19:33:27 +0200 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 In-Reply-To: <20070118173212.GC9890@mellanox.co.il> References: <1169139292.15460.4.camel@stevo-desktop> <20070118173212.GC9890@mellanox.co.il> Message-ID: <20070118173327.GD9890@mellanox.co.il> BTW, since creating a new backport involves copying a lot of files, it's probably better for you to do it in git and we can pull it. Quoting Michael S. Tsirkin : Subject: Re: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 Yes, this makes sense. We probably want to test for something like 2.6.15-*-* to detect ubuntu. But, no mixed-case please. And maybe we want to name it 2.6.15_ubuntu606 just so its clear which ubuntu version it is. Quoting Steve Wise : Subject: Re: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 Micheal/Vlad, This is not one of the kernels that has a set of backport files in kernel_addons. So its pulling in the kernel.org 2.6.15 backport files. Apparently Ubuntu has back-ported some stuff into their 2.6.15-23-server kernel. How shall we proceed? This isn't one of the ofed-1.2 supported distros or kernels, yet you're building against it. I'm willing to create a new directory kernel_addons/backport/2.6.15_Ubuntu for this and do the backport if it is the correct thing to do. Steve. On Thu, 2007-01-18 at 18:38 +0200, Vladimir Sokolovsky wrote: > This patch breaks compilation on openfabrics server (Ubuntu: 2.6.15-23-server) > > gcc -m32 -Wp,-MD,/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.addr.o.d -nostdinc -isystem /usr/lib/gcc/i486-linux-gnu/4.0.3/include -D__KERNEL__ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/ \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include \ > -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include \ > -Iinclude \ > \ > -include include/linux/autoconf.h \ > -include /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include/linux/autoconf.h \ > -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -pipe -msoft-float -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -Iinclude/asm-i386/mach-default -Wdeclaration-after-statement -Wno-pointer-sign -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/include -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/ulp/ipoib -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/debug -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/hw/cxgb3/core -I/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/net/cxgb3 -DMODULE -DKBUILD_BASENAME=addr -DKBUILD_MODNAME=ib_addr -c -o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/.tmp_addr.o /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr. c > In file included from include/linux/inetdevice.h:7, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/inetdevice.h:4, > from /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:32: > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/kernel_addons/backport/2.6.15/include/linux/netdevice.h:19: error: redefinition of '__netif_rx_schedule_prep' > include/linux/netdevice.h:807: error: previous definition of '__netif_rx_schedule_prep' was here > /home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.c:61: warning: initialization from incompatible pointer type > make[3]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core/addr.o] Error 1 > make[2]: *** [/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband/core] Error 2 > make[1]: *** [_module_/home/vlad/tmp/ofa_1_2_kernel-20070118-0829_check/drivers/infiniband] Error 2 > make[1]: Leaving directory `/usr/src/linux-headers-2.6.15-23-server' > make: *** [kernel] Error 2 > > Regards, > Vladimir > > > On Wed, 2007-01-17 at 13:50 -0600, Steve Wise wrote: > > Backport cxgb3 to 2.6.15 > > > > Signed-off-by: Steve Wise > > --- > > > > .../backport/2.6.15/include/linux/genalloc.h | 42 +++++ > > .../backport/2.6.15/include/linux/interrupt.h | 17 ++ > > .../backport/2.6.15/include/linux/netdevice.h | 9 + > > .../backport/2.6.15/include/linux/random.h | 15 ++ > > .../backport/2.6.15/include/linux/skbuff.h | 3 > > .../backport/2.6.15/include/linux/types.h | 6 + > > .../backport/2.6.15/include/linux/workqueue.h | 9 + > > .../backport/2.6.15/include/net/netevent.h | 33 ++++ > > .../backport/2.6.15/include/src/genalloc.c | 198 +++++++++++++++++++++++ > > .../backport/2.6.15/include/src/netevent.c | 71 ++++++++ > > .../backport/2.6.15/cxgb3_makefile_to_2_6_19.patch | 12 + > > .../backport/2.6.15/linux_stuff_to_2_6_17.patch | 24 +++ > > 12 files changed, 438 insertions(+), 1 deletions(-) > > > > diff --git a/kernel_addons/backport/2.6.15/include/linux/genalloc.h b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > new file mode 100644 > > index 0000000..3c23c68 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/genalloc.h > > @@ -0,0 +1,42 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > + > > +/* > > + * General purpose special memory pool descriptor. > > + */ > > +struct gen_pool { > > + rwlock_t lock; > > + struct list_head chunks; /* list of chunks in this pool */ > > + int min_alloc_order; /* minimum allocation order */ > > +}; > > + > > +/* > > + * General purpose special memory pool chunk descriptor. > > + */ > > +struct gen_pool_chunk { > > + spinlock_t lock; > > + struct list_head next_chunk; /* next chunk in pool */ > > + unsigned long start_addr; /* starting address of memory chunk */ > > + unsigned long end_addr; /* ending address of memory chunk */ > > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > > +}; > > + > > +extern struct gen_pool *ib_gen_pool_create(int, int); > > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > > +extern void ib_gen_pool_destroy(struct gen_pool *); > > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > > + > > +#define gen_pool_create ib_gen_pool_create > > +#define gen_pool_add ib_gen_pool_add > > +#define gen_pool_destroy ib_gen_pool_destroy > > +#define gen_pool_alloc ib_gen_pool_alloc > > +#define gen_pool_free ib_gen_pool_free > > diff --git a/kernel_addons/backport/2.6.15/include/linux/interrupt.h b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > new file mode 100644 > > index 0000000..66e66a9 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/interrupt.h > > @@ -0,0 +1,17 @@ > > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > > +#include_next > > + > > +static inline int > > +backport_request_irq(unsigned int irq, > > + irqreturn_t (*handler)(int, void *), > > + unsigned long flags, const char *dev_name, void *dev_id) > > +{ > > + return request_irq(irq, > > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > > + flags, dev_name, dev_id); > > +} > > + > > +#define request_irq backport_request_irq > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/netdevice.h b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > index 5641019..2f12781 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/netdevice.h > > @@ -15,4 +15,13 @@ static inline void netif_tx_unlock(struc > > spin_unlock(&dev->xmit_lock); > > } > > > > +static inline int __netif_rx_schedule_prep(struct net_device *dev) > > +{ > > + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); > > +} > > + > > +#undef SET_ETHTOOL_OPS > > +#define SET_ETHTOOL_OPS(netdev, ops) \ > > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > > + > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/random.h b/kernel_addons/backport/2.6.15/include/linux/random.h > > new file mode 100644 > > index 0000000..2ea2e1f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/random.h > > @@ -0,0 +1,15 @@ > > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > > +#include_next > > + > > +static inline u32 backport_random32(void) > > +{ > > + u32 v; > > + > > + get_random_bytes(&v, sizeof(u32)); > > + return v; > > +} > > + > > +#define random32 backport_random32 > > + > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/skbuff.h b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > index 4845283..70bf011 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/skbuff.h > > @@ -4,5 +4,8 @@ #define LINUX_SKBUFF_H_BACKPORT > > #include_next > > > > #define CHECKSUM_PARTIAL CHECKSUM_HW > > +#define CHECKSUM_COMPLETE CHECKSUM_HW > > + > > +#define gso_size tso_size > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/types.h b/kernel_addons/backport/2.6.15/include/linux/types.h > > new file mode 100644 > > index 0000000..86e334f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/linux/types.h > > @@ -0,0 +1,6 @@ > > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > > +#include_next > > + > > +#define BITS_PER_BYTE 8 > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/linux/workqueue.h b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > index 330f47f..cc8b2cd 100644 > > --- a/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > +++ b/kernel_addons/backport/2.6.15/include/linux/workqueue.h > > @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela > > return cancel_delayed_work(&work->work); > > } > > > > +static inline void > > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > > +{ > > + cancel_rearming_delayed_workqueue(wq, &work->work); > > +} > > + > > > > #undef INIT_WORK > > #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > > @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) > > > > #undef DECLARE_WORK > > #define DECLARE_WORK(n, f) \ > > - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) > > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > > #define DECLARE_DELAYED_WORK(n, f) \ > > struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > > > > #define queue_delayed_work backport_queue_delayed_work > > #define cancel_delayed_work backport_cancel_delayed_work > > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > > > > #endif > > diff --git a/kernel_addons/backport/2.6.15/include/net/netevent.h b/kernel_addons/backport/2.6.15/include/net/netevent.h > > new file mode 100644 > > index 0000000..e5d2162 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/net/netevent.h > > @@ -0,0 +1,33 @@ > > +#ifndef _NET_EVENT_H > > +#define _NET_EVENT_H > > + > > +/* > > + * Generic netevent notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * Changes: > > + */ > > +#ifdef __KERNEL__ > > + > > +#include > > + > > +struct netevent_redirect { > > + struct dst_entry *old; > > + struct dst_entry *new; > > +}; > > + > > +enum netevent_notif_type { > > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > > +}; > > + > > +extern int register_netevent_notifier(struct notifier_block *nb); > > +extern int unregister_netevent_notifier(struct notifier_block *nb); > > +extern int call_netevent_notifiers(unsigned long val, void *v); > > + > > +#endif > > +#endif > > diff --git a/kernel_addons/backport/2.6.15/include/src/genalloc.c b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > new file mode 100644 > > index 0000000..75ae68c > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/genalloc.c > > @@ -0,0 +1,198 @@ > > +/* > > + * Basic general purpose allocator for managing special purpose memory > > + * not managed by the regular kmalloc/kfree interface. > > + * Uses for this includes on-device special memory, uncached memory > > + * etc. > > + * > > + * Copyright 2005 (C) Jes Sorensen > > + * > > + * This source code is licensed under the GNU General Public License, > > + * Version 2. See the file COPYING for more details. > > + */ > > + > > +#include > > +#include > > + > > + > > +/** > > + * gen_pool_create - create a new special memory pool > > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > > + * @nid: node id of the node the pool structure should be allocated on, or -1 > > + * > > + * Create a new special memory pool that can be used to manage special purpose > > + * memory not managed by the regular kmalloc/kfree interface. > > + */ > > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > > +{ > > + struct gen_pool *pool; > > + > > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > > + if (pool != NULL) { > > + rwlock_init(&pool->lock); > > + INIT_LIST_HEAD(&pool->chunks); > > + pool->min_alloc_order = min_alloc_order; > > + } > > + return pool; > > +} > > +EXPORT_SYMBOL(gen_pool_create); > > + > > +/** > > + * gen_pool_add - add a new chunk of special memory to the pool > > + * @pool: pool to add new memory chunk to > > + * @addr: starting address of memory chunk to add to pool > > + * @size: size in bytes of the memory chunk to add to pool > > + * @nid: node id of the node the chunk structure and bitmap should be > > + * allocated on, or -1 > > + * > > + * Add a new chunk of special memory to the specified pool. > > + */ > > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > > + int nid) > > +{ > > + struct gen_pool_chunk *chunk; > > + int nbits = size >> pool->min_alloc_order; > > + int nbytes = sizeof(struct gen_pool_chunk) + > > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > > + > > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > > + if (unlikely(chunk == NULL)) > > + return -1; > > + > > + memset(chunk, 0, nbytes); > > + spin_lock_init(&chunk->lock); > > + chunk->start_addr = addr; > > + chunk->end_addr = addr + size; > > + > > + write_lock(&pool->lock); > > + list_add(&chunk->next_chunk, &pool->chunks); > > + write_unlock(&pool->lock); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_add); > > + > > +/** > > + * gen_pool_destroy - destroy a special memory pool > > + * @pool: pool to destroy > > + * > > + * Destroy the specified special memory pool. Verifies that there are no > > + * outstanding allocations. > > + */ > > +void gen_pool_destroy(struct gen_pool *pool) > > +{ > > + struct list_head *_chunk, *_next_chunk; > > + struct gen_pool_chunk *chunk; > > + int order = pool->min_alloc_order; > > + int bit, end_bit; > > + > > + > > + write_lock(&pool->lock); > > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + list_del(&chunk->next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + bit = find_next_bit(chunk->bits, end_bit, 0); > > + BUG_ON(bit < end_bit); > > + > > + kfree(chunk); > > + } > > + kfree(pool); > > + return; > > +} > > +EXPORT_SYMBOL(gen_pool_destroy); > > + > > +/** > > + * gen_pool_alloc - allocate special memory from the pool > > + * @pool: pool to allocate from > > + * @size: number of bytes to allocate from the pool > > + * > > + * Allocate the requested number of bytes from the specified pool. > > + * Uses a first-fit algorithm. > > + */ > > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long addr, flags; > > + int order = pool->min_alloc_order; > > + int nbits, bit, start_bit, end_bit; > > + > > + if (size == 0) > > + return 0; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > > + end_bit -= nbits + 1; > > + > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = -1; > > + while (bit + 1 < end_bit) { > > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > > + if (bit >= end_bit) > > + break; > > + > > + start_bit = bit; > > + if (nbits > 1) { > > + bit = find_next_bit(chunk->bits, bit + nbits, > > + bit + 1); > > + if (bit - start_bit < nbits) > > + continue; > > + } > > + > > + addr = chunk->start_addr + > > + ((unsigned long)start_bit << order); > > + while (nbits--) > > + __set_bit(start_bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + read_unlock(&pool->lock); > > + return addr; > > + } > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + } > > + read_unlock(&pool->lock); > > + return 0; > > +} > > +EXPORT_SYMBOL(gen_pool_alloc); > > + > > +/** > > + * gen_pool_free - free allocated special memory back to the pool > > + * @pool: pool to free to > > + * @addr: starting address of memory to free back to pool > > + * @size: size in bytes of memory to free > > + * > > + * Free previously allocated special memory back to the specified pool. > > + */ > > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > > +{ > > + struct list_head *_chunk; > > + struct gen_pool_chunk *chunk; > > + unsigned long flags; > > + int order = pool->min_alloc_order; > > + int bit, nbits; > > + > > + nbits = (size + (1UL << order) - 1) >> order; > > + > > + read_lock(&pool->lock); > > + list_for_each(_chunk, &pool->chunks) { > > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > > + > > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > > + BUG_ON(addr + size > chunk->end_addr); > > + spin_lock_irqsave(&chunk->lock, flags); > > + bit = (addr - chunk->start_addr) >> order; > > + while (nbits--) > > + __clear_bit(bit++, &chunk->bits); > > + spin_unlock_irqrestore(&chunk->lock, flags); > > + break; > > + } > > + } > > + BUG_ON(nbits > 0); > > + read_unlock(&pool->lock); > > +} > > +EXPORT_SYMBOL(gen_pool_free); > > diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c > > new file mode 100644 > > index 0000000..5ffadd1 > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c > > @@ -0,0 +1,71 @@ > > +/* > > + * Network event notifiers > > + * > > + * Authors: > > + * Tom Tucker > > + * Steve Wise > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version > > + * 2 of the License, or (at your option) any later version. > > + * > > + * Fixes: > > + */ > > + > > +#include > > +#include > > +#include > > +#include > > + > > +static struct notifier_block *netevent_notif_chain; > > + > > +/** > > + * register_netevent_notifier - register a netevent notifier block > > + * @nb: notifier > > + * > > + * Register a notifier to be called when a netevent occurs. > > + * The notifier passed is linked into the kernel structures and must > > + * not be reused until it has been unregistered. A negative errno code > > + * is returned on a failure. > > + */ > > +int register_netevent_notifier(struct notifier_block *nb) > > +{ > > + int err; > > + > > + err = notifier_chain_register(&netevent_notif_chain, nb); > > + return err; > > +} > > + > > +/** > > + * netevent_unregister_notifier - unregister a netevent notifier block > > + * @nb: notifier > > + * > > + * Unregister a notifier previously registered by > > + * register_neigh_notifier(). The notifier is unlinked into the > > + * kernel structures and may then be reused. A negative errno code > > + * is returned on a failure. > > + */ > > + > > +int unregister_netevent_notifier(struct notifier_block *nb) > > +{ > > + return notifier_chain_unregister(&netevent_notif_chain, nb); > > +} > > + > > +/** > > + * call_netevent_notifiers - call all netevent notifier blocks > > + * @val: value passed unmodified to notifier function > > + * @v: pointer passed unmodified to notifier function > > + * > > + * Call all neighbour notifier blocks. Parameters and return value > > + * are as for notifier_call_chain(). > > + */ > > + > > +int call_netevent_notifiers(unsigned long val, void *v) > > +{ > > + return notifier_call_chain(&netevent_notif_chain, val, v); > > +} > > + > > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > > diff --git a/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > new file mode 100644 > > index 0000000..ad7e7f4 > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/cxgb3_makefile_to_2_6_19.patch > > @@ -0,0 +1,12 @@ > > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > > +index 3434679..bb008b6 100755 > > +--- a/drivers/net/cxgb3/Makefile > > ++++ b/drivers/net/cxgb3/Makefile > > +@@ -1,6 +1,7 @@ > > + # > > + # Chelsio T3 driver > > + # > > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > > + > > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > > + > > diff --git a/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > new file mode 100644 > > index 0000000..eb2285f > > --- /dev/null > > +++ b/kernel_patches/backport/2.6.15/linux_stuff_to_2_6_17.patch > > @@ -0,0 +1,24 @@ > > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/genalloc.c > > +@@ -0,0 +1 @@ > > ++#include "src/genalloc.c" > > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > > +new file mode 100644 > > +index 0000000..58cf933 > > +--- /dev/null > > ++++ b/drivers/infiniband/core/netevent.c > > +@@ -0,0 +1 @@ > > ++#include "src/netevent.c" > > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > > +index 50fb1cd..456bfd0 100644 > > +--- a/drivers/infiniband/core/Makefile > > ++++ b/drivers/infiniband/core/Makefile > > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > > + > > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > > + uverbs_marshall.o > > ++ > > ++ib_core-y += genalloc.o netevent.o > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST -- MST From jgunthorpe at obsidianresearch.com Thu Jan 18 09:37:07 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 18 Jan 2007 10:37:07 -0700 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118161430.GB4298@mellanox.co.il> References: <1169072570.7052.46.camel@brick.pathscale.com> <20070118043923.GC25305@mellanox.co.il> <45AF9210.4090401@prairieinet.net> <20070118161430.GB4298@mellanox.co.il> Message-ID: <20070118173707.GO12386@obsidianresearch.com> On Thu, Jan 18, 2007 at 06:14:31PM +0200, Michael S. Tsirkin wrote: > So the issue is that we are casting char *data which has no > alignment guarantees to 64 bit number. We really must find a way to > force 64 bit alignment for struct ib_user_mad all over. Would not > something like the following simple trick work? > struct ib_user_mad_hdr { > ............. > } __attribute__((aligned (8))); You might get a similar result if you change: struct ib_user_mad { struct ib_user_mad_hdr hdr; __u8 data[0]; }; To struct ib_user_mad { struct ib_user_mad_hdr hdr; __u64 data[0]; }; Which is more clear since it is the data[0] that has the alignment requirement not the ib_usr_mad_hdr.. Plus you only get the pad if you actually need it. Jason From ralph.campbell at qlogic.com Thu Jan 18 09:41:23 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 18 Jan 2007 09:41:23 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118161430.GB4298@mellanox.co.il> References: <1169072570.7052.46.camel@brick.pathscale.com> <20070118043923.GC25305@mellanox.co.il> <45AF9210.4090401@prairieinet.net> <20070118161430.GB4298@mellanox.co.il> Message-ID: <1169142083.7052.59.camel@brick.pathscale.com> On Thu, 2007-01-18 at 18:14 +0200, Michael S. Tsirkin wrote: > > Quoting John W. Marland : > > Subject: Re: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > > > Michael S. Tsirkin wrote: > > > > >>Quoting Ralph Campbell : > > >>Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > >> > > >>IB/core - ib_umad can cause address alignment fault > > >> > > >>In user_mad.c, the definition for struct ib_umad_packet includes > > >>struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() > > >>tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on > > >>architectures which have strict alignment for load/stores. > > >>This patch fixes the problem by changing the offset on which > > >>struct ib_user_mad is defined within struct ib_umad_packet. > > >> > > >>Thanks go to John W. Marland for finding this. > > >> > > >>Signed-off-by: Ralph Campbell > > >> > > >>diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c > > >>--- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 > > >>+++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 > > >>@@ -125,7 +125,7 @@ struct ib_umad_packet { > > >> struct ib_mad_send_buf *msg; > > >> struct ib_mad_recv_wc *recv_wc; > > >> struct list_head list; > > >>- int length; > > >>+ long length; > > >> struct ib_user_mad mad; > > >> }; > > >> > > >> > > > > > >This does not make sense to me - do we have to replace all int fields with long > > >now? Looks like a compiler or makefile bug in your setup - struct fields should > > >be naturally aligned. > > > > > > > > > > > We should probably have given a more complete explanation. The > > unaligned access hits in two places, that I've tracked down so far. > > The one where it's easiest to see what's happening is in ib_umad_write. > > ______________________________________________________________________________________ > > if (!ib_response_mad(packet->msg->mad)) { > > tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; > > *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | > > (be64_to_cpup(tid) & 0xffffffff)); > > > > ---> this line causes the access problem > > rmpp_mad->mad_hdr.tid = *tid; > > } > > ________________________________________________________________________________________ > > The rmpp_mad variable is an ib_rmpp_mad pointer that is initialized > > from the packet->mad.data early in the function. > > Because the ib_umad_packet structure has a as it's last element an > > ib_user_mad structure, not a pointer to one, but the structure. > > This means that the Data[0] declaration at the end of the ib_umad > > structure is forced onto a 4 byte boundary. > > So the issue is that we are casting char *data which has no alignment guarantees > to 64 bit number. We really must find a way to force 64 bit alignment for > struct ib_user_mad all over. Would not something like the following simple trick work? > > struct ib_user_mad_hdr { > ............. > } __attribute__((aligned (8))); This would work but the 8 byte alignment isn't needed everywhere. The int -> long change is needed because struct ib_umad_packet includes struct ib_user_mad (which has 4 byte alignment) but is then cast to struct ib_mad_hdr which has 8 byte alignment. It is not the fault of the compiler. From mst at mellanox.co.il Thu Jan 18 09:46:29 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 19:46:29 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118173707.GO12386@obsidianresearch.com> References: <20070118173707.GO12386@obsidianresearch.com> Message-ID: <20070118174629.GE9890@mellanox.co.il> > Quoting Jason Gunthorpe : > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > On Thu, Jan 18, 2007 at 06:14:31PM +0200, Michael S. Tsirkin wrote: > > > So the issue is that we are casting char *data which has no > > alignment guarantees to 64 bit number. We really must find a way to > > force 64 bit alignment for struct ib_user_mad all over. Would not > > something like the following simple trick work? > > > struct ib_user_mad_hdr { > > ............. > > } __attribute__((aligned (8))); > > You might get a similar result if you change: > > struct ib_user_mad { > struct ib_user_mad_hdr hdr; > __u8 data[0]; > }; > > To > > struct ib_user_mad { > struct ib_user_mad_hdr hdr; > __u64 data[0]; > }; > > Which is more clear since it is the data[0] that has the alignment > requirement not the ib_usr_mad_hdr.. Hmm. Good idea. Patch? Need to audit the code from other cases of 'u8 data' being cast or a struct with 64 bit field, BTW. > Plus you only get the pad if you actually need it. You mean, no pad if ib_user_mad_hdr is used separately? -- MST From jwm at prairieinet.net Thu Jan 18 09:53:35 2007 From: jwm at prairieinet.net (John W. Marland) Date: Thu, 18 Jan 2007 11:53:35 -0600 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118161430.GB4298@mellanox.co.il> References: <1169072570.7052.46.camel@brick.pathscale.com> <20070118043923.GC25305@mellanox.co.il> <45AF9210.4090401@prairieinet.net> <20070118161430.GB4298@mellanox.co.il> Message-ID: <45AFB41F.7070207@prairieinet.net> Michael S. Tsirkin wrote: >>Quoting John W. Marland : >>Subject: Re: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 >> >>Michael S. Tsirkin wrote: >> >> >> >>>>Quoting Ralph Campbell : >>>>Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 >>>> >>>>IB/core - ib_umad can cause address alignment fault >>>> >>>>In user_mad.c, the definition for struct ib_umad_packet includes >>>>struct ib_user_mad at an odd 32-bit offset. When ib_umad_write() >>>>tries to assign rmpp_mad->mad_hdr.tid, there is an alignment fault on >>>>architectures which have strict alignment for load/stores. >>>>This patch fixes the problem by changing the offset on which >>>>struct ib_user_mad is defined within struct ib_umad_packet. >>>> >>>>Thanks go to John W. Marland for finding this. >>>> >>>>Signed-off-by: Ralph Campbell >>>> >>>>diff -r b1128b48dc99 drivers/infiniband/core/user_mad.c >>>>--- a/drivers/infiniband/core/user_mad.c Fri Jan 12 20:00:03 2007 +0000 >>>>+++ b/drivers/infiniband/core/user_mad.c Wed Jan 17 14:09:37 2007 -0800 >>>>@@ -125,7 +125,7 @@ struct ib_umad_packet { >>>> struct ib_mad_send_buf *msg; >>>> struct ib_mad_recv_wc *recv_wc; >>>> struct list_head list; >>>>- int length; >>>>+ long length; >>>> struct ib_user_mad mad; >>>>}; >>>> >>>> >>>> >>>> >>>This does not make sense to me - do we have to replace all int fields with long >>>now? Looks like a compiler or makefile bug in your setup - struct fields should >>>be naturally aligned. >>> >>> >>> >>> >>> >> We should probably have given a more complete explanation. The >>unaligned access hits in two places, that I've tracked down so far. >> The one where it's easiest to see what's happening is in ib_umad_write. >>______________________________________________________________________________________ >> if (!ib_response_mad(packet->msg->mad)) { >> tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; >> *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | >> (be64_to_cpup(tid) & 0xffffffff)); >> >>---> this line causes the access problem >>rmpp_mad->mad_hdr.tid = *tid; >> } >>________________________________________________________________________________________ >> The rmpp_mad variable is an ib_rmpp_mad pointer that is initialized >> from the packet->mad.data early in the function. >> Because the ib_umad_packet structure has a as it's last element an >> ib_user_mad structure, not a pointer to one, but the structure. >> This means that the Data[0] declaration at the end of the ib_umad >> structure is forced onto a 4 byte boundary. >> >> > >So the issue is that we are casting char *data which has no alignment guarantees >to 64 bit number. We really must find a way to force 64 bit alignment for >struct ib_user_mad all over. Would not something like the following simple trick work? > >struct ib_user_mad_hdr { > ............. >} __attribute__((aligned (8))); > > In this case I don't think that will solve it. The memory area where the structure area is copied is one of those open ended declarations .i.e. ... __u8 Data[0] }; The allocation allows one of two different sizes of structures to be placed on that Data[0]. Which would STILL work fine if the ib_umad_packet and ib_user_mad and varied sized data area are all allocated in one lump. I've never cared for these open ended structures. This is a good reason how/why it will eventually bite you. ....JW > > > From mst at mellanox.co.il Thu Jan 18 09:50:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 19:50:35 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <1169142083.7052.59.camel@brick.pathscale.com> References: <1169142083.7052.59.camel@brick.pathscale.com> Message-ID: <20070118175035.GF9890@mellanox.co.il> > The int -> long change is needed because struct ib_umad_packet > includes struct ib_user_mad (which has 4 byte alignment) but > is then cast to struct ib_mad_hdr which has 8 byte alignment. But I thought it is the data field which is cast to ib_mad_hdr and not ib_user_mad? If so Jason's suggestion of using u64 data[0] makes more sense. -- MST From ralph.campbell at qlogic.com Thu Jan 18 09:56:19 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 18 Jan 2007 09:56:19 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118175035.GF9890@mellanox.co.il> References: <1169142083.7052.59.camel@brick.pathscale.com> <20070118175035.GF9890@mellanox.co.il> Message-ID: <1169142979.7052.65.camel@brick.pathscale.com> On Thu, 2007-01-18 at 19:50 +0200, Michael S. Tsirkin wrote: > > The int -> long change is needed because struct ib_umad_packet > > includes struct ib_user_mad (which has 4 byte alignment) but > > is then cast to struct ib_mad_hdr which has 8 byte alignment. > > But I thought it is the data field which is cast to ib_mad_hdr > and not ib_user_mad? If so Jason's suggestion of using > u64 data[0] > makes more sense. That looks OK to me too. It would cover other MAD packet types like get/set GUIDs too. From mshefty at ichips.intel.com Thu Jan 18 09:57:37 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 18 Jan 2007 09:57:37 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118175035.GF9890@mellanox.co.il> References: <1169142083.7052.59.camel@brick.pathscale.com> <20070118175035.GF9890@mellanox.co.il> Message-ID: <45AFB511.3000100@ichips.intel.com> > But I thought it is the data field which is cast to ib_mad_hdr > and not ib_user_mad? If so Jason's suggestion of using > u64 data[0] > makes more sense. I don't think that you can change struct ib_user_mad or struct ib_user_mad_hdr without breaking the ABI. - Sean From mst at mellanox.co.il Thu Jan 18 10:01:02 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 20:01:02 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <45AFB511.3000100@ichips.intel.com> References: <45AFB511.3000100@ichips.intel.com> Message-ID: <20070118180102.GG9890@mellanox.co.il> > Quoting r. Sean Hefty : > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 > > > But I thought it is the data field which is cast to ib_mad_hdr > > and not ib_user_mad? If so Jason's suggestion of using > > u64 data[0] > > makes more sense. > > I don't think that you can change struct ib_user_mad or struct ib_user_mad_hdr > without breaking the ABI. This won't break ABI, since data is aligned already relative to start of struct. We only force the struct itself to be aligned, which has no ABI implications. -- MST From tom at opengridcomputing.com Thu Jan 18 10:17:35 2007 From: tom at opengridcomputing.com (Tom Tucker) Date: Thu, 18 Jan 2007 12:17:35 -0600 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118180102.GG9890@mellanox.co.il> References: <45AFB511.3000100@ichips.intel.com> <20070118180102.GG9890@mellanox.co.il> Message-ID: <1169144255.26676.6.camel@trinity.ogc.int> Does changing the size of the structure break the ABI? On Thu, 2007-01-18 at 20:01 +0200, Michael S. Tsirkin wrote: > > Quoting r. Sean Hefty : > > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 > > > > > But I thought it is the data field which is cast to ib_mad_hdr > > > and not ib_user_mad? If so Jason's suggestion of using > > > u64 data[0] > > > makes more sense. > > > > I don't think that you can change struct ib_user_mad or struct ib_user_mad_hdr > > without breaking the ABI. > > This won't break ABI, since data is aligned already relative to start of struct. > We only force the struct itself to be aligned, which has no ABI implications. > From jwm at systemfabricworks.com Thu Jan 18 10:22:21 2007 From: jwm at systemfabricworks.com (JWM) Date: Thu, 18 Jan 2007 12:22:21 -0600 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 References: <45AFB511.3000100@ichips.intel.com> <20070118180102.GG9890@mellanox.co.il> Message-ID: <00d401c73b2d$98b081e0$7401a8c0@Maelstrom> I've been looking at the code and it seems as though Sean is correct. The data comes in from userland through a if(copy_from_user(&packet->mad,buf, sizeof(struct ib_user_mad) + IB_MGMT_RMPP_HDR)) BTW - why is IB_MGMT_RMPP_HDR an enum? Why not a sizeof ? In any case the data element will be aligned in the packet->mad through compiler padding. The raw ib_user_mad comming in will not, or may be depending on how it was created etc. If it's allocated seperatly or just declared then it's not going to work. ....JW ----- Original Message ----- From: "Michael S. Tsirkin" To: "Sean Hefty" Cc: "Ralph Campbell" ; "John W. Marland" ; "openib-general" Sent: Thursday, January 18, 2007 12:01 PM Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 >> Quoting r. Sean Hefty : >> Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address >> alignment fault onia64 >> >> > But I thought it is the data field which is cast to ib_mad_hdr >> > and not ib_user_mad? If so Jason's suggestion of using >> > u64 data[0] >> > makes more sense. >> >> I don't think that you can change struct ib_user_mad or struct >> ib_user_mad_hdr >> without breaking the ABI. > > This won't break ABI, since data is aligned already relative to start of > struct. > We only force the struct itself to be aligned, which has no ABI > implications. > > -- > MST > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From mshefty at ichips.intel.com Thu Jan 18 10:40:54 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 18 Jan 2007 10:40:54 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <00d401c73b2d$98b081e0$7401a8c0@Maelstrom> References: <45AFB511.3000100@ichips.intel.com> <20070118180102.GG9890@mellanox.co.il> <00d401c73b2d$98b081e0$7401a8c0@Maelstrom> Message-ID: <45AFBF36.5080500@ichips.intel.com> JWM wrote: > if(copy_from_user(&packet->mad,buf, > sizeof(struct ib_user_mad) + IB_MGMT_RMPP_HDR)) > > BTW - why is IB_MGMT_RMPP_HDR an enum? Why not a sizeof ? No structure was ever defined for just this (standard MAD header + RMPP header only). - Sean From jgunthorpe at obsidianresearch.com Thu Jan 18 10:41:54 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 18 Jan 2007 11:41:54 -0700 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118175035.GF9890@mellanox.co.il> References: <1169142083.7052.59.camel@brick.pathscale.com> <20070118175035.GF9890@mellanox.co.il> Message-ID: <20070118184154.GP12386@obsidianresearch.com> On Thu, Jan 18, 2007 at 07:50:35PM +0200, Michael S. Tsirkin wrote: > > The int -> long change is needed because struct ib_umad_packet > > includes struct ib_user_mad (which has 4 byte alignment) but > > is then cast to struct ib_mad_hdr which has 8 byte alignment. > But I thought it is the data field which is cast to ib_mad_hdr > and not ib_user_mad? If so Jason's suggestion of using > u64 data[0] > makes more sense. I'm not sure why, but on closer inspection the code in user_mad.c actually casts both mad.data[0] and mad to ib_mad_hdr (it looks 'correct' but inconsistent, see the last copy_from_user in ib_umad_write) So, changing the data[0] to u64 should still fix both since it will place the .data casting region on the correct alignment and increase the alignment requirement of ib_user_mad - but it is indirect that the 2nd cast is corrected.. > > Plus you only get the pad if you actually need it. > You mean, no pad if ib_user_mad_hdr is used separately? That and platforms that don't require 8 byte alignment for u64 will not generate pads. John: Open ended structures always require care when selecting the type used for the 0 size array so that it 'matches' the structures you intend to cast it to. Unless you are using it for a string, u8 is always a bad choice :> Jason From jgunthorpe at obsidianresearch.com Thu Jan 18 10:51:54 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 18 Jan 2007 11:51:54 -0700 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <1169144255.26676.6.camel@trinity.ogc.int> References: <45AFB511.3000100@ichips.intel.com> <20070118180102.GG9890@mellanox.co.il> <1169144255.26676.6.camel@trinity.ogc.int> Message-ID: <20070118185154.GQ12386@obsidianresearch.com> On Thu, Jan 18, 2007 at 12:17:35PM -0600, Tom Tucker wrote: > Does changing the size of the structure break the ABI? ib_user_mad_hdr is 56 bytes long, that is already a multiple of 8 so .data is already aligned on 8. Thus the size of ib_user_mad does not change and there is no ABI concern here. __u64 data[0] is still 0 size, just like __u8 data[0]. Really all that happens is that the compiler will mark ib_user_mad as requiring 8 byte alignment on ia64/etc and 4 byte on x86-64/etc, which gets you the pad you need in the ib_umad_packet structure. Jason From rdreier at cisco.com Thu Jan 18 10:56:01 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 18 Jan 2007 10:56:01 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: <200701172312.14840.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Wed, 17 Jan 2007 23:12:13 +0100") References: <200701172312.14840.hnguyen@linux.vnet.ibm.com> Message-ID: I've kind of lost the plot here. How does this patch fit in with the previous series of patches you posted? Does it replace them or go on top of them? Can please you resend me the full series of patch that remove the use of do_mmap(), with all cleanups and bug fixes included? And please roll up the fixes, I don't want one patch that adds a yield() inside a spinlock and then a later patch to fix it -- there's no sense in adding landmines for people potentially doing git bisect in the future. And also please try to split the patches so that they don't mix together two things -- please try to make the "remove obsolete prototypes" patch separate from the mmap fixes. Thanks... From rdreier at cisco.com Thu Jan 18 10:58:29 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 18 Jan 2007 10:58:29 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: <200701172312.14840.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Wed, 17 Jan 2007 23:12:13 +0100") References: <200701172312.14840.hnguyen@linux.vnet.ibm.com> Message-ID: > Roland, please note that I applied the previous patches to > your git tree for-2.6.21 before creating this patch. I also > realized a compile issue with the patch from Michael T. in > ehca_reqs.c regarding "return qp pointer in ib_wc". For this > I'll send another patch. Michael -- I didn't notice before but you do seem to have misread the ehca CQ polling code. The ehca CQ poll operation doesn't actually have the qp pointer readily available -- unless I'm mistaken, ehca will have to do an expensive search by QPN in a hash table to get the qp pointer. That doesn't seem acceptable for every CQ poll, so do you have any ideas of how to salvage this API change? - R. From rdreier at cisco.com Thu Jan 18 11:02:41 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 18 Jan 2007 11:02:41 -0800 Subject: [openib-general] [PATCH] fix thinko in libibverbs In-Reply-To: <20070118100612.GA26822@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 18 Jan 2007 12:06:12 +0200") References: <1168941329.12444.6.camel@mtls05.yok.mtl.com> <20070118100612.GA26822@mellanox.co.il> Message-ID: thanks, should be fixed now From rdreier at cisco.com Thu Jan 18 11:03:24 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 18 Jan 2007 11:03:24 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118185154.GQ12386@obsidianresearch.com> (Jason Gunthorpe's message of "Thu, 18 Jan 2007 11:51:54 -0700") References: <45AFB511.3000100@ichips.intel.com> <20070118180102.GG9890@mellanox.co.il> <1169144255.26676.6.camel@trinity.ogc.int> <20070118185154.GQ12386@obsidianresearch.com> Message-ID: Yes, changing data[] to be __u64 seems like the easiest solution here. From swise at opengridcomputing.com Thu Jan 18 11:23:26 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 13:23:26 -0600 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 In-Reply-To: <20070118173327.GD9890@mellanox.co.il> References: <1169139292.15460.4.camel@stevo-desktop> <20070118173212.GC9890@mellanox.co.il> <20070118173327.GD9890@mellanox.co.il> Message-ID: <1169148206.15460.22.camel@stevo-desktop> Here is what I've coded up. It compiles ok on staging.openfabrics.org. You can pull this from git://staging.openfabrics.org/~swise/ofed_1_2 ofed_1_2 Steve. ---- commit b46734fae0a7b519ce22569cce4f5ba2df33aa77 Author: Steve Wise Date: Thu Jan 18 11:07:29 2007 -0800 Backport to 2.6.15_ubuntu606 This is the kernel.org 2.6.15 backport with a few tweaks: - recasting addr paramater for __set_bit() and __clear_bit() - removed backport for __netif_rx_schedule_prep() Signed-off-by: Steve Wise diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h b/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h new file mode 100644 index 0000000..58ebe41 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h @@ -0,0 +1,20 @@ +#ifndef BACKPORT_ASM_BITOPS_TO_2_6_15_UBUNTU606 +#define BACKPORT_ASM_BITOPS_TO_2_6_15_UBUNTU606 + +#include_next + +static __inline__ void __backport_set_bit(int nr, void * addr) +{ + __set_bit(nr, (volatile void *)addr); +} + +#define __set_bit __backport_set_bit + +static __inline__ void __backport_clear_bit(int nr, void * addr) +{ + __clear_bit(nr, (volatile void *)addr); +} + +#define __clear_bit __backport_clear_bit + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h new file mode 100644 index 0000000..d9752fc --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h @@ -0,0 +1,24 @@ +#include_next +#ifndef LINUX_BITOPS_BACKPORT_2_6_15 +#define LINUX_BITOPS_BACKPORT_2_6_15 + +static inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +} + +#endif +#ifndef LINUX_BITOPS_BACKPORT_2_6_16 +#define LINUX_BITOPS_BACKPORT_2_6_16 + +static inline unsigned fls_long(unsigned long l) +{ + if (sizeof(l) == 4) + return fls(l); + return fls64(l); +} + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h new file mode 100644 index 0000000..324f20e --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h @@ -0,0 +1,9 @@ +#ifndef LINUX_DEVICE_BACKPORT_H +#define LINUX_DEVICE_BACKPORT_H + +#include_next + +#define add_uevent_var add_hotplug_env_var +#define uevent hotplug + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h new file mode 100644 index 0000000..f0631fb --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h @@ -0,0 +1,8 @@ +#ifndef BACKPORT_LINUX_FS_H +#define BACKPORT_LINUX_FS_H + +#include_next + +#define i_private u.generic_ip + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h new file mode 100644 index 0000000..7a32313 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_INETDEVICE_BACKPORT_TO_2_6_17 +#define _LINUX_INETDEVICE_BACKPORT_TO_2_6_17 + +#include_next +#include + +static inline struct net_device *xxx_ip_dev_find(u32 addr) +{ + struct net_device *dev; + u32 ip; + + read_lock(&dev_base_lock); + for (dev = dev_base; dev; dev = dev->next) { + ip = inet_select_addr(dev, 0, RT_SCOPE_LINK); + if (ip == addr) { + dev_hold(dev); + break; + } + } + read_unlock(&dev_base_lock); + + return dev; +} + +#define ip_dev_find xxx_ip_dev_find + +#endif + diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h new file mode 100644 index 0000000..a37dcd5 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h @@ -0,0 +1,7 @@ +#ifndef BACKPORT_KERNEL_H_2_6_19 +#define BACKPORT_KERNEL_H_2_6_19 + +#include_next +#include + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h new file mode 100644 index 0000000..0c34f36 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h @@ -0,0 +1,355 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_H +#define __LINUX_LOCKDEP_H + +#if 0 +#include +#include +#include +#include +#endif + +#ifdef CONFIG_LOCKDEP + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; +}; + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache; + const char *name; +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); +extern int lockdep_internal(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +static inline int lockdep_internal(void) +{ + return 0; +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; +#endif /* !LOCKDEP */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +# define early_init_irq_lock_class() do { } while (0) +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +#else +# define early_boot_irqs_off() do { } while (0) +# define early_boot_irqs_on() do { } while (0) +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* __LINUX_LOCKDEP_H */ diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h new file mode 100644 index 0000000..d02e1a5 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h @@ -0,0 +1,157 @@ +/* Integer base 2 logarithm calculation + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells at redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_LOG2_H +#define _LINUX_LOG2_H + +#include +#include + +/* + * deal with unrepresentable constant logarithms + */ +extern __attribute__((const, noreturn)) +int ____ilog2_NaN(void); + +/* + * non-constant log of base 2 calculators + * - the arch may override these in asm/bitops.h if they can be implemented + * more efficiently than using fls() and fls64() + * - the arch is not required to handle n==0 if implementing the fallback + */ +#ifndef CONFIG_ARCH_HAS_ILOG2_U32 +static inline __attribute__((const)) +int __ilog2_u32(u32 n) +{ + return fls(n) - 1; +} +#endif + +#ifndef CONFIG_ARCH_HAS_ILOG2_U64 +static inline __attribute__((const)) +int __ilog2_u64(u64 n) +{ + return fls64(n) - 1; +} +#endif + +/* + * round up to nearest power of two + */ +static inline __attribute__((const)) +unsigned long __roundup_pow_of_two(unsigned long n) +{ + return 1UL << fls_long(n - 1); +} + +/** + * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value + * @n - parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n) < 1 ? ____ilog2_NaN() : \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + (n) & (1ULL << 1) ? 1 : \ + (n) & (1ULL << 0) ? 0 : \ + ____ilog2_NaN() \ + ) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ + ) + +/** + * roundup_pow_of_two - round the given value up to nearest power of two + * @n - parameter + * + * round the given balue up to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define roundup_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 1) ? 0 : \ + (1UL << (ilog2((n) - 1) + 1)) \ + ) : \ + __roundup_pow_of_two(n) \ + ) + +#endif /* _LINUX_LOG2_H */ diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h new file mode 100644 index 0000000..ef5a1b4 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2006 Cisco Systems. All rights reserved. + * + * This file is released under the GPLv2. + */ + +/* mutex compatibility for pre-2.6.16 kernels */ + +#ifndef __LINUX_MUTEX_H +#define __LINUX_MUTEX_H + +#include +#include + +#define mutex semaphore +#define DEFINE_MUTEX(foo) DECLARE_MUTEX(foo) +#define mutex_init(foo) init_MUTEX(foo) +#define mutex_lock(foo) down(foo) +#define mutex_lock_interruptible(foo) down_interruptible(foo) +/* this function follows the spin_trylock() convention, so * + * it is negated to the down_trylock() return values! Be careful */ +#define mutex_trylock(foo) !down_trylock(foo) +#define mutex_unlock(foo) up(foo) + +#endif /* __LINUX_MUTEX_H */ diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h new file mode 100644 index 0000000..225eeda --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_NETDEVICE_BACKPORT_TO_2_6_16 +#define _LINUX_NETDEVICE_BACKPORT_TO_2_6_16 + +#include_next + +static inline void netif_tx_lock(struct net_device *dev) +{ + spin_lock(&dev->xmit_lock); + dev->xmit_lock_owner = smp_processor_id(); +} + +static inline void netif_tx_unlock(struct net_device *dev) +{ + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); +} + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h new file mode 100644 index 0000000..1ad4e13 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_RWSEM_BACKPORT_TO_2_6_17 +#define _LINUX_RWSEM_BACKPORT_TO_2_6_17 + +#include_next + +#define down_read_nested(sem, subclass) down_read(sem) + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h new file mode 100644 index 0000000..78adbab --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_SIGNAL_BACKPORT_2_6_17 +#define _LINUX_SIGNAL_BACKPORT_2_6_17 + +#include_next + +#define IRQF_SHARED SA_SHIRQ + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h new file mode 100644 index 0000000..70bf011 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h @@ -0,0 +1,11 @@ +#ifndef LINUX_SKBUFF_H_BACKPORT +#define LINUX_SKBUFF_H_BACKPORT + +#include_next + +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW + +#define gso_size tso_size + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h new file mode 100644 index 0000000..46ac6e5 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h @@ -0,0 +1,34 @@ +#include_next + +#include_next + +#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 +#define BACKPORT_LINUX_STRING_TO_2_6_18 + +static inline +void *kmemdup(const void *src, size_t len, gfp_t gfp) +{ + void *p; + + p = kmalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} + +#endif +#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 +#define BACKPORT_LINUX_STRING_TO_2_6_18 + +static inline +void *kmemdup(const void *src, size_t len, gfp_t gfp) +{ + void *p; + + p = kmalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h new file mode 100644 index 0000000..db39389 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h @@ -0,0 +1,7 @@ +#ifndef BACKPORT_LINUX_SPINLOCK_H +#define BACKPORT_LINUX_SPINLOCK_H + +#include_next +#define spin_lock_nested(lock, subclass) spin_lock(lock) + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h new file mode 100644 index 0000000..86e334f --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h @@ -0,0 +1,6 @@ +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 +#define BACKPORT_LINUX_TYPES_TO_2_6_15 +#include_next + +#define BITS_PER_BYTE 8 +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h new file mode 100644 index 0000000..cc8b2cd --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h @@ -0,0 +1,50 @@ +#ifndef BACKPORT_LINUX_WORKQUEUE_TO_2_6_19 +#define BACKPORT_LINUX_WORKQUEUE_TO_2_6_19 + +#include_next + +struct delayed_work { + struct work_struct work; +}; + +static inline void +backport_INIT_WORK(struct work_struct *work, void *func) +{ + INIT_WORK(work, func, work); +} + +static inline int backport_queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, + unsigned long delay) +{ + return queue_delayed_work(wq, &work->work, delay); +} + +static inline int +backport_cancel_delayed_work(struct delayed_work *work) +{ + return cancel_delayed_work(&work->work); +} + +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + + +#undef INIT_WORK +#define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) +#define INIT_DELAYED_WORK(_work, _func) INIT_WORK(&(_work)->work, _func) + +#undef DECLARE_WORK +#define DECLARE_WORK(n, f) \ + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) +#define DECLARE_DELAYED_WORK(n, f) \ + struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } + +#define queue_delayed_work backport_queue_delayed_work +#define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h new file mode 100644 index 0000000..962da47 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h @@ -0,0 +1,6 @@ +#ifndef NET_INET_SOCK_H +#define NET_INET_SOCK_H + +#include + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * Changes: + */ +#ifdef __KERNEL__ + +#include + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h new file mode 100644 index 0000000..f621a71 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h @@ -0,0 +1,8 @@ +#ifndef _NET_SOCK_SLES_BACKPORT_H +#define _NET_SOCK_SLES_BACKPORT_H + +#include_next + +#define sk_eat_skb(a, b, c) sk_eat_skb(a, b) + +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h b/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h new file mode 100644 index 0000000..352330d --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h @@ -0,0 +1,7 @@ +#ifndef _SCSI_SCSI_H_BACKPORT +#define _SCSI_SCSI_H_BACKPORT + +#include_next + +#define SCAN_WILD_CARD ~0 +#endif diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c b/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c new file mode 100644 index 0000000..5ffadd1 --- /dev/null +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c @@ -0,0 +1,71 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include +#include +#include + +static struct notifier_block *netevent_notif_chain; + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch new file mode 100644 index 0000000..27eb62e --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch @@ -0,0 +1,82 @@ +diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c +index a617ca7..4e16314 100644 +--- a/drivers/infiniband/core/uverbs_main.c ++++ b/drivers/infiniband/core/uverbs_main.c +@@ -534,9 +534,9 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); +- filp->f_path.mnt = mntget(uverbs_event_mnt); +- filp->f_path.dentry = dget(uverbs_event_mnt->mnt_root); +- filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; ++ filp->f_vfsmnt = mntget(uverbs_event_mnt); ++ filp->f_dentry = dget(uverbs_event_mnt->mnt_root); ++ filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_flags = O_RDONLY; + filp->f_mode = FMODE_READ; + filp->private_data = ev_file; +diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c +index b932bcb..ddbcabd 100644 +--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c ++++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c +@@ -1744,9 +1744,9 @@ static int ipath_assign_port(struct file *fp, + goto done; + } + +- i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; ++ i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE; + ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", +- (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); ++ (long)fp->f_dentry->d_inode->i_rdev, i_minor); + + if (i_minor) + ret = find_free_port(i_minor - 1, fp, uinfo); +diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c +index 79a60f0..d9ff283 100644 +--- a/drivers/infiniband/hw/ipath/ipath_fs.c ++++ b/drivers/infiniband/hw/ipath/ipath_fs.c +@@ -118,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, + u16 i; + struct ipath_devdata *dd; + +- dd = file->f_path.dentry->d_inode->i_private; ++ dd = file->f_dentry->d_inode->i_private; + + for (i = 0; i < NUM_COUNTERS; i++) + counters[i] = ipath_snap_cntr(dd, i); +@@ -138,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, + struct ipath_devdata *dd; + u64 guid; + +- dd = file->f_path.dentry->d_inode->i_private; ++ dd = file->f_dentry->d_inode->i_private; + + guid = be64_to_cpu(dd->ipath_guid); + +@@ -177,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, + u32 tmp, tmp2; + struct ipath_devdata *dd; + +- dd = file->f_path.dentry->d_inode->i_private; ++ dd = file->f_dentry->d_inode->i_private; + + /* so we only initialize non-zero fields. */ + memset(portinfo, 0, sizeof portinfo); +@@ -324,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, + goto bail; + } + +- dd = file->f_path.dentry->d_inode->i_private; ++ dd = file->f_dentry->d_inode->i_private; + if (ipath_eeprom_read(dd, pos, tmp, count)) { + ipath_dev_err(dd, "failed to read from flash\n"); + ret = -ENXIO; +@@ -377,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, + goto bail_tmp; + } + +- dd = file->f_path.dentry->d_inode->i_private; ++ dd = file->f_dentry->d_inode->i_private; + if (ipath_eeprom_write(dd, pos, tmp, count)) { + ret = -ENXIO; + ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch new file mode 100644 index 0000000..6601371 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch @@ -0,0 +1,33 @@ +diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c +index 81a5cdc..640508d 100644 +--- a/drivers/infiniband/core/ucma.c ++++ b/drivers/infiniband/core/ucma.c +@@ -842,7 +842,7 @@ static ssize_t show_abi_version(struct device *dev, + { + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + } +-static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); ++static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + + static int __init ucma_init(void) + { +@@ -852,7 +852,8 @@ static int __init ucma_init(void) + if (ret) + return ret; + +- ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); ++ ret = class_device_create_file(ucma_misc.class, ++ &class_device_attr_abi_version); + if (ret) { + printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); + goto err; +@@ -865,7 +866,8 @@ err: + + static void __exit ucma_cleanup(void) + { +- device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); ++ class_device_remove_file(ucma_misc.class, ++ &class_device_attr_abi_version); + misc_deregister(&ucma_misc); + idr_destroy(&ctx_idr); + } diff --git a/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch new file mode 100644 index 0000000..316d8d2 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch @@ -0,0 +1,76 @@ +commit e795d092507d571d66f2ec98d3efdc7dd284bf80 +Author: Tom Tucker +Date: Sun Jul 30 20:44:19 2006 -0700 + + [NET] infiniband: Cleanup ib_addr module to use the netevents + + Signed-off-by: Tom Tucker + Signed-off-by: Steve Wise + Signed-off-by: David S. Miller + +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index 1205e80..d294bbc 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -35,7 +35,6 @@ #include + #include + #include + #include +-#include + #include + + MODULE_AUTHOR("Sean Hefty"); +@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad + } + EXPORT_SYMBOL(rdma_addr_cancel); + +-static int netevent_callback(struct notifier_block *self, unsigned long event, +- void *ctx) ++static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, ++ struct packet_type *pkt, struct net_device *orig_dev) + { +- if (event == NETEVENT_NEIGH_UPDATE) { +- struct neighbour *neigh = ctx; ++ struct arphdr *arp_hdr; + +- if (neigh->dev->type == ARPHRD_INFINIBAND && +- (neigh->nud_state & NUD_VALID)) { +- set_timeout(jiffies); +- } +- } ++ arp_hdr = (struct arphdr *) skb->nh.raw; ++ ++ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || ++ arp_hdr->ar_op == htons(ARPOP_REPLY)) ++ set_timeout(jiffies); ++ ++ kfree_skb(skb); + return 0; + } + +-static struct notifier_block nb = { +- .notifier_call = netevent_callback ++static struct packet_type addr_arp = { ++ .type = __constant_htons(ETH_P_ARP), ++ .func = addr_arp_recv, ++ .af_packet_priv = (void*) 1, + }; + + static int addr_init(void) +@@ -351,13 +353,13 @@ static int addr_init(void) + if (!addr_wq) + return -ENOMEM; + +- register_netevent_notifier(&nb); ++ dev_add_pack(&addr_arp); + return 0; + } + + static void addr_cleanup(void) + { +- unregister_netevent_notifier(&nb); ++ dev_remove_pack(&addr_arp); + destroy_workqueue(addr_wq); + } + + diff --git a/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch new file mode 100644 index 0000000..2975774 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch @@ -0,0 +1,83 @@ +diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h +index 07deee8..501ee93 100644 +--- a/drivers/infiniband/ulp/ipoib/ipoib.h ++++ b/drivers/infiniband/ulp/ipoib/ipoib.h +@@ -217,6 +219,7 @@ struct ipoib_neigh { + + struct neighbour *neighbour; + ++ struct list_head all_neigh_list; + struct list_head list; + }; + +diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c +index 705eb1d..56022f5 100644 +--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c +@@ -85,6 +85,9 @@ struct workqueue_struct *ipoib_workqueue; + + struct ib_sa_client ipoib_sa_client; + ++static DEFINE_SPINLOCK(ipoib_all_neigh_list_lock); ++static LIST_HEAD(ipoib_all_neigh_list); ++ + static void ipoib_add_one(struct ib_device *device); + static void ipoib_remove_one(struct ib_device *device); + +@@ -773,6 +776,17 @@ static void ipoib_neigh_destructor(struct neighbour *n) + unsigned long flags; + struct ipoib_ah *ah = NULL; + ++ struct ipoib_neigh *tn, *nn = NULL; ++ spin_lock(&ipoib_all_neigh_list_lock); ++ list_for_each_entry(tn, &ipoib_all_neigh_list, all_neigh_list) ++ if (tn->neighbour == n) { ++ nn = tn; ++ break; ++ } ++ spin_unlock(&ipoib_all_neigh_list_lock); ++ if (!nn) ++ return; ++ + ipoib_dbg(priv, + "neigh_destructor for %06x " IPOIB_GID_FMT "\n", + IPOIB_QPN(n->ha), +@@ -806,6 +820,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) + *to_ipoib_neigh(neighbour) = neigh; + skb_queue_head_init(&neigh->queue); + ++ spin_lock(&ipoib_all_neigh_list_lock); ++ list_add_tail(&neigh->all_neigh_list, &ipoib_all_neigh_list); ++ neigh->neighbour->ops->destructor = ipoib_neigh_destructor; ++ spin_unlock(&ipoib_all_neigh_list_lock); ++ + return neigh; + } + +@@ -813,6 +832,17 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; ++ struct ipoib_neigh *nn; ++ spin_lock(&ipoib_all_neigh_list_lock); ++ list_del(&neigh->all_neigh_list); ++ list_for_each_entry(nn, &ipoib_all_neigh_list, all_neigh_list) ++ if (nn->neighbour->ops == neigh->neighbour->ops) ++ goto found; ++ ++ neigh->neighbour->ops->destructor = NULL; ++found: ++ spin_unlock(&ipoib_all_neigh_list_lock); ++ + *to_ipoib_neigh(neigh->neighbour) = NULL; + while ((skb = __skb_dequeue(&neigh->queue))) { + ++priv->stats.tx_dropped; +@@ -823,8 +853,6 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) + + static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) + { +- parms->neigh_destructor = ipoib_neigh_destructor; +- + return 0; + } + diff --git a/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch new file mode 100644 index 0000000..eb2285f --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch @@ -0,0 +1,24 @@ +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +new file mode 100644 +index 0000000..58cf933 +--- /dev/null ++++ b/drivers/infiniband/core/netevent.c +@@ -0,0 +1 @@ ++#include "src/netevent.c" +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 50fb1cd..456bfd0 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o netevent.o diff --git a/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch new file mode 100644 index 0000000..497a203 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch @@ -0,0 +1,20 @@ +Index: gen2_linux/drivers/infiniband/core/uverbs_main.c +=================================================================== +--- gen2_linux.orig/drivers/infiniband/core/uverbs_main.c ++++ gen2_linux/drivers/infiniband/core/uverbs_main.c +@@ -815,12 +815,11 @@ static void ib_uverbs_remove_one(struct + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + } + +-static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, +- const char *dev_name, void *data, +- struct vfsmount *mnt) ++static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *data) + { + return get_sb_pseudo(fs_type, "infinibandevent:", NULL, +- INFINIBANDEVENTFS_MAGIC, mnt); ++ INFINIBANDEVENTFS_MAGIC); + } + + static struct file_system_type uverbs_event_fs = { diff --git a/ofed_scripts/configure b/ofed_scripts/configure index f5e1da4..0618d77 100755 --- a/ofed_scripts/configure +++ b/ofed_scripts/configure @@ -195,6 +195,9 @@ get_backport_dir() 2.6.14*) echo 2.6.14 ;; + 2.6.15-*-*) + echo 2.6.15_ubuntu606 + ;; 2.6.15*) echo 2.6.15 ;; From sashak at voltaire.com Thu Jan 18 11:44:03 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 21:44:03 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118050014.GD25305@mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118004224.GQ21043@sashak.voltaire.com> <20070118050014.GD25305@mellanox.co.il> Message-ID: <20070118194403.GA23783@sashak.voltaire.com> On 07:00 Thu 18 Jan , Michael S. Tsirkin wrote: > > What about pure opensource - http://sourceware.org/pthreads-win32/? It > > is licensed under LGPL, I see on the net many positive reports about > > stability and usability. > > I used it to do a windows port of linux complib at some point and opensm > seemed to work fine with it. What it was lacking at that point was > support for 64 bit applications, and for some reason (which is > still unclear to me) there was a strong desire to run opensm in 64 bit mode. > Seems to have been fixed now, BTW. So this seems to be good option for OpenSM on Windows. Right? Sasha From swise at opengridcomputing.com Thu Jan 18 11:45:15 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 13:45:15 -0600 Subject: [openib-general] [PATCH ] ofascripts Create real libibverbs.a library instead of an empty file. Message-ID: <20070118194515.23135.54454.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- configure | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/configure b/configure index c0ff88b..216864d 100755 --- a/configure +++ b/configure @@ -600,7 +600,7 @@ ac_cv_func_ibv_register_driver=yes " [ ! -x ./configure ] && [ ! -x ./autogen.sh ] && continue [ ! -x ./configure ] && ./autogen.sh - echo > libibverbs.a + ar qc libibverbs.a echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} CPPFLAGS=\"-I../libibverbs/include\"" if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} \ CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L.") From sashak at voltaire.com Thu Jan 18 11:53:40 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 18 Jan 2007 21:53:40 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45A664CC.8050200@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> Message-ID: <20070118195340.GB23783@sashak.voltaire.com> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > > As for the mailing list it's openib-windows at openib.org. You can access > it here: http://openib.org/mailman/listinfo/openib-windows I found only references to svn://windows.openib.org, where 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | head -n 40' shows: ------------------------------------------------------------------------ r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line Set property svn:keywords "id" on all repository ------------------------------------------------------------------------ r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line [OPENSM] When running as a service, if all ports are down, use the first port. ------------------------------------------------------------------------ r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines [OPENSM] When trying to set to INIT the remote port of the given physical port in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no check whether the physical port in null (e.g., if it's disconnected). ------------------------------------------------------------------------ r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line [opensm] Base service status on results that were received from opensm log messages. ------------------------------------------------------------------------ r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line [OPENSM] missed fix for OPENSM logging to System Event Log ------------------------------------------------------------------------ r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! 2. bugfix: added message file for correct logging to System Event Log. 3. bugfix: wrong passing parameters in server mode; 4. bugfix: error in table of parameters ------------------------------------------------------------------------ r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line [opensm] Fix a trivial build break ------------------------------------------------------------------------ r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines if the guid2lid is corrupted, don't exit when running with -y option (don't exit on fatal) - just ignore the file Seems that development there was stopped in Aug 2006, and it doesn't have recent Win port patches. Am I looking in the wrong place? Sasha From HNGUYEN at de.ibm.com Thu Jan 18 11:57:54 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Thu, 18 Jan 2007 14:57:54 -0500 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: Message-ID: No problem. Will resend the full patch set for 2.6.21. Thanks Nam linuxppc-dev-bounces+hnguyen=de.ibm.com at ozlabs.org wrote on 18.01.2007 13:56:01: > I've kind of lost the plot here. How does this patch fit in with the > previous series of patches you posted? Does it replace them or go on > top of them? > > Can please you resend me the full series of patch that remove the use > of do_mmap(), with all cleanups and bug fixes included? And please > roll up the fixes, I don't want one patch that adds a yield() inside a > spinlock and then a later patch to fix it -- there's no sense in > adding landmines for people potentially doing git bisect in the > future. > > And also please try to split the patches so that they don't mix > together two things -- please try to make the "remove obsolete > prototypes" patch separate from the mmap fixes. > > Thanks... > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev at ozlabs.org > https://ozlabs.org/mailman/listinfo/linuxppc-dev From HNGUYEN at de.ibm.com Thu Jan 18 12:03:54 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Thu, 18 Jan 2007 15:03:54 -0500 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: Message-ID: > > Roland, please note that I applied the previous patches to > > your git tree for-2.6.21 before creating this patch. I also > > realized a compile issue with the patch from Michael T. in > > ehca_reqs.c regarding "return qp pointer in ib_wc". For this > > I'll send another patch. > Michael -- I didn't notice before but you do seem to have misread the > ehca CQ polling code. The ehca CQ poll operation doesn't actually > have the qp pointer readily available -- unless I'm mistaken, ehca > will have to do an expensive search by QPN in a hash table to get the > qp pointer. That doesn't seem acceptable for every CQ poll, so do you > have any ideas of how to salvage this API change? Yes, your concern is correct. From swise at opengridcomputing.com Thu Jan 18 12:25:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 14:25:54 -0600 Subject: [openib-general] [PATCH ] ofascripts Remove patches/user_fixes/roland_fixes.patch Message-ID: <20070118202554.25497.85011.stgit@dell3.ogc.int> This has been incorporated into roland's tree. Signed-off-by: Steve Wise --- patches/user_fixes/roland_fixes.patch | 31 ------------------------------- 1 files changed, 0 insertions(+), 31 deletions(-) diff --git a/patches/user_fixes/roland_fixes.patch b/patches/user_fixes/roland_fixes.patch deleted file mode 100644 index 3f02d27..0000000 --- a/patches/user_fixes/roland_fixes.patch +++ /dev/null @@ -1,31 +0,0 @@ -asprintf returns the number of bytes printed, -so the way to check for failure is < 0. - -Signed-off-by: Michael S. Tsirkin - -diff --git g/src/userspace/libibverbs/src/device.c b/src/userspace/libibverbs/src/device.c -index f7ef224..96f906c 100644 ---- a/src/userspace/libibverbs/src/device.c -+++ b/src/userspace/libibverbs/src/device.c -@@ -111,7 +111,7 @@ struct ibv_context *ibv_open_device(struct ibv_device *device) - int cmd_fd; - struct ibv_context *context; - -- if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name)) -+ if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name) < 0) - return NULL; - - /* -diff --git a/src/userspace/libibverbs/src/sysfs.c b/src/userspace/libibverbs/src/sysfs.c -index 392a675..85aee39 100644 ---- a/src/userspace/libibverbs/src/sysfs.c -+++ b/src/userspace/libibverbs/src/sysfs.c -@@ -82,7 +82,7 @@ int ibv_read_sysfs_file(const char *dir, const char *file, - int fd; - int len; - -- if (asprintf(&path, "%s/%s", dir, file)) -+ if (asprintf(&path, "%s/%s", dir, file) < 0) - return -1; - - fd = open(path, O_RDONLY); From swise at opengridcomputing.com Thu Jan 18 12:28:27 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 14:28:27 -0600 Subject: [openib-general] [PATCH 1/4] ofabuild Fixed syntax error. Message-ID: <20070118202827.25557.11503.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- build_ofa_user.sh | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index 82a10ee..2182d96 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -338,14 +338,14 @@ check_build() ex tar xzf ${dest}/${proj}/${projdir}.tgz cd ${tmpdir}/${projdir} - if ! ( ./configure ${WITH_PACKAGES} > ${configure_log} 2>&1 ) + if ! ( ./configure ${WITH_PACKAGES} > ${configure_log} 2>&1 ) ; then echo "Failed: ./configure ${WITH_PACKAGES}" | tee -a ${report} echo exit 1 fi echo "Passed: ./configure ${WITH_PACKAGES}" | tee -a ${report} - if ! ( make all > ${make_log} 2>&1 ) + if ! ( make all > ${make_log} 2>&1 ) ; then echo "Failed: make all" | tee -a ${report} echo exit 1 From swise at opengridcomputing.com Thu Jan 18 12:28:33 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 14:28:33 -0600 Subject: [openib-general] [PATCH 4/4] ofabuild Use the rdma_ucm-abi3 branch for the librdmacm git tree. In-Reply-To: <20070118202827.25557.11503.stgit@dell3.ogc.int> References: <20070118202827.25557.11503.stgit@dell3.ogc.int> Message-ID: <20070118202833.25557.49832.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- build_ofa_user.sh | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index f27d286..904a6da 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -52,7 +52,7 @@ srptools_branch="master" ipoibtools_git="git://git.openfabrics.org/~vlad/ipoibtools.git" ipoibtools_branch="master" librdmacm_git="git://git.openfabrics.org/~shefty/librdmacm.git" -librdmacm_branch="multicast" +librdmacm_branch="rdma_ucm-abi3" dapl_git="git://git.openfabrics.org/~ardavis/dapl.git" dapl_branch="rdma_ucm" imgen_git="git://git.openfabrics.org/~mst/imgen.git" From swise at opengridcomputing.com Thu Jan 18 12:28:29 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 14:28:29 -0600 Subject: [openib-general] [PATCH 2/4] ofabuild Changes for Chelsio T3 Support. In-Reply-To: <20070118202827.25557.11503.stgit@dell3.ogc.int> References: <20070118202827.25557.11503.stgit@dell3.ogc.int> Message-ID: <20070118202829.25557.47797.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- build_ofa_kernel.sh | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/build_ofa_kernel.sh b/build_ofa_kernel.sh index 0643066..08a0b7f 100755 --- a/build_ofa_kernel.sh +++ b/build_ofa_kernel.sh @@ -33,6 +33,7 @@ WITH_PACKAGES=" \ --with-user_access-mod \ --with-mthca-mod \ --with-core-mod \ +--with-cxgb3-mod \ --with-addr_trans-mod" arch=${arch:-"$(uname -m)"} @@ -83,6 +84,7 @@ get_src() include/scsi/iscsi_if.h \ include/scsi/libiscsi.h \ include/scsi/scsi_transport_iscsi.h \ + drivers/net/cxgb3 \ ` echo ref: refs/heads/${git_branch} > .git/HEAD From swise at opengridcomputing.com Thu Jan 18 12:28:31 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 14:28:31 -0600 Subject: [openib-general] [PATCH 3/4] ofabuild Create empty archive file for libibverbs.a instead of empty file. In-Reply-To: <20070118202827.25557.11503.stgit@dell3.ogc.int> References: <20070118202827.25557.11503.stgit@dell3.ogc.int> Message-ID: <20070118202831.25557.47484.stgit@dell3.ogc.int> Signed-off-by: Steve Wise --- build_ofa_user.sh | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index 2182d96..f27d286 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -184,7 +184,7 @@ ac_cv_func_ibv_dofork_range=yes \ ac_cv_func_ibv_register_driver=yes " echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" - echo > libibverbs.a + ar qc libibverbs.a if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L." > ${conflog}.${dir} 2>&1 ) then echo "Failed to execute: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck CPPFLAGS=\"-I../libibverbs/include\" LDFLAGS=\"-L.\"" From mst at mellanox.co.il Thu Jan 18 13:14:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:14:09 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause addressalignment fault onia64 In-Reply-To: <1169144255.26676.6.camel@trinity.ogc.int> References: <1169144255.26676.6.camel@trinity.ogc.int> Message-ID: <20070118211409.GH9890@mellanox.co.il> > On Thu, 2007-01-18 at 20:01 +0200, Michael S. Tsirkin wrote: > > > Quoting r. Sean Hefty : > > > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 > > > > > > > But I thought it is the data field which is cast to ib_mad_hdr > > > > and not ib_user_mad? If so Jason's suggestion of using > > > > u64 data[0] > > > > makes more sense. > > > > > > I don't think that you can change struct ib_user_mad or struct ib_user_mad_hdr > > > without breaking the ABI. > > > > This won't break ABI, since data is aligned already relative to start of struct. > > We only force the struct itself to be aligned, which has no ABI implications. > > > Quoting Tom Tucker : > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause addressalignment fault onia64 > > > Does changing the size of the structure break the ABI? Size is the same, too. ABI is not broken. -- MST From mst at mellanox.co.il Thu Jan 18 13:16:02 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:16:02 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 In-Reply-To: <20070118185154.GQ12386@obsidianresearch.com> References: <20070118185154.GQ12386@obsidianresearch.com> Message-ID: <20070118211602.GI9890@mellanox.co.il> > Quoting Jason Gunthorpe : > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault onia64 > > On Thu, Jan 18, 2007 at 12:17:35PM -0600, Tom Tucker wrote: > > > Does changing the size of the structure break the ABI? > > ib_user_mad_hdr is 56 bytes long, that is already a multiple of 8 so > .data is already aligned on 8. Thus the size of ib_user_mad does not > change and there is no ABI concern here. > > __u64 data[0] is still 0 size, just like __u8 data[0]. > > Really all that happens is that the compiler will mark ib_user_mad as > requiring 8 byte alignment on ia64/etc and 4 byte on x86-64/etc, which > gets you the pad you need in the ib_umad_packet structure. I believe you are right. Can you post a patch that does this? I'm not in the lab now. -- MST From mst at mellanox.co.il Thu Jan 18 13:20:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:20:15 +0200 Subject: [openib-general] [PATCH ] ofascripts Create real libibverbs.a library instead of an empty file. In-Reply-To: <20070118194515.23135.54454.stgit@dell3.ogc.int> References: <20070118194515.23135.54454.stgit@dell3.ogc.int> Message-ID: <20070118212015.GL9890@mellanox.co.il> I thought we did this already. Vlad? Quoting Steve Wise : Subject: [PATCH ] ofascripts Create real libibverbs.a library instead of an empty file. Signed-off-by: Steve Wise --- configure | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/configure b/configure index c0ff88b..216864d 100755 --- a/configure +++ b/configure @@ -600,7 +600,7 @@ ac_cv_func_ibv_register_driver=yes " [ ! -x ./configure ] && [ ! -x ./autogen.sh ] && continue [ ! -x ./configure ] && ./autogen.sh - echo > libibverbs.a + ar qc libibverbs.a echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} CPPFLAGS=\"-I../libibverbs/include\"" if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} \ CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L.") -- MST From mst at mellanox.co.il Thu Jan 18 13:18:40 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:18:40 +0200 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: References: Message-ID: <20070118211840.GJ9890@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability > > > Roland, please note that I applied the previous patches to > > your git tree for-2.6.21 before creating this patch. I also > > realized a compile issue with the patch from Michael T. in > > ehca_reqs.c regarding "return qp pointer in ib_wc". For this > > I'll send another patch. > > Michael -- I didn't notice before but you do seem to have misread the > ehca CQ polling code. The ehca CQ poll operation doesn't actually > have the qp pointer readily available -- unless I'm mistaken, ehca > will have to do an expensive search by QPN in a hash table to get the > qp pointer. That doesn't seem acceptable for every CQ poll, so do you > have any ideas of how to salvage this API change? qpnum is actually only required from poll cq if srq is supported. so ehca can just fill in NULL. -- MST From mst at mellanox.co.il Thu Jan 18 13:22:29 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:22:29 +0200 Subject: [openib-general] [PATCH 4/4] ofabuild Use the rdma_ucm-abi3 branch for the librdmacm git tree. In-Reply-To: <20070118202833.25557.49832.stgit@dell3.ogc.int> References: <20070118202833.25557.49832.stgit@dell3.ogc.int> Message-ID: <20070118212229.GM9890@mellanox.co.il> I do not think we want that. Sean indicated that OFED should take master and guess it's his decision as the maintainer. Quoting Steve Wise : Subject: [PATCH 4/4] ofabuild Use the rdma_ucm-abi3 branch for the librdmacm git tree. Signed-off-by: Steve Wise --- build_ofa_user.sh | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index f27d286..904a6da 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -52,7 +52,7 @@ srptools_branch="master" ipoibtools_git="git://git.openfabrics.org/~vlad/ipoibtools.git" ipoibtools_branch="master" librdmacm_git="git://git.openfabrics.org/~shefty/librdmacm.git" -librdmacm_branch="multicast" +librdmacm_branch="rdma_ucm-abi3" dapl_git="git://git.openfabrics.org/~ardavis/dapl.git" dapl_branch="rdma_ucm" imgen_git="git://git.openfabrics.org/~mst/imgen.git" -- MST From swise at opengridcomputing.com Thu Jan 18 13:27:41 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 15:27:41 -0600 Subject: [openib-general] [PATCH 4/4] ofabuild Use the rdma_ucm-abi3 branch for the librdmacm git tree. In-Reply-To: <20070118212229.GM9890@mellanox.co.il> References: <20070118202833.25557.49832.stgit@dell3.ogc.int> <20070118212229.GM9890@mellanox.co.il> Message-ID: <1169155661.27984.3.camel@stevo-desktop> On Thu, 2007-01-18 at 23:22 +0200, Michael S. Tsirkin wrote: > I do not think we want that. > Sean indicated that OFED should take master and guess it's his decision > as the maintainer. > I thought he said rdma_ucm-abi3. http://openib.org/pipermail/openib-general/2007-January/031572.html Steve. From rdreier at cisco.com Thu Jan 18 13:26:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 18 Jan 2007 13:26:58 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: <20070118211840.GJ9890@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 18 Jan 2007 23:18:40 +0200") References: <20070118211840.GJ9890@mellanox.co.il> Message-ID: > qpnum is actually only required from poll cq if srq is supported. > so ehca can just fill in NULL. actually the IB spec requires QPN in the work completion if either SRQ or base queue management extensions are supported. I'm not sure why though. It seems at least for current code it is fine for ehca to report wc->qp as NULL. I've update the patch in my for-2.6.21 branch to do that, and pushed it out. From swise at opengridcomputing.com Thu Jan 18 13:28:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 15:28:46 -0600 Subject: [openib-general] [PATCH ] ofascripts Create real libibverbs.a library instead of an empty file. In-Reply-To: <20070118212015.GL9890@mellanox.co.il> References: <20070118194515.23135.54454.stgit@dell3.ogc.int> <20070118212015.GL9890@mellanox.co.il> Message-ID: <1169155726.27984.5.camel@stevo-desktop> On Thu, 2007-01-18 at 23:20 +0200, Michael S. Tsirkin wrote: > I thought we did this already. Vlad? > I just pulled and it wasn't there so I posted this patch. > Quoting Steve Wise : > Subject: [PATCH ] ofascripts Create real libibverbs.a library instead of an empty file. > > > Signed-off-by: Steve Wise > --- > > configure | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/configure b/configure > index c0ff88b..216864d 100755 > --- a/configure > +++ b/configure > @@ -600,7 +600,7 @@ ac_cv_func_ibv_register_driver=yes " > [ ! -x ./configure ] && [ ! -x ./autogen.sh ] && continue > [ ! -x ./configure ] && ./autogen.sh > > - echo > libibverbs.a > + ar qc libibverbs.a > echo "Running: $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} CPPFLAGS=\"-I../libibverbs/include\"" > if ! ( $ENV ./configure --cache-file=${CACHEFILE} --disable-libcheck ${PREFIX} ${LIBDIR} \ > CPPFLAGS="-I../libibverbs/include" LDFLAGS="-L.") > From mst at mellanox.co.il Thu Jan 18 13:40:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:40:22 +0200 Subject: [openib-general] [PATCH ] ofascripts Create real libibverbs.a library insteadof an empty file. In-Reply-To: <1169155726.27984.5.camel@stevo-desktop> References: <1169155726.27984.5.camel@stevo-desktop> Message-ID: <20070118214022.GO9890@mellanox.co.il> > Quoting Steve Wise : > Subject: Re: [PATCH ] ofascripts Create real libibverbs.a library insteadof an empty file. > > On Thu, 2007-01-18 at 23:20 +0200, Michael S. Tsirkin wrote: > > I thought we did this already. Vlad? > > > > I just pulled and it wasn't there so I posted this patch. > Thanks! BTW if ${AR} is defined we should be using that. -- MST From mst at mellanox.co.il Thu Jan 18 13:50:23 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 18 Jan 2007 23:50:23 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118194403.GA23783@sashak.voltaire.com> References: <20070118194403.GA23783@sashak.voltaire.com> Message-ID: <20070118215023.GP9890@mellanox.co.il> > Quoting Sasha Khapyorsky : > Subject: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] > > On 07:00 Thu 18 Jan , Michael S. Tsirkin wrote: > > > What about pure opensource - http://sourceware.org/pthreads-win32/? It > > > is licensed under LGPL, I see on the net many positive reports about > > > stability and usability. > > > > I used it to do a windows port of linux complib at some point and opensm > > seemed to work fine with it. What it was lacking at that point was > > support for 64 bit applications, and for some reason (which is > > still unclear to me) there was a strong desire to run opensm in 64 bit mode. > > Seems to have been fixed now, BTW. > > So this seems to be good option for OpenSM on Windows. Right? No idea. Distributing a copy of the pthread DLL with opensm does not look like a problem. But is it worth it? -- MST From swise at opengridcomputing.com Thu Jan 18 14:01:48 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 16:01:48 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 Message-ID: <1169157708.27984.10.camel@stevo-desktop> Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a known issue? ----------- [cut here ] --------- [please bite here ] --------- Kernel BUG at workqueue:308 invalid operand: 0000 [1] SMP CPU 0 Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH-200512121832250000) RIP: 0010:[] {__create_workqueue+33} RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task 000001001fcdc9e0) Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f ffffffff803e4020 ffffffff8015d4dd Call Trace:{:ib_local_sa:sa_db_init+143} {sys_init_module+6589} {generic_file_read+187} {:ib_local_sa:sa_db_init+0} {vfs_read+244} {sys_read+157} {system_call+124} Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 RIP {__create_workqueue+33} RSP <000001003f4f7d38> From swise at opengridcomputing.com Thu Jan 18 14:04:29 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 16:04:29 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <1169157708.27984.10.camel@stevo-desktop> References: <1169157708.27984.10.camel@stevo-desktop> Message-ID: <1169157869.27984.12.camel@stevo-desktop> It hit this: struct workqueue_struct *__create_workqueue(const char *name, int singlethread) { int cpu, destroy = 0; struct workqueue_struct *wq; struct task_struct *p; BUG_ON(strlen(name) > 10); On Thu, 2007-01-18 at 16:01 -0600, Steve Wise wrote: > Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a > known issue? > > ----------- [cut here ] --------- [please bite here ] --------- > Kernel BUG at workqueue:308 > invalid operand: 0000 [1] SMP > CPU 0 > Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH-200512121832250000) > RIP: 0010:[] {__create_workqueue+33} > RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 > RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 > RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 > RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 > R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 > R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 > FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 > Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task 000001001fcdc9e0) > Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 > 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f > ffffffff803e4020 ffffffff8015d4dd > Call Trace:{:ib_local_sa:sa_db_init+143} {sys_init_module+6589} > {generic_file_read+187} {:ib_local_sa:sa_db_init+0} > {vfs_read+244} {sys_read+157} > {system_call+124} > > Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 > RIP {__create_workqueue+33} RSP <000001003f4f7d38> > From sean.hefty at intel.com Thu Jan 18 14:09:29 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 18 Jan 2007 14:09:29 -0800 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <1169157708.27984.10.camel@stevo-desktop> Message-ID: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> Can you see if the call to create_singlethread_workqueue() passes in a string longer than 8 characters? If so, try reducing the size of the string. I've committed a change to the local_sa.c file this morning to avoid this sort of backport issue, if this is indeed what it is. - Sean >Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a >known issue? > >----------- [cut here ] --------- [please bite here ] --------- >Kernel BUG at workqueue:308 >invalid operand: 0000 [1] SMP >CPU 0 >Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH- >200512121832250000) >RIP: 0010:[] {__create_workqueue+33} >RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 >RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 >RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 >RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 >R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 >R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 >FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 >CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b >CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 >Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task >000001001fcdc9e0) >Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 > 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f > ffffffff803e4020 ffffffff8015d4dd >Call Trace:{:ib_local_sa:sa_db_init+143} >{sys_init_module+6589} > {generic_file_read+187} >{:ib_local_sa:sa_db_init+0} > {vfs_read+244} {sys_read+157} > {system_call+124} > >Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 >RIP {__create_workqueue+33} RSP <000001003f4f7d38> > > > >_______________________________________________ >openib-general mailing list >openib-general at openib.org >http://openib.org/mailman/listinfo/openib-general > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Thu Jan 18 14:20:32 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 19 Jan 2007 00:20:32 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118215023.GP9890@mellanox.co.il> References: <20070118194403.GA23783@sashak.voltaire.com> <20070118215023.GP9890@mellanox.co.il> Message-ID: <20070118222032.GD23783@sashak.voltaire.com> On 23:50 Thu 18 Jan , Michael S. Tsirkin wrote: > > Quoting Sasha Khapyorsky : > > Subject: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] > > > > On 07:00 Thu 18 Jan , Michael S. Tsirkin wrote: > > > > What about pure opensource - http://sourceware.org/pthreads-win32/? It > > > > is licensed under LGPL, I see on the net many positive reports about > > > > stability and usability. > > > > > > I used it to do a windows port of linux complib at some point and opensm > > > seemed to work fine with it. What it was lacking at that point was > > > support for 64 bit applications, and for some reason (which is > > > still unclear to me) there was a strong desire to run opensm in 64 bit mode. > > > Seems to have been fixed now, BTW. > > > > So this seems to be good option for OpenSM on Windows. Right? > > No idea. Distributing a copy of the pthread DLL with opensm does not > look like a problem. But is it worth it? Sure, it makes windows porting much more transparent and let us to use standard *nix stuff w/out #ifndef WIN32. Other (generic) benefit is that posix is more standard and powerful than wrappers like complib. Sasha From swise at opengridcomputing.com Thu Jan 18 14:18:41 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 18 Jan 2007 16:18:41 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> References: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> Message-ID: <1169158721.27984.16.camel@stevo-desktop> On Thu, 2007-01-18 at 14:09 -0800, Sean Hefty wrote: > Can you see if the call to create_singlethread_workqueue() passes in a string > longer than 8 characters? If so, try reducing the size of the string. > > I've committed a change to the local_sa.c file this morning to avoid this sort > of backport issue, if this is indeed what it is. > > - Sean > That's it! I changed the workqueue name to "lsa" and it worked. So the patch in the ofed tree needs to be updated... Steve. From hch at infradead.org Thu Jan 18 14:27:20 2007 From: hch at infradead.org (Christoph Hellwig) Date: Thu, 18 Jan 2007 22:27:20 +0000 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: References: <200701172312.14840.hnguyen@linux.vnet.ibm.com> Message-ID: <20070118222720.GA5385@infradead.org> On Thu, Jan 18, 2007 at 10:56:01AM -0800, Roland Dreier wrote: > I've kind of lost the plot here. How does this patch fit in with the > previous series of patches you posted? Does it replace them or go on > top of them? It's a cleanup ontop of the actual fix. From jriotto at cisco.com Thu Jan 18 14:46:42 2007 From: jriotto at cisco.com (Jamie Riotto (jriotto)) Date: Thu, 18 Jan 2007 14:46:42 -0800 Subject: [openib-general] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Message-ID: <944AD9DA9232E346ADF590C41BFFEC41034984C9@xmb-sjc-232.amer.cisco.com> Dear Open Fabrics Community, I am currently Chairperson for the Enterprise Working Group (EWG) committee for OFA. By way of quick introduction, the EWG was formed to promote a single commercial release of the OFA stack, in order to encourage all participating OFA members to test, package and support a unified release. The alternative was to have each company take different snap shots at different times, and release independently of one another (which we felt would confuse and frustrate customers). This resulted in the successful release of OFED 1.1, and continues to drive the process in the form of OFED 1.2 which is the next scheduled release. In order to insure fair and balanced management of this process, I am seeking a Co-Chair to help drive these efforts moving forward. The ideal person would have a fair understanding of the technical issues invovled in a release, and perhaps more importantly, a broad view of the evolving industry requirements in order to help steer the inevitable trade-offs made between new technology introductions and current customer stability. If you, or someone you know, would be willing to contribute to the OFA community in this regard, please consider submitting them as a nomination for Co-Chair of the EWG. All nominations should be submitted to me directly at jriotto at cisco.com. Thank you very much - jamie Jamie Riotto Sr. Director Engineering Server Virtualization Business Unit (SVBU) Cisco Systems 408-853-7813 jriotto at cisco.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From jgunthorpe at obsidianresearch.com Thu Jan 18 15:07:44 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 18 Jan 2007 16:07:44 -0700 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 Message-ID: <20070118230744.GR12386@obsidianresearch.com> Make the untyped data region in ib_user_mad u64 aligned so that casting ib_user_mad to structs with u64s in them works on ia64. --- include/rdma/ib_user_mad.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index 44537aa..d66b15e 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -98,7 +98,7 @@ struct ib_user_mad_hdr { */ struct ib_user_mad { struct ib_user_mad_hdr hdr; - __u8 data[0]; + __u64 data[0]; }; /** -- 1.4.1.1 From ftillier.sst at gmail.com Thu Jan 18 15:12:39 2007 From: ftillier.sst at gmail.com (Fabian Tillier) Date: Thu, 18 Jan 2007 15:12:39 -0800 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118215023.GP9890@mellanox.co.il> References: <20070118194403.GA23783@sashak.voltaire.com> <20070118215023.GP9890@mellanox.co.il> Message-ID: <79ae2f320701181512r651c7e4dl51e92d7f781cdda8@mail.gmail.com> Hi Folks, On 1/18/07, Michael S. Tsirkin wrote: > > Quoting Sasha Khapyorsky : > > Subject: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] > > > > On 07:00 Thu 18 Jan , Michael S. Tsirkin wrote: > > > > What about pure opensource - http://sourceware.org/pthreads-win32/? It > > > > is licensed under LGPL, I see on the net many positive reports about > > > > stability and usability. > > > > > > I used it to do a windows port of linux complib at some point and opensm > > > seemed to work fine with it. What it was lacking at that point was > > > support for 64 bit applications, and for some reason (which is > > > still unclear to me) there was a strong desire to run opensm in 64 bit mode. > > > Seems to have been fixed now, BTW. > > > > So this seems to be good option for OpenSM on Windows. Right? > > No idea. Distributing a copy of the pthread DLL with opensm does not > look like a problem. But is it worth it? Something like the pthread DLL (or even better the static lib version) seem like it would be light-weight enough that it might be worth it given the benefits in keeping OpenSM in sync between Linux and Windows. A larger package might be a deterent to users, but that's something the IB IHVs that support OpenSM on Windows need to find out for themselves. I think keeping OpenSM as a single executable or an executable and pthread DLL that can be simply copied into place on the target system is highly desirable. Whether OpenSM uses pthreads or complib threads or any other kind of thread internally doesn't really matter to users as long as performance, stability, and usability aren't negatively affected. This obviously doesn't help with the logging issue, but it's a step in making the code more simpler and more maintainable. -Fab From eitan at sw053.yok.mtl.com Thu Jan 18 21:25:13 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Fri, 19 Jan 2007 07:25:13 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-19:normal completion Message-ID: <200701190525.l0J5PD2p014788@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Thu_Jan_18_10:20:08_2007 7b4f75 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From zc_kyo at hotmail.com Thu Jan 18 21:30:14 2007 From: zc_kyo at hotmail.com (zhang chao) Date: Fri, 19 Jan 2007 05:30:14 +0000 Subject: [openib-general] OFED embedded in CentOS 4.4 doesn't work Message-ID: Hi, openib maillist: I have a cluster installed CentOS 4.4 -- which embedded the OFED packages(under /usr/ofed directory). All infiniband drivers and libraries have been installed and I have configured IPoIB, it also works well. The OFED version is 1.0 -- maybe. Now I am trying to install MVAPICH so that I can run my MPI applications over Infiniband. The MVAPICH's version is 0.9.8 -- the latest stable version. I modified make.mvapich.gen2 script_(set the IBHOME to /usr/ofed, and set the IBHOMELIB to /usr/ofed/lib64, this directory contains libibverbs.so, libibcommon.so....., etc.), the installation was successful (MVAPICH recognized my HCA adapter -- Mellonox PCI-Express SDR, and it seems that there were no errors during configure, make and install). Then I wrote a simple mpihello.c program to verify the installation. This program just printf "helloworld" in every process. I used mpicc to compile it and when I run it, the problem occurs: [eric at cfx1 testcodes]$ /usr/local/mvapich/bin/mpirun -np 4 -hostfile hostfile2 mpihello libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libopensm.so: /usr/ofed/lib64/infiniband/libopensm.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libopensm.so: /usr/ofed/lib64/infiniband/libopensm.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libopensm.so: /usr/ofed/lib64/infiniband/libopensm.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libopensm.so: /usr/ofed/lib64/infiniband/libopensm.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libosmcomp-1.2.1.so: /usr/ofed/lib64/infiniband/libosmcomp-1.2.1.so: undefined symbol: osm_log libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libosmcomp.so: /usr/ofed/lib64/infiniband/libosmcomp.so: undefined symbol: osm_log libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libosmvendor-1.2.1.so: /usr/ofed/lib64/infiniband/libosmvendor-1.2.1.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libosmvendor.so: /usr/ofed/lib64/infiniband/libosmvendor.so: undefined symbol: ib_error_str libibverbs: Warning: couldn't load driver /usr/ofed/lib64/infiniband/libosmvendor_openib.so: /usr/ofed/lib64/infiniband/libosmvendor_openib.so: undefined symbol: ib_error_str mpirun: executable version 1 does not match our version 3. done. I have two questions here: 1. Why the libibverbs wants to find the libraries in /usr/ofed/lib64/infiniband directory? The libraries are under /usr/ofed/lib64 directory but I still copied all the libraries files into the /usr/ofed/lib64/infiniband, whereas the problems still exist. 2. What does the error messages list above mean? How to solve it? I have also tried the command: /usr/local/mvapich/bin/mpirun_rsh -np 4 -hostfile ./hostfile2 ./mpihello , this also cannot be executed, the error message is the same. Thanks. Any suggestions are greatly appreciated. Eric 2006-01-19 _________________________________________________________________ 享用世界上最大的电子邮件系统― MSN Hotmail。 http://www.hotmail.com From mst at mellanox.co.il Fri Jan 19 01:11:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 19 Jan 2007 11:11:09 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118230744.GR12386@obsidianresearch.com> References: <20070118230744.GR12386@obsidianresearch.com> Message-ID: <20070119091109.GB28197@mellanox.co.il> > Quoting Jason Gunthorpe : > Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > Make the untyped data region in ib_user_mad u64 aligned so that casting > ib_user_mad to structs with u64s in them works on ia64. > --- > include/rdma/ib_user_mad.h | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) You forgot the S.O.B. line. -- MST From nirvana.code at gmail.com Fri Jan 19 03:12:05 2007 From: nirvana.code at gmail.com (kalash nainwal) Date: Fri, 19 Jan 2007 16:42:05 +0530 Subject: [openib-general] cq callbacks in hardirq Message-ID: Hi, Is there a sound reasoning behind cq callbacks being made in hardirq (and why it should not be called in softirq)? Or is it assumed to be solely the consumer's responsibility to return from callbacks ASAP? Thanks and regards, -Kalash From mst at mellanox.co.il Fri Jan 19 05:10:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 19 Jan 2007 15:10:24 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070118230744.GR12386@obsidianresearch.com> References: <20070118230744.GR12386@obsidianresearch.com> Message-ID: <20070119131024.GD28197@mellanox.co.il> > Quoting Jason Gunthorpe : > Subject: [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > Make the untyped data region in ib_user_mad u64 aligned so that casting > ib_user_mad to structs with u64s in them works on ia64. Note this does not affect the ABI - ib_user_mad format stays the same, the only effect this has is in how ib_user_mad is placed within larger kernel-only structures. So this makes sense to me, and I think this is small enough to be 2.6.20 material. Sean? Roland? Jason, while the patch is a one-liner, I think you should indicate that you can certify the Developer's Certificate of Origin 1.1 -- MST From swise at opengridcomputing.com Fri Jan 19 06:28:16 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 08:28:16 -0600 Subject: [openib-general] [Fwd: Re: [PATCH 1/10] cxgb3 - main header files] Message-ID: <1169216896.15842.6.camel@stevo-desktop> Hey Roland, Jeff has pulled in the Chelsio Ethernet driver. If you are ready to merge in the RDMA driver, you can pull it from git://staging.openfabrics.org/~swise/cxgb3.git for-roland Thanks, Steve. -------- Forwarded Message -------- From: Jeff Garzik To: Divy Le Ray Cc: netdev at vger.kernel.org, linux-kernel at vger.kernel.org, swise at opengridcomputing.com Subject: Re: [PATCH 1/10] cxgb3 - main header files Date: Thu, 18 Jan 2007 22:05:02 -0500 Divy Le Ray wrote: > Jeff Garzik wrote: >> Divy Le Ray wrote: >>> From: Divy Le Ray >>> >>> This patch implements the main header files of >>> the Chelsio T3 network driver. >>> >>> Signed-off-by: Divy Le Ray >> >> Once you think it's ready, email me a URL to a single patch that adds >> the driver to the latest linux-2.6.git kernel. Include in the email a >> description of the driver and signed-off-by line, which will get >> directly included in the git changelog. >> >> Adding new drivers is a bit special, because we want to merge it as a >> single changeset, but that would create a patch too large to review on >> the common kernel mailing lists. > Jeff, > > You can grab the monolithic patch at this URL: > http://service.chelsio.com/kernel.org/cxgb3.patch.bz2 applied to netdev-2.6.git#upstream I'm really counting on Chelsio to actively maintain this driver, unlike the abandonware you guys first submitted. Jeff From halr at voltaire.com Fri Jan 19 06:34:30 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 19 Jan 2007 09:34:30 -0500 Subject: [openib-general] [PATCH] opensm: osm_console: don't poll invalid file descriptors In-Reply-To: <20070118104100.GC22422@sashak.voltaire.com> References: <20070118104100.GC22422@sashak.voltaire.com> Message-ID: <1169217269.28635.914.camel@hal.voltaire.com> On Thu, 2007-01-18 at 05:41, Sasha Khapyorsky wrote: > Don't poll invalid file descriptors (-1), when listening socket is > disabled or connection is not accepted yet. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Fri Jan 19 06:35:23 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 19 Jan 2007 09:35:23 -0500 Subject: [openib-general] [PATCH] opensm: osm_console: don't poll for POLLOUT events. In-Reply-To: <20070118104223.GD22422@sashak.voltaire.com> References: <20070118104100.GC22422@sashak.voltaire.com> <20070118104223.GD22422@sashak.voltaire.com> Message-ID: <1169217305.28635.916.camel@hal.voltaire.com> On Thu, 2007-01-18 at 05:42, Sasha Khapyorsky wrote: > Then connection is accepted POLLOUT event is up most of the time (but > not handled) and osm_console() just loops without doing something. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From HNGUYEN at de.ibm.com Fri Jan 19 06:45:35 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Fri, 19 Jan 2007 09:45:35 -0500 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: Message-ID: openib-general-bounces at openib.org wrote on 18.01.2007 16:26:58: > > qpnum is actually only required from poll cq if srq is supported. > > so ehca can just fill in NULL. > actually the IB spec requires QPN in the work completion if either SRQ > or base queue management extensions are supported. I'm not sure why > though. It seems at least for current code it is fine for ehca to > report wc->qp as NULL. > I've update the patch in my for-2.6.21 branch to do that, and pushed > it out. Just want to make sure I understand this properly: One day, if eHCA supports srq, will I have to set qp pointer with a valid address or can I still leave it to NULL? Roland, you are talking about QPN. By that you mean also qp pointer? Regards Nam From swise at opengridcomputing.com Fri Jan 19 08:20:29 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 10:20:29 -0600 Subject: [openib-general] libibverbs regression Message-ID: <1169223629.15842.43.camel@stevo-desktop> Roland, I think this breaks things. The man page for asprintf() sez it returns the number of bytes upon success. The patch below assumes non-zero is a failure. So all requests to get the sysfs path will fail. Steve. commit dd3d43d89e398f23c4824d26f2698446ff2d120f Author: Dotan Barak Date: Tue Jan 16 12:21:25 2007 -0800 Handle asprintf memory allocation failures Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier diff --git a/src/device.c b/src/device.c index 3bfbca6..f7ef224 100644 --- a/src/device.c +++ b/src/device.c @@ -111,7 +111,8 @@ struct ibv_context *ibv_open_device(stru int cmd_fd; struct ibv_context *context; - asprintf(&devpath, "/dev/infiniband/%s", device->dev_name); + if (asprintf(&devpath, "/dev/infiniband/%s", device->dev_name)) + return NULL; /* * We'll only be doing writes, but we need O_RDWR in case the diff --git a/src/sysfs.c b/src/sysfs.c index e274c03..392a675 100644 --- a/src/sysfs.c +++ b/src/sysfs.c @@ -82,7 +82,8 @@ int ibv_read_sysfs_file(const char *dir, int fd; int len; - asprintf(&path, "%s/%s", dir, file); + if (asprintf(&path, "%s/%s", dir, file)) + return -1; fd = open(path, O_RDONLY); if (fd < 0) { From swise at opengridcomputing.com Fri Jan 19 08:34:24 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 10:34:24 -0600 Subject: [openib-general] libibverbs regression In-Reply-To: <1169223629.15842.43.camel@stevo-desktop> References: <1169223629.15842.43.camel@stevo-desktop> Message-ID: <1169224464.15842.44.camel@stevo-desktop> On Fri, 2007-01-19 at 10:20 -0600, Steve Wise wrote: > Roland, > > I think this breaks things. The man page for asprintf() sez it returns > the number of bytes upon success. The patch below assumes non-zero is a > failure. So all requests to get the sysfs > path will fail. > > Steve. Ignore this. I was on a down level tree. Sorry. Steve. From rdreier at cisco.com Fri Jan 19 09:07:57 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 19 Jan 2007 09:07:57 -0800 Subject: [openib-general] [Fwd: Re: [PATCH 1/10] cxgb3 - main header files] In-Reply-To: <1169216896.15842.6.camel@stevo-desktop> (Steve Wise's message of "Fri, 19 Jan 2007 08:28:16 -0600") References: <1169216896.15842.6.camel@stevo-desktop> Message-ID: > Jeff has pulled in the Chelsio Ethernet driver. If you are ready to > merge in the RDMA driver, you can pull it from Yes, I saw that... OK, I'll get serious about reviewing the RDMA stuff. From vlad at dev.mellanox.co.il Fri Jan 19 09:50:09 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Fri, 19 Jan 2007 19:50:09 +0200 Subject: [openib-general] [PATCH RFC 05/10] ofed_1_2 Backport cxgb3 to2.6.15 In-Reply-To: <1169148206.15460.22.camel@stevo-desktop> References: <1169139292.15460.4.camel@stevo-desktop> <20070118173212.GC9890@mellanox.co.il> <20070118173327.GD9890@mellanox.co.il> <1169148206.15460.22.camel@stevo-desktop> Message-ID: <1169229011.23809.87.camel@swlab50.yok.mtl.com> Applied. Regards, Vladimir On Thu, 2007-01-18 at 13:23 -0600, Steve Wise wrote: > Here is what I've coded up. It compiles ok on staging.openfabrics.org. > You can pull this from > > git://staging.openfabrics.org/~swise/ofed_1_2 ofed_1_2 > > Steve. > > ---- > > commit b46734fae0a7b519ce22569cce4f5ba2df33aa77 > Author: Steve Wise > Date: Thu Jan 18 11:07:29 2007 -0800 > > Backport to 2.6.15_ubuntu606 > > This is the kernel.org 2.6.15 backport with a few tweaks: > > - recasting addr paramater for __set_bit() and __clear_bit() > - removed backport for __netif_rx_schedule_prep() > > Signed-off-by: Steve Wise > > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h b/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h > new file mode 100644 > index 0000000..58ebe41 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/asm/bitops.h > @@ -0,0 +1,20 @@ > +#ifndef BACKPORT_ASM_BITOPS_TO_2_6_15_UBUNTU606 > +#define BACKPORT_ASM_BITOPS_TO_2_6_15_UBUNTU606 > + > +#include_next > + > +static __inline__ void __backport_set_bit(int nr, void * addr) > +{ > + __set_bit(nr, (volatile void *)addr); > +} > + > +#define __set_bit __backport_set_bit > + > +static __inline__ void __backport_clear_bit(int nr, void * addr) > +{ > + __clear_bit(nr, (volatile void *)addr); > +} > + > +#define __clear_bit __backport_clear_bit > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h > new file mode 100644 > index 0000000..d9752fc > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/bitops.h > @@ -0,0 +1,24 @@ > +#include_next > +#ifndef LINUX_BITOPS_BACKPORT_2_6_15 > +#define LINUX_BITOPS_BACKPORT_2_6_15 > + > +static inline int fls64(__u64 x) > +{ > + __u32 h = x >> 32; > + if (h) > + return fls(h) + 32; > + return fls(x); > +} > + > +#endif > +#ifndef LINUX_BITOPS_BACKPORT_2_6_16 > +#define LINUX_BITOPS_BACKPORT_2_6_16 > + > +static inline unsigned fls_long(unsigned long l) > +{ > + if (sizeof(l) == 4) > + return fls(l); > + return fls64(l); > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h > new file mode 100644 > index 0000000..324f20e > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/device.h > @@ -0,0 +1,9 @@ > +#ifndef LINUX_DEVICE_BACKPORT_H > +#define LINUX_DEVICE_BACKPORT_H > + > +#include_next > + > +#define add_uevent_var add_hotplug_env_var > +#define uevent hotplug > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h > new file mode 100644 > index 0000000..f0631fb > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/fs.h > @@ -0,0 +1,8 @@ > +#ifndef BACKPORT_LINUX_FS_H > +#define BACKPORT_LINUX_FS_H > + > +#include_next > + > +#define i_private u.generic_ip > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h > new file mode 100644 > index 0000000..3c23c68 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/genalloc.h > @@ -0,0 +1,42 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > + > +/* > + * General purpose special memory pool descriptor. > + */ > +struct gen_pool { > + rwlock_t lock; > + struct list_head chunks; /* list of chunks in this pool */ > + int min_alloc_order; /* minimum allocation order */ > +}; > + > +/* > + * General purpose special memory pool chunk descriptor. > + */ > +struct gen_pool_chunk { > + spinlock_t lock; > + struct list_head next_chunk; /* next chunk in pool */ > + unsigned long start_addr; /* starting address of memory chunk */ > + unsigned long end_addr; /* ending address of memory chunk */ > + unsigned long bits[0]; /* bitmap for allocating memory chunk */ > +}; > + > +extern struct gen_pool *ib_gen_pool_create(int, int); > +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); > +extern void ib_gen_pool_destroy(struct gen_pool *); > +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); > +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); > + > +#define gen_pool_create ib_gen_pool_create > +#define gen_pool_add ib_gen_pool_add > +#define gen_pool_destroy ib_gen_pool_destroy > +#define gen_pool_alloc ib_gen_pool_alloc > +#define gen_pool_free ib_gen_pool_free > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h > new file mode 100644 > index 0000000..7a32313 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/inetdevice.h > @@ -0,0 +1,28 @@ > +#ifndef _LINUX_INETDEVICE_BACKPORT_TO_2_6_17 > +#define _LINUX_INETDEVICE_BACKPORT_TO_2_6_17 > + > +#include_next > +#include > + > +static inline struct net_device *xxx_ip_dev_find(u32 addr) > +{ > + struct net_device *dev; > + u32 ip; > + > + read_lock(&dev_base_lock); > + for (dev = dev_base; dev; dev = dev->next) { > + ip = inet_select_addr(dev, 0, RT_SCOPE_LINK); > + if (ip == addr) { > + dev_hold(dev); > + break; > + } > + } > + read_unlock(&dev_base_lock); > + > + return dev; > +} > + > +#define ip_dev_find xxx_ip_dev_find > + > +#endif > + > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h > new file mode 100644 > index 0000000..66e66a9 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/interrupt.h > @@ -0,0 +1,17 @@ > +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 > +#include_next > + > +static inline int > +backport_request_irq(unsigned int irq, > + irqreturn_t (*handler)(int, void *), > + unsigned long flags, const char *dev_name, void *dev_id) > +{ > + return request_irq(irq, > + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, > + flags, dev_name, dev_id); > +} > + > +#define request_irq backport_request_irq > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h > new file mode 100644 > index 0000000..a37dcd5 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/kernel.h > @@ -0,0 +1,7 @@ > +#ifndef BACKPORT_KERNEL_H_2_6_19 > +#define BACKPORT_KERNEL_H_2_6_19 > + > +#include_next > +#include > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h > new file mode 100644 > index 0000000..0c34f36 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/lockdep.h > @@ -0,0 +1,355 @@ > +/* > + * Runtime locking correctness validator > + * > + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar > + * > + * see Documentation/lockdep-design.txt for more details. > + */ > +#ifndef __LINUX_LOCKDEP_H > +#define __LINUX_LOCKDEP_H > + > +#if 0 > +#include > +#include > +#include > +#include > +#endif > + > +#ifdef CONFIG_LOCKDEP > + > +/* > + * Lock-class usage-state bits: > + */ > +enum lock_usage_bit > +{ > + LOCK_USED = 0, > + LOCK_USED_IN_HARDIRQ, > + LOCK_USED_IN_SOFTIRQ, > + LOCK_ENABLED_SOFTIRQS, > + LOCK_ENABLED_HARDIRQS, > + LOCK_USED_IN_HARDIRQ_READ, > + LOCK_USED_IN_SOFTIRQ_READ, > + LOCK_ENABLED_SOFTIRQS_READ, > + LOCK_ENABLED_HARDIRQS_READ, > + LOCK_USAGE_STATES > +}; > + > +/* > + * Usage-state bitmasks: > + */ > +#define LOCKF_USED (1 << LOCK_USED) > +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) > +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) > +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) > +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) > + > +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) > +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) > + > +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) > +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) > +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) > +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) > + > +#define LOCKF_ENABLED_IRQS_READ \ > + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) > +#define LOCKF_USED_IN_IRQ_READ \ > + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) > + > +#define MAX_LOCKDEP_SUBCLASSES 8UL > + > +/* > + * Lock-classes are keyed via unique addresses, by embedding the > + * lockclass-key into the kernel (or module) .data section. (For > + * static locks we use the lock address itself as the key.) > + */ > +struct lockdep_subclass_key { > + char __one_byte; > +} __attribute__ ((__packed__)); > + > +struct lock_class_key { > + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; > +}; > + > +/* > + * The lock-class itself: > + */ > +struct lock_class { > + /* > + * class-hash: > + */ > + struct list_head hash_entry; > + > + /* > + * global list of all lock-classes: > + */ > + struct list_head lock_entry; > + > + struct lockdep_subclass_key *key; > + unsigned int subclass; > + > + /* > + * IRQ/softirq usage tracking bits: > + */ > + unsigned long usage_mask; > + struct stack_trace usage_traces[LOCK_USAGE_STATES]; > + > + /* > + * These fields represent a directed graph of lock dependencies, > + * to every node we attach a list of "forward" and a list of > + * "backward" graph nodes. > + */ > + struct list_head locks_after, locks_before; > + > + /* > + * Generation counter, when doing certain classes of graph walking, > + * to ensure that we check one node only once: > + */ > + unsigned int version; > + > + /* > + * Statistics counter: > + */ > + unsigned long ops; > + > + const char *name; > + int name_version; > +}; > + > +/* > + * Map the lock object (the lock instance) to the lock-class object. > + * This is embedded into specific lock instances: > + */ > +struct lockdep_map { > + struct lock_class_key *key; > + struct lock_class *class_cache; > + const char *name; > +}; > + > +/* > + * Every lock has a list of other locks that were taken after it. > + * We only grow the list, never remove from it: > + */ > +struct lock_list { > + struct list_head entry; > + struct lock_class *class; > + struct stack_trace trace; > +}; > + > +/* > + * We record lock dependency chains, so that we can cache them: > + */ > +struct lock_chain { > + struct list_head entry; > + u64 chain_key; > +}; > + > +struct held_lock { > + /* > + * One-way hash of the dependency chain up to this point. We > + * hash the hashes step by step as the dependency chain grows. > + * > + * We use it for dependency-caching and we skip detection > + * passes and dependency-updates if there is a cache-hit, so > + * it is absolutely critical for 100% coverage of the validator > + * to have a unique key value for every unique dependency path > + * that can occur in the system, to make a unique hash value > + * as likely as possible - hence the 64-bit width. > + * > + * The task struct holds the current hash value (initialized > + * with zero), here we store the previous hash value: > + */ > + u64 prev_chain_key; > + struct lock_class *class; > + unsigned long acquire_ip; > + struct lockdep_map *instance; > + > + /* > + * The lock-stack is unified in that the lock chains of interrupt > + * contexts nest ontop of process context chains, but we 'separate' > + * the hashes by starting with 0 if we cross into an interrupt > + * context, and we also keep do not add cross-context lock > + * dependencies - the lock usage graph walking covers that area > + * anyway, and we'd just unnecessarily increase the number of > + * dependencies otherwise. [Note: hardirq and softirq contexts > + * are separated from each other too.] > + * > + * The following field is used to detect when we cross into an > + * interrupt context: > + */ > + int irq_context; > + int trylock; > + int read; > + int check; > + int hardirqs_off; > +}; > + > +/* > + * Initialization, self-test and debugging-output methods: > + */ > +extern void lockdep_init(void); > +extern void lockdep_info(void); > +extern void lockdep_reset(void); > +extern void lockdep_reset_lock(struct lockdep_map *lock); > +extern void lockdep_free_key_range(void *start, unsigned long size); > + > +extern void lockdep_off(void); > +extern void lockdep_on(void); > +extern int lockdep_internal(void); > + > +/* > + * These methods are used by specific locking variants (spinlocks, > + * rwlocks, mutexes and rwsems) to pass init/acquire/release events > + * to lockdep: > + */ > + > +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, > + struct lock_class_key *key); > + > +/* > + * Reinitialize a lock key - for cases where there is special locking or > + * special initialization of locks so that the validator gets the scope > + * of dependencies wrong: they are either too broad (they need a class-split) > + * or they are too narrow (they suffer from a false class-split): > + */ > +#define lockdep_set_class(lock, key) \ > + lockdep_init_map(&(lock)->dep_map, #key, key) > +#define lockdep_set_class_and_name(lock, key, name) \ > + lockdep_init_map(&(lock)->dep_map, name, key) > + > +/* > + * Acquire a lock. > + * > + * Values for "read": > + * > + * 0: exclusive (write) acquire > + * 1: read-acquire (no recursion allowed) > + * 2: read-acquire with same-instance recursion allowed > + * > + * Values for check: > + * > + * 0: disabled > + * 1: simple checks (freeing, held-at-exit-time, etc.) > + * 2: full validation > + */ > +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, > + int trylock, int read, int check, unsigned long ip); > + > +extern void lock_release(struct lockdep_map *lock, int nested, > + unsigned long ip); > + > +# define INIT_LOCKDEP .lockdep_recursion = 0, > + > +#else /* !LOCKDEP */ > + > +static inline void lockdep_off(void) > +{ > +} > + > +static inline void lockdep_on(void) > +{ > +} > + > +static inline int lockdep_internal(void) > +{ > + return 0; > +} > + > +# define lock_acquire(l, s, t, r, c, i) do { } while (0) > +# define lock_release(l, n, i) do { } while (0) > +# define lockdep_init() do { } while (0) > +# define lockdep_info() do { } while (0) > +# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0) > +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) > +# define lockdep_set_class_and_name(lock, key, name) \ > + do { (void)(key); } while (0) > +# define INIT_LOCKDEP > +# define lockdep_reset() do { debug_locks = 1; } while (0) > +# define lockdep_free_key_range(start, size) do { } while (0) > +/* > + * The class key takes no space if lockdep is disabled: > + */ > +struct lock_class_key { }; > +#endif /* !LOCKDEP */ > + > +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) > +extern void early_init_irq_lock_class(void); > +#else > +# define early_init_irq_lock_class() do { } while (0) > +#endif > + > +#ifdef CONFIG_TRACE_IRQFLAGS > +extern void early_boot_irqs_off(void); > +extern void early_boot_irqs_on(void); > +#else > +# define early_boot_irqs_off() do { } while (0) > +# define early_boot_irqs_on() do { } while (0) > +#endif > + > +/* > + * For trivial one-depth nesting of a lock-class, the following > + * global define can be used. (Subsystems with multiple levels > + * of nesting should define their own lock-nesting subclasses.) > + */ > +#define SINGLE_DEPTH_NESTING 1 > + > +/* > + * Map the dependency ops to NOP or to real lockdep ops, depending > + * on the per lock-class debug mode: > + */ > + > +#ifdef CONFIG_DEBUG_LOCK_ALLOC > +# ifdef CONFIG_PROVE_LOCKING > +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) > +# else > +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) > +# endif > +# define spin_release(l, n, i) lock_release(l, n, i) > +#else > +# define spin_acquire(l, s, t, i) do { } while (0) > +# define spin_release(l, n, i) do { } while (0) > +#endif > + > +#ifdef CONFIG_DEBUG_LOCK_ALLOC > +# ifdef CONFIG_PROVE_LOCKING > +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) > +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) > +# else > +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) > +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) > +# endif > +# define rwlock_release(l, n, i) lock_release(l, n, i) > +#else > +# define rwlock_acquire(l, s, t, i) do { } while (0) > +# define rwlock_acquire_read(l, s, t, i) do { } while (0) > +# define rwlock_release(l, n, i) do { } while (0) > +#endif > + > +#ifdef CONFIG_DEBUG_LOCK_ALLOC > +# ifdef CONFIG_PROVE_LOCKING > +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) > +# else > +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) > +# endif > +# define mutex_release(l, n, i) lock_release(l, n, i) > +#else > +# define mutex_acquire(l, s, t, i) do { } while (0) > +# define mutex_release(l, n, i) do { } while (0) > +#endif > + > +#ifdef CONFIG_DEBUG_LOCK_ALLOC > +# ifdef CONFIG_PROVE_LOCKING > +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) > +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) > +# else > +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) > +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) > +# endif > +# define rwsem_release(l, n, i) lock_release(l, n, i) > +#else > +# define rwsem_acquire(l, s, t, i) do { } while (0) > +# define rwsem_acquire_read(l, s, t, i) do { } while (0) > +# define rwsem_release(l, n, i) do { } while (0) > +#endif > + > +#endif /* __LINUX_LOCKDEP_H */ > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h > new file mode 100644 > index 0000000..d02e1a5 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/log2.h > @@ -0,0 +1,157 @@ > +/* Integer base 2 logarithm calculation > + * > + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. > + * Written by David Howells (dhowells at redhat.com) > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#ifndef _LINUX_LOG2_H > +#define _LINUX_LOG2_H > + > +#include > +#include > + > +/* > + * deal with unrepresentable constant logarithms > + */ > +extern __attribute__((const, noreturn)) > +int ____ilog2_NaN(void); > + > +/* > + * non-constant log of base 2 calculators > + * - the arch may override these in asm/bitops.h if they can be implemented > + * more efficiently than using fls() and fls64() > + * - the arch is not required to handle n==0 if implementing the fallback > + */ > +#ifndef CONFIG_ARCH_HAS_ILOG2_U32 > +static inline __attribute__((const)) > +int __ilog2_u32(u32 n) > +{ > + return fls(n) - 1; > +} > +#endif > + > +#ifndef CONFIG_ARCH_HAS_ILOG2_U64 > +static inline __attribute__((const)) > +int __ilog2_u64(u64 n) > +{ > + return fls64(n) - 1; > +} > +#endif > + > +/* > + * round up to nearest power of two > + */ > +static inline __attribute__((const)) > +unsigned long __roundup_pow_of_two(unsigned long n) > +{ > + return 1UL << fls_long(n - 1); > +} > + > +/** > + * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value > + * @n - parameter > + * > + * constant-capable log of base 2 calculation > + * - this can be used to initialise global variables from constant data, hence > + * the massive ternary operator construction > + * > + * selects the appropriately-sized optimised version depending on sizeof(n) > + */ > +#define ilog2(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n) < 1 ? ____ilog2_NaN() : \ > + (n) & (1ULL << 63) ? 63 : \ > + (n) & (1ULL << 62) ? 62 : \ > + (n) & (1ULL << 61) ? 61 : \ > + (n) & (1ULL << 60) ? 60 : \ > + (n) & (1ULL << 59) ? 59 : \ > + (n) & (1ULL << 58) ? 58 : \ > + (n) & (1ULL << 57) ? 57 : \ > + (n) & (1ULL << 56) ? 56 : \ > + (n) & (1ULL << 55) ? 55 : \ > + (n) & (1ULL << 54) ? 54 : \ > + (n) & (1ULL << 53) ? 53 : \ > + (n) & (1ULL << 52) ? 52 : \ > + (n) & (1ULL << 51) ? 51 : \ > + (n) & (1ULL << 50) ? 50 : \ > + (n) & (1ULL << 49) ? 49 : \ > + (n) & (1ULL << 48) ? 48 : \ > + (n) & (1ULL << 47) ? 47 : \ > + (n) & (1ULL << 46) ? 46 : \ > + (n) & (1ULL << 45) ? 45 : \ > + (n) & (1ULL << 44) ? 44 : \ > + (n) & (1ULL << 43) ? 43 : \ > + (n) & (1ULL << 42) ? 42 : \ > + (n) & (1ULL << 41) ? 41 : \ > + (n) & (1ULL << 40) ? 40 : \ > + (n) & (1ULL << 39) ? 39 : \ > + (n) & (1ULL << 38) ? 38 : \ > + (n) & (1ULL << 37) ? 37 : \ > + (n) & (1ULL << 36) ? 36 : \ > + (n) & (1ULL << 35) ? 35 : \ > + (n) & (1ULL << 34) ? 34 : \ > + (n) & (1ULL << 33) ? 33 : \ > + (n) & (1ULL << 32) ? 32 : \ > + (n) & (1ULL << 31) ? 31 : \ > + (n) & (1ULL << 30) ? 30 : \ > + (n) & (1ULL << 29) ? 29 : \ > + (n) & (1ULL << 28) ? 28 : \ > + (n) & (1ULL << 27) ? 27 : \ > + (n) & (1ULL << 26) ? 26 : \ > + (n) & (1ULL << 25) ? 25 : \ > + (n) & (1ULL << 24) ? 24 : \ > + (n) & (1ULL << 23) ? 23 : \ > + (n) & (1ULL << 22) ? 22 : \ > + (n) & (1ULL << 21) ? 21 : \ > + (n) & (1ULL << 20) ? 20 : \ > + (n) & (1ULL << 19) ? 19 : \ > + (n) & (1ULL << 18) ? 18 : \ > + (n) & (1ULL << 17) ? 17 : \ > + (n) & (1ULL << 16) ? 16 : \ > + (n) & (1ULL << 15) ? 15 : \ > + (n) & (1ULL << 14) ? 14 : \ > + (n) & (1ULL << 13) ? 13 : \ > + (n) & (1ULL << 12) ? 12 : \ > + (n) & (1ULL << 11) ? 11 : \ > + (n) & (1ULL << 10) ? 10 : \ > + (n) & (1ULL << 9) ? 9 : \ > + (n) & (1ULL << 8) ? 8 : \ > + (n) & (1ULL << 7) ? 7 : \ > + (n) & (1ULL << 6) ? 6 : \ > + (n) & (1ULL << 5) ? 5 : \ > + (n) & (1ULL << 4) ? 4 : \ > + (n) & (1ULL << 3) ? 3 : \ > + (n) & (1ULL << 2) ? 2 : \ > + (n) & (1ULL << 1) ? 1 : \ > + (n) & (1ULL << 0) ? 0 : \ > + ____ilog2_NaN() \ > + ) : \ > + (sizeof(n) <= 4) ? \ > + __ilog2_u32(n) : \ > + __ilog2_u64(n) \ > + ) > + > +/** > + * roundup_pow_of_two - round the given value up to nearest power of two > + * @n - parameter > + * > + * round the given balue up to the nearest power of two > + * - the result is undefined when n == 0 > + * - this can be used to initialise global variables from constant data > + */ > +#define roundup_pow_of_two(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n == 1) ? 0 : \ > + (1UL << (ilog2((n) - 1) + 1)) \ > + ) : \ > + __roundup_pow_of_two(n) \ > + ) > + > +#endif /* _LINUX_LOG2_H */ > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h > new file mode 100644 > index 0000000..ef5a1b4 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/mutex.h > @@ -0,0 +1,25 @@ > +/* > + * Copyright (c) 2006 Cisco Systems. All rights reserved. > + * > + * This file is released under the GPLv2. > + */ > + > +/* mutex compatibility for pre-2.6.16 kernels */ > + > +#ifndef __LINUX_MUTEX_H > +#define __LINUX_MUTEX_H > + > +#include > +#include > + > +#define mutex semaphore > +#define DEFINE_MUTEX(foo) DECLARE_MUTEX(foo) > +#define mutex_init(foo) init_MUTEX(foo) > +#define mutex_lock(foo) down(foo) > +#define mutex_lock_interruptible(foo) down_interruptible(foo) > +/* this function follows the spin_trylock() convention, so * > + * it is negated to the down_trylock() return values! Be careful */ > +#define mutex_trylock(foo) !down_trylock(foo) > +#define mutex_unlock(foo) up(foo) > + > +#endif /* __LINUX_MUTEX_H */ > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h > new file mode 100644 > index 0000000..225eeda > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/netdevice.h > @@ -0,0 +1,22 @@ > +#ifndef _LINUX_NETDEVICE_BACKPORT_TO_2_6_16 > +#define _LINUX_NETDEVICE_BACKPORT_TO_2_6_16 > + > +#include_next > + > +static inline void netif_tx_lock(struct net_device *dev) > +{ > + spin_lock(&dev->xmit_lock); > + dev->xmit_lock_owner = smp_processor_id(); > +} > + > +static inline void netif_tx_unlock(struct net_device *dev) > +{ > + dev->xmit_lock_owner = -1; > + spin_unlock(&dev->xmit_lock); > +} > + > +#undef SET_ETHTOOL_OPS > +#define SET_ETHTOOL_OPS(netdev, ops) \ > + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h > new file mode 100644 > index 0000000..2ea2e1f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/random.h > @@ -0,0 +1,15 @@ > +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 > +#include_next > + > +static inline u32 backport_random32(void) > +{ > + u32 v; > + > + get_random_bytes(&v, sizeof(u32)); > + return v; > +} > + > +#define random32 backport_random32 > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h > new file mode 100644 > index 0000000..1ad4e13 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/rwsem.h > @@ -0,0 +1,8 @@ > +#ifndef _LINUX_RWSEM_BACKPORT_TO_2_6_17 > +#define _LINUX_RWSEM_BACKPORT_TO_2_6_17 > + > +#include_next > + > +#define down_read_nested(sem, subclass) down_read(sem) > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h > new file mode 100644 > index 0000000..78adbab > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/signal.h > @@ -0,0 +1,8 @@ > +#ifndef _LINUX_SIGNAL_BACKPORT_2_6_17 > +#define _LINUX_SIGNAL_BACKPORT_2_6_17 > + > +#include_next > + > +#define IRQF_SHARED SA_SHIRQ > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h > new file mode 100644 > index 0000000..70bf011 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/skbuff.h > @@ -0,0 +1,11 @@ > +#ifndef LINUX_SKBUFF_H_BACKPORT > +#define LINUX_SKBUFF_H_BACKPORT > + > +#include_next > + > +#define CHECKSUM_PARTIAL CHECKSUM_HW > +#define CHECKSUM_COMPLETE CHECKSUM_HW > + > +#define gso_size tso_size > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h > new file mode 100644 > index 0000000..46ac6e5 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/slab.h > @@ -0,0 +1,34 @@ > +#include_next > + > +#include_next > + > +#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > +#define BACKPORT_LINUX_STRING_TO_2_6_18 > + > +static inline > +void *kmemdup(const void *src, size_t len, gfp_t gfp) > +{ > + void *p; > + > + p = kmalloc(len, gfp); > + if (p) > + memcpy(p, src, len); > + return p; > +} > + > +#endif > +#ifndef BACKPORT_LINUX_STRING_TO_2_6_18 > +#define BACKPORT_LINUX_STRING_TO_2_6_18 > + > +static inline > +void *kmemdup(const void *src, size_t len, gfp_t gfp) > +{ > + void *p; > + > + p = kmalloc(len, gfp); > + if (p) > + memcpy(p, src, len); > + return p; > +} > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h > new file mode 100644 > index 0000000..db39389 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/spinlock.h > @@ -0,0 +1,7 @@ > +#ifndef BACKPORT_LINUX_SPINLOCK_H > +#define BACKPORT_LINUX_SPINLOCK_H > + > +#include_next > +#define spin_lock_nested(lock, subclass) spin_lock(lock) > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h > new file mode 100644 > index 0000000..86e334f > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/types.h > @@ -0,0 +1,6 @@ > +#ifndef BACKPORT_LINUX_TYPES_TO_2_6_15 > +#define BACKPORT_LINUX_TYPES_TO_2_6_15 > +#include_next > + > +#define BITS_PER_BYTE 8 > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h > new file mode 100644 > index 0000000..cc8b2cd > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/linux/workqueue.h > @@ -0,0 +1,50 @@ > +#ifndef BACKPORT_LINUX_WORKQUEUE_TO_2_6_19 > +#define BACKPORT_LINUX_WORKQUEUE_TO_2_6_19 > + > +#include_next > + > +struct delayed_work { > + struct work_struct work; > +}; > + > +static inline void > +backport_INIT_WORK(struct work_struct *work, void *func) > +{ > + INIT_WORK(work, func, work); > +} > + > +static inline int backport_queue_delayed_work(struct workqueue_struct *wq, > + struct delayed_work *work, > + unsigned long delay) > +{ > + return queue_delayed_work(wq, &work->work, delay); > +} > + > +static inline int > +backport_cancel_delayed_work(struct delayed_work *work) > +{ > + return cancel_delayed_work(&work->work); > +} > + > +static inline void > +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) > +{ > + cancel_rearming_delayed_workqueue(wq, &work->work); > +} > + > + > +#undef INIT_WORK > +#define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) > +#define INIT_DELAYED_WORK(_work, _func) INIT_WORK(&(_work)->work, _func) > + > +#undef DECLARE_WORK > +#define DECLARE_WORK(n, f) \ > + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) > +#define DECLARE_DELAYED_WORK(n, f) \ > + struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } > + > +#define queue_delayed_work backport_queue_delayed_work > +#define cancel_delayed_work backport_cancel_delayed_work > +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h > new file mode 100644 > index 0000000..962da47 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/inet_sock.h > @@ -0,0 +1,6 @@ > +#ifndef NET_INET_SOCK_H > +#define NET_INET_SOCK_H > + > +#include > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h > new file mode 100644 > index 0000000..e5d2162 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/netevent.h > @@ -0,0 +1,33 @@ > +#ifndef _NET_EVENT_H > +#define _NET_EVENT_H > + > +/* > + * Generic netevent notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * Changes: > + */ > +#ifdef __KERNEL__ > + > +#include > + > +struct netevent_redirect { > + struct dst_entry *old; > + struct dst_entry *new; > +}; > + > +enum netevent_notif_type { > + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ > + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ > + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ > +}; > + > +extern int register_netevent_notifier(struct notifier_block *nb); > +extern int unregister_netevent_notifier(struct notifier_block *nb); > +extern int call_netevent_notifiers(unsigned long val, void *v); > + > +#endif > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h b/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h > new file mode 100644 > index 0000000..f621a71 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/net/sock.h > @@ -0,0 +1,8 @@ > +#ifndef _NET_SOCK_SLES_BACKPORT_H > +#define _NET_SOCK_SLES_BACKPORT_H > + > +#include_next > + > +#define sk_eat_skb(a, b, c) sk_eat_skb(a, b) > + > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h b/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h > new file mode 100644 > index 0000000..352330d > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/scsi/scsi.h > @@ -0,0 +1,7 @@ > +#ifndef _SCSI_SCSI_H_BACKPORT > +#define _SCSI_SCSI_H_BACKPORT > + > +#include_next > + > +#define SCAN_WILD_CARD ~0 > +#endif > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c b/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c > new file mode 100644 > index 0000000..75ae68c > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/src/genalloc.c > @@ -0,0 +1,198 @@ > +/* > + * Basic general purpose allocator for managing special purpose memory > + * not managed by the regular kmalloc/kfree interface. > + * Uses for this includes on-device special memory, uncached memory > + * etc. > + * > + * Copyright 2005 (C) Jes Sorensen > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > + > +#include > +#include > + > + > +/** > + * gen_pool_create - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > + * @nid: node id of the node the pool structure should be allocated on, or -1 > + * > + * Create a new special memory pool that can be used to manage special purpose > + * memory not managed by the regular kmalloc/kfree interface. > + */ > +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > +{ > + struct gen_pool *pool; > + > + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); > + if (pool != NULL) { > + rwlock_init(&pool->lock); > + INIT_LIST_HEAD(&pool->chunks); > + pool->min_alloc_order = min_alloc_order; > + } > + return pool; > +} > +EXPORT_SYMBOL(gen_pool_create); > + > +/** > + * gen_pool_add - add a new chunk of special memory to the pool > + * @pool: pool to add new memory chunk to > + * @addr: starting address of memory chunk to add to pool > + * @size: size in bytes of the memory chunk to add to pool > + * @nid: node id of the node the chunk structure and bitmap should be > + * allocated on, or -1 > + * > + * Add a new chunk of special memory to the specified pool. > + */ > +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, > + int nid) > +{ > + struct gen_pool_chunk *chunk; > + int nbits = size >> pool->min_alloc_order; > + int nbytes = sizeof(struct gen_pool_chunk) + > + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; > + > + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); > + if (unlikely(chunk == NULL)) > + return -1; > + > + memset(chunk, 0, nbytes); > + spin_lock_init(&chunk->lock); > + chunk->start_addr = addr; > + chunk->end_addr = addr + size; > + > + write_lock(&pool->lock); > + list_add(&chunk->next_chunk, &pool->chunks); > + write_unlock(&pool->lock); > + > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_add); > + > +/** > + * gen_pool_destroy - destroy a special memory pool > + * @pool: pool to destroy > + * > + * Destroy the specified special memory pool. Verifies that there are no > + * outstanding allocations. > + */ > +void gen_pool_destroy(struct gen_pool *pool) > +{ > + struct list_head *_chunk, *_next_chunk; > + struct gen_pool_chunk *chunk; > + int order = pool->min_alloc_order; > + int bit, end_bit; > + > + > + write_lock(&pool->lock); > + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + list_del(&chunk->next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + bit = find_next_bit(chunk->bits, end_bit, 0); > + BUG_ON(bit < end_bit); > + > + kfree(chunk); > + } > + kfree(pool); > + return; > +} > +EXPORT_SYMBOL(gen_pool_destroy); > + > +/** > + * gen_pool_alloc - allocate special memory from the pool > + * @pool: pool to allocate from > + * @size: number of bytes to allocate from the pool > + * > + * Allocate the requested number of bytes from the specified pool. > + * Uses a first-fit algorithm. > + */ > +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long addr, flags; > + int order = pool->min_alloc_order; > + int nbits, bit, start_bit, end_bit; > + > + if (size == 0) > + return 0; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + end_bit = (chunk->end_addr - chunk->start_addr) >> order; > + end_bit -= nbits + 1; > + > + spin_lock_irqsave(&chunk->lock, flags); > + bit = -1; > + while (bit + 1 < end_bit) { > + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); > + if (bit >= end_bit) > + break; > + > + start_bit = bit; > + if (nbits > 1) { > + bit = find_next_bit(chunk->bits, bit + nbits, > + bit + 1); > + if (bit - start_bit < nbits) > + continue; > + } > + > + addr = chunk->start_addr + > + ((unsigned long)start_bit << order); > + while (nbits--) > + __set_bit(start_bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + read_unlock(&pool->lock); > + return addr; > + } > + spin_unlock_irqrestore(&chunk->lock, flags); > + } > + read_unlock(&pool->lock); > + return 0; > +} > +EXPORT_SYMBOL(gen_pool_alloc); > + > +/** > + * gen_pool_free - free allocated special memory back to the pool > + * @pool: pool to free to > + * @addr: starting address of memory to free back to pool > + * @size: size in bytes of memory to free > + * > + * Free previously allocated special memory back to the specified pool. > + */ > +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > +{ > + struct list_head *_chunk; > + struct gen_pool_chunk *chunk; > + unsigned long flags; > + int order = pool->min_alloc_order; > + int bit, nbits; > + > + nbits = (size + (1UL << order) - 1) >> order; > + > + read_lock(&pool->lock); > + list_for_each(_chunk, &pool->chunks) { > + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); > + > + if (addr >= chunk->start_addr && addr < chunk->end_addr) { > + BUG_ON(addr + size > chunk->end_addr); > + spin_lock_irqsave(&chunk->lock, flags); > + bit = (addr - chunk->start_addr) >> order; > + while (nbits--) > + __clear_bit(bit++, &chunk->bits); > + spin_unlock_irqrestore(&chunk->lock, flags); > + break; > + } > + } > + BUG_ON(nbits > 0); > + read_unlock(&pool->lock); > +} > +EXPORT_SYMBOL(gen_pool_free); > diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c > new file mode 100644 > index 0000000..5ffadd1 > --- /dev/null > +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c > @@ -0,0 +1,71 @@ > +/* > + * Network event notifiers > + * > + * Authors: > + * Tom Tucker > + * Steve Wise > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * Fixes: > + */ > + > +#include > +#include > +#include > +#include > + > +static struct notifier_block *netevent_notif_chain; > + > +/** > + * register_netevent_notifier - register a netevent notifier block > + * @nb: notifier > + * > + * Register a notifier to be called when a netevent occurs. > + * The notifier passed is linked into the kernel structures and must > + * not be reused until it has been unregistered. A negative errno code > + * is returned on a failure. > + */ > +int register_netevent_notifier(struct notifier_block *nb) > +{ > + int err; > + > + err = notifier_chain_register(&netevent_notif_chain, nb); > + return err; > +} > + > +/** > + * netevent_unregister_notifier - unregister a netevent notifier block > + * @nb: notifier > + * > + * Unregister a notifier previously registered by > + * register_neigh_notifier(). The notifier is unlinked into the > + * kernel structures and may then be reused. A negative errno code > + * is returned on a failure. > + */ > + > +int unregister_netevent_notifier(struct notifier_block *nb) > +{ > + return notifier_chain_unregister(&netevent_notif_chain, nb); > +} > + > +/** > + * call_netevent_notifiers - call all netevent notifier blocks > + * @val: value passed unmodified to notifier function > + * @v: pointer passed unmodified to notifier function > + * > + * Call all neighbour notifier blocks. Parameters and return value > + * are as for notifier_call_chain(). > + */ > + > +int call_netevent_notifiers(unsigned long val, void *v) > +{ > + return notifier_call_chain(&netevent_notif_chain, val, v); > +} > + > +EXPORT_SYMBOL_GPL(register_netevent_notifier); > +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); > +EXPORT_SYMBOL_GPL(call_netevent_notifiers); > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch > new file mode 100644 > index 0000000..27eb62e > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/1_struct_path_revert_to_2_6_19.patch > @@ -0,0 +1,82 @@ > +diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > +index a617ca7..4e16314 100644 > +--- a/drivers/infiniband/core/uverbs_main.c > ++++ b/drivers/infiniband/core/uverbs_main.c > +@@ -534,9 +534,9 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, > + * module reference. > + */ > + filp->f_op = fops_get(&uverbs_event_fops); > +- filp->f_path.mnt = mntget(uverbs_event_mnt); > +- filp->f_path.dentry = dget(uverbs_event_mnt->mnt_root); > +- filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; > ++ filp->f_vfsmnt = mntget(uverbs_event_mnt); > ++ filp->f_dentry = dget(uverbs_event_mnt->mnt_root); > ++ filp->f_mapping = filp->f_dentry->d_inode->i_mapping; > + filp->f_flags = O_RDONLY; > + filp->f_mode = FMODE_READ; > + filp->private_data = ev_file; > +diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c > +index b932bcb..ddbcabd 100644 > +--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c > ++++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c > +@@ -1744,9 +1744,9 @@ static int ipath_assign_port(struct file *fp, > + goto done; > + } > + > +- i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; > ++ i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE; > + ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", > +- (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); > ++ (long)fp->f_dentry->d_inode->i_rdev, i_minor); > + > + if (i_minor) > + ret = find_free_port(i_minor - 1, fp, uinfo); > +diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c > +index 79a60f0..d9ff283 100644 > +--- a/drivers/infiniband/hw/ipath/ipath_fs.c > ++++ b/drivers/infiniband/hw/ipath/ipath_fs.c > +@@ -118,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, > + u16 i; > + struct ipath_devdata *dd; > + > +- dd = file->f_path.dentry->d_inode->i_private; > ++ dd = file->f_dentry->d_inode->i_private; > + > + for (i = 0; i < NUM_COUNTERS; i++) > + counters[i] = ipath_snap_cntr(dd, i); > +@@ -138,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, > + struct ipath_devdata *dd; > + u64 guid; > + > +- dd = file->f_path.dentry->d_inode->i_private; > ++ dd = file->f_dentry->d_inode->i_private; > + > + guid = be64_to_cpu(dd->ipath_guid); > + > +@@ -177,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, > + u32 tmp, tmp2; > + struct ipath_devdata *dd; > + > +- dd = file->f_path.dentry->d_inode->i_private; > ++ dd = file->f_dentry->d_inode->i_private; > + > + /* so we only initialize non-zero fields. */ > + memset(portinfo, 0, sizeof portinfo); > +@@ -324,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, > + goto bail; > + } > + > +- dd = file->f_path.dentry->d_inode->i_private; > ++ dd = file->f_dentry->d_inode->i_private; > + if (ipath_eeprom_read(dd, pos, tmp, count)) { > + ipath_dev_err(dd, "failed to read from flash\n"); > + ret = -ENXIO; > +@@ -377,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, > + goto bail_tmp; > + } > + > +- dd = file->f_path.dentry->d_inode->i_private; > ++ dd = file->f_dentry->d_inode->i_private; > + if (ipath_eeprom_write(dd, pos, tmp, count)) { > + ret = -ENXIO; > + ipath_dev_err(dd, "failed to write to flash\n"); > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch > new file mode 100644 > index 0000000..6601371 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/2_misc_device_to_2_6_19.patch > @@ -0,0 +1,33 @@ > +diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c > +index 81a5cdc..640508d 100644 > +--- a/drivers/infiniband/core/ucma.c > ++++ b/drivers/infiniband/core/ucma.c > +@@ -842,7 +842,7 @@ static ssize_t show_abi_version(struct device *dev, > + { > + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); > + } > +-static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); > ++static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); > + > + static int __init ucma_init(void) > + { > +@@ -852,7 +852,8 @@ static int __init ucma_init(void) > + if (ret) > + return ret; > + > +- ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); > ++ ret = class_device_create_file(ucma_misc.class, > ++ &class_device_attr_abi_version); > + if (ret) { > + printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); > + goto err; > +@@ -865,7 +866,8 @@ err: > + > + static void __exit ucma_cleanup(void) > + { > +- device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); > ++ class_device_remove_file(ucma_misc.class, > ++ &class_device_attr_abi_version); > + misc_deregister(&ucma_misc); > + idr_destroy(&ctx_idr); > + } > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch > new file mode 100644 > index 0000000..316d8d2 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch > @@ -0,0 +1,76 @@ > +commit e795d092507d571d66f2ec98d3efdc7dd284bf80 > +Author: Tom Tucker > +Date: Sun Jul 30 20:44:19 2006 -0700 > + > + [NET] infiniband: Cleanup ib_addr module to use the netevents > + > + Signed-off-by: Tom Tucker > + Signed-off-by: Steve Wise > + Signed-off-by: David S. Miller > + > +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c > +index 1205e80..d294bbc 100644 > +--- a/drivers/infiniband/core/addr.c > ++++ b/drivers/infiniband/core/addr.c > +@@ -35,7 +35,6 @@ #include > + #include > + #include > + #include > +-#include > + #include > + > + MODULE_AUTHOR("Sean Hefty"); > +@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad > + } > + EXPORT_SYMBOL(rdma_addr_cancel); > + > +-static int netevent_callback(struct notifier_block *self, unsigned long event, > +- void *ctx) > ++static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, > ++ struct packet_type *pkt, struct net_device *orig_dev) > + { > +- if (event == NETEVENT_NEIGH_UPDATE) { > +- struct neighbour *neigh = ctx; > ++ struct arphdr *arp_hdr; > + > +- if (neigh->dev->type == ARPHRD_INFINIBAND && > +- (neigh->nud_state & NUD_VALID)) { > +- set_timeout(jiffies); > +- } > +- } > ++ arp_hdr = (struct arphdr *) skb->nh.raw; > ++ > ++ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || > ++ arp_hdr->ar_op == htons(ARPOP_REPLY)) > ++ set_timeout(jiffies); > ++ > ++ kfree_skb(skb); > + return 0; > + } > + > +-static struct notifier_block nb = { > +- .notifier_call = netevent_callback > ++static struct packet_type addr_arp = { > ++ .type = __constant_htons(ETH_P_ARP), > ++ .func = addr_arp_recv, > ++ .af_packet_priv = (void*) 1, > + }; > + > + static int addr_init(void) > +@@ -351,13 +353,13 @@ static int addr_init(void) > + if (!addr_wq) > + return -ENOMEM; > + > +- register_netevent_notifier(&nb); > ++ dev_add_pack(&addr_arp); > + return 0; > + } > + > + static void addr_cleanup(void) > + { > +- unregister_netevent_notifier(&nb); > ++ dev_remove_pack(&addr_arp); > + destroy_workqueue(addr_wq); > + } > + > + > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch > new file mode 100644 > index 0000000..ad7e7f4 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/cxgb3_makefile_to_2_6_19.patch > @@ -0,0 +1,12 @@ > +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile > +index 3434679..bb008b6 100755 > +--- a/drivers/net/cxgb3/Makefile > ++++ b/drivers/net/cxgb3/Makefile > +@@ -1,6 +1,7 @@ > + # > + # Chelsio T3 driver > + # > ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) > + > + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o > + > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch > new file mode 100644 > index 0000000..2975774 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_8111_to_2_6_16.patch > @@ -0,0 +1,83 @@ > +diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h > +index 07deee8..501ee93 100644 > +--- a/drivers/infiniband/ulp/ipoib/ipoib.h > ++++ b/drivers/infiniband/ulp/ipoib/ipoib.h > +@@ -217,6 +219,7 @@ struct ipoib_neigh { > + > + struct neighbour *neighbour; > + > ++ struct list_head all_neigh_list; > + struct list_head list; > + }; > + > +diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c > +index 705eb1d..56022f5 100644 > +--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > ++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > +@@ -85,6 +85,9 @@ struct workqueue_struct *ipoib_workqueue; > + > + struct ib_sa_client ipoib_sa_client; > + > ++static DEFINE_SPINLOCK(ipoib_all_neigh_list_lock); > ++static LIST_HEAD(ipoib_all_neigh_list); > ++ > + static void ipoib_add_one(struct ib_device *device); > + static void ipoib_remove_one(struct ib_device *device); > + > +@@ -773,6 +776,17 @@ static void ipoib_neigh_destructor(struct neighbour *n) > + unsigned long flags; > + struct ipoib_ah *ah = NULL; > + > ++ struct ipoib_neigh *tn, *nn = NULL; > ++ spin_lock(&ipoib_all_neigh_list_lock); > ++ list_for_each_entry(tn, &ipoib_all_neigh_list, all_neigh_list) > ++ if (tn->neighbour == n) { > ++ nn = tn; > ++ break; > ++ } > ++ spin_unlock(&ipoib_all_neigh_list_lock); > ++ if (!nn) > ++ return; > ++ > + ipoib_dbg(priv, > + "neigh_destructor for %06x " IPOIB_GID_FMT "\n", > + IPOIB_QPN(n->ha), > +@@ -806,6 +820,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) > + *to_ipoib_neigh(neighbour) = neigh; > + skb_queue_head_init(&neigh->queue); > + > ++ spin_lock(&ipoib_all_neigh_list_lock); > ++ list_add_tail(&neigh->all_neigh_list, &ipoib_all_neigh_list); > ++ neigh->neighbour->ops->destructor = ipoib_neigh_destructor; > ++ spin_unlock(&ipoib_all_neigh_list_lock); > ++ > + return neigh; > + } > + > +@@ -813,6 +832,17 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) > + { > + struct ipoib_dev_priv *priv = netdev_priv(dev); > + struct sk_buff *skb; > ++ struct ipoib_neigh *nn; > ++ spin_lock(&ipoib_all_neigh_list_lock); > ++ list_del(&neigh->all_neigh_list); > ++ list_for_each_entry(nn, &ipoib_all_neigh_list, all_neigh_list) > ++ if (nn->neighbour->ops == neigh->neighbour->ops) > ++ goto found; > ++ > ++ neigh->neighbour->ops->destructor = NULL; > ++found: > ++ spin_unlock(&ipoib_all_neigh_list_lock); > ++ > + *to_ipoib_neigh(neigh->neighbour) = NULL; > + while ((skb = __skb_dequeue(&neigh->queue))) { > + ++priv->stats.tx_dropped; > +@@ -823,8 +853,6 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) > + > + static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) > + { > +- parms->neigh_destructor = ipoib_neigh_destructor; > +- > + return 0; > + } > + > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch > new file mode 100644 > index 0000000..eb2285f > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/linux_stuff_to_2_6_17.patch > @@ -0,0 +1,24 @@ > +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/genalloc.c > +@@ -0,0 +1 @@ > ++#include "src/genalloc.c" > +diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +new file mode 100644 > +index 0000000..58cf933 > +--- /dev/null > ++++ b/drivers/infiniband/core/netevent.c > +@@ -0,0 +1 @@ > ++#include "src/netevent.c" > +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > +index 50fb1cd..456bfd0 100644 > +--- a/drivers/infiniband/core/Makefile > ++++ b/drivers/infiniband/core/Makefile > +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > + > + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > + uverbs_marshall.o > ++ > ++ib_core-y += genalloc.o netevent.o > diff --git a/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch > new file mode 100644 > index 0000000..497a203 > --- /dev/null > +++ b/kernel_patches/backport/2.6.15_ubuntu606/uverbs_to_2_6_17.patch > @@ -0,0 +1,20 @@ > +Index: gen2_linux/drivers/infiniband/core/uverbs_main.c > +=================================================================== > +--- gen2_linux.orig/drivers/infiniband/core/uverbs_main.c > ++++ gen2_linux/drivers/infiniband/core/uverbs_main.c > +@@ -815,12 +815,11 @@ static void ib_uverbs_remove_one(struct > + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); > + } > + > +-static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, > +- const char *dev_name, void *data, > +- struct vfsmount *mnt) > ++static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, > ++ const char *dev_name, void *data) > + { > + return get_sb_pseudo(fs_type, "infinibandevent:", NULL, > +- INFINIBANDEVENTFS_MAGIC, mnt); > ++ INFINIBANDEVENTFS_MAGIC); > + } > + > + static struct file_system_type uverbs_event_fs = { > diff --git a/ofed_scripts/configure b/ofed_scripts/configure > index f5e1da4..0618d77 100755 > --- a/ofed_scripts/configure > +++ b/ofed_scripts/configure > @@ -195,6 +195,9 @@ get_backport_dir() > 2.6.14*) > echo 2.6.14 > ;; > + 2.6.15-*-*) > + echo 2.6.15_ubuntu606 > + ;; > 2.6.15*) > echo 2.6.15 > ;; > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Fri Jan 19 10:42:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 12:42:36 -0600 Subject: [openib-general] abi_version file not created on SLES9SP3 or RHEL4U4 Message-ID: <1169232156.1120.8.camel@stevo-desktop> Sean, I'm testing the latest ofed_1_2 code on RHEL4U4 and SLES9SP3. After loading the rdma_ucm module, there isn't a abi_version file installed in /sys/class/misc/rdma_cm. There is no error logged from the ucma so I _think_ it created the file ok in ucma_init(). Have you seen this? Steve. From sean.hefty at intel.com Fri Jan 19 10:48:01 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 19 Jan 2007 10:48:01 -0800 Subject: [openib-general] abi_version file not created on SLES9SP3 or RHEL4U4 In-Reply-To: <1169232156.1120.8.camel@stevo-desktop> Message-ID: <000001c73bfa$58df7210$ff0da8c0@amr.corp.intel.com> >I'm testing the latest ofed_1_2 code on RHEL4U4 and SLES9SP3. After >loading the rdma_ucm module, there isn't a abi_version file installed >in /sys/class/misc/rdma_cm. There is no error logged from the ucma so I >_think_ it created the file ok in ucma_init(). > >Have you seen this? I think the issue is that there's not a /sys/class/misc, but the kernels have been patched enough to appear that things work. Woody likely has more details on this, as I know this is one of the areas that he has to modify as part of creating his backport patches. The librdmacm works around this by looking in /sys/class/infiniband_ucma as well, and if that fails, just assumes the latest version of the ABI. The backport patches create the abi file under infiniband_ucma. - Sean From swise at opengridcomputing.com Fri Jan 19 10:54:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 12:54:54 -0600 Subject: [openib-general] abi_version file not created on SLES9SP3 or RHEL4U4 In-Reply-To: <000001c73bfa$58df7210$ff0da8c0@amr.corp.intel.com> References: <000001c73bfa$58df7210$ff0da8c0@amr.corp.intel.com> Message-ID: <1169232894.1120.11.camel@stevo-desktop> On Fri, 2007-01-19 at 10:48 -0800, Sean Hefty wrote: > >I'm testing the latest ofed_1_2 code on RHEL4U4 and SLES9SP3. After > >loading the rdma_ucm module, there isn't a abi_version file installed > >in /sys/class/misc/rdma_cm. There is no error logged from the ucma so I > >_think_ it created the file ok in ucma_init(). > > > >Have you seen this? > > I think the issue is that there's not a /sys/class/misc, but the kernels have > been patched enough to appear that things work. Woody likely has more details > on this, as I know this is one of the areas that he has to modify as part of > creating his backport patches. > > The librdmacm works around this by looking in /sys/class/infiniband_ucma as > well, and if that fails, just assumes the latest version of the ABI. The > backport patches create the abi file under infiniband_ucma. > > - Sean There's /sys/class/misc/rdma_cm/dev file but no abi_version, and there is no /sys/class/infiniband_ucma directory. I didn't see any backport patches that affect this, but maybe I missed it. From jgunthorpe at obsidianresearch.com Fri Jan 19 10:58:49 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Fri, 19 Jan 2007 11:58:49 -0700 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070119131024.GD28197@mellanox.co.il> References: <20070118230744.GR12386@obsidianresearch.com> <20070119131024.GD28197@mellanox.co.il> Message-ID: <20070119185849.GT12386@obsidianresearch.com> On Fri, Jan 19, 2007 at 03:10:24PM +0200, Michael S. Tsirkin wrote: > Jason, while the patch is a one-liner, I think you should > indicate that you can certify the Developer's Certificate of Origin 1.1 Ah yes, sorry. I should have included that. I do agree to the DCO for this patch. --------------- Make the untyped data region in ib_user_mad u64 aligned so that casting ib_user_mad to structs with u64s in them works on ia64. Signed-off-by: Jason Gunthorpe --- include/rdma/ib_user_mad.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index 44537aa..d66b15e 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -98,7 +98,7 @@ struct ib_user_mad_hdr { */ struct ib_user_mad { struct ib_user_mad_hdr hdr; - __u8 data[0]; + __u64 data[0]; }; /** -- 1.4.1.1 From robert.j.woodruff at intel.com Fri Jan 19 11:06:56 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Fri, 19 Jan 2007 11:06:56 -0800 Subject: [openib-general] abi_version file not created on SLES9SP3 or RHEL4U4 In-Reply-To: <1169232156.1120.8.camel@stevo-desktop> Message-ID: Steve wrote, >Sean, >I'm testing the latest ofed_1_2 code on RHEL4U4 and SLES9SP3. After >loading the rdma_ucm module, there isn't a abi_version file installed >in /sys/class/misc/rdma_cm. There is no error logged from the ucma so I >_think_ it created the file ok in ucma_init(). >Have you seen this? I ran into the same problem when backporting Sean's latest multicast+sa_cache branches to EL4. This backport patch can be applied to sean's latest code to allow it to work on EL4. I also had to make other fixes to get Sean latest code base to work on EL4 My complete backport patch set is located in my directory on the open fabrics server under, /home/woody/ofa_patches_and_backports/trunk/kernel/backports/EL4-U4 diff -Naurp linux-2.6.9/drivers/infiniband/core/ucma.c linux-2.6.9-openib-drivers-git011107-fixups/drivers/infiniband/core/ucma .c --- linux-2.6.9/drivers/infiniband/core/ucma.c 2007-01-11 15:54:57.000000000 -0800 +++ linux-2.6.9-openib-drivers-git011107-fixups/drivers/infiniband/core/ucma .c 2007-01-12 13:19:15.000000000 -0800 @@ -1034,13 +1034,13 @@ static struct miscdevice ucma_misc = { .fops = &ucma_fops, }; -static ssize_t show_abi_version(struct device *dev, - struct device_attribute *attr, - char *buf) +static struct class *ucma_class; +static ssize_t show_abi_version(struct class *class_dev, char *buf) { - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } -static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + static int __init ucma_init(void) { @@ -1050,22 +1050,28 @@ static int __init ucma_init(void) if (ret) return ret; - ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); - if (ret) { - printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); - goto err; - } - return 0; + ucma_class = class_create(THIS_MODULE, "infiniband_ucma"); + if (IS_ERR(ucma_class)) { + printk(KERN_ERR "rdma_ucm: couldn't create class infiniband_ucma\n"); + goto err; + } + + ret = class_create_file(ucma_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + goto err; + } + + return 0; err: - misc_deregister(&ucma_misc); - return ret; + misc_deregister(&ucma_misc); + return ret; } + static void __exit ucma_cleanup(void) { - device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); - idr_destroy(&ctx_idr); } module_init(ucma_init); From swise at opengridcomputing.com Fri Jan 19 11:18:45 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 13:18:45 -0600 Subject: [openib-general] ibv_cmd_create_qp() question Message-ID: <1169234325.1120.20.camel@stevo-desktop> Roland, I've uncovered a problem while debugging my rhel4u4 chelsio backport and I have question for you. libcxgb3's iwch_create_qp() calls ibv_cmd_create_qp(). If that returns success, it then mmap()s the wq memory. If mmap() fails (which is what's happening on rhel4u4), it then calls ibv_cmd_destroy_qp(). However the qp->context field isn't initialized at this point so ibv_cmd_destroy_qp() faults. Q: Should ibv_cmd_create_qp() set the context field since ibv_cmd_destroy_qp() requires it for destroying the object? Thanks, Steve. From DaveS at Mellanox.com Fri Jan 19 12:13:30 2007 From: DaveS at Mellanox.com (Dave Sheffler) Date: Fri, 19 Jan 2007 12:13:30 -0800 Subject: [openib-general] [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Message-ID: <1E3DCD1C63492545881FACB6063A57C1BCCD48@mtiexch01.mti.com> I nominate Aviram Gutman as co-chair of the EWG of the IBTA . He is Sr. Director of SW Development at Mellanox. He has been involved for almost 7 years and is working on both IB and Ethernet in his current capacity . All Mellanox developed and released software to date has been contributed as open source software . Dave Sheffler VP Worldwide Sales Mellanox Technologies, Inc. Phone: (408) 916-0003 Email: daves at mellanox.com -----Original Message----- From: Dave Sheffler Sent: Friday, January 19, 2007 12:08 PM To: 'Jamie Riotto (jriotto)'; openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Subject: RE: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... I nominate Aviram Gutman Sr. Director of SW Development at Mellanox , active in both IB and Ethernet development . Dave Sheffler VP Worldwide Sales Mellanox Technologies, Inc. Phone: (408) 916-0003 Email: daves at mellanox.com -----Original Message----- From: openib-promoters-bounces at openib.org [mailto:openib-promoters-bounces at openib.org]On Behalf Of Jamie Riotto (jriotto) Sent: Thursday, January 18, 2007 2:47 PM To: openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Subject: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Dear Open Fabrics Community, I am currently Chairperson for the Enterprise Working Group (EWG) committee for OFA. By way of quick introduction, the EWG was formed to promote a single commercial release of the OFA stack, in order to encourage all participating OFA members to test, package and support a unified release. The alternative was to have each company take different snap shots at different times, and release independently of one another (which we felt would confuse and frustrate customers). This resulted in the successful release of OFED 1.1, and continues to drive the process in the form of OFED 1.2 which is the next scheduled release. In order to insure fair and balanced management of this process, I am seeking a Co-Chair to help drive these efforts moving forward. The ideal person would have a fair understanding of the technical issues invovled in a release, and perhaps more importantly, a broad view of the evolving industry requirements in order to help steer the inevitable trade-offs made between new technology introductions and current customer stability. If you, or someone you know, would be willing to contribute to the OFA community in this regard, please consider submitting them as a nomination for Co-Chair of the EWG. All nominations should be submitted to me directly at jriotto at cisco.com. Thank you very much - jamie Jamie Riotto Sr. Director Engineering Server Virtualization Business Unit (SVBU) Cisco Systems 408-853-7813 jriotto at cisco.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Fri Jan 19 12:28:51 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 19 Jan 2007 12:28:51 -0800 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: (Hoang-Nam Nguyen's message of "Fri, 19 Jan 2007 09:45:35 -0500") References: Message-ID: > > actually the IB spec requires QPN in the work completion if either SRQ > > or base queue management extensions are supported. I'm not sure why > > though. It seems at least for current code it is fine for ehca to > > report wc->qp as NULL. > > I've update the patch in my for-2.6.21 branch to do that, and pushed > > it out. > Just want to make sure I understand this properly: > One day, if eHCA supports srq, will I have to set qp pointer with a > valid address or can I still leave it to NULL? I though ehca hardware can't do SRQ? But anyway, the idea behind this API change is that for SRQ receive completions, then the ib_wc will have the QP pointer that the receive happened on rather than just the QPN. > Roland, you are talking about QPN. By that you mean also qp pointer? Well, I was quoting the IB spec which talks about QPN. But we're replacing that (fairly useless) info with a pointer to the actual QP, so that the consumer can get to qp_context. -R. From rdreier at cisco.com Fri Jan 19 12:30:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 19 Jan 2007 12:30:30 -0800 Subject: [openib-general] ibv_cmd_create_qp() question In-Reply-To: <1169234325.1120.20.camel@stevo-desktop> (Steve Wise's message of "Fri, 19 Jan 2007 13:18:45 -0600") References: <1169234325.1120.20.camel@stevo-desktop> Message-ID: > I've uncovered a problem while debugging my rhel4u4 chelsio backport and > I have question for you. libcxgb3's iwch_create_qp() calls > ibv_cmd_create_qp(). If that returns success, it then mmap()s the wq > memory. If mmap() fails (which is what's happening on rhel4u4), it then > calls ibv_cmd_destroy_qp(). However the qp->context field isn't > initialized at this point so ibv_cmd_destroy_qp() faults. > > Q: Should ibv_cmd_create_qp() set the context field since > ibv_cmd_destroy_qp() requires it for destroying the object? Yes, I see the problem. Hmm, setting context in the create commands looks like the best way to handle this. Can you make a patch to do that (we should fix all the create commands and move setting context from verbs.c to cmd.c)? Thanks... From tnguyen at pantasys.com Fri Jan 19 12:28:36 2007 From: tnguyen at pantasys.com (Tung M. Nguyen) Date: Fri, 19 Jan 2007 12:28:36 -0800 Subject: [openib-general] [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... In-Reply-To: <1E3DCD1C63492545881FACB6063A57C1BCCD48@mtiexch01.mti.com> References: <1E3DCD1C63492545881FACB6063A57C1BCCD48@mtiexch01.mti.com> Message-ID: <002b01c73c08$664c4d70$8c28010a@EXECTMN> PANTA seconds this nomination. Regards, Tung _____ From: openib-promoters-bounces at openib.org [mailto:openib-promoters-bounces at openib.org] On Behalf Of Dave Sheffler Sent: Friday, January 19, 2007 12:14 PM To: Dave Sheffler; Jamie Riotto (jriotto); openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Cc: Aviram Gutman Subject: Re: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... I nominate Aviram Gutman as co-chair of the EWG of the IBTA . He is Sr. Director of SW Development at Mellanox. He has been involved for almost 7 years and is working on both IB and Ethernet in his current capacity . All Mellanox developed and released software to date has been contributed as open source software . Dave Sheffler VP Worldwide Sales Mellanox Technologies, Inc. Phone: (408) 916-0003 Email: daves at mellanox.com -----Original Message----- From: Dave Sheffler Sent: Friday, January 19, 2007 12:08 PM To: 'Jamie Riotto (jriotto)'; openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Subject: RE: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... I nominate Aviram Gutman Sr. Director of SW Development at Mellanox , active in both IB and Ethernet development . Dave Sheffler VP Worldwide Sales Mellanox Technologies, Inc. Phone: (408) 916-0003 Email: daves at mellanox.com -----Original Message----- From: openib-promoters-bounces at openib.org [mailto:openib-promoters-bounces at openib.org]On Behalf Of Jamie Riotto (jriotto) Sent: Thursday, January 18, 2007 2:47 PM To: openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Subject: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Dear Open Fabrics Community, I am currently Chairperson for the Enterprise Working Group (EWG) committee for OFA. By way of quick introduction, the EWG was formed to promote a single commercial release of the OFA stack, in order to encourage all participating OFA members to test, package and support a unified release. The alternative was to have each company take different snap shots at different times, and release independently of one another (which we felt would confuse and frustrate customers). This resulted in the successful release of OFED 1.1, and continues to drive the process in the form of OFED 1.2 which is the next scheduled release. In order to insure fair and balanced management of this process, I am seeking a Co-Chair to help drive these efforts moving forward. The ideal person would have a fair understanding of the technical issues invovled in a release, and perhaps more importantly, a broad view of the evolving industry requirements in order to help steer the inevitable trade-offs made between new technology introductions and current customer stability. If you, or someone you know, would be willing to contribute to the OFA community in this regard, please consider submitting them as a nomination for Co-Chair of the EWG. All nominations should be submitted to me directly at jriotto at cisco.com. Thank you very much - jamie Jamie Riotto Sr. Director Engineering Server Virtualization Business Unit (SVBU) Cisco Systems 408-853-7813 jriotto at cisco.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From HNGUYEN at de.ibm.com Fri Jan 19 12:47:38 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Fri, 19 Jan 2007 15:47:38 -0500 Subject: [openib-general] [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability In-Reply-To: Message-ID: > > Just want to make sure I understand this properly: > > One day, if eHCA supports srq, will I have to set qp pointer with a > > valid address or can I still leave it to NULL? > I though ehca hardware can't do SRQ? Yes, current ehca hardware can't SRQ. Our next generation eHCA will support SRQ. From DaveS at Mellanox.com Fri Jan 19 12:07:55 2007 From: DaveS at Mellanox.com (Dave Sheffler) Date: Fri, 19 Jan 2007 12:07:55 -0800 Subject: [openib-general] [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Message-ID: <1E3DCD1C63492545881FACB6063A57C1BCCD47@mtiexch01.mti.com> I nominate Aviram Gutman Sr. Director of SW Development at Mellanox , active in both IB and Ethernet development . Dave Sheffler VP Worldwide Sales Mellanox Technologies, Inc. Phone: (408) 916-0003 Email: daves at mellanox.com -----Original Message----- From: openib-promoters-bounces at openib.org [mailto:openib-promoters-bounces at openib.org]On Behalf Of Jamie Riotto (jriotto) Sent: Thursday, January 18, 2007 2:47 PM To: openfabrics-ewg at openib.org; openib-promoters at openib.org; openib-general at openib.org Subject: [Openib-promoters] OFA EWG (Enterprise Working Group) seeking Co-Chair nominations... Dear Open Fabrics Community, I am currently Chairperson for the Enterprise Working Group (EWG) committee for OFA. By way of quick introduction, the EWG was formed to promote a single commercial release of the OFA stack, in order to encourage all participating OFA members to test, package and support a unified release. The alternative was to have each company take different snap shots at different times, and release independently of one another (which we felt would confuse and frustrate customers). This resulted in the successful release of OFED 1.1, and continues to drive the process in the form of OFED 1.2 which is the next scheduled release. In order to insure fair and balanced management of this process, I am seeking a Co-Chair to help drive these efforts moving forward. The ideal person would have a fair understanding of the technical issues invovled in a release, and perhaps more importantly, a broad view of the evolving industry requirements in order to help steer the inevitable trade-offs made between new technology introductions and current customer stability. If you, or someone you know, would be willing to contribute to the OFA community in this regard, please consider submitting them as a nomination for Co-Chair of the EWG. All nominations should be submitted to me directly at jriotto at cisco.com. Thank you very much - jamie Jamie Riotto Sr. Director Engineering Server Virtualization Business Unit (SVBU) Cisco Systems 408-853-7813 jriotto at cisco.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Fri Jan 19 13:16:59 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 19 Jan 2007 15:16:59 -0600 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. Message-ID: <20070119211659.18220.52930.stgit@dell3.ogc.int> From: Steve Wise If the ibv_cmd_* create function succeeds, then the object context pointer must be set by that function so that the corresponding destroy function will work. Signed-off-by: Steve Wise --- src/cmd.c | 21 ++++++++++++++------- src/verbs.c | 16 ++-------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/cmd.c b/src/cmd.c index b267603..f7d3fde 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -209,7 +209,8 @@ int ibv_cmd_alloc_pd(struct ibv_context VALGRIND_MAKE_MEM_DEFINED(resp, resp_size); - pd->handle = resp->pd_handle; + pd->handle = resp->pd_handle; + pd->context = context; return 0; } @@ -250,6 +251,7 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, vo mr->handle = resp->mr_handle; mr->lkey = resp->lkey; mr->rkey = resp->rkey; + mr->context = pd->context; return 0; } @@ -289,8 +291,9 @@ static int ibv_cmd_create_cq_v2(struct i VALGRIND_MAKE_MEM_DEFINED(resp, sizeof resp_size); - cq->handle = resp->cq_handle; - cq->cqe = resp->cqe; + cq->handle = resp->cq_handle; + cq->cqe = resp->cqe; + cq->context = context; return 0; } @@ -317,8 +320,9 @@ int ibv_cmd_create_cq(struct ibv_context VALGRIND_MAKE_MEM_DEFINED(resp, resp_size); - cq->handle = resp->cq_handle; - cq->cqe = resp->cqe; + cq->handle = resp->cq_handle; + cq->cqe = resp->cqe; + cq->context = context; return 0; } @@ -458,7 +462,8 @@ int ibv_cmd_create_srq(struct ibv_pd *pd VALGRIND_MAKE_MEM_DEFINED(resp, resp_size); - srq->handle = resp->srq_handle; + srq->handle = resp->srq_handle; + srq->context = pd->context; if (abi_ver > 5) { attr->attr.max_wr = resp->max_wr; @@ -607,6 +612,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, qp->handle = resp->qp_handle; qp->qp_num = resp->qpn; + qp->context = pd->context; if (abi_ver > 3) { attr->cap.max_recv_sge = resp->max_recv_sge; @@ -1025,7 +1031,8 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); - ah->handle = resp.handle; + ah->handle = resp.handle; + ah->context = pd->context; return 0; } diff --git a/src/verbs.c b/src/verbs.c index 6ac56d3..f719572 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -134,13 +134,7 @@ int ibv_query_pkey(struct ibv_context *c struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) { - struct ibv_pd *pd; - - pd = context->ops.alloc_pd(context); - if (pd) - pd->context = context; - - return pd; + return context->ops.alloc_pd(context); } int ibv_dealloc_pd(struct ibv_pd *pd) @@ -158,7 +152,6 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd mr = pd->context->ops.reg_mr(pd, addr, length, access); if (mr) { - mr->context = pd->context; mr->pd = pd; mr->addr = addr; mr->length = length; @@ -248,7 +241,6 @@ struct ibv_cq *ibv_create_cq(struct ibv_ comp_vector); if (cq) { - cq->context = context; cq->cq_context = cq_context; cq->comp_events_completed = 0; cq->async_events_completed = 0; @@ -308,7 +300,6 @@ struct ibv_srq *ibv_create_srq(struct ib srq = pd->context->ops.create_srq(pd, srq_init_attr); if (srq) { - srq->context = pd->context; srq->srq_context = srq_init_attr->srq_context; srq->pd = pd; srq->events_completed = 0; @@ -342,7 +333,6 @@ struct ibv_qp *ibv_create_qp(struct ibv_ struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); if (qp) { - qp->context = pd->context; qp->qp_context = qp_init_attr->qp_context; qp->pd = pd; qp->send_cq = qp_init_attr->send_cq; @@ -397,10 +387,8 @@ struct ibv_ah *ibv_create_ah(struct ibv_ { struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); - if (ah) { - ah->context = pd->context; + if (ah) ah->pd = pd; - } return ah; } From sashak at voltaire.com Fri Jan 19 13:31:36 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 19 Jan 2007 23:31:36 +0200 Subject: [openib-general] [PATCH] opensm: AC_CONFIG_SRCDIR file change Message-ID: <20070119213136.GA8193@sashak.voltaire.com> Use osm_opensm.c file in AC_CONFIG_SRCDIR ac macros instead of minor *_ctrl.c one. Signed-off-by: Sasha Khapyorsky --- osm/configure.in | 2 +- osm/opensm/configure.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/osm/configure.in b/osm/configure.in index 75f3cb0..76297c8 100644 --- a/osm/configure.in +++ b/osm/configure.in @@ -2,7 +2,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) AC_INIT(opensm, 3.0.0, openib-general at openib.org) -AC_CONFIG_SRCDIR([opensm/osm_sa_service_record_ctrl.c]) +AC_CONFIG_SRCDIR([opensm/osm_opensm.c]) AC_CONFIG_AUX_DIR(config) AM_INIT_AUTOMAKE(opensm, 3.0.0) diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in index cecf932..893e3b1 100644 --- a/osm/opensm/configure.in +++ b/osm/opensm/configure.in @@ -2,7 +2,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) AC_INIT(opensm, 2.1.0, openib-general at openib.org) -AC_CONFIG_SRCDIR([osm_sa_service_record_ctrl.c]) +AC_CONFIG_SRCDIR([osm_opensm.c]) AC_CONFIG_AUX_DIR(config) AM_CONFIG_HEADER(config.h) AM_INIT_AUTOMAKE -- 1.4.4.2.gfc82d From arlin.r.davis at intel.com Fri Jan 19 15:42:47 2007 From: arlin.r.davis at intel.com (Arlin Davis) Date: Fri, 19 Jan 2007 15:42:47 -0800 Subject: [openib-general] [PATCH] uDAPL - rdma_ucm branch: add changes to support rr/init exchange Message-ID: <000001c73c23$86a59930$4297070a@amr.corp.intel.com> Some uDAPL changes to support exchanging and validation of the device responder_resources and the initiator_depth during connection establishment. Signed-off by: Arlin Davis ardavis at ichips.intel.com diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c old mode 100644 new mode 100755 index 0f24244..8bdd0eb --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -259,6 +259,18 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, new_conn->sp = conn->sp; new_conn->hca = conn->hca; + /* Get requesters connect data, setup for accept */ + new_conn->params.responder_resources = + DAPL_MIN(event->param.conn.initiator_depth, + conn->hca->ib_trans.max_rdma_rd_in); + new_conn->params.initiator_depth = + DAPL_MIN(event->param.conn.responder_resources, + conn->hca->ib_trans.max_rdma_rd_out); + + new_conn->params.flow_control = event->param.conn.flow_control; + new_conn->params.rnr_retry_count = event->param.conn.rnr_retry_count; + new_conn->params.retry_count = event->param.conn.retry_count; + /* save private data */ if (event->param.conn.private_data_len) { dapl_os_memcpy(new_conn->p_data, @@ -279,7 +291,8 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, event->param.conn.private_data, event->param.conn.private_data_len); dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: " - "REQ: IP SRC %x PORT %d DST %x PORT %d\n", + "REQ: IP SRC %x PORT %d DST %x PORT %d " + "rr %d init %d\n", ntohl(((struct sockaddr_in *) &ipaddr->src_addr)->sin_addr.s_addr), ntohs(((struct sockaddr_in *) @@ -287,7 +300,9 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, ntohl(((struct sockaddr_in *) &ipaddr->dst_addr)->sin_addr.s_addr), ntohs(((struct sockaddr_in *) - &ipaddr->dst_addr)->sin_port)); + &ipaddr->dst_addr)->sin_port), + new_conn->params.responder_resources, + new_conn->params.initiator_depth); } return new_conn; } @@ -556,8 +571,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, /* Setup QP/CM parameters and private data in cm_id */ (void)dapl_os_memzero(&conn->params, sizeof(conn->params)); - conn->params.responder_resources = IB_TARGET_MAX; - conn->params.initiator_depth = IB_INITIATOR_DEPTH; + conn->params.responder_resources = conn->hca->ib_trans.max_rdma_rd_in; + conn->params.initiator_depth = conn->hca->ib_trans.max_rdma_rd_out; conn->params.flow_control = 1; conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT; conn->params.retry_count = IB_RC_RETRY_COUNT; @@ -814,7 +829,6 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle; int ret; DAT_RETURN dat_status; - struct rdma_conn_param conn_params; dapl_dbg_log(DAPL_DBG_TYPE_CM, " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n", @@ -867,16 +881,10 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, ep_ptr->qp_handle = cr_conn; ep_ptr->cm_handle = cr_conn; cr_conn->ep = ep_ptr; + cr_conn->params.private_data = p_data; + cr_conn->params.private_data_len = p_size; - memset(&conn_params, 0, sizeof(conn_params)); - conn_params.private_data = p_data; - conn_params.private_data_len = p_size; - conn_params.responder_resources = IB_TARGET_MAX; - conn_params.initiator_depth = IB_INITIATOR_DEPTH; - conn_params.flow_control = 1; - conn_params.rnr_retry_count = IB_RNR_RETRY_COUNT; - - ret = rdma_accept(cr_conn->cm_id, &conn_params); + ret = rdma_accept(cr_conn->cm_id, &cr_conn->params); if (ret) { dapl_dbg_log(DAPL_DBG_TYPE_ERR," accept: ERROR %d\n", ret); dat_status = dapl_convert_errno(ret, "accept"); diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c old mode 100644 new mode 100755 index 6bb35f6..0606312 --- a/dapl/openib_cma/dapl_ib_util.c +++ b/dapl/openib_cma/dapl_ib_util.c @@ -469,6 +469,9 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA *hca_ptr, ia_attr->num_vendor_attr = 0; ia_attr->vendor_attr = NULL; ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge; + /* save rd_atom for peer validation during connect requests */ + hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom; + hca_ptr->ib_trans.max_rdma_rd_out = dev_attr.max_qp_rd_atom; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " query_hca: (ver=%x) ep %d ep_q %d evd %d evd_q %d\n", diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h old mode 100644 new mode 100755 index 1fadb08..457d26b --- a/dapl/openib_cma/dapl_ib_util.h +++ b/dapl/openib_cma/dapl_ib_util.h @@ -73,10 +73,7 @@ typedef ib_hca_handle_t dapl_ibal_ca_t; #define IB_ARP_RETRY_COUNT 15 /* 60 sec total */ #define IB_ROUTE_TIMEOUT 4000 /* 4 sec */ #define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */ -#define IB_REQ_MRA_TIMEOUT 27 /* a little over 9 minutes */ #define IB_MAX_AT_RETRY 3 -#define IB_TARGET_MAX 4 /* max_qp_ous_rd_atom */ -#define IB_INITIATOR_DEPTH 4 /* max_qp_init_rd_atom */ typedef enum { IB_CME_CONNECTED, @@ -272,6 +269,9 @@ typedef struct _ib_hca_transport ib_async_qp_handler_t async_qp_error; uint8_t max_cm_timeout; uint8_t max_cm_retries; + /* device attributes */ + int max_rdma_rd_in; + int max_rdma_rd_out; } ib_hca_transport_t; From halr at voltaire.com Fri Jan 19 16:08:04 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 19 Jan 2007 19:08:04 -0500 Subject: [openib-general] [PATCH] opensm: AC_CONFIG_SRCDIR file change In-Reply-To: <20070119213136.GA8193@sashak.voltaire.com> References: <20070119213136.GA8193@sashak.voltaire.com> Message-ID: <1169251612.28635.33304.camel@hal.voltaire.com> On Fri, 2007-01-19 at 16:31, Sasha Khapyorsky wrote: > Use osm_opensm.c file in AC_CONFIG_SRCDIR ac macros instead of minor > *_ctrl.c one. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From hnguyen at linux.vnet.ibm.com Fri Jan 19 13:49:32 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 19 Jan 2007 22:49:32 +0100 Subject: [openib-general] [PATCH 2.6.20 0/2] ehca: fix yield and spinlock conflicts Message-ID: <200701192249.33587.hnguyen@linux.vnet.ibm.com> Hello Roland! Here is patch set for ehca with the following bug fixes: * Fix unproper use of yield within spinlock context * Fix mismatched spin_unlock in irq handler Thanks Nam ehca_cq.c | 5 ++++- ehca_irq.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) From hnguyen at linux.vnet.ibm.com Fri Jan 19 13:51:01 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 19 Jan 2007 22:51:01 +0100 Subject: [openib-general] [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler Message-ID: <200701192251.01888.hnguyen@linux.vnet.ibm.com> Hello Roland! This is a patch for ehca_irq.c that fixes an unproper use of spin_unlock in irq handler. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_irq.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index e7209af..93788d8 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -440,7 +440,7 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); break; } From hnguyen at linux.vnet.ibm.com Fri Jan 19 13:50:10 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 19 Jan 2007 22:50:10 +0100 Subject: [openib-general] [PATCH 2.6.20 1/2] ehca: ehca_cq.c: fix unproper use of yield within spinlock context Message-ID: <200701192250.10765.hnguyen@linux.vnet.ibm.com> Hello Roland! This is a patch for ehca_cq.c that fixes unproper use of yield within spinlock context. Thanks Nam Signed-off-by Hoang-Nam Nguyen --- ehca_cq.c | 5 ++++- 1 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 93995b6..6074c89 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq) unsigned long flags; spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); From sashak at voltaire.com Fri Jan 19 18:54:46 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 20 Jan 2007 04:54:46 +0200 Subject: [openib-general] [PATCH 0/2] opensm: _ctrl layer cleanup Message-ID: <20070120025446.GG8193@sashak.voltaire.com> Hi Hal, I found that opensm mad processing _ctrl layer is non-functional, OTOH it costs us 11k lines of code and 17k in run-time. So there are two patches which clean this up (for SM and SA related MADs). Any real objections against this? Sasha From sashak at voltaire.com Fri Jan 19 18:56:51 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 20 Jan 2007 04:56:51 +0200 Subject: [openib-general] [PATCH 1/2] opensm: cleanup sm related _ctrl layer In-Reply-To: <20070120025446.GG8193@sashak.voltaire.com> References: <20070120025446.GG8193@sashak.voltaire.com> Message-ID: <20070120025651.GH8193@sashak.voltaire.com> This cleanups sm related _ctrl layer. Signed-off-by: Sasha Khapyorsky --- osm/include/Makefile.am | 11 -- osm/include/opensm/osm_lin_fwd_rcv.h | 15 +- osm/include/opensm/osm_lin_fwd_rcv_ctrl.h | 233 ------------------------ osm/include/opensm/osm_mcast_fwd_rcv.h | 15 +- osm/include/opensm/osm_mcast_fwd_rcv_ctrl.h | 235 ------------------------ osm/include/opensm/osm_node_desc_rcv.h | 15 +- osm/include/opensm/osm_node_desc_rcv_ctrl.h | 232 ------------------------ osm/include/opensm/osm_node_info_rcv.h | 19 +- osm/include/opensm/osm_node_info_rcv_ctrl.h | 261 --------------------------- osm/include/opensm/osm_pkey_rcv.h | 17 +- osm/include/opensm/osm_pkey_rcv_ctrl.h | 248 ------------------------- osm/include/opensm/osm_port_info_rcv.h | 8 +- osm/include/opensm/osm_port_info_rcv_ctrl.h | 261 --------------------------- osm/include/opensm/osm_slvl_map_rcv.h | 17 +- osm/include/opensm/osm_slvl_map_rcv_ctrl.h | 261 --------------------------- osm/include/opensm/osm_sm.h | 44 +++--- osm/include/opensm/osm_sminfo_rcv.h | 21 +-- osm/include/opensm/osm_sminfo_rcv_ctrl.h | 232 ------------------------ osm/include/opensm/osm_sw_info_rcv.h | 19 +- osm/include/opensm/osm_sw_info_rcv_ctrl.h | 261 --------------------------- osm/include/opensm/osm_trap_rcv.h | 22 ++-- osm/include/opensm/osm_trap_rcv_ctrl.h | 232 ------------------------ osm/include/opensm/osm_vl_arb_rcv.h | 17 +- osm/include/opensm/osm_vl_arb_rcv_ctrl.h | 261 --------------------------- osm/opensm/Makefile.am | 22 ++-- osm/opensm/osm_lin_fwd_rcv.c | 6 +- osm/opensm/osm_lin_fwd_rcv_ctrl.c | 125 ------------- osm/opensm/osm_mcast_fwd_rcv.c | 6 +- osm/opensm/osm_mcast_fwd_rcv_ctrl.c | 125 ------------- osm/opensm/osm_node_desc_rcv.c | 6 +- osm/opensm/osm_node_desc_rcv_ctrl.c | 127 ------------- osm/opensm/osm_node_info_rcv.c | 6 +- osm/opensm/osm_node_info_rcv_ctrl.c | 127 ------------- osm/opensm/osm_pkey_rcv.c | 6 +- osm/opensm/osm_pkey_rcv_ctrl.c | 116 ------------ osm/opensm/osm_port_info_rcv.c | 6 +- osm/opensm/osm_port_info_rcv_ctrl.c | 128 ------------- osm/opensm/osm_slvl_map_rcv.c | 6 +- osm/opensm/osm_slvl_map_rcv_ctrl.c | 127 ------------- osm/opensm/osm_sm.c | 141 +++++++-------- osm/opensm/osm_sminfo_rcv.c | 6 +- osm/opensm/osm_sminfo_rcv_ctrl.c | 127 ------------- osm/opensm/osm_sw_info_rcv.c | 6 +- osm/opensm/osm_sw_info_rcv_ctrl.c | 126 ------------- osm/opensm/osm_trap_rcv.c | 6 +- osm/opensm/osm_trap_rcv_ctrl.c | 126 ------------- osm/opensm/osm_vl_arb_rcv.c | 6 +- osm/opensm/osm_vl_arb_rcv_ctrl.c | 127 ------------- 48 files changed, 230 insertions(+), 4337 deletions(-) diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index d6bdd84..bc23723 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -11,7 +11,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_sa_path_record.h \ $(srcdir)/opensm/osm_lid_mgr.h \ $(srcdir)/opensm/osm_vl_arb_rcv.h \ - $(srcdir)/opensm/osm_lin_fwd_rcv_ctrl.h \ $(srcdir)/opensm/osm_pkey_rcv.h \ $(srcdir)/opensm/osm_port.h \ $(srcdir)/opensm/osm_sm_state_mgr.h \ @@ -23,15 +22,12 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_subnet.h \ $(srcdir)/opensm/osm_sweep_fail_ctrl.h \ $(srcdir)/opensm/osm_matrix.h \ - $(srcdir)/opensm/osm_trap_rcv_ctrl.h \ $(srcdir)/opensm/osm_sa_mcmember_record_ctrl.h \ $(srcdir)/opensm/osm_sa_service_record_ctrl.h \ - $(srcdir)/opensm/osm_pkey_rcv_ctrl.h \ $(srcdir)/opensm/osm_sa_lft_record.h \ $(srcdir)/opensm/osm_sa_mft_record.h \ $(srcdir)/opensm/osm_resp.h \ $(srcdir)/opensm/osm_partition.h \ - $(srcdir)/opensm/osm_slvl_map_rcv_ctrl.h \ $(srcdir)/opensm/osm_sa_pkey_record_ctrl.h \ $(srcdir)/opensm/osm_helper.h \ $(srcdir)/opensm/osm_sa_portinfo_record.h \ @@ -45,7 +41,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_req.h \ $(srcdir)/opensm/osm_mcm_info.h \ $(srcdir)/opensm/osm_mtl_bind.h \ - $(srcdir)/opensm/osm_sminfo_rcv_ctrl.h \ $(srcdir)/opensm/osm_sa_pkey_record.h \ $(srcdir)/opensm/osm_sa_lft_record_ctrl.h \ $(srcdir)/opensm/osm_sa_mft_record_ctrl.h \ @@ -58,7 +53,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_sa_slvl_record.h \ $(srcdir)/opensm/osm_opensm.h \ $(srcdir)/opensm/osm_sa.h \ - $(srcdir)/opensm/osm_vl_arb_rcv_ctrl.h \ $(srcdir)/opensm/osm_port_profile.h \ $(srcdir)/opensm/osm_sminfo_rcv.h \ $(srcdir)/opensm/osm_multicast.h \ @@ -82,9 +76,7 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_mad_pool.h \ $(srcdir)/opensm/osm_remote_sm.h \ $(srcdir)/opensm/osm_sa_class_port_info_ctrl.h \ - $(srcdir)/opensm/osm_node_info_rcv_ctrl.h \ $(srcdir)/opensm/osm_link_mgr.h \ - $(srcdir)/opensm/osm_mcast_fwd_rcv_ctrl.h \ $(srcdir)/opensm/osm_msgdef.h \ $(srcdir)/opensm/osm_sa_node_record.h \ $(srcdir)/opensm/st.h \ @@ -93,19 +85,16 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_pkey_mgr.h \ $(srcdir)/opensm/osm_sa_mad_ctrl.h \ $(srcdir)/opensm/osm_req_ctrl.h \ - $(srcdir)/opensm/osm_sw_info_rcv.h \ $(srcdir)/opensm/osm_sa_link_record.h \ $(srcdir)/opensm/osm_mcm_port.h \ $(srcdir)/opensm/osm_log.h \ $(srcdir)/opensm/osm_mcast_fwd_rcv.h \ - $(srcdir)/opensm/osm_node_desc_rcv_ctrl.h \ $(srcdir)/opensm/osm_fwd_tbl.h \ $(srcdir)/opensm/osm_db_pack.h \ $(srcdir)/opensm/osm_sm_mad_ctrl.h \ $(srcdir)/opensm/osm_slvl_map_rcv.h \ $(srcdir)/opensm/osm_sa_node_record_ctrl.h \ $(srcdir)/opensm/osm_attrib_req.h \ - $(srcdir)/opensm/osm_port_info_rcv_ctrl.h \ $(srcdir)/opensm/osm_node_desc_rcv.h \ $(srcdir)/opensm/osm_stats.h \ $(srcdir)/opensm/osm_sa_path_record_ctrl.h \ diff --git a/osm/include/opensm/osm_lin_fwd_rcv.h b/osm/include/opensm/osm_lin_fwd_rcv.h index 77b4e4a..4fe5b90 100644 --- a/osm/include/opensm/osm_lin_fwd_rcv.h +++ b/osm/include/opensm/osm_lin_fwd_rcv.h @@ -96,10 +96,9 @@ BEGIN_C_DECLS */ typedef struct _osm_lft_rcv { - osm_subn_t *p_subn; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_lft_rcv_t; /* * FIELDS @@ -229,14 +228,14 @@ ib_api_status_t osm_lft_rcv_init( * SYNOPSIS */ void osm_lft_rcv_process( - IN const osm_lft_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_lft_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's LFT attribute. * diff --git a/osm/include/opensm/osm_lin_fwd_rcv_ctrl.h b/osm/include/opensm/osm_lin_fwd_rcv_ctrl.h deleted file mode 100644 index a9c6192..0000000 --- a/osm/include/opensm/osm_lin_fwd_rcv_ctrl.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_lft_rcv_ctrl_t. - * This object represents a controller that receives the IBA - * LFT attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_LFT_RCV_CTRL_H_ -#define _OSM_LFT_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/LFT Receive Controller -* NAME -* LFT Receive Controller -* -* DESCRIPTION -* The LFT Receive Controller object -* encapsulates the information -* needed to receive the NodeDescription attribute from a node. -* -* The LFT Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: LFT Receive Controller/osm_lft_rcv_ctrl_t -* NAME -* osm_lft_rcv_ctrl_t -* -* DESCRIPTION -* LFT Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_lft_rcv_ctrl -{ - osm_lft_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_lft_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the LFT Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* LFT Receive Controller object -*********/ - -/****f* OpenSM: LFT Receive Controller/osm_lft_rcv_ctrl_construct -* NAME -* osm_lft_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a LFT Receive Controller object. -* -* SYNOPSIS -*/ -void -osm_lft_rcv_ctrl_construct( - IN osm_lft_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a LFT Receive Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_lft_rcv_ctrl_init, osm_lft_rcv_ctrl_destroy -* -* Calling osm_lft_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_lft_rcv_ctrl_init. -* -* SEE ALSO -* LFT Receive Controller object, osm_lft_rcv_ctrl_init, -* osm_lft_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: LFT Receive Controller/osm_lft_rcv_ctrl_destroy -* NAME -* osm_lft_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_lft_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_lft_rcv_ctrl_destroy( - IN osm_lft_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* LFT Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_lft_rcv_ctrl_construct or osm_lft_rcv_ctrl_init. -* -* SEE ALSO -* LFT Receive Controller object, osm_lft_rcv_ctrl_construct, -* osm_lft_rcv_ctrl_init -*********/ - -/****f* OpenSM: LFT Receive Controller/osm_lft_rcv_ctrl_init -* NAME -* osm_lft_rcv_ctrl_init -* -* DESCRIPTION -* The osm_lft_rcv_ctrl_init function initializes a -* LFT Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_lft_rcv_ctrl_init( - IN osm_lft_rcv_ctrl_t* const p_ctrl, - IN osm_lft_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_lft_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_lft_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the LFT Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other LFT Receive Controller methods. -* -* SEE ALSO -* LFT Receive Controller object, osm_lft_rcv_ctrl_construct, -* osm_lft_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* OSM_LFT_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_mcast_fwd_rcv.h b/osm/include/opensm/osm_mcast_fwd_rcv.h index c18a748..3a2de28 100644 --- a/osm/include/opensm/osm_mcast_fwd_rcv.h +++ b/osm/include/opensm/osm_mcast_fwd_rcv.h @@ -96,10 +96,9 @@ BEGIN_C_DECLS */ typedef struct _osm_mft_rcv { - osm_subn_t *p_subn; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_mft_rcv_t; /* * FIELDS @@ -233,14 +232,14 @@ osm_mft_rcv_init( */ void osm_mft_rcv_process( - IN const osm_mft_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_mft_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's MFT attribute. * diff --git a/osm/include/opensm/osm_mcast_fwd_rcv_ctrl.h b/osm/include/opensm/osm_mcast_fwd_rcv_ctrl.h deleted file mode 100644 index f64a0c8..0000000 --- a/osm/include/opensm/osm_mcast_fwd_rcv_ctrl.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_mft_rcv_ctrl_t. - * This object represents a controller that receives the IBA - * Multicast Forwarding Table attribute from a node (specifically, a - * switch). - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_MFT_RCV_CTRL_H_ -#define _OSM_MFT_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/MFT Receive Controller -* NAME -* MFT Receive Controller -* -* DESCRIPTION -* The MFT Receive Controller object -* encapsulates the information -* needed to receive the Multicast Forwarding Table -* attribute from a node. -* -* The MFT Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: MFT Receive Controller/osm_mft_rcv_ctrl_t -* NAME -* osm_mft_rcv_ctrl_t -* -* DESCRIPTION -* MFT Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_mft_rcv_ctrl -{ - osm_mft_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_mft_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the MFT Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* MFT Receive Controller object -*********/ - -/****f* OpenSM: MFT Receive Controller/osm_mft_rcv_ctrl_construct -* NAME -* osm_mft_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a MFT Receive Controller object. -* -* SYNOPSIS -*/ -void -osm_mft_rcv_ctrl_construct( - IN osm_mft_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a MFT Receive Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_mft_rcv_ctrl_init, osm_mft_rcv_ctrl_destroy -* -* Calling osm_mft_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_mft_rcv_ctrl_init. -* -* SEE ALSO -* MFT Receive Controller object, osm_mft_rcv_ctrl_init, -* osm_mft_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: MFT Receive Controller/osm_mft_rcv_ctrl_destroy -* NAME -* osm_mft_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_mft_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_mft_rcv_ctrl_destroy( - IN osm_mft_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* MFT Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_mft_rcv_ctrl_construct or osm_mft_rcv_ctrl_init. -* -* SEE ALSO -* MFT Receive Controller object, osm_mft_rcv_ctrl_construct, -* osm_mft_rcv_ctrl_init -*********/ - -/****f* OpenSM: MFT Receive Controller/osm_mft_rcv_ctrl_init -* NAME -* osm_mft_rcv_ctrl_init -* -* DESCRIPTION -* The osm_mft_rcv_ctrl_init function initializes a -* MFT Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_mft_rcv_ctrl_init( - IN osm_mft_rcv_ctrl_t* const p_ctrl, - IN osm_mft_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mft_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_mft_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the MFT Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other MFT Receive Controller methods. -* -* SEE ALSO -* MFT Receive Controller object, osm_mft_rcv_ctrl_construct, -* osm_mft_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* OSM_MFT_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_node_desc_rcv.h b/osm/include/opensm/osm_node_desc_rcv.h index 8803dc9..b228d58 100644 --- a/osm/include/opensm/osm_node_desc_rcv.h +++ b/osm/include/opensm/osm_node_desc_rcv.h @@ -97,10 +97,9 @@ BEGIN_C_DECLS */ typedef struct _osm_nd_rcv { - osm_subn_t *p_subn; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_nd_rcv_t; /* * FIELDS @@ -230,14 +229,14 @@ ib_api_status_t osm_nd_rcv_init( * SYNOPSIS */ void osm_nd_rcv_process( - IN const osm_nd_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_nd_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's NodeInfo attribute. * diff --git a/osm/include/opensm/osm_node_desc_rcv_ctrl.h b/osm/include/opensm/osm_node_desc_rcv_ctrl.h deleted file mode 100644 index a3c612b..0000000 --- a/osm/include/opensm/osm_node_desc_rcv_ctrl.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_nd_rcv_ctrl_t. - * This object represents a controller that receives the IBA NodeDescription - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_ND_RCV_CTRL_H_ -#define _OSM_ND_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Node Description Receive Controller -* NAME -* Node Description Receive Controller -* -* DESCRIPTION -* The Node Description Receive Controller object encapsulates the information -* needed to receive the NodeDescription attribute from a node. -* -* The Node Description Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: Node Description Receive Controller/osm_nd_rcv_ctrl_t -* NAME -* osm_nd_rcv_ctrl_t -* -* DESCRIPTION -* Node Description Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_nd_rcv_ctrl -{ - osm_nd_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_nd_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Node Description Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Node Description Receive Controller object -*********/ - -/****f* OpenSM: Node Description Receive Controller/osm_nd_rcv_ctrl_construct -* NAME -* osm_nd_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Node Description Receive Controller object. -* -* SYNOPSIS -*/ -void -osm_nd_rcv_ctrl_construct( - IN osm_nd_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Node Description Receive Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_nd_rcv_ctrl_init, osm_nd_rcv_ctrl_destroy -* -* Calling osm_nd_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_nd_rcv_ctrl_init. -* -* SEE ALSO -* Node Description Receive Controller object, osm_nd_rcv_ctrl_init, -* osm_nd_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Node Description Receive Controller/osm_nd_rcv_ctrl_destroy -* NAME -* osm_nd_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_nd_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_nd_rcv_ctrl_destroy( - IN osm_nd_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Node Description Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_nd_rcv_ctrl_construct or osm_nd_rcv_ctrl_init. -* -* SEE ALSO -* Node Description Receive Controller object, osm_nd_rcv_ctrl_construct, -* osm_nd_rcv_ctrl_init -*********/ - -/****f* OpenSM: Node Description Receive Controller/osm_nd_rcv_ctrl_init -* NAME -* osm_nd_rcv_ctrl_init -* -* DESCRIPTION -* The osm_nd_rcv_ctrl_init function initializes a -* Node Description Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_nd_rcv_ctrl_init( - IN osm_nd_rcv_ctrl_t* const p_ctrl, - IN osm_nd_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_nd_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_nd_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Node Description Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Node Description Receive Controller methods. -* -* SEE ALSO -* Node Description Receive Controller object, osm_nd_rcv_ctrl_construct, -* osm_nd_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* OSM_ND_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_node_info_rcv.h b/osm/include/opensm/osm_node_info_rcv.h index 88d14a8..cae2f7f 100644 --- a/osm/include/opensm/osm_node_info_rcv.h +++ b/osm/include/opensm/osm_node_info_rcv.h @@ -100,12 +100,11 @@ BEGIN_C_DECLS */ typedef struct _osm_ni_rcv { - osm_subn_t *p_subn; - osm_req_t *p_gen_req; - osm_log_t *p_log; - osm_state_mgr_t *p_state_mgr; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_req_t *p_gen_req; + osm_log_t *p_log; + osm_state_mgr_t *p_state_mgr; + cl_plock_t *p_lock; } osm_ni_rcv_t; /* * FIELDS @@ -279,14 +278,14 @@ boolean_t osm_ni_rcv_is_inited( * SYNOPSIS */ void osm_ni_rcv_process( - IN const osm_ni_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_ni_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's NodeInfo attribute. * diff --git a/osm/include/opensm/osm_node_info_rcv_ctrl.h b/osm/include/opensm/osm_node_info_rcv_ctrl.h deleted file mode 100644 index 2765a8e..0000000 --- a/osm/include/opensm/osm_node_info_rcv_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_ni_rcv_ctrl_t. - * This object represents a controller that receives the IBA NodeInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_NI_RCV_CTRL_H_ -#define _OSM_NI_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Node Info Receive Controller -* NAME -* Node Info Receive Controller -* -* DESCRIPTION -* The Node Info Receive Controller object encapsulates -* the information needed to receive the NodeInfo attribute from a node. -* -* The Node Info Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: Node Info Receive Controller/osm_ni_rcv_ctrl_t -* NAME -* osm_ni_rcv_ctrl_t -* -* DESCRIPTION -* Node Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_ni_rcv_ctrl -{ - osm_ni_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_ni_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Node Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Node Info Receive Controller object -* Node Info Receiver object -*********/ - -/****f* OpenSM: Node Info Receive Controller/osm_ni_rcv_ctrl_construct -* NAME -* osm_ni_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Node Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_ni_rcv_ctrl_construct( - IN osm_ni_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Node Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_ni_rcv_ctrl_init, osm_ni_rcv_ctrl_destroy, -* and osm_ni_rcv_ctrl_is_inited. -* -* Calling osm_ni_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_ni_rcv_ctrl_init. -* -* SEE ALSO -* Node Info Receive Controller object, osm_ni_rcv_ctrl_init, -* osm_ni_rcv_ctrl_destroy, osm_ni_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Node Info Receive Controller/osm_ni_rcv_ctrl_destroy -* NAME -* osm_ni_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_ni_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_ni_rcv_ctrl_destroy( - IN osm_ni_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Node Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_ni_rcv_ctrl_construct or osm_ni_rcv_ctrl_init. -* -* SEE ALSO -* Node Info Receive Controller object, osm_ni_rcv_ctrl_construct, -* osm_ni_rcv_ctrl_init -*********/ - -/****f* OpenSM: Node Info Receive Controller/osm_ni_rcv_ctrl_init -* NAME -* osm_ni_rcv_ctrl_init -* -* DESCRIPTION -* The osm_ni_rcv_ctrl_init function initializes a -* Node Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_ni_rcv_ctrl_init( - IN osm_ni_rcv_ctrl_t* const p_ctrl, - IN osm_ni_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_ni_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_ni_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Node Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Node Info Receive Controller methods. -* -* SEE ALSO -* Node Info Receive Controller object, osm_ni_rcv_ctrl_construct, -* osm_ni_rcv_ctrl_destroy, osm_ni_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Node Info Receive Controller/osm_ni_rcv_ctrl_is_inited -* NAME -* osm_ni_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_ni_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_ni_rcv_ctrl_is_inited( - IN const osm_ni_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_ni_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_ni_rcv_ctrl_construct or osm_ni_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Node Info Receive Controller object, osm_ni_rcv_ctrl_construct, -* osm_ni_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_NI_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_pkey_rcv.h b/osm/include/opensm/osm_pkey_rcv.h index 6f973be..20829b2 100644 --- a/osm/include/opensm/osm_pkey_rcv.h +++ b/osm/include/opensm/osm_pkey_rcv.h @@ -86,11 +86,10 @@ BEGIN_C_DECLS */ typedef struct _osm_pkey_rcv { - osm_subn_t *p_subn; - osm_req_t *p_req; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_req_t *p_req; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_pkey_rcv_t; /* * FIELDS @@ -227,14 +226,14 @@ ib_api_status_t osm_pkey_rcv_init( * SYNOPSIS */ void osm_pkey_rcv_process( - IN const osm_pkey_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_pkey_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SLtoVL attribute. * diff --git a/osm/include/opensm/osm_pkey_rcv_ctrl.h b/osm/include/opensm/osm_pkey_rcv_ctrl.h deleted file mode 100644 index af51ce8..0000000 --- a/osm/include/opensm/osm_pkey_rcv_ctrl.h +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef _OSM_PKEY_RCV_CTRL_H_ -#define _OSM_PKEY_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/P_Key Table Receive Controller -* NAME -* P_Key Receive Controller -* -* DESCRIPTION -* The P_Key Receive Controller object encapsulates -* the information needed to get or set P_Key table of a port. -* -* The P_Key Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Yael Kalka, Mellanox -* -*********/ - -/****s* OpenSM: P_Key Receive Controller/osm_pkey_rcv_ctrl_t -* NAME -* osm_pkey_rcv_ctrl_t -* -* DESCRIPTION -* P_Key Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_pkey_rcv_ctrl -{ - osm_pkey_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_pkey_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the P_Key Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* P_Key Receive Controller object -* P_Key Receiver object -*********/ - -/****f* OpenSM: P_Key Receive Controller/osm_pkey_rcv_ctrl_construct -* NAME -* osm_pkey_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a P_Key Receive Controller object. -* -* SYNOPSIS -*/ -void osm_pkey_rcv_ctrl_construct( - IN osm_pkey_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a P_Key Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_pkey_rcv_ctrl_init, osm_pkey_rcv_ctrl_destroy, -* and osm_pkey_rcv_ctrl_is_inited. -* -* Calling osm_pkey_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_pkey_rcv_ctrl_init. -* -* SEE ALSO -* P_Key Receive Controller object, osm_pkey_rcv_ctrl_init, -* osm_pkey_rcv_ctrl_destroy, osm_pkey_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: P_Key Receive Controller/osm_pkey_rcv_ctrl_destroy -* NAME -* osm_pkey_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_pkey_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_pkey_rcv_ctrl_destroy( - IN osm_pkey_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* P_Key Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_pkey_rcv_ctrl_construct or osm_pkey_rcv_ctrl_init. -* -* SEE ALSO -* P_Key Receive Controller object, osm_pkey_rcv_ctrl_construct, -* osm_pkey_rcv_ctrl_init -*********/ - -/****f* OpenSM: P_Key Receive Controller/osm_pkey_rcv_ctrl_init -* NAME -* osm_pkey_rcv_ctrl_init -* -* DESCRIPTION -* The osm_pkey_rcv_ctrl_init function initializes a -* P_Key Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_pkey_rcv_ctrl_init( - IN osm_pkey_rcv_ctrl_t* const p_ctrl, - IN osm_pkey_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pkey_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_pkey_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the P_Key Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other P_Key Receive Controller methods. -* -* SEE ALSO -* P_Key Receive Controller object, osm_pkey_rcv_ctrl_construct, -* osm_pkey_rcv_ctrl_destroy, osm_pkey_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: P_Key Receive Controller/osm_pkey_rcv_ctrl_is_inited -* NAME -* osm_pkey_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_pkey_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_pkey_rcv_ctrl_is_inited( - IN const osm_pkey_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pkey_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_pkey_rcv_ctrl_construct or osm_pkey_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* P_Key Receive Controller object, osm_pkey_rcv_ctrl_construct, -* osm_pkey_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_PKEY_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_port_info_rcv.h b/osm/include/opensm/osm_port_info_rcv.h index 834d21e..35f399c 100644 --- a/osm/include/opensm/osm_port_info_rcv.h +++ b/osm/include/opensm/osm_port_info_rcv.h @@ -248,14 +248,14 @@ ib_api_status_t osm_pi_rcv_init( * SYNOPSIS */ void osm_pi_rcv_process( - IN const osm_pi_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_pi_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's PortInfo attribute. * diff --git a/osm/include/opensm/osm_port_info_rcv_ctrl.h b/osm/include/opensm/osm_port_info_rcv_ctrl.h deleted file mode 100644 index 8337bf7..0000000 --- a/osm/include/opensm/osm_port_info_rcv_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_pi_rcv_ctrl_t. - * This object represents a controller that receives the IBA NodeInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_PI_RCV_CTRL_H_ -#define _OSM_PI_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Port Info Receive Controller -* NAME -* Port Info Receive Controller -* -* DESCRIPTION -* The Port Info Receive Controller object encapsulates -* the information needed to receive the NodeInfo attribute from a node. -* -* The Port Info Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: Port Info Receive Controller/osm_pi_rcv_ctrl_t -* NAME -* osm_pi_rcv_ctrl_t -* -* DESCRIPTION -* Port Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_pi_rcv_ctrl -{ - osm_pi_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_pi_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Port Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Port Info Receive Controller object -* Port Info Receiver object -*********/ - -/****f* OpenSM: Port Info Receive Controller/osm_pi_rcv_ctrl_construct -* NAME -* osm_pi_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Port Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_pi_rcv_ctrl_construct( - IN osm_pi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Port Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_pi_rcv_ctrl_init, osm_pi_rcv_ctrl_destroy, -* and osm_pi_rcv_ctrl_is_inited. -* -* Calling osm_pi_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_pi_rcv_ctrl_init. -* -* SEE ALSO -* Port Info Receive Controller object, osm_pi_rcv_ctrl_init, -* osm_pi_rcv_ctrl_destroy, osm_pi_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Port Info Receive Controller/osm_pi_rcv_ctrl_destroy -* NAME -* osm_pi_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_pi_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_pi_rcv_ctrl_destroy( - IN osm_pi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Port Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_pi_rcv_ctrl_construct or osm_pi_rcv_ctrl_init. -* -* SEE ALSO -* Port Info Receive Controller object, osm_pi_rcv_ctrl_construct, -* osm_pi_rcv_ctrl_init -*********/ - -/****f* OpenSM: Port Info Receive Controller/osm_pi_rcv_ctrl_init -* NAME -* osm_pi_rcv_ctrl_init -* -* DESCRIPTION -* The osm_pi_rcv_ctrl_init function initializes a -* Port Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_pi_rcv_ctrl_init( - IN osm_pi_rcv_ctrl_t* const p_ctrl, - IN osm_pi_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pi_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_pi_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Port Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Port Info Receive Controller methods. -* -* SEE ALSO -* Port Info Receive Controller object, osm_pi_rcv_ctrl_construct, -* osm_pi_rcv_ctrl_destroy, osm_pi_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Port Info Receive Controller/osm_pi_rcv_ctrl_is_inited -* NAME -* osm_pi_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_pi_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_pi_rcv_ctrl_is_inited( - IN const osm_pi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pi_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_pi_rcv_ctrl_construct or osm_pi_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Port Info Receive Controller object, osm_pi_rcv_ctrl_construct, -* osm_pi_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_PI_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_slvl_map_rcv.h b/osm/include/opensm/osm_slvl_map_rcv.h index 40e1a93..6a1e440 100644 --- a/osm/include/opensm/osm_slvl_map_rcv.h +++ b/osm/include/opensm/osm_slvl_map_rcv.h @@ -98,11 +98,10 @@ BEGIN_C_DECLS */ typedef struct _osm_slvl_rcv { - osm_subn_t *p_subn; - osm_req_t *p_req; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_req_t *p_req; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_slvl_rcv_t; /* * FIELDS @@ -239,14 +238,14 @@ ib_api_status_t osm_slvl_rcv_init( * SYNOPSIS */ void osm_slvl_rcv_process( - IN const osm_slvl_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void* context, + IN void* p_data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_slvl_rcv_t object. * -* p_madw +* p_data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SLtoVL attribute. * diff --git a/osm/include/opensm/osm_slvl_map_rcv_ctrl.h b/osm/include/opensm/osm_slvl_map_rcv_ctrl.h deleted file mode 100644 index 51cf084..0000000 --- a/osm/include/opensm/osm_slvl_map_rcv_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_pi_rcv_ctrl_t. - * This object represents a controller that receives the IBA NodeInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#ifndef _OSM_SLVL_RCV_CTRL_H_ -#define _OSM_SLVL_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/SltoVL Map Table Receive Controller -* NAME -* Slvl Map Receive Controller -* -* DESCRIPTION -* The Slvl Map Receive Controller object encapsulates -* the information needed to get or set SLtoVL Map of a port. -* -* The Slvl Map Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: Slvl Map Receive Controller/osm_slvl_rcv_ctrl_t -* NAME -* osm_slvl_rcv_ctrl_t -* -* DESCRIPTION -* Slvl Map Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_slvl_rcv_ctrl -{ - osm_slvl_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_slvl_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Slvl Map Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Slvl Map Receive Controller object -* Slvl Map Receiver object -*********/ - -/****f* OpenSM: Slvl Map Receive Controller/osm_slvl_rcv_ctrl_construct -* NAME -* osm_slvl_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Slvl Map Receive Controller object. -* -* SYNOPSIS -*/ -void osm_slvl_rcv_ctrl_construct( - IN osm_slvl_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Slvl Map Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_slvl_rcv_ctrl_init, osm_slvl_rcv_ctrl_destroy, -* and osm_slvl_rcv_ctrl_is_inited. -* -* Calling osm_slvl_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_slvl_rcv_ctrl_init. -* -* SEE ALSO -* Slvl Map Receive Controller object, osm_slvl_rcv_ctrl_init, -* osm_slvl_rcv_ctrl_destroy, osm_slvl_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Slvl Map Receive Controller/osm_slvl_rcv_ctrl_destroy -* NAME -* osm_slvl_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_slvl_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_slvl_rcv_ctrl_destroy( - IN osm_slvl_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Slvl Map Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_slvl_rcv_ctrl_construct or osm_slvl_rcv_ctrl_init. -* -* SEE ALSO -* Slvl Map Receive Controller object, osm_slvl_rcv_ctrl_construct, -* osm_slvl_rcv_ctrl_init -*********/ - -/****f* OpenSM: Slvl Map Receive Controller/osm_slvl_rcv_ctrl_init -* NAME -* osm_slvl_rcv_ctrl_init -* -* DESCRIPTION -* The osm_slvl_rcv_ctrl_init function initializes a -* Slvl Map Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_slvl_rcv_ctrl_init( - IN osm_slvl_rcv_ctrl_t* const p_ctrl, - IN osm_slvl_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_slvl_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_slvl_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Slvl Map Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Slvl Map Receive Controller methods. -* -* SEE ALSO -* Slvl Map Receive Controller object, osm_slvl_rcv_ctrl_construct, -* osm_slvl_rcv_ctrl_destroy, osm_slvl_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Slvl Map Receive Controller/osm_slvl_rcv_ctrl_is_inited -* NAME -* osm_slvl_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_slvl_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_slvl_rcv_ctrl_is_inited( - IN const osm_slvl_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_slvl_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_slvl_rcv_ctrl_construct or osm_slvl_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Slvl Map Receive Controller object, osm_slvl_rcv_ctrl_construct, -* osm_slvl_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_SLVL_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sm.h b/osm/include/opensm/osm_sm.h index ea93144..a3894d8 100644 --- a/osm/include/opensm/osm_sm.h +++ b/osm/include/opensm/osm_sm.h @@ -62,25 +62,25 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include #include -#include -#include +#include +#include #include -#include -#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -145,14 +145,10 @@ typedef struct _osm_sm osm_req_ctrl_t req_ctrl; osm_resp_t resp; osm_ni_rcv_t ni_rcv; - osm_ni_rcv_ctrl_t ni_rcv_ctrl; osm_pi_rcv_t pi_rcv; - osm_pi_rcv_ctrl_t pi_rcv_ctrl; osm_nd_rcv_t nd_rcv; - osm_nd_rcv_ctrl_t nd_rcv_ctrl; osm_sm_mad_ctrl_t mad_ctrl; osm_si_rcv_t si_rcv; - osm_si_rcv_ctrl_t si_rcv_ctrl; osm_state_mgr_ctrl_t state_mgr_ctrl; osm_lid_mgr_t lid_mgr; osm_ucast_mgr_t ucast_mgr; @@ -160,22 +156,26 @@ typedef struct _osm_sm osm_state_mgr_t state_mgr; osm_drop_mgr_t drop_mgr; osm_lft_rcv_t lft_rcv; - osm_lft_rcv_ctrl_t lft_rcv_ctrl; osm_mft_rcv_t mft_rcv; - osm_mft_rcv_ctrl_t mft_rcv_ctrl; osm_sweep_fail_ctrl_t sweep_fail_ctrl; osm_sminfo_rcv_t sm_info_rcv; - osm_sminfo_rcv_ctrl_t sm_info_rcv_ctrl; osm_trap_rcv_t trap_rcv; - osm_trap_rcv_ctrl_t trap_rcv_ctrl; osm_sm_state_mgr_t sm_state_mgr; osm_mcast_mgr_t mcast_mgr; osm_slvl_rcv_t slvl_rcv; - osm_slvl_rcv_ctrl_t slvl_rcv_ctrl; osm_vla_rcv_t vla_rcv; - osm_vla_rcv_ctrl_t vla_rcv_ctrl; osm_pkey_rcv_t pkey_rcv; - osm_pkey_rcv_ctrl_t pkey_rcv_ctrl; + cl_disp_reg_handle_t ni_disp_h; + cl_disp_reg_handle_t pi_disp_h; + cl_disp_reg_handle_t nd_disp_h; + cl_disp_reg_handle_t si_disp_h; + cl_disp_reg_handle_t lft_disp_h; + cl_disp_reg_handle_t mft_disp_h; + cl_disp_reg_handle_t sm_info_disp_h; + cl_disp_reg_handle_t trap_disp_h; + cl_disp_reg_handle_t slvl_disp_h; + cl_disp_reg_handle_t vla_disp_h; + cl_disp_reg_handle_t pkey_disp_h; } osm_sm_t; /* * FIELDS diff --git a/osm/include/opensm/osm_sminfo_rcv.h b/osm/include/opensm/osm_sminfo_rcv.h index eba5ebe..0d9ffc0 100644 --- a/osm/include/opensm/osm_sminfo_rcv.h +++ b/osm/include/opensm/osm_sminfo_rcv.h @@ -100,14 +100,13 @@ BEGIN_C_DECLS */ typedef struct _osm_sminfo_rcv { - osm_subn_t *p_subn; - osm_stats_t *p_stats; - osm_log_t *p_log; - osm_resp_t *p_resp; + osm_subn_t *p_subn; + osm_stats_t *p_stats; + osm_log_t *p_log; + osm_resp_t *p_resp; osm_state_mgr_t *p_state_mgr; - struct _osm_sm_state_mgr *p_sm_state_mgr; - cl_plock_t *p_lock; - + struct _osm_sm_state_mgr *p_sm_state_mgr; + cl_plock_t *p_lock; } osm_sminfo_rcv_t; /* * FIELDS @@ -265,14 +264,14 @@ ib_api_status_t osm_sminfo_rcv_init( * SYNOPSIS */ void osm_sminfo_rcv_process( - IN const osm_sminfo_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_sminfo_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SMInfo attribute. * diff --git a/osm/include/opensm/osm_sminfo_rcv_ctrl.h b/osm/include/opensm/osm_sminfo_rcv_ctrl.h deleted file mode 100644 index 511440f..0000000 --- a/osm/include/opensm/osm_sminfo_rcv_ctrl.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sminfo_rcv_ctrl_t. - * This object represents a controller that receives the IBA SMInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_SMINFO_RCV_CTRL_H_ -#define _OSM_SMINFO_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/SMInfo Receive Controller -* NAME -* SMInfo Receive Controller -* -* DESCRIPTION -* The SMInfo Receive Controller object encapsulates the information -* needed to receive the SMInfo attribute from a node. -* -* The SMInfo Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: SMInfo Receive Controller/osm_sminfo_rcv_ctrl_t -* NAME -* osm_sminfo_rcv_ctrl_t -* -* DESCRIPTION -* SMInfo Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_sminfo_rcv_ctrl -{ - osm_sminfo_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_sminfo_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the SMInfo Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* SMInfo Receive Controller object -*********/ - -/****f* OpenSM: SMInfo Receive Controller/osm_sminfo_rcv_ctrl_construct -* NAME -* osm_sminfo_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a SMInfo Receive Controller object. -* -* SYNOPSIS -*/ -void -osm_sminfo_rcv_ctrl_construct( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a SMInfo Receive Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_sminfo_rcv_ctrl_init, osm_sminfo_rcv_ctrl_destroy -* -* Calling osm_sminfo_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_sminfo_rcv_ctrl_init. -* -* SEE ALSO -* SMInfo Receive Controller object, osm_sminfo_rcv_ctrl_init, -* osm_sminfo_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: SMInfo Receive Controller/osm_sminfo_rcv_ctrl_destroy -* NAME -* osm_sminfo_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_sminfo_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_sminfo_rcv_ctrl_destroy( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* SMInfo Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_sminfo_rcv_ctrl_construct or osm_sminfo_rcv_ctrl_init. -* -* SEE ALSO -* SMInfo Receive Controller object, osm_sminfo_rcv_ctrl_construct, -* osm_sminfo_rcv_ctrl_init -*********/ - -/****f* OpenSM: SMInfo Receive Controller/osm_sminfo_rcv_ctrl_init -* NAME -* osm_sminfo_rcv_ctrl_init -* -* DESCRIPTION -* The osm_sminfo_rcv_ctrl_init function initializes a -* SMInfo Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_sminfo_rcv_ctrl_init( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl, - IN osm_sminfo_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_sminfo_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_sminfo_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* IB_SUCCESS if the SMInfo Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other SMInfo Receive Controller methods. -* -* SEE ALSO -* SMInfo Receive Controller object, osm_sminfo_rcv_ctrl_construct, -* osm_sminfo_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* OSM_SMINFO_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sw_info_rcv.h b/osm/include/opensm/osm_sw_info_rcv.h index d58a832..41b4238 100644 --- a/osm/include/opensm/osm_sw_info_rcv.h +++ b/osm/include/opensm/osm_sw_info_rcv.h @@ -99,12 +99,11 @@ BEGIN_C_DECLS */ typedef struct _osm_si_rcv { - osm_subn_t *p_subn; - osm_log_t *p_log; - osm_req_t *p_req; - osm_state_mgr_t *p_state_mgr; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_log_t *p_log; + osm_req_t *p_req; + osm_state_mgr_t *p_state_mgr; + cl_plock_t *p_lock; } osm_si_rcv_t; /* * FIELDS @@ -278,14 +277,14 @@ boolean_t osm_si_rcv_is_inited( * SYNOPSIS */ void osm_si_rcv_process( - IN const osm_si_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_si_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SwitchInfo attribute. * diff --git a/osm/include/opensm/osm_sw_info_rcv_ctrl.h b/osm/include/opensm/osm_sw_info_rcv_ctrl.h deleted file mode 100644 index 3483bf5..0000000 --- a/osm/include/opensm/osm_sw_info_rcv_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_si_rcv_ctrl_t. - * This object represents a controller that receives the IBA SwitchInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_SI_RCV_CTRL_H_ -#define _OSM_SI_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Switch Info Receive Controller -* NAME -* Switch Info Receive Controller -* -* DESCRIPTION -* The Switch Info Receive Controller object encapsulates -* the information needed to receive the SwitchInfo attribute from a node. -* -* The Switch Info Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: Switch Info Receive Controller/osm_si_rcv_ctrl_t -* NAME -* osm_si_rcv_ctrl_t -* -* DESCRIPTION -* Switch Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_si_rcv_ctrl -{ - osm_si_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_si_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Switch Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Switch Info Receive Controller object -* Switch Info Receiver object -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_si_rcv_ctrl_construct -* NAME -* osm_si_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Switch Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_si_rcv_ctrl_construct( - IN osm_si_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Switch Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_si_rcv_ctrl_init, osm_si_rcv_ctrl_destroy, -* and osm_si_rcv_ctrl_is_inited. -* -* Calling osm_si_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_si_rcv_ctrl_init. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_si_rcv_ctrl_init, -* osm_si_rcv_ctrl_destroy, osm_si_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_si_rcv_ctrl_destroy -* NAME -* osm_si_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_si_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_si_rcv_ctrl_destroy( - IN osm_si_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Switch Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_si_rcv_ctrl_construct or osm_si_rcv_ctrl_init. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_si_rcv_ctrl_construct, -* osm_si_rcv_ctrl_init -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_si_rcv_ctrl_init -* NAME -* osm_si_rcv_ctrl_init -* -* DESCRIPTION -* The osm_si_rcv_ctrl_init function initializes a -* Switch Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_si_rcv_ctrl_init( - IN osm_si_rcv_ctrl_t* const p_ctrl, - IN osm_si_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_si_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_si_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Switch Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Switch Info Receive Controller methods. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_si_rcv_ctrl_construct, -* osm_si_rcv_ctrl_destroy, osm_si_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_si_rcv_ctrl_is_inited -* NAME -* osm_si_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_si_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_si_rcv_ctrl_is_inited( - IN const osm_si_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_si_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_si_rcv_ctrl_construct or osm_si_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_si_rcv_ctrl_construct, -* osm_si_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_SI_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_trap_rcv.h b/osm/include/opensm/osm_trap_rcv.h index 813ee43..cc565db 100644 --- a/osm/include/opensm/osm_trap_rcv.h +++ b/osm/include/opensm/osm_trap_rcv.h @@ -101,13 +101,13 @@ BEGIN_C_DECLS */ typedef struct _osm_trap_rcv { - osm_subn_t *p_subn; - osm_stats_t *p_stats; - osm_log_t *p_log; - osm_resp_t *p_resp; - osm_state_mgr_t *p_state_mgr; - cl_plock_t *p_lock; - cl_event_wheel_t trap_aging_tracker; + osm_subn_t *p_subn; + osm_stats_t *p_stats; + osm_log_t *p_log; + osm_resp_t *p_resp; + osm_state_mgr_t *p_state_mgr; + cl_plock_t *p_lock; + cl_event_wheel_t trap_aging_tracker; } osm_trap_rcv_t; /* * FIELDS @@ -263,14 +263,14 @@ ib_api_status_t osm_trap_rcv_init( * SYNOPSIS */ void osm_trap_rcv_process( - IN osm_trap_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_trap_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's Trap attribute. * diff --git a/osm/include/opensm/osm_trap_rcv_ctrl.h b/osm/include/opensm/osm_trap_rcv_ctrl.h deleted file mode 100644 index be31c62..0000000 --- a/osm/include/opensm/osm_trap_rcv_ctrl.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_trap_rcv_ctrl_t. - * This object represents a controller that receives the IBA Trap - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.2 $ - */ - -#ifndef _OSM_TRAP_RCV_CTRL_H_ -#define _OSM_TRAP_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/***** OpenSM/Trap Receive Controller -* NAME -* Trap Receive Controller -* -* DESCRIPTION -* The Trap Receive Controller object encapsulates the information -* needed to receive the Trap attribute from a node. -* -* The Trap Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Yael Kalka, Mellanox -* -*********/ - -/****s* OpenSM: Trap Receive Controller/osm_trap_rcv_ctrl_t -* NAME -* osm_trap_rcv_ctrl_t -* -* DESCRIPTION -* Trap Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_trap_rcv_ctrl -{ - osm_trap_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_trap_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Trap Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Trap Receive Controller object -*********/ - -/****f* OpenSM: Trap Receive Controller/osm_trap_rcv_ctrl_construct -* NAME -* osm_trap_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Trap Receive Controller object. -* -* SYNOPSIS -*/ -void -osm_trap_rcv_ctrl_construct( - IN osm_trap_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Trap Receive Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_trap_rcv_ctrl_init, osm_trap_rcv_ctrl_destroy -* -* Calling osm_trap_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_trap_rcv_ctrl_init. -* -* SEE ALSO -* Trap Receive Controller object, osm_trap_rcv_ctrl_init, -* osm_trap_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Trap Receive Controller/osm_trap_rcv_ctrl_destroy -* NAME -* osm_trap_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_trap_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_trap_rcv_ctrl_destroy( - IN osm_trap_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Trap Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_trap_rcv_ctrl_construct or osm_trap_rcv_ctrl_init. -* -* SEE ALSO -* Trap Receive Controller object, osm_trap_rcv_ctrl_construct, -* osm_trap_rcv_ctrl_init -*********/ - -/****f* OpenSM: Trap Receive Controller/osm_trap_rcv_ctrl_init -* NAME -* osm_trap_rcv_ctrl_init -* -* DESCRIPTION -* The osm_trap_rcv_ctrl_init function initializes a -* Trap Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_trap_rcv_ctrl_init( - IN osm_trap_rcv_ctrl_t* const p_ctrl, - IN osm_trap_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_trap_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_trap_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* IB_SUCCESS if the Trap Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Trap Receive Controller methods. -* -* SEE ALSO -* Trap Receive Controller object, osm_trap_rcv_ctrl_construct, -* osm_trap_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* OSM_TRAP_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_vl_arb_rcv.h b/osm/include/opensm/osm_vl_arb_rcv.h index 428dffc..4027a2d 100644 --- a/osm/include/opensm/osm_vl_arb_rcv.h +++ b/osm/include/opensm/osm_vl_arb_rcv.h @@ -98,11 +98,10 @@ BEGIN_C_DECLS */ typedef struct _osm_vla_rcv { - osm_subn_t *p_subn; - osm_req_t *p_req; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_req_t *p_req; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_vla_rcv_t; /* * FIELDS @@ -239,14 +238,14 @@ ib_api_status_t osm_vla_rcv_init( * SYNOPSIS */ void osm_vla_rcv_process( - IN const osm_vla_rcv_t* const p_ctrl, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_vla_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SLtoVL attribute. * diff --git a/osm/include/opensm/osm_vl_arb_rcv_ctrl.h b/osm/include/opensm/osm_vl_arb_rcv_ctrl.h deleted file mode 100644 index d005b47..0000000 --- a/osm/include/opensm/osm_vl_arb_rcv_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_vla_rcv_ctrl_t. - * This object represents a controller that set or get resp the - * IBA VL Arbitration Table attribute from a port. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#ifndef _OSM_VLA_RCV_CTRL_H_ -#define _OSM_VLA_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/VL Arbitration Table Receive Controller -* NAME -* VL Arbitration Receive Controller -* -* DESCRIPTION -* The VL Arbitration Receive Controller object encapsulates -* the information needed to get or set VL Arbitration of a port. -* -* The VL Arbitration Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: VL Arbitration Receive Controller/osm_vla_rcv_ctrl_t -* NAME -* osm_vla_rcv_ctrl_t -* -* DESCRIPTION -* VL Arbitration Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_vla_rcv_ctrl -{ - osm_vla_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_vla_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the VL Arbitration Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* VL Arbitration Receive Controller object -* VL Arbitration Receiver object -*********/ - -/****f* OpenSM: VL Arbitration Receive Controller/osm_vla_rcv_ctrl_construct -* NAME -* osm_vla_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a VL Arbitration Receive Controller object. -* -* SYNOPSIS -*/ -void osm_vla_rcv_ctrl_construct( - IN osm_vla_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a VL Arbitration Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_vla_rcv_ctrl_init, osm_vla_rcv_ctrl_destroy, -* and osm_vla_rcv_ctrl_is_inited. -* -* Calling osm_vla_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_vla_rcv_ctrl_init. -* -* SEE ALSO -* VL Arbitration Receive Controller object, osm_vla_rcv_ctrl_init, -* osm_vla_rcv_ctrl_destroy, osm_vla_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: VL Arbitration Receive Controller/osm_vla_rcv_ctrl_destroy -* NAME -* osm_vla_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_vla_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_vla_rcv_ctrl_destroy( - IN osm_vla_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* VL Arbitration Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_vla_rcv_ctrl_construct or osm_vla_rcv_ctrl_init. -* -* SEE ALSO -* VL Arbitration Receive Controller object, osm_vla_rcv_ctrl_construct, -* osm_vla_rcv_ctrl_init -*********/ - -/****f* OpenSM: VL Arbitration Receive Controller/osm_vla_rcv_ctrl_init -* NAME -* osm_vla_rcv_ctrl_init -* -* DESCRIPTION -* The osm_vla_rcv_ctrl_init function initializes a -* VL Arbitration Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_vla_rcv_ctrl_init( - IN osm_vla_rcv_ctrl_t* const p_ctrl, - IN osm_vla_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_vla_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_vla_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the VL Arbitration Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other VL Arbitration Receive Controller methods. -* -* SEE ALSO -* VL Arbitration Receive Controller object, osm_vla_rcv_ctrl_construct, -* osm_vla_rcv_ctrl_destroy, osm_vla_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: VL Arbitration Receive Controller/osm_vla_rcv_ctrl_is_inited -* NAME -* osm_vla_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_vla_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_vla_rcv_ctrl_is_inited( - IN const osm_vla_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_vla_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_vla_rcv_ctrl_construct or osm_vla_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* VL Arbitration Receive Controller object, osm_vla_rcv_ctrl_construct, -* osm_vla_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_VLA_RCV_CTRL_H_ */ diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index dd6dbae..d9e63d4 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -30,15 +30,15 @@ bin_PROGRAMS = opensm opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_db_pack.c osm_drop_mgr.c osm_fwd_tbl.c \ osm_inform.c osm_lid_mgr.c osm_lin_fwd_rcv.c \ - osm_lin_fwd_rcv_ctrl.c osm_lin_fwd_tbl.c osm_link_mgr.c \ - osm_matrix.c osm_mcast_fwd_rcv.c osm_mcast_fwd_rcv_ctrl.c \ + osm_lin_fwd_tbl.c osm_link_mgr.c \ + osm_matrix.c osm_mcast_fwd_rcv.c \ osm_mcast_mgr.c osm_mcast_tbl.c osm_mcm_info.c \ osm_mcm_port.c osm_mtree.c osm_multicast.c osm_node.c \ - osm_node_desc_rcv.c osm_node_desc_rcv_ctrl.c \ - osm_node_info_rcv.c osm_node_info_rcv_ctrl.c \ + osm_node_desc_rcv.c \ + osm_node_info_rcv.c \ osm_opensm.c osm_pkey.c osm_pkey_mgr.c osm_pkey_rcv.c \ - osm_pkey_rcv_ctrl.c osm_port.c \ - osm_port_info_rcv.c osm_port_info_rcv_ctrl.c \ + osm_port.c \ + osm_port_info_rcv.c \ osm_remote_sm.c osm_req.c osm_req_ctrl.c \ osm_resp.c osm_sa.c osm_sa_class_port_info.c \ osm_sa_class_port_info_ctrl.c osm_sa_informinfo.c \ @@ -59,19 +59,19 @@ opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_sa_sminfo_record_ctrl.c osm_sa_vlarb_record.c \ osm_sa_vlarb_record_ctrl.c osm_sa_sw_info_record.c \ osm_sa_sw_info_record_ctrl.c osm_service.c \ - osm_slvl_map_rcv.c osm_slvl_map_rcv_ctrl.c \ + osm_slvl_map_rcv.c \ osm_sm.c osm_sminfo_rcv.c \ - osm_sminfo_rcv_ctrl.c osm_sm_mad_ctrl.c \ + osm_sm_mad_ctrl.c \ osm_sm_state_mgr.c osm_state_mgr.c \ osm_state_mgr_ctrl.c osm_subnet.c \ osm_sweep_fail_ctrl.c osm_sw_info_rcv.c \ - osm_sw_info_rcv_ctrl.c osm_switch.c \ + osm_switch.c \ osm_prtn.c osm_prtn_config.c osm_qos.c osm_router.c \ - osm_trap_rcv.c osm_trap_rcv_ctrl.c \ + osm_trap_rcv.c \ osm_ucast_mgr.c osm_ucast_updn.c \ osm_ucast_file.c osm_ucast_ftree.c \ osm_vl15intf.c osm_vl_arb_rcv.c \ - osm_vl_arb_rcv_ctrl.c st.c + st.c if OSMV_OPENIB opensm_CFLAGS = -Wall $(OSMV_CFLAGS) -fno-strict-aliasing -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 opensm_CXXFLAGS = -Wall $(OSMV_CFLAGS) -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 diff --git a/osm/opensm/osm_lin_fwd_rcv.c b/osm/opensm/osm_lin_fwd_rcv.c index 9750bf0..eda10d8 100644 --- a/osm/opensm/osm_lin_fwd_rcv.c +++ b/osm/opensm/osm_lin_fwd_rcv.c @@ -104,9 +104,11 @@ osm_lft_rcv_init( **********************************************************************/ void osm_lft_rcv_process( - IN const osm_lft_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_lft_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_smp_t *p_smp; uint32_t block_num; osm_switch_t *p_sw; diff --git a/osm/opensm/osm_lin_fwd_rcv_ctrl.c b/osm/opensm/osm_lin_fwd_rcv_ctrl.c deleted file mode 100644 index 6d8b664..0000000 --- a/osm/opensm/osm_lin_fwd_rcv_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_lft_rcv_ctrl_t. - * This object represents the LFT Receive controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_lft_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_lft_rcv_process( ((osm_lft_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_lft_rcv_ctrl_construct( - IN osm_lft_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_lft_rcv_ctrl_destroy( - IN osm_lft_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_lft_rcv_ctrl_init( - IN osm_lft_rcv_ctrl_t* const p_ctrl, - IN osm_lft_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_lft_rcv_ctrl_init ); - - osm_lft_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_LFT, - __osm_lft_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_lft_rcv_ctrl_init: ERR 1601: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_mcast_fwd_rcv.c b/osm/opensm/osm_mcast_fwd_rcv.c index 39b33c2..19a4788 100644 --- a/osm/opensm/osm_mcast_fwd_rcv.c +++ b/osm/opensm/osm_mcast_fwd_rcv.c @@ -109,9 +109,11 @@ osm_mft_rcv_init( **********************************************************************/ void osm_mft_rcv_process( - IN const osm_mft_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_mft_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_smp_t *p_smp; uint32_t block_num; uint8_t position; diff --git a/osm/opensm/osm_mcast_fwd_rcv_ctrl.c b/osm/opensm/osm_mcast_fwd_rcv_ctrl.c deleted file mode 100644 index be0a3e8..0000000 --- a/osm/opensm/osm_mcast_fwd_rcv_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_mft_rcv_ctrl_t. - * This object represents the LFT Receive controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_mft_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_mft_rcv_process( ((osm_mft_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_mft_rcv_ctrl_construct( - IN osm_mft_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_mft_rcv_ctrl_destroy( - IN osm_mft_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_mft_rcv_ctrl_init( - IN osm_mft_rcv_ctrl_t* const p_ctrl, - IN osm_mft_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_mft_rcv_ctrl_init ); - - osm_mft_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_MFT, - __osm_mft_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_mft_rcv_ctrl_init: ERR 0901: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_node_desc_rcv.c b/osm/opensm/osm_node_desc_rcv.c index d759a50..13c5a93 100644 --- a/osm/opensm/osm_node_desc_rcv.c +++ b/osm/opensm/osm_node_desc_rcv.c @@ -138,9 +138,11 @@ osm_nd_rcv_init( **********************************************************************/ void osm_nd_rcv_process( - IN const osm_nd_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_nd_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_guid_tbl; ib_node_desc_t *p_nd; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_node_desc_rcv_ctrl.c b/osm/opensm/osm_node_desc_rcv_ctrl.c deleted file mode 100644 index 693bf5c..0000000 --- a/osm/opensm/osm_node_desc_rcv_ctrl.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_nd_rcv_ctrl_t. - * This object represents the NodeDescription request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_nd_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_nd_rcv_process( ((osm_nd_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_nd_rcv_ctrl_construct( - IN osm_nd_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_nd_rcv_ctrl_destroy( - IN osm_nd_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_nd_rcv_ctrl_init( - IN osm_nd_rcv_ctrl_t* const p_ctrl, - IN osm_nd_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_nd_rcv_ctrl_init ); - - osm_nd_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_NODE_DESC, - __osm_nd_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_nd_rcv_ctrl_init: ERR 0C01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_node_info_rcv.c b/osm/opensm/osm_node_info_rcv.c index bc093ea..5cbd3b7 100644 --- a/osm/opensm/osm_node_info_rcv.c +++ b/osm/opensm/osm_node_info_rcv.c @@ -1016,9 +1016,11 @@ osm_ni_rcv_init( **********************************************************************/ void osm_ni_rcv_process( - IN const osm_ni_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_ni_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_guid_tbl; ib_node_info_t *p_ni; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_node_info_rcv_ctrl.c b/osm/opensm/osm_node_info_rcv_ctrl.c deleted file mode 100644 index 3548d77..0000000 --- a/osm/opensm/osm_node_info_rcv_ctrl.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_ni_rcv_ctrl_t. - * This object represents the NodeInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_ni_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_ni_rcv_process( ((osm_ni_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_ni_rcv_ctrl_construct( - IN osm_ni_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_ni_rcv_ctrl_destroy( - IN osm_ni_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_ni_rcv_ctrl_init( - IN osm_ni_rcv_ctrl_t* const p_ctrl, - IN osm_ni_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_ni_rcv_ctrl_init ); - - osm_ni_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_NODE_INFO, - __osm_ni_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_ni_rcv_ctrl_init: ERR 0E01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_pkey_rcv.c b/osm/opensm/osm_pkey_rcv.c index 3dd569e..76af9fc 100644 --- a/osm/opensm/osm_pkey_rcv.c +++ b/osm/opensm/osm_pkey_rcv.c @@ -109,9 +109,11 @@ osm_pkey_rcv_init( */ void osm_pkey_rcv_process( - IN const osm_pkey_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_pkey_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_guid_tbl; ib_pkey_table_t *p_pkey_tbl; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_pkey_rcv_ctrl.c b/osm/opensm/osm_pkey_rcv_ctrl.c deleted file mode 100644 index 1d7323a..0000000 --- a/osm/opensm/osm_pkey_rcv_ctrl.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -static void -__osm_pkey_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_pkey_rcv_process( ((osm_pkey_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_pkey_rcv_ctrl_construct( - IN osm_pkey_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_pkey_rcv_ctrl_destroy( - IN osm_pkey_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_pkey_rcv_ctrl_init( - IN osm_pkey_rcv_ctrl_t* const p_ctrl, - IN osm_pkey_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_pkey_rcv_ctrl_init ); - - osm_pkey_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_PKEY, - __osm_pkey_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_pkey_rcv_ctrl_init: ERR 4901: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_port_info_rcv.c b/osm/opensm/osm_port_info_rcv.c index ced745d..66141e1 100644 --- a/osm/opensm/osm_port_info_rcv.c +++ b/osm/opensm/osm_port_info_rcv.c @@ -652,9 +652,11 @@ osm_pi_rcv_process_set( **********************************************************************/ void osm_pi_rcv_process( - IN const osm_pi_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_pi_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_guid_tbl; ib_port_info_t *p_pi; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_port_info_rcv_ctrl.c b/osm/opensm/osm_port_info_rcv_ctrl.c deleted file mode 100644 index a3f9a23..0000000 --- a/osm/opensm/osm_port_info_rcv_ctrl.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_pi_rcv_ctrl_t. - * This object represents the PortInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_pi_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_pi_rcv_process( ((osm_pi_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_pi_rcv_ctrl_construct( - IN osm_pi_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_pi_rcv_ctrl_destroy( - IN osm_pi_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_pi_rcv_ctrl_init( - IN osm_pi_rcv_ctrl_t* const p_ctrl, - IN osm_pi_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_pi_rcv_ctrl_init ); - - osm_pi_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_PORT_INFO, - __osm_pi_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_pi_rcv_ctrl_init: ERR 1001: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_slvl_map_rcv.c b/osm/opensm/osm_slvl_map_rcv.c index 899f677..3fa3a7e 100644 --- a/osm/opensm/osm_slvl_map_rcv.c +++ b/osm/opensm/osm_slvl_map_rcv.c @@ -121,9 +121,11 @@ osm_slvl_rcv_init( */ void osm_slvl_rcv_process( - IN const osm_slvl_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *p_data ) { + osm_slvl_rcv_t *p_rcv = context; + osm_madw_t *p_madw = p_data; cl_qmap_t *p_guid_tbl; ib_slvl_table_t *p_slvl_tbl; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_slvl_map_rcv_ctrl.c b/osm/opensm/osm_slvl_map_rcv_ctrl.c deleted file mode 100644 index 4d21a3d..0000000 --- a/osm/opensm/osm_slvl_map_rcv_ctrl.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_slvl_rcv_ctrl_t. - * This object represents the SLtoVL request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_slvl_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_slvl_rcv_process( ((osm_slvl_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_slvl_rcv_ctrl_construct( - IN osm_slvl_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_slvl_rcv_ctrl_destroy( - IN osm_slvl_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_slvl_rcv_ctrl_init( - IN osm_slvl_rcv_ctrl_t* const p_ctrl, - IN osm_slvl_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_slvl_rcv_ctrl_init ); - - osm_slvl_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SLVL, - __osm_slvl_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_slvl_rcv_ctrl_init: ERR 2D01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sm.c b/osm/opensm/osm_sm.c index 71fd847..a2b531f 100644 --- a/osm/opensm/osm_sm.c +++ b/osm/opensm/osm_sm.c @@ -140,14 +140,10 @@ osm_sm_construct( osm_req_ctrl_construct( &p_sm->req_ctrl ); osm_resp_construct( &p_sm->resp ); osm_ni_rcv_construct( &p_sm->ni_rcv ); - osm_ni_rcv_ctrl_construct( &p_sm->ni_rcv_ctrl ); osm_pi_rcv_construct( &p_sm->pi_rcv ); - osm_pi_rcv_ctrl_construct( &p_sm->pi_rcv_ctrl ); osm_nd_rcv_construct( &p_sm->nd_rcv ); - osm_nd_rcv_ctrl_construct( &p_sm->nd_rcv_ctrl ); osm_sm_mad_ctrl_construct( &p_sm->mad_ctrl ); osm_si_rcv_construct( &p_sm->si_rcv ); - osm_si_rcv_ctrl_construct( &p_sm->si_rcv_ctrl ); osm_lid_mgr_construct( &p_sm->lid_mgr ); osm_ucast_mgr_construct( &p_sm->ucast_mgr ); osm_link_mgr_construct( &p_sm->link_mgr ); @@ -155,21 +151,14 @@ osm_sm_construct( osm_state_mgr_ctrl_construct( &p_sm->state_mgr_ctrl ); osm_drop_mgr_construct( &p_sm->drop_mgr ); osm_lft_rcv_construct( &p_sm->lft_rcv ); - osm_lft_rcv_ctrl_construct( &p_sm->lft_rcv_ctrl ); osm_mft_rcv_construct( &p_sm->mft_rcv ); - osm_mft_rcv_ctrl_construct( &p_sm->mft_rcv_ctrl ); osm_sweep_fail_ctrl_construct( &p_sm->sweep_fail_ctrl ); osm_sminfo_rcv_construct( &p_sm->sm_info_rcv ); - osm_sminfo_rcv_ctrl_construct( &p_sm->sm_info_rcv_ctrl ); osm_trap_rcv_construct( &p_sm->trap_rcv ); - osm_trap_rcv_ctrl_construct( &p_sm->trap_rcv_ctrl ); osm_sm_state_mgr_construct( &p_sm->sm_state_mgr ); osm_slvl_rcv_construct( &p_sm->slvl_rcv ); - osm_slvl_rcv_ctrl_construct( &p_sm->slvl_rcv_ctrl ); osm_vla_rcv_construct( &p_sm->vla_rcv ); - osm_vla_rcv_ctrl_construct( &p_sm->vla_rcv_ctrl ); osm_pkey_rcv_construct( &p_sm->pkey_rcv ); - osm_pkey_rcv_ctrl_construct( &p_sm->pkey_rcv_ctrl ); osm_mcast_mgr_construct( &p_sm->mcast_mgr ); } @@ -206,18 +195,18 @@ osm_sm_shutdown( * dispatcher are complete. */ osm_sm_mad_ctrl_destroy( &p_sm->mad_ctrl ); - osm_trap_rcv_ctrl_destroy( &p_sm->trap_rcv_ctrl ); - osm_sminfo_rcv_ctrl_destroy( &p_sm->sm_info_rcv_ctrl ); osm_req_ctrl_destroy( &p_sm->req_ctrl ); - osm_ni_rcv_ctrl_destroy( &p_sm->ni_rcv_ctrl ); - osm_pi_rcv_ctrl_destroy( &p_sm->pi_rcv_ctrl ); - osm_si_rcv_ctrl_destroy( &p_sm->si_rcv_ctrl ); - osm_nd_rcv_ctrl_destroy( &p_sm->nd_rcv_ctrl ); - osm_lft_rcv_ctrl_destroy( &p_sm->lft_rcv_ctrl ); - osm_mft_rcv_ctrl_destroy( &p_sm->mft_rcv_ctrl ); - osm_slvl_rcv_ctrl_destroy( &p_sm->slvl_rcv_ctrl ); - osm_vla_rcv_ctrl_destroy( &p_sm->vla_rcv_ctrl ); - osm_pkey_rcv_ctrl_destroy( &p_sm->pkey_rcv_ctrl ); + cl_disp_unregister(p_sm->ni_disp_h); + cl_disp_unregister(p_sm->pi_disp_h); + cl_disp_unregister(p_sm->si_disp_h); + cl_disp_unregister(p_sm->nd_disp_h); + cl_disp_unregister(p_sm->lft_disp_h); + cl_disp_unregister(p_sm->mft_disp_h); + cl_disp_unregister(p_sm->sm_info_disp_h); + cl_disp_unregister(p_sm->trap_disp_h); + cl_disp_unregister(p_sm->slvl_disp_h); + cl_disp_unregister(p_sm->vla_disp_h); + cl_disp_unregister(p_sm->pkey_disp_h); osm_sweep_fail_ctrl_destroy( &p_sm->sweep_fail_ctrl ); osm_state_mgr_ctrl_destroy( &p_sm->state_mgr_ctrl ); @@ -323,22 +312,12 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_ni_rcv_ctrl_init( &p_sm->ni_rcv_ctrl, - &p_sm->ni_rcv, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_pi_rcv_init( &p_sm->pi_rcv, &p_sm->req, p_subn, p_log, &p_sm->state_mgr, p_lock ); if( status != IB_SUCCESS ) goto Exit; - status = osm_pi_rcv_ctrl_init( &p_sm->pi_rcv_ctrl, - &p_sm->pi_rcv, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_si_rcv_init( &p_sm->si_rcv, p_sm->p_subn, p_sm->p_log, @@ -347,20 +326,10 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_si_rcv_ctrl_init( &p_sm->si_rcv_ctrl, - &p_sm->si_rcv, p_sm->p_log, p_sm->p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_nd_rcv_init( &p_sm->nd_rcv, p_subn, p_log, p_lock ); if( status != IB_SUCCESS ) goto Exit; - status = osm_nd_rcv_ctrl_init( &p_sm->nd_rcv_ctrl, - &p_sm->nd_rcv, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_lid_mgr_init( &p_sm->lid_mgr, &p_sm->req, p_sm->p_subn, @@ -414,20 +383,10 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_lft_rcv_ctrl_init( &p_sm->lft_rcv_ctrl, - &p_sm->lft_rcv, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_mft_rcv_init( &p_sm->mft_rcv, p_subn, p_log, p_lock ); if( status != IB_SUCCESS ) goto Exit; - status = osm_mft_rcv_ctrl_init( &p_sm->mft_rcv_ctrl, - &p_sm->mft_rcv, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_sweep_fail_ctrl_init( &p_sm->sweep_fail_ctrl, p_log, &p_sm->state_mgr, p_disp ); if( status != IB_SUCCESS ) @@ -443,12 +402,6 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_sminfo_rcv_ctrl_init( &p_sm->sm_info_rcv_ctrl, - &p_sm->sm_info_rcv, - p_sm->p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_trap_rcv_init( &p_sm->trap_rcv, p_subn, p_stats, @@ -456,11 +409,6 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_trap_rcv_ctrl_init( &p_sm->trap_rcv_ctrl, - &p_sm->trap_rcv, p_sm->p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_sm_state_mgr_init( &p_sm->sm_state_mgr, &p_sm->state_mgr, p_sm->p_subn, &p_sm->req, p_sm->p_log ); @@ -477,29 +425,70 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_slvl_rcv_ctrl_init( &p_sm->slvl_rcv_ctrl, - &p_sm->slvl_rcv, p_sm->p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_vla_rcv_init( &p_sm->vla_rcv, &p_sm->req, p_subn, p_log, p_lock ); if( status != IB_SUCCESS ) goto Exit; - status = osm_vla_rcv_ctrl_init( &p_sm->vla_rcv_ctrl, - &p_sm->vla_rcv, p_sm->p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_pkey_rcv_init( &p_sm->pkey_rcv, &p_sm->req, p_subn, p_log, p_lock ); if( status != IB_SUCCESS ) goto Exit; - status = osm_pkey_rcv_ctrl_init( &p_sm->pkey_rcv_ctrl, - &p_sm->pkey_rcv, p_sm->p_log, p_disp ); - if( status != IB_SUCCESS ) + p_sm->ni_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_INFO, + osm_ni_rcv_process, &p_sm->ni_rcv); + if( p_sm->ni_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->pi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORT_INFO, + osm_pi_rcv_process, &p_sm->pi_rcv); + if( p_sm->pi_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->si_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO, + osm_si_rcv_process, &p_sm->si_rcv); + if( p_sm->si_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->nd_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_DESC, + osm_nd_rcv_process, &p_sm->nd_rcv); + if( p_sm->nd_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT, + osm_lft_rcv_process, &p_sm->lft_rcv); + if( p_sm->lft_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT, + osm_mft_rcv_process, &p_sm->mft_rcv); + if( p_sm->mft_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->sm_info_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SM_INFO, + osm_sminfo_rcv_process, + &p_sm->sm_info_rcv); + if( p_sm->sm_info_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->trap_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NOTICE, + osm_trap_rcv_process, &p_sm->trap_rcv); + if( p_sm->trap_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->slvl_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SLVL, + osm_slvl_rcv_process, &p_sm->slvl_rcv); + if( p_sm->slvl_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->vla_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB, + osm_vla_rcv_process, &p_sm->vla_rcv); + if( p_sm->vla_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sm->pkey_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PKEY, + osm_pkey_rcv_process, &p_sm->pkey_rcv); + if( p_sm->pkey_disp_h == CL_DISP_INVALID_HANDLE ) goto Exit; /* diff --git a/osm/opensm/osm_sminfo_rcv.c b/osm/opensm/osm_sminfo_rcv.c index 27d55e4..51b588e 100644 --- a/osm/opensm/osm_sminfo_rcv.c +++ b/osm/opensm/osm_sminfo_rcv.c @@ -717,9 +717,11 @@ __osm_sminfo_rcv_process_set_response( **********************************************************************/ void osm_sminfo_rcv_process( - IN const osm_sminfo_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_sminfo_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_smp_t *p_smp; osm_smi_context_t *p_smi_context; diff --git a/osm/opensm/osm_sminfo_rcv_ctrl.c b/osm/opensm/osm_sminfo_rcv_ctrl.c deleted file mode 100644 index 593b2ae..0000000 --- a/osm/opensm/osm_sminfo_rcv_ctrl.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_sminfo_rcv_ctrl_t. - * This object represents the SMInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_sminfo_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_sminfo_rcv_process( ((osm_sminfo_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_sminfo_rcv_ctrl_construct( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_sminfo_rcv_ctrl_destroy( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_sminfo_rcv_ctrl_init( - IN osm_sminfo_rcv_ctrl_t* const p_ctrl, - IN osm_sminfo_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_sminfo_rcv_ctrl_init ); - - osm_sminfo_rcv_ctrl_construct( p_ctrl ); - - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SM_INFO, - __osm_sminfo_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_sminfo_rcv_ctrl_init: ERR 3001: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sw_info_rcv.c b/osm/opensm/osm_sw_info_rcv.c index 6647d16..fe3fe9f 100644 --- a/osm/opensm/osm_sw_info_rcv.c +++ b/osm/opensm/osm_sw_info_rcv.c @@ -581,9 +581,11 @@ osm_si_rcv_init( **********************************************************************/ void osm_si_rcv_process( - IN const osm_si_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_si_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_node_guid_tbl; ib_switch_info_t *p_si; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_sw_info_rcv_ctrl.c b/osm/opensm/osm_sw_info_rcv_ctrl.c deleted file mode 100644 index 4287ba6..0000000 --- a/osm/opensm/osm_sw_info_rcv_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_si_rcv_ctrl_t. - * This object represents the SwitchInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_si_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_si_rcv_process( ((osm_si_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_si_rcv_ctrl_construct( - IN osm_si_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_si_rcv_ctrl_destroy( - IN osm_si_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_si_rcv_ctrl_init( - IN osm_si_rcv_ctrl_t* const p_ctrl, - IN osm_si_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_si_rcv_ctrl_init ); - - osm_si_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SWITCH_INFO, - __osm_si_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_si_rcv_ctrl_init: ERR 3701: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_trap_rcv.c b/osm/opensm/osm_trap_rcv.c index 164d96a..f994844 100644 --- a/osm/opensm/osm_trap_rcv.c +++ b/osm/opensm/osm_trap_rcv.c @@ -742,9 +742,11 @@ __osm_trap_rcv_process_response( **********************************************************************/ void osm_trap_rcv_process( - IN osm_trap_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_trap_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_smp_t *p_smp; OSM_LOG_ENTER( p_rcv->p_log, osm_trap_rcv_process ); diff --git a/osm/opensm/osm_trap_rcv_ctrl.c b/osm/opensm/osm_trap_rcv_ctrl.c deleted file mode 100644 index c0a475a..0000000 --- a/osm/opensm/osm_trap_rcv_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_trap_rcv_ctrl_t. - * This object represents the Trap request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_trap_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_trap_rcv_process( ((osm_trap_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_trap_rcv_ctrl_construct( - IN osm_trap_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_trap_rcv_ctrl_destroy( - IN osm_trap_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_trap_rcv_ctrl_init( - IN osm_trap_rcv_ctrl_t* const p_ctrl, - IN osm_trap_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_trap_rcv_ctrl_init ); - - osm_trap_rcv_ctrl_construct( p_ctrl ); - - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_NOTICE, - __osm_trap_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_trap_rcv_ctrl_init: ERR 3901: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_vl_arb_rcv.c b/osm/opensm/osm_vl_arb_rcv.c index 0ebfe7e..930360a 100644 --- a/osm/opensm/osm_vl_arb_rcv.c +++ b/osm/opensm/osm_vl_arb_rcv.c @@ -121,9 +121,11 @@ osm_vla_rcv_init( */ void osm_vla_rcv_process( - IN const osm_vla_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_vla_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; cl_qmap_t *p_guid_tbl; ib_vl_arb_table_t *p_vla_tbl; ib_smp_t *p_smp; diff --git a/osm/opensm/osm_vl_arb_rcv_ctrl.c b/osm/opensm/osm_vl_arb_rcv_ctrl.c deleted file mode 100644 index fe23856..0000000 --- a/osm/opensm/osm_vl_arb_rcv_ctrl.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_vla_rcv_ctrl_t. - * This object represents the Vl Arbitration request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_vla_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_vla_rcv_process( ((osm_vla_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_vla_rcv_ctrl_construct( - IN osm_vla_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_vla_rcv_ctrl_destroy( - IN osm_vla_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_vla_rcv_ctrl_init( - IN osm_vla_rcv_ctrl_t* const p_ctrl, - IN osm_vla_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_vla_rcv_ctrl_init ); - - osm_vla_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_VL_ARB, - __osm_vla_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_vla_rcv_ctrl_init: ERR 4001: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - -- 1.4.4.2.gfc82d From sashak at voltaire.com Fri Jan 19 18:57:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 20 Jan 2007 04:57:48 +0200 Subject: [openib-general] [PATCH 2/2] opensm: cleanup sa related _ctrl layer In-Reply-To: <20070120025446.GG8193@sashak.voltaire.com> References: <20070120025446.GG8193@sashak.voltaire.com> Message-ID: <20070120025748.GI8193@sashak.voltaire.com> Cleanup SA related *_ctrl layer. Signed-off-by: Sasha Khapyorsky --- osm/include/Makefile.am | 18 -- osm/include/opensm/osm_sa.h | 82 +++--- osm/include/opensm/osm_sa_class_port_info.h | 19 +- osm/include/opensm/osm_sa_class_port_info_ctrl.h | 261 ------------------ osm/include/opensm/osm_sa_guidinfo_record.h | 21 +- osm/include/opensm/osm_sa_guidinfo_record_ctrl.h | 230 ---------------- osm/include/opensm/osm_sa_informinfo.h | 28 +- osm/include/opensm/osm_sa_informinfo_ctrl.h | 261 ------------------ osm/include/opensm/osm_sa_lft_record.h | 22 +- osm/include/opensm/osm_sa_lft_record_ctrl.h | 233 ---------------- osm/include/opensm/osm_sa_link_record.h | 21 +- osm/include/opensm/osm_sa_link_record_ctrl.h | 261 ------------------ osm/include/opensm/osm_sa_mcmember_record.h | 24 +- osm/include/opensm/osm_sa_mcmember_record_ctrl.h | 262 ------------------ osm/include/opensm/osm_sa_mft_record.h | 22 +- osm/include/opensm/osm_sa_mft_record_ctrl.h | 231 ---------------- osm/include/opensm/osm_sa_multipath_record.h | 8 +- osm/include/opensm/osm_sa_multipath_record_ctrl.h | 260 ------------------ osm/include/opensm/osm_sa_node_record.h | 21 +- osm/include/opensm/osm_sa_node_record_ctrl.h | 231 ---------------- osm/include/opensm/osm_sa_path_record.h | 20 +- osm/include/opensm/osm_sa_path_record_ctrl.h | 261 ------------------ osm/include/opensm/osm_sa_pkey_record.h | 21 +- osm/include/opensm/osm_sa_pkey_record_ctrl.h | 218 --------------- osm/include/opensm/osm_sa_portinfo_record.h | 20 +- osm/include/opensm/osm_sa_portinfo_record_ctrl.h | 231 ---------------- osm/include/opensm/osm_sa_service_record.h | 23 +- osm/include/opensm/osm_sa_service_record_ctrl.h | 230 ---------------- osm/include/opensm/osm_sa_slvl_record.h | 21 +- osm/include/opensm/osm_sa_slvl_record_ctrl.h | 231 ---------------- osm/include/opensm/osm_sa_sminfo_record.h | 22 +- osm/include/opensm/osm_sa_sminfo_record_ctrl.h | 231 ---------------- osm/include/opensm/osm_sa_sw_info_record.h | 8 +- osm/include/opensm/osm_sa_sw_info_record_ctrl.h | 259 ------------------ osm/include/opensm/osm_sa_vlarb_record.h | 20 +- osm/include/opensm/osm_sa_vlarb_record_ctrl.h | 231 ---------------- osm/opensm/Makefile.am | 34 ++-- osm/opensm/osm_sa.c | 293 ++++++++------------- osm/opensm/osm_sa_class_port_info.c | 6 +- osm/opensm/osm_sa_class_port_info_ctrl.c | 126 --------- osm/opensm/osm_sa_guidinfo_record.c | 6 +- osm/opensm/osm_sa_guidinfo_record_ctrl.c | 124 --------- osm/opensm/osm_sa_informinfo.c | 12 +- osm/opensm/osm_sa_informinfo_ctrl.c | 154 ----------- osm/opensm/osm_sa_lft_record.c | 6 +- osm/opensm/osm_sa_lft_record_ctrl.c | 124 --------- osm/opensm/osm_sa_link_record.c | 6 +- osm/opensm/osm_sa_link_record_ctrl.c | 128 --------- osm/opensm/osm_sa_mcmember_record.c | 6 +- osm/opensm/osm_sa_mcmember_record_ctrl.c | 132 --------- osm/opensm/osm_sa_mft_record.c | 6 +- osm/opensm/osm_sa_mft_record_ctrl.c | 123 --------- osm/opensm/osm_sa_multipath_record.c | 6 +- osm/opensm/osm_sa_multipath_record_ctrl.c | 128 --------- osm/opensm/osm_sa_node_record.c | 6 +- osm/opensm/osm_sa_node_record_ctrl.c | 125 --------- osm/opensm/osm_sa_path_record.c | 6 +- osm/opensm/osm_sa_path_record_ctrl.c | 126 --------- osm/opensm/osm_sa_pkey_record.c | 6 +- osm/opensm/osm_sa_pkey_record_ctrl.c | 113 -------- osm/opensm/osm_sa_portinfo_record.c | 6 +- osm/opensm/osm_sa_portinfo_record_ctrl.c | 125 --------- osm/opensm/osm_sa_service_record.c | 6 +- osm/opensm/osm_sa_service_record_ctrl.c | 125 --------- osm/opensm/osm_sa_slvl_record.c | 6 +- osm/opensm/osm_sa_slvl_record_ctrl.c | 126 --------- osm/opensm/osm_sa_sminfo_record.c | 6 +- osm/opensm/osm_sa_sminfo_record_ctrl.c | 125 --------- osm/opensm/osm_sa_sw_info_record.c | 6 +- osm/opensm/osm_sa_sw_info_record_ctrl.c | 123 --------- osm/opensm/osm_sa_vlarb_record.c | 6 +- osm/opensm/osm_sa_vlarb_record_ctrl.c | 126 --------- osm/opensm/osm_state_mgr.c | 1 + 73 files changed, 405 insertions(+), 6747 deletions(-) diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index bc23723..b49cf21 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -5,9 +5,6 @@ nobase_pkginclude_HEADERS = iba/ib_types.h EXTRA_DIST = \ $(srcdir)/opensm/osm_version.h \ - $(srcdir)/opensm/osm_sa_portinfo_record_ctrl.h \ - $(srcdir)/opensm/osm_sa_guidinfo_record_ctrl.h \ - $(srcdir)/opensm/osm_sa_multipath_record_ctrl.h \ $(srcdir)/opensm/osm_sa_path_record.h \ $(srcdir)/opensm/osm_lid_mgr.h \ $(srcdir)/opensm/osm_vl_arb_rcv.h \ @@ -18,17 +15,13 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_rand_fwd_tbl.h \ $(srcdir)/opensm/osm_sa_vlarb_record.h \ $(srcdir)/opensm/osm_madw.h \ - $(srcdir)/opensm/osm_sa_sminfo_record_ctrl.h \ $(srcdir)/opensm/osm_subnet.h \ $(srcdir)/opensm/osm_sweep_fail_ctrl.h \ $(srcdir)/opensm/osm_matrix.h \ - $(srcdir)/opensm/osm_sa_mcmember_record_ctrl.h \ - $(srcdir)/opensm/osm_sa_service_record_ctrl.h \ $(srcdir)/opensm/osm_sa_lft_record.h \ $(srcdir)/opensm/osm_sa_mft_record.h \ $(srcdir)/opensm/osm_resp.h \ $(srcdir)/opensm/osm_partition.h \ - $(srcdir)/opensm/osm_sa_pkey_record_ctrl.h \ $(srcdir)/opensm/osm_helper.h \ $(srcdir)/opensm/osm_sa_portinfo_record.h \ $(srcdir)/opensm/osm_sa_guidinfo_record.h \ @@ -37,13 +30,10 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_sa_response.h \ $(srcdir)/opensm/osm_node.h \ $(srcdir)/opensm/osm_console.h \ - $(srcdir)/opensm/osm_sa_slvl_record_ctrl.h \ $(srcdir)/opensm/osm_req.h \ $(srcdir)/opensm/osm_mcm_info.h \ $(srcdir)/opensm/osm_mtl_bind.h \ $(srcdir)/opensm/osm_sa_pkey_record.h \ - $(srcdir)/opensm/osm_sa_lft_record_ctrl.h \ - $(srcdir)/opensm/osm_sa_mft_record_ctrl.h \ $(srcdir)/opensm/osm_inform.h \ $(srcdir)/opensm/osm_path.h \ $(srcdir)/opensm/osm_lin_fwd_rcv.h \ @@ -61,12 +51,10 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_umadt.h \ $(srcdir)/opensm/osm_node_info_rcv.h \ $(srcdir)/opensm/osm_base.h \ - $(srcdir)/opensm/osm_sa_vlarb_record_ctrl.h \ $(srcdir)/opensm/osm_sa_sminfo_record.h \ $(srcdir)/opensm/osm_mcast_mgr.h \ $(srcdir)/opensm/osm_errors.h \ $(srcdir)/opensm/osm_mtree.h \ - $(srcdir)/opensm/osm_sa_informinfo_ctrl.h \ $(srcdir)/opensm/osm_sm.h \ $(srcdir)/opensm/osm_trap_rcv.h \ $(srcdir)/opensm/osm_lin_fwd_tbl.h \ @@ -75,7 +63,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_sa_informinfo.h \ $(srcdir)/opensm/osm_mad_pool.h \ $(srcdir)/opensm/osm_remote_sm.h \ - $(srcdir)/opensm/osm_sa_class_port_info_ctrl.h \ $(srcdir)/opensm/osm_link_mgr.h \ $(srcdir)/opensm/osm_msgdef.h \ $(srcdir)/opensm/osm_sa_node_record.h \ @@ -93,15 +80,10 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_db_pack.h \ $(srcdir)/opensm/osm_sm_mad_ctrl.h \ $(srcdir)/opensm/osm_slvl_map_rcv.h \ - $(srcdir)/opensm/osm_sa_node_record_ctrl.h \ $(srcdir)/opensm/osm_attrib_req.h \ $(srcdir)/opensm/osm_node_desc_rcv.h \ $(srcdir)/opensm/osm_stats.h \ - $(srcdir)/opensm/osm_sa_path_record_ctrl.h \ - $(srcdir)/opensm/osm_sa_link_record_ctrl.h \ - $(srcdir)/opensm/osm_sw_info_rcv_ctrl.h \ $(srcdir)/opensm/osm_sa_mcmember_record.h \ - $(srcdir)/opensm/osm_sa_sw_info_record_ctrl.h \ $(srcdir)/opensm/osm_sa_sw_info_record.h \ $(srcdir)/opensm/osm_vl15intf.h \ $(srcdir)/opensm/osm_drop_mgr.h \ diff --git a/osm/include/opensm/osm_sa.h b/osm/include/opensm/osm_sa.h index 1508f44..ea60341 100644 --- a/osm/include/opensm/osm_sa.h +++ b/osm/include/opensm/osm_sa.h @@ -60,24 +60,24 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -151,55 +151,59 @@ typedef struct _osm_sa osm_sa_mad_ctrl_t mad_ctrl; osm_sa_resp_t resp; osm_cpi_rcv_t cpi_rcv; - osm_cpi_rcv_ctrl_t cpi_rcv_ctrl; osm_nr_rcv_t nr_rcv; - osm_nr_rcv_ctrl_t nr_rcv_ctrl; osm_pir_rcv_t pir_rcv; - osm_pir_rcv_ctrl_t pir_rcv_ctrl; osm_gir_rcv_t gir_rcv; - osm_gir_rcv_ctrl_t gir_rcv_ctrl; osm_lr_rcv_t lr_rcv; - osm_lr_rcv_ctrl_t lr_rcv_ctrl; osm_pr_rcv_t pr_rcv; - osm_pr_rcv_ctrl_t pr_rcv_ctrl; osm_smir_rcv_t smir_rcv; - osm_smir_ctrl_t smir_ctrl; osm_mcmr_recv_t mcmr_rcv; - osm_mcmr_rcv_ctrl_t mcmr_rcv_ctlr; osm_sr_rcv_t sr_rcv; - osm_sr_rcv_ctrl_t sr_rcv_ctrl; #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) osm_mpr_rcv_t mpr_rcv; - osm_mpr_rcv_ctrl_t mpr_rcv_ctrl; #endif /* InformInfo Receiver */ osm_infr_rcv_t infr_rcv; - osm_infr_rcv_ctrl_t infr_rcv_ctrl; /* VL Arbitrartion Query */ osm_vlarb_rec_rcv_t vlarb_rec_rcv; - osm_vlarb_rec_rcv_ctrl_t vlarb_rec_rcv_ctrl; /* SLtoVL Map Query */ osm_slvl_rec_rcv_t slvl_rec_rcv; - osm_slvl_rec_rcv_ctrl_t slvl_rec_rcv_ctrl; /* P_Key table Query */ osm_pkey_rec_rcv_t pkey_rec_rcv; - osm_pkey_rec_rcv_ctrl_t pkey_rec_rcv_ctrl; /* LinearForwardingTable Query */ osm_lftr_rcv_t lftr_rcv; - osm_lftr_rcv_ctrl_t lftr_rcv_ctrl; /* SwitchInfo Query */ osm_sir_rcv_t sir_rcv; - osm_sir_rcv_ctrl_t sir_rcv_ctrl; /* MulticastForwardingTable Query */ osm_mftr_rcv_t mftr_rcv; - osm_mftr_rcv_ctrl_t mftr_rcv_ctrl; + + cl_disp_reg_handle_t cpi_disp_h; + cl_disp_reg_handle_t nr_disp_h; + cl_disp_reg_handle_t pir_disp_h; + cl_disp_reg_handle_t gir_disp_h; + cl_disp_reg_handle_t lr_disp_h; + cl_disp_reg_handle_t pr_disp_h; + cl_disp_reg_handle_t smir_disp_h; + cl_disp_reg_handle_t mcmr_disp_h; + cl_disp_reg_handle_t sr_disp_h; +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + cl_disp_reg_handle_t mpr_disp_h; +#endif + cl_disp_reg_handle_t infr_disp_h; + cl_disp_reg_handle_t infir_disp_h; + cl_disp_reg_handle_t vlarb_disp_h; + cl_disp_reg_handle_t slvl_disp_h; + cl_disp_reg_handle_t pkey_disp_h; + cl_disp_reg_handle_t lft_disp_h; + cl_disp_reg_handle_t sir_disp_h; + cl_disp_reg_handle_t mft_disp_h; } osm_sa_t; /* * FIELDS @@ -234,24 +238,14 @@ typedef struct _osm_sa * * nr * -* nr_ctrl -* * pir_rcv * -* pir_rcv_ctrl -* * lr * -* lr_ctrl -* * pr * -* pr_ctrl -* * smir * -* smir_ctrl -* * SEE ALSO * SM object *********/ diff --git a/osm/include/opensm/osm_sa_class_port_info.h b/osm/include/opensm/osm_sa_class_port_info.h index 9caba7a..ca159a2 100644 --- a/osm/include/opensm/osm_sa_class_port_info.h +++ b/osm/include/opensm/osm_sa_class_port_info.h @@ -100,12 +100,11 @@ BEGIN_C_DECLS */ typedef struct _osm_cpi_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; } osm_cpi_rcv_t; /* * FIELDS @@ -243,14 +242,14 @@ osm_cpi_rcv_init( */ void osm_cpi_rcv_process( - IN osm_cpi_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_cpi_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the ClassPortInfo attribute. * diff --git a/osm/include/opensm/osm_sa_class_port_info_ctrl.h b/osm/include/opensm/osm_sa_class_port_info_ctrl.h deleted file mode 100644 index 447617c..0000000 --- a/osm/include/opensm/osm_sa_class_port_info_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_cpi_rcv_ctrl_t. - * This object represents a controller that receives the IBA ClassPortInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.2 $ - */ - -#ifndef _OSM_CPICTRL_H_ -#define _OSM_CPICTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Class Port Info Receive Controller -* NAME -* Class Port Info Receive Controller -* -* DESCRIPTION -* The Class Port Info Receive Controller object encapsulates -* the information requested by the ClassPortInfo attribute. -* -* The ClassPortInfo Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: ClassPort Info Receive Controller/osm_cpi_rcv_ctrl_t -* NAME -* osm_cpi_rcv_ctrl_t -* -* DESCRIPTION -* ClassPort Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_cpi_rcv_ctrl -{ - osm_cpi_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_cpi_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the ClassPort Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Class Port Info Receive Controller object -* Class Port Info Receiver object -*********/ - -/****f* OpenSM: Class Port Info Receive Controller/osm_cpi_rcv_ctrl_construct -* NAME -* osm_cpi_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Class Port Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_cpi_rcv_ctrl_construct( - IN osm_cpi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Class Port Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_cpi_rcv_ctrl_init, osm_cpi_rcv_ctrl_destroy, -* and osm_cpi_rcv_ctrl_is_inited. -* -* Calling osm_cpi_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_cpi_rcv_ctrl_init. -* -* SEE ALSO -* Class Port Info Receive Controller object, osm_cpi_rcv_ctrl_init, -* osm_cpi_rcv_ctrl_destroy, osm_cpi_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Class Port Info Receive Controller/osm_cpi_rcv_ctrl_destroy -* NAME -* osm_cpi_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_cpi_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_cpi_rcv_ctrl_destroy( - IN osm_cpi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Class Port Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_cpi_rcv_ctrl_construct or osm_cpi_rcv_ctrl_init. -* -* SEE ALSO -* Class Port Info Receive Controller object, osm_cpi_rcv_ctrl_construct, -* osm_cpi_rcv_ctrl_init -*********/ - -/****f* OpenSM: Class Port Info Receive Controller/osm_cpi_rcv_ctrl_init -* NAME -* osm_cpi_rcv_ctrl_init -* -* DESCRIPTION -* The osm_cpi_rcv_ctrl_init function initializes a -* Class Port Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_cpi_rcv_ctrl_init( - IN osm_cpi_rcv_ctrl_t* const p_ctrl, - IN osm_cpi_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_cpi_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_cpi_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Class Port Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Class Port Info Receive Controller methods. -* -* SEE ALSO -* Class Port Info Receive Controller object, osm_cpi_rcv_ctrl_construct, -* osm_cpi_rcv_ctrl_destroy, osm_cpi_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Class Port Info Receive Controller/osm_cpi_rcv_ctrl_is_inited -* NAME -* osm_cpi_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_cpi_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_cpi_rcv_ctrl_is_inited( - IN const osm_cpi_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_cpi_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_cpi_rcv_ctrl_construct or osm_cpi_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Class Port Info Receive Controller object, osm_cpi_rcv_ctrl_construct, -* osm_cpi_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_CPICTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_guidinfo_record.h b/osm/include/opensm/osm_sa_guidinfo_record.h index f22c525..5c23cf9 100644 --- a/osm/include/opensm/osm_sa_guidinfo_record.h +++ b/osm/include/opensm/osm_sa_guidinfo_record.h @@ -98,13 +98,12 @@ BEGIN_C_DECLS */ typedef struct _osm_gir_rcv { - const osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; - + const osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_gir_rcv_t; /* * FIELDS @@ -253,14 +252,14 @@ osm_gir_rcv_init( */ void osm_gir_rcv_process( - IN osm_gir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_gir_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's GUIDInfoRecord attribute. * diff --git a/osm/include/opensm/osm_sa_guidinfo_record_ctrl.h b/osm/include/opensm/osm_sa_guidinfo_record_ctrl.h deleted file mode 100644 index b9d0cfb..0000000 --- a/osm/include/opensm/osm_sa_guidinfo_record_ctrl.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sa_gir_rec_rcv_ctrl_t. - * This object represents a controller that receives the IBA GUID Info - * record query from SA client. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#ifndef _OSM_GIR_CTRL_H_ -#define _OSM_GIR_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/GUID Info Record Receive Controller -* NAME -* GUID Info Record Receive Controller -* -* DESCRIPTION -* The GUID Info Record Receive Controller object encapsulates -* the information needed to handle GUID Info record query from SA client. -* -* The GUID Info Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Hal Rosenstock, Voltaire -* -*********/ - -/****s* OpenSM: GUID Info Record Receive Controller/osm_gir_rcv_ctrl_t -* NAME -* osm_gir_rcv_ctrl_t -* -* DESCRIPTION -* GUID Info Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_gir_rcv_ctrl -{ - osm_gir_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_gir_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the GUID Info Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* GUID Info Record Receive Controller object -* GUID Info Record Receiver object -*********/ - -/****f* OpenSM: GUID Info Record Receive Controller/osm_gir_rec_rcv_ctrl_construct -* NAME -* osm_gir_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a GUID Info Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_gir_rcv_ctrl_construct( - IN osm_gir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a GUID Info Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_gir_rcv_ctrl_init, osm_gir_rcv_ctrl_destroy -* -* Calling osm_gir_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_gir_rcv_ctrl_init. -* -* SEE ALSO -* GUID Info Record Receive Controller object, osm_gir_rcv_ctrl_init, -* osm_gir_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: GUID Info Record Receive Controller/osm_gir_rcv_ctrl_destroy -* NAME -* osm_gir_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_gir_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_gir_rcv_ctrl_destroy( - IN osm_gir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* GUIDInfo Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_gir_rcv_ctrl_construct or osm_gir_rcv_ctrl_init. -* -* SEE ALSO -* GUIDInfo Record Receive Controller object, osm_gir_rcv_ctrl_construct, -* osm_gir_rcv_ctrl_init -*********/ - -/****f* OpenSM: GUID Info Record Receive Controller/osm_gir_rcv_ctrl_init -* NAME -* osm_gir_rcv_ctrl_init -* -* DESCRIPTION -* The osm_gir_rcv_ctrl_init function initializes a -* GUID Info Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_gir_rcv_ctrl_init( - IN osm_gir_rcv_ctrl_t* const p_ctrl, - IN osm_gir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_gir_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_gir_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the GUID Info Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other GUID Info Record Receive Controller methods. -* -* SEE ALSO -* GUID Info Record Receive Controller object, osm_gir_rcv_ctrl_construct, -* osm_gir_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_GIR_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_informinfo.h b/osm/include/opensm/osm_sa_informinfo.h index c22c1eb..c935995 100644 --- a/osm/include/opensm/osm_sa_informinfo.h +++ b/osm/include/opensm/osm_sa_informinfo.h @@ -102,12 +102,12 @@ BEGIN_C_DECLS */ typedef struct _osm_infr_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_infr_rcv_t; /* * FIELDS @@ -249,14 +249,14 @@ osm_infr_rcv_init( */ void osm_infr_rcv_process( - IN osm_infr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_infr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's InformInfo attribute. * NOTES @@ -277,14 +277,14 @@ osm_infr_rcv_process( */ void osm_infir_rcv_process( - IN osm_infr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_infr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's InformInfo Record attribute. * NOTES diff --git a/osm/include/opensm/osm_sa_informinfo_ctrl.h b/osm/include/opensm/osm_sa_informinfo_ctrl.h deleted file mode 100644 index 6213748..0000000 --- a/osm/include/opensm/osm_sa_informinfo_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_infr_rcv_ctrl_t. - * This object represents a controller that receives the IBA InfromInfo - * Set method attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#ifndef _OSM_INFR_RCV_CTRL_H_ -#define _OSM_INFR_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/InformInfo Receive Controller -* NAME -* InformInfo Receive Controller -* -* DESCRIPTION -* The InformInfo Receive Controller object encapsulates -* the information needed to receive the InformInfo attribute from a node. -* -* The InformInfo Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: InformInfo Receive Controller/osm_infr_rcv_ctrl_t -* NAME -* osm_infr_rcv_ctrl_t -* -* DESCRIPTION -* InformInfo Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_infr_rcv_ctrl -{ - osm_infr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - cl_disp_reg_handle_t h_disp2; -} osm_infr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the InformInfo Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* InformInfo Receive Controller object -* InformInfo Receiver object -*********/ - -/****f* OpenSM: InformInfo Receive Controller/osm_infr_rcv_ctrl_construct -* NAME -* osm_infr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a InformInfo Receive Controller object. -* -* SYNOPSIS -*/ -void osm_infr_rcv_ctrl_construct( - IN osm_infr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a InformInfo Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_infr_rcv_ctrl_init, osm_infr_rcv_ctrl_destroy, -* and osm_infr_rcv_ctrl_is_inited. -* -* Calling osm_infr_rcv_ctrl_construct is a prerequisite to calling any -* other method except osm_infr_rcv_ctrl_init. -* -* SEE ALSO -* InformInfo Receive Controller object, osm_infr_rcv_ctrl_init, -* osm_infr_rcv_ctrl_destroy, osm_infr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: InformInfo Receive Controller/osm_infr_rcv_ctrl_destroy -* NAME -* osm_infr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_infr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_infr_rcv_ctrl_destroy( - IN osm_infr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* InformInfo Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_infr_rcv_ctrl_construct or osm_infr_rcv_ctrl_init. -* -* SEE ALSO -* InformInfo Receive Controller object, osm_infr_rcv_ctrl_construct, -* osm_infr_rcv_ctrl_init -*********/ - -/****f* OpenSM: InformInfo Receive Controller/osm_infr_rcv_ctrl_init -* NAME -* osm_infr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_infr_rcv_ctrl_init function initializes a -* InformInfo Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_infr_rcv_ctrl_init( - IN osm_infr_rcv_ctrl_t* const p_ctrl, - IN osm_infr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_infr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_infr_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the InformInfo Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other InformInfo Receive Controller methods. -* -* SEE ALSO -* InformInfo Receive Controller object, osm_infr_rcv_ctrl_construct, -* osm_infr_rcv_ctrl_destroy, osm_infr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: InformInfo Receive Controller/osm_infr_rcv_ctrl_is_inited -* NAME -* osm_infr_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_infr_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_infr_rcv_ctrl_is_inited( - IN const osm_infr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_infr_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_infr_rcv_ctrl_construct or osm_infr_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* InformInfo Receive Controller object, osm_infr_rcv_ctrl_construct, -* osm_infr_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_INFR_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_lft_record.h b/osm/include/opensm/osm_sa_lft_record.h index 8d3e596..c055e9e 100644 --- a/osm/include/opensm/osm_sa_lft_record.h +++ b/osm/include/opensm/osm_sa_lft_record.h @@ -100,13 +100,13 @@ BEGIN_C_DECLS */ typedef struct _osm_lft { - osm_subn_t* p_subn; - osm_stats_t* p_stats; - osm_sa_resp_t* p_resp; - osm_mad_pool_t* p_mad_pool; - osm_log_t* p_log; - cl_plock_t* p_lock; - cl_qlock_pool_t pool; + osm_subn_t* p_subn; + osm_stats_t* p_stats; + osm_sa_resp_t* p_resp; + osm_mad_pool_t* p_mad_pool; + osm_log_t* p_log; + cl_plock_t* p_lock; + cl_qlock_pool_t pool; } osm_lftr_rcv_t; /* * FIELDS @@ -254,14 +254,14 @@ ib_api_status_t osm_lftr_rcv_init( * SYNOPSIS */ void osm_lftr_rcv_process( - IN osm_lftr_rcv_t* const p_ctrl, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_lftr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the switch node's LinearForwardingTable attribute. * diff --git a/osm/include/opensm/osm_sa_lft_record_ctrl.h b/osm/include/opensm/osm_sa_lft_record_ctrl.h deleted file mode 100644 index 4869851..0000000 --- a/osm/include/opensm/osm_sa_lft_record_ctrl.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_lftr_rcv_ctrl_t. - * This object represents a controller that receives the IBA - * LinearForwardingTable attribute from a switch. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_LFTR_RCV_CTRL_H_ -#define _OSM_LFTR_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Linear Forwarding Table Receive Controller -* NAME -* Linear Forwarding Table Record Receive Controller -* -* DESCRIPTION -* The Linear Forwarding Table Receive Controller object encapsulates -* the information needed to receive the LinearFowardingTable attribute -* from a switch node. -* -* The Linear Forwarding Table Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox Technologies LTD -* -*********/ - -/****s* OpenSM: Linear Forwarding Table Receive Controller/osm_lftr_rcv_ctrl_t -* NAME -* osm_lftr_rcv_ctrl_t -* -* DESCRIPTION -* Linear Forwarding Table Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_lftr_rcv_ctrl -{ - osm_lftr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_lftr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Linear Forwarding Table Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Linear Forwarding Table Receive Controller object -* Linear Forwarding Table Receiver object -*********/ - -/****f* OpenSM: Linear Forwarding Table Receive Controller/osm_lftr_rcv_ctrl_construct -* NAME -* osm_lftr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Linear Forwarding Table Receive -* Controller object. -* -* SYNOPSIS -*/ -void osm_lftr_rcv_ctrl_construct( - IN osm_lftr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Linear Forwarding Table Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_lftr_rcv_ctrl_init, osm_lftr_rcv_ctrl_destroy -* -* Calling osm_lftr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_lftr_rcv_ctrl_init. -* -* SEE ALSO -* Linear Forwarding Table Receive Controller object, osm_lftr_rcv_ctrl_init, -* osm_lftr_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Linear Forwarding Table Receive Controller/osm_lftr_rcv_ctrl_destroy -* NAME -* osm_lftr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_lftr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_lftr_rcv_ctrl_destroy( - IN osm_lftr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Linear Forwarding Table Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_lftr_rcv_ctrl_construct or osm_lftr_rcv_ctrl_init. -* -* SEE ALSO -* Linear Forwarding Table Receive Controller object, osm_lftr_rcv_ctrl_construct, -* osm_lftr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Linear Forwarding Table Receive Controller/osm_lftr_rcv_ctrl_init -* NAME -* osm_lftr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_lftr_rcv_ctrl_init function initializes a -* Linear Forwarding Table Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_lftr_rcv_ctrl_init( - IN osm_lftr_rcv_ctrl_t* const p_ctrl, - IN osm_lftr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_lftr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_lftr_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Linear Forwarding Table Receive Controller object -* was initialized successfully. -* -* NOTES -* Allows calling other Linear Forwarding Table Receive Controller methods. -* -* SEE ALSO -* Linear Forwarding Table Receive Controller object, -* osm_lftr_rcv_ctrl_construct, osm_lftr_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_LFTR_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_link_record.h b/osm/include/opensm/osm_sa_link_record.h index 3390b35..adc3be8 100644 --- a/osm/include/opensm/osm_sa_link_record.h +++ b/osm/include/opensm/osm_sa_link_record.h @@ -100,13 +100,12 @@ BEGIN_C_DECLS */ typedef struct _osm_lr_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t lr_pool; - + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t lr_pool; } osm_lr_rcv_t; /* * FIELDS @@ -253,14 +252,14 @@ osm_lr_rcv_init( * SYNOPSIS */ void osm_lr_rcv_process( - IN osm_lr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_lr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's Link Record attribute. * diff --git a/osm/include/opensm/osm_sa_link_record_ctrl.h b/osm/include/opensm/osm_sa_link_record_ctrl.h deleted file mode 100644 index 166b850..0000000 --- a/osm/include/opensm/osm_sa_link_record_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_lr_rcv_ctrl_t. - * This object represents a controller that receives the IBA Link Record - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_LR_CTRL_H_ -#define _OSM_LR_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Link Record Receive Controller -* NAME -* Link Record Receive Controller -* -* DESCRIPTION -* The Link Record Receive Controller object encapsulates -* the information needed to receive the LinkRecord attribute from a node. -* -* The Link Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Ranjit Pandit, Intel -* -*********/ - -/****s* OpenSM: Link Record Receive Controller/osm_lr_rcv_ctrl_t -* NAME -* osm_lr_rcv_ctrl_t -* -* DESCRIPTION -* Link Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_lr_rcv_ctrl -{ - osm_lr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_lr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Link Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Link Record Receive Controller object -* Link Record Receiver object -*********/ - -/****f* OpenSM: Link Record Receive Controller/osm_lr_rcv_ctrl_construct -* NAME -* osm_lr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Link Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_lr_rcv_ctrl_construct( - IN osm_lr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Link Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_lr_rcv_ctrl_init, osm_lr_rcv_ctrl_destroy, -* and osm_lr_rcv_ctrl_is_inited. -* -* Calling osm_lr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_lr_rcv_ctrl_init. -* -* SEE ALSO -* Link Record Receive Controller object, osm_lr_rcv_ctrl_init, -* osm_lr_rcv_ctrl_destroy, osm_lr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Link Record Receive Controller/osm_lr_rcv_ctrl_destroy -* NAME -* osm_lr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_lr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_lr_rcv_ctrl_destroy( - IN osm_lr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Link Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_lr_rcv_ctrl_construct or osm_lr_rcv_ctrl_init. -* -* SEE ALSO -* Link Record Receive Controller object, osm_lr_rcv_ctrl_construct, -* osm_lr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Link Record Receive Controller/osm_lr_rcv_ctrl_init -* NAME -* osm_lr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_lr_rcv_ctrl_init function initializes a -* Link Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_lr_rcv_ctrl_init( - IN osm_lr_rcv_ctrl_t* const p_ctrl, - IN osm_lr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_lr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_lr_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Link Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Link Record Receive Controller methods. -* -* SEE ALSO -* Link Record Receive Controller object, osm_lr_rcv_ctrl_construct, -* osm_lr_rcv_ctrl_destroy, osm_lr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Link Record Receive Controller/osm_lr_rcv_ctrl_is_inited -* NAME -* osm_lr_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_lr_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_lr_rcv_ctrl_is_inited( - IN const osm_lr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_lr_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_lr_rcv_ctrl_construct or osm_lr_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Link Record Receive Controller object, osm_lr_rcv_ctrl_construct, -* osm_lr_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_LR_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_mcmember_record.h b/osm/include/opensm/osm_sa_mcmember_record.h index 802f3e8..c87d100 100644 --- a/osm/include/opensm/osm_sa_mcmember_record.h +++ b/osm/include/opensm/osm_sa_mcmember_record.h @@ -103,14 +103,14 @@ BEGIN_C_DECLS typedef struct _osm_mcmr { - osm_subn_t *p_subn; - osm_sm_t *p_sm; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - uint16_t mlid_ho; - cl_qlock_pool_t pool; + osm_subn_t *p_subn; + osm_sm_t *p_sm; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + uint16_t mlid_ho; + cl_qlock_pool_t pool; } osm_mcmr_recv_t; @@ -253,14 +253,14 @@ ib_api_status_t osm_mcmr_rcv_init( * SYNOPSIS */ void osm_mcmr_rcv_process( - IN osm_mcmr_recv_t* const p_ctrl, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_mcmr_recv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's MCMemberRecord attribute. * diff --git a/osm/include/opensm/osm_sa_mcmember_record_ctrl.h b/osm/include/opensm/osm_sa_mcmember_record_ctrl.h deleted file mode 100644 index a73b817..0000000 --- a/osm/include/opensm/osm_sa_mcmember_record_ctrl.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_mcmr_rcv_ctrl_t. - * This object represents a controller that receives the IBA MCMemberRecord - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - - -#ifndef _OSM_MCMRCTRL_H -#define _OSM_MCMRCTRL_H - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/MCMember Receive Controller -* NAME -* MCMember Receive Controller -* -* DESCRIPTION -* The MCMember Receive Controller object encapsulates -* the information needed to receive the MCMemberRecord attribute from a node. -* -* The MCMember Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Ranjit Pandit, Intel -* -*********/ - -/****s* OpenSM: MCMember Receive Controller/osm_mcmr_rcv_ctrl_t -* NAME -* osm_mcmr_rcv_ctrl_t -* -* DESCRIPTION -* MCMember Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_mcmr_rcv_ctrl -{ - osm_mcmr_recv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_mcmr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the MCMember Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* MCMember Receive Controller object -* MCMember Receiver object -*********/ - -/****f* OpenSM: MCMember Receive Controller/osm_mcmr_rcv_ctrl_construct -* NAME -* osm_mcmr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a MCMember Receive Controller object. -* -* SYNOPSIS -*/ -void osm_mcmr_rcv_ctrl_construct( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a MCMember Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_mcmr_rcv_ctrl_init, osm_mcmr_rcv_ctrl_destroy, -* and osm_mcmr_ctrl_is_inited. -* -* Calling osm_mcmr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_mcmr_rcv_ctrl_init. -* -* SEE ALSO -* MCMember Receive Controller object, osm_mcmr_rcv_ctrl_init, -* osm_mcmr_rcv_ctrl_destroy, osm_mcmr_ctrl_is_inited -*********/ - -/****f* OpenSM: MCMember Receive Controller/osm_mcmr_rcv_ctrl_destroy -* NAME -* osm_mcmr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_mcmr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_mcmr_rcv_ctrl_destroy( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* MCMember Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_mcmr_rcv_ctrl_construct or osm_mcmr_rcv_ctrl_init. -* -* SEE ALSO -* MCMember Receive Controller object, osm_mcmr_rcv_ctrl_construct, -* osm_mcmr_rcv_ctrl_init -*********/ - -/****f* OpenSM: MCMember Receive Controller/osm_mcmr_rcv_ctrl_init -* NAME -* osm_mcmr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_mcmr_rcv_ctrl_init function initializes a -* MCMember Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_mcmr_rcv_ctrl_init( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl, - IN osm_mcmr_recv_t* const p_mcmr, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mcmr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_mcmr_recv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the MCMember Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other MCMember Receive Controller methods. -* -* SEE ALSO -* MCMember Receive Controller object, osm_mcmr_rcv_ctrl_construct, -* osm_mcmr_rcv_ctrl_destroy, osm_mcmr_ctrl_is_inited -*********/ - -/****f* OpenSM: MCMember Receive Controller/osm_mcmr_ctrl_is_inited -* NAME -* osm_mcmr_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_mcmr_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_mcmr_ctrl_is_inited( - IN const osm_mcmr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mcmr_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_mcmr_rcv_ctrl_construct or osm_mcmr_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* MCMember Receive Controller object, osm_mcmr_rcv_ctrl_construct, -* osm_mcmr_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_MCMRCTRL_H */ diff --git a/osm/include/opensm/osm_sa_mft_record.h b/osm/include/opensm/osm_sa_mft_record.h index f961206..e63e4ef 100644 --- a/osm/include/opensm/osm_sa_mft_record.h +++ b/osm/include/opensm/osm_sa_mft_record.h @@ -99,13 +99,13 @@ BEGIN_C_DECLS */ typedef struct _osm_mft { - osm_subn_t* p_subn; - osm_stats_t* p_stats; - osm_sa_resp_t* p_resp; - osm_mad_pool_t* p_mad_pool; - osm_log_t* p_log; - cl_plock_t* p_lock; - cl_qlock_pool_t pool; + osm_subn_t* p_subn; + osm_stats_t* p_stats; + osm_sa_resp_t* p_resp; + osm_mad_pool_t* p_mad_pool; + osm_log_t* p_log; + cl_plock_t* p_lock; + cl_qlock_pool_t pool; } osm_mftr_rcv_t; /* * FIELDS @@ -253,14 +253,14 @@ ib_api_status_t osm_mftr_rcv_init( * SYNOPSIS */ void osm_mftr_rcv_process( - IN osm_mftr_rcv_t* const p_ctrl, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_mftr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the switch node's MulticastForwardingTable attribute. * diff --git a/osm/include/opensm/osm_sa_mft_record_ctrl.h b/osm/include/opensm/osm_sa_mft_record_ctrl.h deleted file mode 100644 index a28374d..0000000 --- a/osm/include/opensm/osm_sa_mft_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_mftr_rcv_ctrl_t. - * This object represents a controller that receives the IBA - * MulticastForwardingTable attribute from a switch. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#ifndef _OSM_MFTR_RCV_CTRL_H_ -#define _OSM_MFTR_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Multicast Forwarding Table Receive Controller -* NAME -* Multicast Forwarding Table Record Receive Controller -* -* DESCRIPTION -* The Multicast Forwarding Table Receive Controller object encapsulates -* the information needed to receive the MulticastFowardingTable attribute -* from a switch node. -* -* The Multicast Forwarding Table Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Hal Rosenstock, Voltaire -* -*********/ - -/****s* OpenSM: Multicast Forwarding Table Receive Controller/osm_mftr_rcv_ctrl_t -* NAME -* osm_mftr_rcv_ctrl_t -* -* DESCRIPTION -* Multicast Forwarding Table Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_mftr_rcv_ctrl -{ - osm_mftr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; -} osm_mftr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Multicast Forwarding Table Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Multicast Forwarding Table Receive Controller object -* Multicast Forwarding Table Receiver object -*********/ - -/****f* OpenSM: Multicast Forwarding Table Receive Controller/osm_mftr_rcv_ctrl_construct -* NAME -* osm_mftr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Multicast Forwarding Table Receive -* Controller object. -* -* SYNOPSIS -*/ -void osm_mftr_rcv_ctrl_construct( - IN osm_mftr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Multicast Forwarding Table Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_mftr_rcv_ctrl_init, osm_mftr_rcv_ctrl_destroy -* -* Calling osm_mftr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_mftr_rcv_ctrl_init. -* -* SEE ALSO -* Multicast Forwarding Table Receive Controller object, osm_mftr_rcv_ctrl_init, -* osm_mftr_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Multicast Forwarding Table Receive Controller/osm_mftr_rcv_ctrl_destroy -* NAME -* osm_mftr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_mftr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_mftr_rcv_ctrl_destroy( - IN osm_mftr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Multicast Forwarding Table Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_mftr_rcv_ctrl_construct or osm_mftr_rcv_ctrl_init. -* -* SEE ALSO -* Multicast Forwarding Table Receive Controller object, osm_mftr_rcv_ctrl_construct, -* osm_mftr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Multicast Forwarding Table Receive Controller/osm_mftr_rcv_ctrl_init -* NAME -* osm_mftr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_mftr_rcv_ctrl_init function initializes a -* Multicast Forwarding Table Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_mftr_rcv_ctrl_init( - IN osm_mftr_rcv_ctrl_t* const p_ctrl, - IN osm_mftr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mftr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_mftr_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Multicast Forwarding Table Receive Controller object -* was initialized successfully. -* -* NOTES -* Allows calling other Multicast Forwarding Table Receive Controller methods. -* -* SEE ALSO -* Multicast Forwarding Table Receive Controller object, -* osm_mftr_rcv_ctrl_construct, osm_mftr_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_MFTR_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_multipath_record.h b/osm/include/opensm/osm_sa_multipath_record.h index d647227..6f99ec9 100644 --- a/osm/include/opensm/osm_sa_multipath_record.h +++ b/osm/include/opensm/osm_sa_multipath_record.h @@ -247,14 +247,14 @@ osm_mpr_rcv_init( */ void osm_mpr_rcv_process( - IN osm_mpr_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_mpr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's MultiPathRecord attribute. * diff --git a/osm/include/opensm/osm_sa_multipath_record_ctrl.h b/osm/include/opensm/osm_sa_multipath_record_ctrl.h deleted file mode 100644 index ca618f9..0000000 --- a/osm/include/opensm/osm_sa_multipath_record_ctrl.h +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_mpr_rcv_ctrl_t. - * This object represents a controller that receives the IBA - * MultiPathRecord attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#ifndef _OSM_MPRCTRL_H_ -#define _OSM_MPRCTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/MultiPath Record Receive Controller -* NAME -* MultiPath Record Receive Controller -* -* DESCRIPTION -* The MultiPath Record Receive Controller object encapsulates -* the information needed to receive the MultiPathRecord attribute from a node. -* -* The MultiPath record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Hal Rosenstock, Voltaire -* -*********/ - -/****s* OpenSM: MultiPath Record Receive Controller/osm_mpr_rcv_ctrl_t -* NAME -* osm_mpr_rcv_ctrl_t -* -* DESCRIPTION -* MultiPath Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_mpr_rcv_ctrl -{ - osm_mpr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_mpr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the MultiPath Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* MultiPath Record Receive Controller object -* MultiPath Record Receiver object -*********/ - -/****f* OpenSM: MultiPath Record Receive Controller/osm_pr_rcv_ctrl_construct -* NAME -* osm_mpr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a MultiPath Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_mpr_rcv_ctrl_construct( - IN osm_mpr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a MultiPath Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_mpr_rcv_ctrl_init, osm_mpr_rcv_ctrl_destroy, -* and osm_mpr_rcv_ctrl_is_inited. -* -* Calling osm_mpr_rcv_ctrl_construct is a prerequisite to calling any -* other method except osm_mpr_rcv_ctrl_init. -* -* SEE ALSO -* MultiPath Record Receive Controller object, osm_mpr_rcv_ctrl_init, -* osm_mpr_rcv_ctrl_destroy, osm_mpr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: MultiPath Record Receive Controller/osm_mpr_rcv_ctrl_destroy -* NAME -* osm_mpr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_mpr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_mpr_rcv_ctrl_destroy( - IN osm_mpr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* MultiPath Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_mpr_rcv_ctrl_construct or osm_mpr_rcv_ctrl_init. -* -* SEE ALSO -* MultiPath Record Receive Controller object, osm_mpr_rcv_ctrl_construct, -* osm_mpr_rcv_ctrl_init -*********/ - -/****f* OpenSM: MultiPath Record Receive Controller/osm_mpr_rcv_ctrl_init -* NAME -* osm_mpr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_mpr_rcv_ctrl_init function initializes a -* MultiPath Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_mpr_rcv_ctrl_init( - IN osm_mpr_rcv_ctrl_t* const p_ctrl, - IN osm_mpr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mpr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_mpr_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the MultiPath Record Receive Controller object was -* initialized successfully. -* -* NOTES -* Allows calling other MultiPath Record Receive Controller methods. -* -* SEE ALSO -* MultiPath Record Receive Controller object, osm_pr_rcv_ctrl_construct, -* osm_mpr_rcv_ctrl_destroy, osm_mpr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: MultiPath Record Receive Controller/osm_mpr_rcv_ctrl_is_inited -* NAME -* osm_mpr_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_mpr_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_mpr_rcv_ctrl_is_inited( - IN const osm_mpr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_mpr_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_mpr_rcv_ctrl_construct or osm_mpr_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* MultiPath Record Receive Controller object, osm_mpr_rcv_ctrl_construct, -* osm_mpr_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_MPRCTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_node_record.h b/osm/include/opensm/osm_sa_node_record.h index f796eda..c0e8988 100644 --- a/osm/include/opensm/osm_sa_node_record.h +++ b/osm/include/opensm/osm_sa_node_record.h @@ -99,13 +99,12 @@ BEGIN_C_DECLS */ typedef struct _osm_nr_recv { - const osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; - + const osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_nr_rcv_t; /* * FIELDS @@ -252,14 +251,14 @@ ib_api_status_t osm_nr_rcv_init( * SYNOPSIS */ void osm_nr_rcv_process( - IN osm_nr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_nr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's NodeRecord attribute. * diff --git a/osm/include/opensm/osm_sa_node_record_ctrl.h b/osm/include/opensm/osm_sa_node_record_ctrl.h deleted file mode 100644 index 02b60b3..0000000 --- a/osm/include/opensm/osm_sa_node_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_nr_rcv_ctrl_t. - * This object represents a controller that receives the IBA NodeInfo - * record from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_NR_CTRL_H_ -#define _OSM_NR_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Node Record Receive Controller -* NAME -* Node Record Receive Controller -* -* DESCRIPTION -* The Node Record Receive Controller object encapsulates -* the information needed to receive the NodeInfo attribute from a node. -* -* The Node Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Anil S Keshavamurthy, Intel -* -*********/ - -/****s* OpenSM: Node Record Receive Controller/osm_nr_rcv_ctrl_t -* NAME -* osm_nr_rcv_ctrl_t -* -* DESCRIPTION -* Node Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_nr_ctrl -{ - osm_nr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_nr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Node Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Node Record Receive Controller object -* Node Record Receiver object -*********/ - -/****f* OpenSM: Node Record Receive Controller/osm_nr_rcv_ctrl_construct -* NAME -* osm_nr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Node Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_nr_rcv_ctrl_construct( - IN osm_nr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Node Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_nr_rcv_ctrl_init, osm_nr_rcv_ctrl_destroy, -* -* Calling osm_nr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_nr_rcv_ctrl_init. -* -* SEE ALSO -* Node Record Receive Controller object, osm_nr_rcv_ctrl_init, -* osm_nr_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Node Record Receive Controller/osm_nr_rcv_ctrl_destroy -* NAME -* osm_nr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_nr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_nr_rcv_ctrl_destroy( - IN osm_nr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Node Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_nr_rcv_ctrl_construct or osm_nr_rcv_ctrl_init. -* -* SEE ALSO -* Node Record Receive Controller object, osm_nr_rcv_ctrl_construct, -* osm_nr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Node Record Receive Controller/osm_nr_rcv_ctrl_init -* NAME -* osm_nr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_nr_rcv_ctrl_init function initializes a -* Node Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_nr_rcv_ctrl_init( - IN osm_nr_rcv_ctrl_t* const p_ctrl, - IN osm_nr_rcv_t* const p_nr, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_nr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_nr_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Node Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Node Record Receive Controller methods. -* -* SEE ALSO -* Node Record Receive Controller object, osm_nr_rcv_ctrl_construct, -* osm_nr_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_NR_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_path_record.h b/osm/include/opensm/osm_sa_path_record.h index 9196960..29667ef 100644 --- a/osm/include/opensm/osm_sa_path_record.h +++ b/osm/include/opensm/osm_sa_path_record.h @@ -102,12 +102,12 @@ BEGIN_C_DECLS */ typedef struct _osm_pr_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pr_pool; + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pr_pool; } osm_pr_rcv_t; /* * FIELDS @@ -248,14 +248,14 @@ osm_pr_rcv_init( */ void osm_pr_rcv_process( - IN osm_pr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_pr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's PathRecord attribute. * diff --git a/osm/include/opensm/osm_sa_path_record_ctrl.h b/osm/include/opensm/osm_sa_path_record_ctrl.h deleted file mode 100644 index 7fba54a..0000000 --- a/osm/include/opensm/osm_sa_path_record_ctrl.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_pr_rcv_ctrl_t. - * This object represents a controller that receives the IBA PathRecord - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_PRCTRL_H_ -#define _OSM_PRCTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Path Record Receive Controller -* NAME -* Path Record Receive Controller -* -* DESCRIPTION -* The Path Record Receive Controller object encapsulates -* the information needed to receive the PathRecord attribute from a node. -* -* The Path record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Ranjit Pandit, Intel -* -*********/ - -/****s* OpenSM: Path Record Receive Controller/osm_pr_rcv_ctrl_t -* NAME -* osm_pr_rcv_ctrl_t -* -* DESCRIPTION -* Path Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_pr_rcv_ctrl -{ - osm_pr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_pr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Path Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Path Record Receive Controller object -* Path Record Receiver object -*********/ - -/****f* OpenSM: Path Record Receive Controller/osm_pr_rcv_ctrl_construct -* NAME -* osm_pr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Path Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_pr_rcv_ctrl_construct( - IN osm_pr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Path Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_pr_rcv_ctrl_init, osm_pr_rcv_ctrl_destroy, -* and osm_pr_rcv_ctrl_is_inited. -* -* Calling osm_pr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_pr_rcv_ctrl_init. -* -* SEE ALSO -* Path Record Receive Controller object, osm_pr_rcv_ctrl_init, -* osm_pr_rcv_ctrl_destroy, osm_pr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Path Record Receive Controller/osm_pr_rcv_ctrl_destroy -* NAME -* osm_pr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_pr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_pr_rcv_ctrl_destroy( - IN osm_pr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Path Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_pr_rcv_ctrl_construct or osm_pr_rcv_ctrl_init. -* -* SEE ALSO -* Path Record Receive Controller object, osm_pr_rcv_ctrl_construct, -* osm_pr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Path Record Receive Controller/osm_pr_rcv_ctrl_init -* NAME -* osm_pr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_pr_rcv_ctrl_init function initializes a -* Path Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_pr_rcv_ctrl_init( - IN osm_pr_rcv_ctrl_t* const p_ctrl, - IN osm_pr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_pr_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Path Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Path Record Receive Controller methods. -* -* SEE ALSO -* Path Record Receive Controller object, osm_pr_rcv_ctrl_construct, -* osm_pr_rcv_ctrl_destroy, osm_pr_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Path Record Receive Controller/osm_pr_rcv_ctrl_is_inited -* NAME -* osm_pr_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_pr_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_pr_rcv_ctrl_is_inited( - IN const osm_pr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pr_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_pr_rcv_ctrl_construct or osm_pr_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Path Record Receive Controller object, osm_pr_rcv_ctrl_construct, -* osm_pr_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_PRCTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_pkey_record.h b/osm/include/opensm/osm_sa_pkey_record.h index f4eb73d..aceab9a 100644 --- a/osm/include/opensm/osm_sa_pkey_record.h +++ b/osm/include/opensm/osm_sa_pkey_record.h @@ -87,13 +87,12 @@ BEGIN_C_DECLS */ typedef struct _osm_pkey_rec_rcv { - const osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; - + const osm_subn_t* p_subn; + osm_sa_resp_t* p_resp; + osm_mad_pool_t* p_mad_pool; + osm_log_t* p_log; + cl_plock_t* p_lock; + cl_qlock_pool_t pool; } osm_pkey_rec_rcv_t; /* * FIELDS @@ -242,14 +241,14 @@ osm_pkey_rec_rcv_init( */ void osm_pkey_rec_rcv_process( - IN osm_pkey_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_pkey_rec_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the P_Key Record Query attribute. * diff --git a/osm/include/opensm/osm_sa_pkey_record_ctrl.h b/osm/include/opensm/osm_sa_pkey_record_ctrl.h deleted file mode 100644 index e655692..0000000 --- a/osm/include/opensm/osm_sa_pkey_record_ctrl.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef _OSM_PKEY_REC_CTRL_H_ -#define _OSM_PKEY_REC_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/P_Key Record Receive Controller -* NAME -* P_Key Record Receive Controller -* -* DESCRIPTION -* The P_Key Record Receive Controller object encapsulates -* the information needed to handle P_Key record query from SA client. -* -* The P_Key Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Yael Kalka, Mellanox -* -*********/ - -/****s* OpenSM: P_Key Record Receive Controller/osm_pkey_rec_rcv_ctrl_t -* NAME -* osm_pkey_rec_rcv_ctrl_t -* -* DESCRIPTION -* P_Key Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_pkey_rec_rcv_ctrl -{ - osm_pkey_rec_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_pkey_rec_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the P_Key Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* P_Key Record Receive Controller object -* P_Key Record Receiver object -*********/ - -/****f* OpenSM: P_Key Record Receive Controller/osm_pkey_rec_rcv_ctrl_construct -* NAME -* osm_pkey_rec_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a P_Key Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_pkey_rec_rcv_ctrl_construct( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a P_Key Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_pkey_rec_rcv_ctrl_init, osm_pkey_rec_rcv_ctrl_destroy -* -* Calling osm_pkey_rec_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_pkey_rec_rcv_ctrl_init. -* -* SEE ALSO -* P_Key Record Receive Controller object, osm_pkey_rec_rcv_ctrl_init, -* osm_pkey_rec_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: P_Key Record Receive Controller/osm_pkey_rec_rcv_ctrl_destroy -* NAME -* osm_pkey_rec_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_pkey_rec_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_pkey_rec_rcv_ctrl_destroy( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* P_Key Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_pkey_rec_rcv_ctrl_construct or osm_pkey_rec_rcv_ctrl_init. -* -* SEE ALSO -* P_Key Record Receive Controller object, osm_pkey_rec_rcv_ctrl_construct, -* osm_pkey_rec_rcv_ctrl_init -*********/ - -/****f* OpenSM: P_Key Record Receive Controller/osm_pkey_rec_rcv_ctrl_init -* NAME -* osm_pkey_rec_rcv_ctrl_init -* -* DESCRIPTION -* The osm_pkey_rec_rcv_ctrl_init function initializes a -* P_Key Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_pkey_rec_rcv_ctrl_init( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl, - IN osm_pkey_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pkey_rec_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_pkey_rec_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the P_Key Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other P_Key Record Receive Controller methods. -* -* SEE ALSO -* P_Key Record Receive Controller object, osm_pkey_rec_rcv_ctrl_construct, -* osm_pkey_rec_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_PKEY_REC_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_portinfo_record.h b/osm/include/opensm/osm_sa_portinfo_record.h index 6cd7e87..19cbfbf 100644 --- a/osm/include/opensm/osm_sa_portinfo_record.h +++ b/osm/include/opensm/osm_sa_portinfo_record.h @@ -100,12 +100,12 @@ BEGIN_C_DECLS */ typedef struct _osm_pir_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_pir_rcv_t; /* * FIELDS @@ -254,14 +254,14 @@ osm_pir_rcv_init( */ void osm_pir_rcv_process( - IN osm_pir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_pir_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's PortInfoRecord attribute. * diff --git a/osm/include/opensm/osm_sa_portinfo_record_ctrl.h b/osm/include/opensm/osm_sa_portinfo_record_ctrl.h deleted file mode 100644 index 313fe03..0000000 --- a/osm/include/opensm/osm_sa_portinfo_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_pir_rcv_ctrl_t. - * This object represents a controller that receives the IBA PortInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_PIR_CTRL_H_ -#define _OSM_PIR_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/PortInfo Record Receive Controller -* NAME -* PortInfo Record Receive Controller -* -* DESCRIPTION -* The PortInfo Record Receive Controller object encapsulates -* the information needed to receive the PortInfo attribute from a node. -* -* The PortInfo Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Ranjit Pandit, Intel -* -*********/ - -/****s* OpenSM: PortInfo Record Receive Controller/osm_pir_rcv_ctrl_t -* NAME -* osm_pir_rcv_ctrl_t -* -* DESCRIPTION -* PortInfo Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_pir_rcv_ctrl -{ - osm_pir_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_pir_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the PortInfo Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* PortInfo Record Receive Controller object -* PortInfo Record Receiver object -*********/ - -/****f* OpenSM: PortInfo Record Receive Controller/osm_pir_rcv_ctrl_construct -* NAME -* osm_pir_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a PortInfo Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_pir_rcv_ctrl_construct( - IN osm_pir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a PortInfo Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_pir_rcv_ctrl_init, osm_pir_rcv_ctrl_destroy -* -* Calling osm_pir_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_pir_rcv_ctrl_init. -* -* SEE ALSO -* PortInfo Record Receive Controller object, osm_pir_rcv_ctrl_init, -* osm_pir_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: PortInfo Record Receive Controller/osm_pir_rcv_ctrl_destroy -* NAME -* osm_pir_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_pir_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_pir_rcv_ctrl_destroy( - IN osm_pir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* PortInfo Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_pir_rcv_ctrl_construct or osm_pir_rcv_ctrl_init. -* -* SEE ALSO -* PortInfo Record Receive Controller object, osm_pir_rcv_ctrl_construct, -* osm_pir_rcv_ctrl_init -*********/ - -/****f* OpenSM: PortInfo Record Receive Controller/osm_pir_rcv_ctrl_init -* NAME -* osm_pir_rcv_ctrl_init -* -* DESCRIPTION -* The osm_pir_rcv_ctrl_init function initializes a -* PortInfo Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_pir_rcv_ctrl_init( - IN osm_pir_rcv_ctrl_t* const p_ctrl, - IN osm_pir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_pir_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_pir_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the PortInfo Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other PortInfo Record Receive Controller methods. -* -* SEE ALSO -* PortInfo Record Receive Controller object, osm_pir_rcv_ctrl_construct, -* osm_pir_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_PIR_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_service_record.h b/osm/include/opensm/osm_sa_service_record.h index 8fb9ce4..d485509 100644 --- a/osm/include/opensm/osm_sa_service_record.h +++ b/osm/include/opensm/osm_sa_service_record.h @@ -102,14 +102,13 @@ BEGIN_C_DECLS */ typedef struct _osm_sr_rcv { - osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t sr_pool; - cl_timer_t sr_timer; - + osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t sr_pool; + cl_timer_t sr_timer; } osm_sr_rcv_t; /* * FIELDS @@ -250,14 +249,14 @@ osm_sr_rcv_init( */ void osm_sr_rcv_process( - IN osm_sr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_sr_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's ServiceRecord attribute. * NOTES diff --git a/osm/include/opensm/osm_sa_service_record_ctrl.h b/osm/include/opensm/osm_sa_service_record_ctrl.h deleted file mode 100644 index 1e1bd99..0000000 --- a/osm/include/opensm/osm_sa_service_record_ctrl.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sr_rcv_ctrl_t. - * This object represents a controller that receives the IBA Service - * record attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_SRCTRL_H_ -#define _OSM_SRCTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Service Record Receive Controller -* NAME -* Service Record Receive Controller -* -* DESCRIPTION -* The Service Record Receive Controller object encapsulates -* the information needed to receive the Service Record attribute from a node. -* -* The Service Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Anil S Keshavamurthy, Intel -* -*********/ - -/****s* OpenSM: Service Record Receive Controller/osm_sr_rcv_ctrl_t -* NAME -* osm_sr_rcv_ctrl_t -* -* DESCRIPTION -* Service Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_sr_rcv_ctrl -{ - osm_sr_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_sr_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Service Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Service Record Receiver object -*********/ - -/****f* OpenSM: Service Record Receive Controller/osm_sr_rcv_ctrl_construct -* NAME -* osm_sr_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Service Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_sr_rcv_ctrl_construct( - IN osm_sr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Service Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_sr_rcv_ctrl_init, osm_sr_rcv_ctrl_destroy, -* -* Calling osm_sr_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_psr_rcv_ctrl_init. -* -* SEE ALSO -* Service Record Receive Controller object, osm_sr_rcv_ctrl_init, -* osm_sr_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: Service Record Receive Controller/osm_sr_rcv_ctrl_destroy -* NAME -* osm_sr_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_sr_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_sr_rcv_ctrl_destroy( - IN osm_sr_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Service Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_sr_rcv_ctrl_construct or osm_sr_rcv_ctrl_init. -* -* SEE ALSO -* Service Record Receive Controller object, osm_sr_rcv_ctrl_construct, -* osm_sr_rcv_ctrl_init -*********/ - -/****f* OpenSM: Service Record Receive Controller/osm_sr_rcv_ctrl_init -* NAME -* osm_sr_rcv_ctrl_init -* -* DESCRIPTION -* The osm_sr_rcv_ctrl_init function initializes a -* Service Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_sr_rcv_ctrl_init( - IN osm_sr_rcv_ctrl_t* const p_ctrl, - IN osm_sr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_sr_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_sr_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* IB_SUCCESS if the osm_sr_rcv_t Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Service Record Receive Controller methods. -* -* SEE ALSO -* Service Record Receive Controller object, osm_sr_rcv_ctrl_construct, -* osm_sr_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_SRCTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_slvl_record.h b/osm/include/opensm/osm_sa_slvl_record.h index f4409d1..a5ce9b4 100644 --- a/osm/include/opensm/osm_sa_slvl_record.h +++ b/osm/include/opensm/osm_sa_slvl_record.h @@ -100,13 +100,12 @@ BEGIN_C_DECLS */ typedef struct _osm_slvl_rec_rcv { - const osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; - + const osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_slvl_rec_rcv_t; /* * FIELDS @@ -255,14 +254,14 @@ osm_slvl_rec_rcv_init( */ void osm_slvl_rec_rcv_process( - IN osm_slvl_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_slvl_rec_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the SLtoVL Map Record Query attribute. * diff --git a/osm/include/opensm/osm_sa_slvl_record_ctrl.h b/osm/include/opensm/osm_sa_slvl_record_ctrl.h deleted file mode 100644 index 6fdf77a..0000000 --- a/osm/include/opensm/osm_sa_slvl_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sa_slvl_rec_rcv_ctrl_t. - * This object represents a controller that receives the IBA VL Arbitration - * record query from SA client. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#ifndef _OSM_SLVL_REC_CTRL_H_ -#define _OSM_SLVL_REC_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/SLtoVL Record Receive Controller -* NAME -* SLtoVL Record Receive Controller -* -* DESCRIPTION -* The SLtoVL Mapping Record Receive Controller object encapsulates -* the information needed to handle SLtoVL Mapping record query from SA client. -* -* The SLtoVL Mapping Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: SLtoVL Mapping Record Receive Controller/osm_slvl_rec_rcv_ctrl_t -* NAME -* osm_slvl_rec_rcv_ctrl_t -* -* DESCRIPTION -* SLtoVL Mapping Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_slvl_rec_rcv_ctrl -{ - osm_slvl_rec_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_slvl_rec_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the SLtoVL Mapping Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* SLtoVL Mapping Record Receive Controller object -* SLtoVL Mapping Record Receiver object -*********/ - -/****f* OpenSM: SLtoVL Mapping Record Receive Controller/osm_slvl_rec_rcv_ctrl_construct -* NAME -* osm_slvl_rec_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a SLtoVL Mapping Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_slvl_rec_rcv_ctrl_construct( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a SLtoVL Mapping Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_slvl_rec_rcv_ctrl_init, osm_slvl_rec_rcv_ctrl_destroy -* -* Calling osm_slvl_rec_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_slvl_rec_rcv_ctrl_init. -* -* SEE ALSO -* SLtoVL Mapping Record Receive Controller object, osm_slvl_rec_rcv_ctrl_init, -* osm_slvl_rec_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: SLtoVL Mapping Record Receive Controller/osm_slvl_rec_rcv_ctrl_destroy -* NAME -* osm_slvl_rec_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_slvl_rec_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_slvl_rec_rcv_ctrl_destroy( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* SLtoVL Mapping Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_slvl_rec_rcv_ctrl_construct or osm_slvl_rec_rcv_ctrl_init. -* -* SEE ALSO -* SLtoVL Mapping Record Receive Controller object, osm_slvl_rec_rcv_ctrl_construct, -* osm_slvl_rec_rcv_ctrl_init -*********/ - -/****f* OpenSM: SLtoVL Mapping Record Receive Controller/osm_slvl_rec_rcv_ctrl_init -* NAME -* osm_slvl_rec_rcv_ctrl_init -* -* DESCRIPTION -* The osm_slvl_rec_rcv_ctrl_init function initializes a -* SLtoVL Mapping Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_slvl_rec_rcv_ctrl_init( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl, - IN osm_slvl_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_slvl_rec_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_slvl_rec_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the SLtoVL Mapping Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other SLtoVL Mapping Record Receive Controller methods. -* -* SEE ALSO -* SLtoVL Mapping Record Receive Controller object, osm_slvl_rec_rcv_ctrl_construct, -* osm_slvl_rec_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_SLVL_REC_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_sminfo_record.h b/osm/include/opensm/osm_sa_sminfo_record.h index cafc09b..0020c4f 100644 --- a/osm/include/opensm/osm_sa_sminfo_record.h +++ b/osm/include/opensm/osm_sa_sminfo_record.h @@ -99,13 +99,13 @@ BEGIN_C_DECLS */ typedef struct _osm_smir { - osm_subn_t* p_subn; - osm_stats_t* p_stats; - osm_sa_resp_t* p_resp; - osm_mad_pool_t* p_mad_pool; - osm_log_t* p_log; - cl_plock_t* p_lock; - cl_qlock_pool_t pool; + osm_subn_t* p_subn; + osm_stats_t* p_stats; + osm_sa_resp_t* p_resp; + osm_mad_pool_t* p_mad_pool; + osm_log_t* p_log; + cl_plock_t* p_lock; + cl_qlock_pool_t pool; } osm_smir_rcv_t; /* * FIELDS @@ -236,14 +236,14 @@ ib_api_status_t osm_smir_rcv_init( * SYNOPSIS */ void osm_smir_rcv_process( - IN osm_smir_rcv_t* const p_ctrl, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_smir_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SMInfoRecord attribute. * diff --git a/osm/include/opensm/osm_sa_sminfo_record_ctrl.h b/osm/include/opensm/osm_sa_sminfo_record_ctrl.h deleted file mode 100644 index d51a947..0000000 --- a/osm/include/opensm/osm_sa_sminfo_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_smir_ctrl_t. - * This object represents a controller that receives the IBA SMInfo - * attribute from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_SMIR_CTRL_H_ -#define _OSM_SMIR_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/SM Info Receive Controller -* NAME -* SM Info Receive Controller -* -* DESCRIPTION -* The SM Info Receive Controller object encapsulates -* the information needed to receive the SMInfo attribute from a node. -* -* The SM Info Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Ranjit Pandit, Intel -* -*********/ - -/****s* OpenSM: SM Info Receive Controller/osm_smir_ctrl_t -* NAME -* osm_smir_ctrl_t -* -* DESCRIPTION -* SM Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_smir_ctrl -{ - osm_smir_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_smir_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the SM Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* SM Info Receive Controller object -* SM Info Receiver object -*********/ - -/****f* OpenSM: SM Info Receive Controller/osm_smir_ctrl_construct -* NAME -* osm_smir_ctrl_construct -* -* DESCRIPTION -* This function constructs a SM Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_smir_ctrl_construct( - IN osm_smir_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a SM Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_smir_ctrl_init, osm_smir_ctrl_destroy -* -* Calling osm_smir_ctrl_construct is a prerequisite to calling any other -* method except osm_smir_ctrl_init. -* -* SEE ALSO -* SM Info Receive Controller object, osm_smir_ctrl_init, -* osm_smir_ctrl_destroy -*********/ - -/****f* OpenSM: SM Info Receive Controller/osm_smir_ctrl_destroy -* NAME -* osm_smir_ctrl_destroy -* -* DESCRIPTION -* The osm_smir_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_smir_ctrl_destroy( - IN osm_smir_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* SM Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_smir_ctrl_construct or osm_smir_ctrl_init. -* -* SEE ALSO -* SM Info Receive Controller object, osm_smir_ctrl_construct, -* osm_smir_ctrl_init -*********/ - -/****f* OpenSM: SM Info Receive Controller/osm_smir_ctrl_init -* NAME -* osm_smir_ctrl_init -* -* DESCRIPTION -* The osm_smir_ctrl_init function initializes a -* SM Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_smir_ctrl_init( - IN osm_smir_ctrl_t* const p_ctrl, - IN osm_smir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_smir_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_smir_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the SM Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other SM Info Receive Controller methods. -* -* SEE ALSO -* SM Info Receive Controller object, osm_smir_ctrl_construct, -* osm_smir_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_SMIR_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_sw_info_record.h b/osm/include/opensm/osm_sa_sw_info_record.h index c6b421f..674b2f7 100644 --- a/osm/include/opensm/osm_sa_sw_info_record.h +++ b/osm/include/opensm/osm_sa_sw_info_record.h @@ -280,14 +280,14 @@ boolean_t osm_sir_rcv_is_inited( * SYNOPSIS */ void osm_sir_rcv_process( - IN osm_sir_rcv_t* const p_ctrl, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_ctrl +* context * [in] Pointer to an osm_sir_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the node's SwitchInfo attribute. * diff --git a/osm/include/opensm/osm_sa_sw_info_record_ctrl.h b/osm/include/opensm/osm_sa_sw_info_record_ctrl.h deleted file mode 100644 index b58654f..0000000 --- a/osm/include/opensm/osm_sa_sw_info_record_ctrl.h +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sir_rcv_ctrl_t. - * This object represents a controller that receives the IBA SwitchInfo - * attribute from a switch node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#ifndef _OSM_SIR_RCV_CTRL_H_ -#define _OSM_SIR_RCV_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Switch Info Receive Controller -* NAME -* Switch Info Receive Controller -* -* DESCRIPTION -* The Switch Info Receive Controller object encapsulates the information -* needed to receive the SwitchInfo attribute from a switch node. -* -* The Switch Info Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Hal Rosenstock, Voltaire -* -*********/ - -/****s* OpenSM: Switch Info Receive Controller/osm_sir_rcv_ctrl_t -* NAME -* osm_sir_rcv_ctrl_t -* -* DESCRIPTION -* Switch Info Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_sir_rcv_ctrl -{ - osm_sir_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; -} osm_sir_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the Switch Info Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Switch Info Receive Controller object -* Switch Info Receiver object -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_sir_rcv_ctrl_construct -* NAME -* osm_sir_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a Switch Info Receive Controller object. -* -* SYNOPSIS -*/ -void osm_sir_rcv_ctrl_construct( - IN osm_sir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Switch Info Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_sir_rcv_ctrl_init, osm_sir_rcv_ctrl_destroy, -* and osm_sir_rcv_ctrl_is_inited. -* -* Calling osm_sir_rcv_ctrl_construct is a prerequisite to calling any -* other method except osm_sir_rcv_ctrl_init. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_sir_rcv_ctrl_init, -* osm_sir_rcv_ctrl_destroy, osm_sir_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_sir_rcv_ctrl_destroy -* NAME -* osm_sir_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_sir_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_sir_rcv_ctrl_destroy( - IN osm_sir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Switch Info Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_sir_rcv_ctrl_construct or osm_sir_rcv_ctrl_init. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_sir_rcv_ctrl_construct, -* osm_sir_rcv_ctrl_init -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_sir_rcv_ctrl_init -* NAME -* osm_sir_rcv_ctrl_init -* -* DESCRIPTION -* The osm_sir_rcv_ctrl_init function initializes a -* Switch Info Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_sir_rcv_ctrl_init( - IN osm_sir_rcv_ctrl_t* const p_ctrl, - IN osm_sir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_sir_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_sir_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Switch Info Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Switch Info Receive Controller methods. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_sir_rcv_ctrl_construct, -* osm_sir_rcv_ctrl_destroy, osm_sir_rcv_ctrl_is_inited -*********/ - -/****f* OpenSM: Switch Info Receive Controller/osm_sir_rcv_ctrl_is_inited -* NAME -* osm_sir_rcv_ctrl_is_inited -* -* DESCRIPTION -* Indicates if the object has been initialized with osm_sir_rcv_ctrl_init. -* -* SYNOPSIS -*/ -boolean_t osm_sir_rcv_ctrl_is_inited( - IN const osm_sir_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_sir_rcv_ctrl_t object. -* -* RETURN VALUES -* TRUE if the object was initialized successfully, -* FALSE otherwise. -* -* NOTES -* The osm_sir_rcv_ctrl_construct or osm_sir_rcv_ctrl_init must be -* called before using this function. -* -* SEE ALSO -* Switch Info Receive Controller object, osm_sir_rcv_ctrl_construct, -* osm_sir_rcv_ctrl_init -*********/ - -END_C_DECLS - -#endif /* _OSM_SIR_RCV_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sa_vlarb_record.h b/osm/include/opensm/osm_sa_vlarb_record.h index a30ebfd..4aad76f 100644 --- a/osm/include/opensm/osm_sa_vlarb_record.h +++ b/osm/include/opensm/osm_sa_vlarb_record.h @@ -100,12 +100,12 @@ BEGIN_C_DECLS */ typedef struct _osm_vlarb_rec_rcv { - const osm_subn_t *p_subn; - osm_sa_resp_t *p_resp; - osm_mad_pool_t *p_mad_pool; - osm_log_t *p_log; - cl_plock_t *p_lock; - cl_qlock_pool_t pool; + const osm_subn_t *p_subn; + osm_sa_resp_t *p_resp; + osm_mad_pool_t *p_mad_pool; + osm_log_t *p_log; + cl_plock_t *p_lock; + cl_qlock_pool_t pool; } osm_vlarb_rec_rcv_t; /* * FIELDS @@ -254,14 +254,14 @@ osm_vlarb_rec_rcv_init( */ void osm_vlarb_rec_rcv_process( - IN osm_vlarb_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ); + IN void *context, + IN void *data ); /* * PARAMETERS -* p_rcv +* context * [in] Pointer to an osm_vlarb_rec_rcv_t object. * -* p_madw +* data * [in] Pointer to the MAD Wrapper containing the MAD * that contains the VL Arbitration Record Query attribute. * diff --git a/osm/include/opensm/osm_sa_vlarb_record_ctrl.h b/osm/include/opensm/osm_sa_vlarb_record_ctrl.h deleted file mode 100644 index b61960e..0000000 --- a/osm/include/opensm/osm_sa_vlarb_record_ctrl.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_sa_vlarb_rec_rcv_ctrl_t. - * This object represents a controller that receives the IBA VL Arbitration - * record query from SA client. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#ifndef _OSM_VLARB_REC_CTRL_H_ -#define _OSM_VLARB_REC_CTRL_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/VLArbitration Record Receive Controller -* NAME -* VLArbitration Record Receive Controller -* -* DESCRIPTION -* The VLArbitration Record Receive Controller object encapsulates -* the information needed to handle VLArbitration record query from SA client. -* -* The VLArbitration Record Receive Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Eitan Zahavi, Mellanox -* -*********/ - -/****s* OpenSM: VLArbitration Record Receive Controller/osm_vlarb_rec_rcv_ctrl_t -* NAME -* osm_vlarb_rec_rcv_ctrl_t -* -* DESCRIPTION -* VLArbitration Record Receive Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_vlarb_rec_rcv_ctrl -{ - osm_vlarb_rec_rcv_t *p_rcv; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_vlarb_rec_rcv_ctrl_t; -/* -* FIELDS -* p_rcv -* Pointer to the VLArbitration Record Receiver object. -* -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* VLArbitration Record Receive Controller object -* VLArbitration Record Receiver object -*********/ - -/****f* OpenSM: VLArbitration Record Receive Controller/osm_vlarb_rec_rcv_ctrl_construct -* NAME -* osm_vlarb_rec_rcv_ctrl_construct -* -* DESCRIPTION -* This function constructs a VLArbitration Record Receive Controller object. -* -* SYNOPSIS -*/ -void osm_vlarb_rec_rcv_ctrl_construct( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a VLArbitration Record Receive Controller -* object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_vlarb_rec_rcv_ctrl_init, osm_vlarb_rec_rcv_ctrl_destroy -* -* Calling osm_vlarb_rec_rcv_ctrl_construct is a prerequisite to calling any other -* method except osm_vlarb_rec_rcv_ctrl_init. -* -* SEE ALSO -* VLArbitration Record Receive Controller object, osm_vlarb_rec_rcv_ctrl_init, -* osm_vlarb_rec_rcv_ctrl_destroy -*********/ - -/****f* OpenSM: VLArbitration Record Receive Controller/osm_vlarb_rec_rcv_ctrl_destroy -* NAME -* osm_vlarb_rec_rcv_ctrl_destroy -* -* DESCRIPTION -* The osm_vlarb_rec_rcv_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void osm_vlarb_rec_rcv_ctrl_destroy( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* VLArbitration Record Receive Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_vlarb_rec_rcv_ctrl_construct or osm_vlarb_rec_rcv_ctrl_init. -* -* SEE ALSO -* VLArbitration Record Receive Controller object, osm_vlarb_rec_rcv_ctrl_construct, -* osm_vlarb_rec_rcv_ctrl_init -*********/ - -/****f* OpenSM: VLArbitration Record Receive Controller/osm_vlarb_rec_rcv_ctrl_init -* NAME -* osm_vlarb_rec_rcv_ctrl_init -* -* DESCRIPTION -* The osm_vlarb_rec_rcv_ctrl_init function initializes a -* VLArbitration Record Receive Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t osm_vlarb_rec_rcv_ctrl_init( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl, - IN osm_vlarb_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_vlarb_rec_rcv_ctrl_t object to initialize. -* -* p_rcv -* [in] Pointer to an osm_vlarb_rec_rcv_t object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the VLArbitration Record Receive Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other VLArbitration Record Receive Controller methods. -* -* SEE ALSO -* VLArbitration Record Receive Controller object, osm_vlarb_rec_rcv_ctrl_construct, -* osm_vlarb_rec_rcv_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_VLARB_REC_CTRL_H_ */ diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index d9e63d4..de77a97 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -41,24 +41,24 @@ opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_port_info_rcv.c \ osm_remote_sm.c osm_req.c osm_req_ctrl.c \ osm_resp.c osm_sa.c osm_sa_class_port_info.c \ - osm_sa_class_port_info_ctrl.c osm_sa_informinfo.c \ - osm_sa_informinfo_ctrl.c osm_sa_lft_record.c \ - osm_sa_lft_record_ctrl.c osm_sa_mft_record.c \ - osm_sa_mft_record_ctrl.c osm_sa_link_record.c \ - osm_sa_link_record_ctrl.c osm_sa_mad_ctrl.c \ - osm_sa_mcmember_record.c osm_sa_mcmember_record_ctrl.c \ - osm_sa_node_record.c osm_sa_node_record_ctrl.c \ - osm_sa_path_record.c osm_sa_path_record_ctrl.c \ - osm_sa_pkey_record.c osm_sa_pkey_record_ctrl.c \ - osm_sa_portinfo_record.c osm_sa_portinfo_record_ctrl.c \ - osm_sa_guidinfo_record.c osm_sa_guidinfo_record_ctrl.c \ - osm_sa_multipath_record.c osm_sa_multipath_record_ctrl.c \ + osm_sa_informinfo.c \ + osm_sa_lft_record.c \ + osm_sa_mft_record.c \ + osm_sa_link_record.c \ + osm_sa_mad_ctrl.c \ + osm_sa_mcmember_record.c \ + osm_sa_node_record.c \ + osm_sa_path_record.c \ + osm_sa_pkey_record.c \ + osm_sa_portinfo_record.c \ + osm_sa_guidinfo_record.c \ + osm_sa_multipath_record.c \ osm_sa_response.c osm_sa_service_record.c \ - osm_sa_service_record_ctrl.c osm_sa_slvl_record.c \ - osm_sa_slvl_record_ctrl.c osm_sa_sminfo_record.c \ - osm_sa_sminfo_record_ctrl.c osm_sa_vlarb_record.c \ - osm_sa_vlarb_record_ctrl.c osm_sa_sw_info_record.c \ - osm_sa_sw_info_record_ctrl.c osm_service.c \ + osm_sa_slvl_record.c \ + osm_sa_sminfo_record.c \ + osm_sa_vlarb_record.c \ + osm_sa_sw_info_record.c \ + osm_service.c \ osm_slvl_map_rcv.c \ osm_sm.c osm_sminfo_rcv.c \ osm_sm_mad_ctrl.c \ diff --git a/osm/opensm/osm_sa.c b/osm/opensm/osm_sa.c index 60dffd7..42a38aa 100644 --- a/osm/opensm/osm_sa.c +++ b/osm/opensm/osm_sa.c @@ -82,58 +82,25 @@ osm_sa_construct( p_sa->state = OSM_SA_STATE_INIT; p_sa->sa_trans_id = OSM_SA_INITIAL_TID_VALUE; - osm_sa_mad_ctrl_construct( &p_sa->mad_ctrl ); osm_sa_resp_construct( &p_sa->resp ); - osm_nr_rcv_construct( &p_sa->nr_rcv); - osm_nr_rcv_ctrl_construct( &p_sa->nr_rcv_ctrl ); - osm_pir_rcv_construct( &p_sa->pir_rcv ); - osm_pir_rcv_ctrl_construct( &p_sa->pir_rcv_ctrl ); - osm_gir_rcv_construct( &p_sa->gir_rcv ); - osm_gir_rcv_ctrl_construct( &p_sa->gir_rcv_ctrl ); - osm_lr_rcv_construct( &p_sa->lr_rcv ); - osm_lr_rcv_ctrl_construct( &p_sa->lr_rcv_ctrl ); - osm_pr_rcv_construct( &p_sa->pr_rcv ); - osm_pr_rcv_ctrl_construct( &p_sa->pr_rcv_ctrl ); - #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) osm_mpr_rcv_construct( &p_sa->mpr_rcv ); - osm_mpr_rcv_ctrl_construct( &p_sa->mpr_rcv_ctrl ); #endif - osm_smir_rcv_construct( &p_sa->smir_rcv ); - osm_smir_ctrl_construct( &p_sa->smir_ctrl ); - osm_mcmr_rcv_construct(&p_sa->mcmr_rcv ); - osm_mcmr_rcv_ctrl_construct(&p_sa->mcmr_rcv_ctlr); - osm_sr_rcv_construct( &p_sa->sr_rcv ); - osm_sr_rcv_ctrl_construct( &p_sa->sr_rcv_ctrl ); - osm_infr_rcv_construct( &p_sa->infr_rcv ); - osm_infr_rcv_ctrl_construct( &p_sa->infr_rcv_ctrl ); - osm_vlarb_rec_rcv_construct( &p_sa->vlarb_rec_rcv ); - osm_vlarb_rec_rcv_ctrl_construct( &p_sa->vlarb_rec_rcv_ctrl ); - osm_slvl_rec_rcv_construct( &p_sa->slvl_rec_rcv ); - osm_slvl_rec_rcv_ctrl_construct( &p_sa->slvl_rec_rcv_ctrl ); - osm_pkey_rec_rcv_construct( &p_sa->pkey_rec_rcv ); - osm_pkey_rec_rcv_ctrl_construct( &p_sa->pkey_rec_rcv_ctrl ); - osm_lftr_rcv_construct( &p_sa->lftr_rcv ); - osm_lftr_rcv_ctrl_construct( &p_sa->lftr_rcv_ctrl ); - osm_sir_rcv_construct( &p_sa->sir_rcv ); - osm_sir_rcv_ctrl_construct( &p_sa->sir_rcv_ctrl ); - osm_mftr_rcv_construct( &p_sa->mftr_rcv ); - osm_mftr_rcv_ctrl_construct( &p_sa->mftr_rcv_ctrl ); } /********************************************************************** @@ -149,24 +116,25 @@ osm_sa_shutdown( status = osm_sa_mad_ctrl_unbind( &p_sa->mad_ctrl ); /* remove any registered dispatcher message */ - osm_nr_rcv_ctrl_destroy( &p_sa->nr_rcv_ctrl ); - osm_pir_rcv_ctrl_destroy( &p_sa->pir_rcv_ctrl ); - osm_gir_rcv_ctrl_destroy( &p_sa->gir_rcv_ctrl ); - osm_lr_rcv_ctrl_destroy( &p_sa->lr_rcv_ctrl ); - osm_pr_rcv_ctrl_destroy( &p_sa->pr_rcv_ctrl ); + cl_disp_unregister( p_sa->nr_disp_h ); + cl_disp_unregister( p_sa->pir_disp_h ); + cl_disp_unregister( p_sa->gir_disp_h ); + cl_disp_unregister( p_sa->lr_disp_h ); + cl_disp_unregister( p_sa->pr_disp_h ); #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) - osm_mpr_rcv_ctrl_destroy( &p_sa->mpr_rcv_ctrl ); + cl_disp_unregister( p_sa->mpr_disp_h ); #endif - osm_smir_ctrl_destroy( &p_sa->smir_ctrl ); - osm_mcmr_rcv_ctrl_destroy( &p_sa->mcmr_rcv_ctlr); - osm_sr_rcv_ctrl_destroy( &p_sa->sr_rcv_ctrl ); - osm_infr_rcv_ctrl_destroy( &p_sa->infr_rcv_ctrl ); - osm_vlarb_rec_rcv_ctrl_destroy( &p_sa->vlarb_rec_rcv_ctrl ); - osm_slvl_rec_rcv_ctrl_destroy( &p_sa->slvl_rec_rcv_ctrl ); - osm_pkey_rec_rcv_ctrl_destroy( &p_sa->pkey_rec_rcv_ctrl ); - osm_lftr_rcv_ctrl_destroy( &p_sa->lftr_rcv_ctrl ); - osm_sir_rcv_ctrl_destroy( &p_sa->sir_rcv_ctrl ); - osm_mftr_rcv_ctrl_destroy( &p_sa->mftr_rcv_ctrl ); + cl_disp_unregister( p_sa->smir_disp_h ); + cl_disp_unregister( p_sa->mcmr_disp_h); + cl_disp_unregister( p_sa->sr_disp_h ); + cl_disp_unregister( p_sa->infr_disp_h ); + cl_disp_unregister( p_sa->infir_disp_h ); + cl_disp_unregister( p_sa->vlarb_disp_h ); + cl_disp_unregister( p_sa->slvl_disp_h ); + cl_disp_unregister( p_sa->pkey_disp_h ); + cl_disp_unregister( p_sa->lft_disp_h ); + cl_disp_unregister( p_sa->sir_disp_h ); + cl_disp_unregister( p_sa->mft_disp_h ); osm_sa_mad_ctrl_destroy( &p_sa->mad_ctrl ); OSM_LOG_EXIT( p_sa->p_log ); @@ -260,14 +228,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_cpi_rcv_ctrl_init( - &p_sa->cpi_rcv_ctrl, - &p_sa->cpi_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_nr_rcv_init( &p_sa->nr_rcv, &p_sa->resp, @@ -278,14 +238,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_nr_rcv_ctrl_init( - &p_sa->nr_rcv_ctrl, - &p_sa->nr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_pir_rcv_init( &p_sa->pir_rcv, &p_sa->resp, @@ -296,14 +248,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_pir_rcv_ctrl_init( - &p_sa->pir_rcv_ctrl, - &p_sa->pir_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_gir_rcv_init( &p_sa->gir_rcv, &p_sa->resp, @@ -314,14 +258,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_gir_rcv_ctrl_init( - &p_sa->gir_rcv_ctrl, - &p_sa->gir_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_lr_rcv_init( &p_sa->lr_rcv, &p_sa->resp, @@ -332,14 +268,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_lr_rcv_ctrl_init( - &p_sa->lr_rcv_ctrl, - &p_sa->lr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_pr_rcv_init( &p_sa->pr_rcv, &p_sa->resp, @@ -350,14 +278,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_pr_rcv_ctrl_init( - &p_sa->pr_rcv_ctrl, - &p_sa->pr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) status = osm_mpr_rcv_init( &p_sa->mpr_rcv, @@ -368,14 +288,6 @@ osm_sa_init( p_lock ); if( status != IB_SUCCESS ) goto Exit; - - status = osm_mpr_rcv_ctrl_init( - &p_sa->mpr_rcv_ctrl, - &p_sa->mpr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; #endif status = osm_smir_rcv_init( @@ -389,14 +301,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_smir_ctrl_init( - &p_sa->smir_ctrl, - &p_sa->smir_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_mcmr_rcv_init( p_sm, &p_sa->mcmr_rcv, @@ -408,14 +312,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_mcmr_rcv_ctrl_init( - &p_sa->mcmr_rcv_ctlr, - &p_sa->mcmr_rcv, - p_log, - p_disp); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_sr_rcv_init( &p_sa->sr_rcv, &p_sa->resp, @@ -426,14 +322,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_sr_rcv_ctrl_init( - &p_sa->sr_rcv_ctrl, - &p_sa->sr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_infr_rcv_init( &p_sa->infr_rcv, &p_sa->resp, @@ -444,14 +332,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_infr_rcv_ctrl_init( - &p_sa->infr_rcv_ctrl, - &p_sa->infr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_vlarb_rec_rcv_init( &p_sa->vlarb_rec_rcv, &p_sa->resp, @@ -462,14 +342,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_vlarb_rec_rcv_ctrl_init( - &p_sa->vlarb_rec_rcv_ctrl, - &p_sa->vlarb_rec_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_slvl_rec_rcv_init( &p_sa->slvl_rec_rcv, &p_sa->resp, @@ -480,14 +352,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_slvl_rec_rcv_ctrl_init( - &p_sa->slvl_rec_rcv_ctrl, - &p_sa->slvl_rec_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_pkey_rec_rcv_init( &p_sa->pkey_rec_rcv, &p_sa->resp, @@ -498,14 +362,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_pkey_rec_rcv_ctrl_init( - &p_sa->pkey_rec_rcv_ctrl, - &p_sa->pkey_rec_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_lftr_rcv_init( &p_sa->lftr_rcv, &p_sa->resp, @@ -516,14 +372,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_lftr_rcv_ctrl_init( - &p_sa->lftr_rcv_ctrl, - &p_sa->lftr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_sir_rcv_init( &p_sa->sir_rcv, &p_sa->resp, @@ -534,14 +382,6 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_sir_rcv_ctrl_init( - &p_sa->sir_rcv_ctrl, - &p_sa->sir_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_mftr_rcv_init( &p_sa->mftr_rcv, &p_sa->resp, @@ -552,12 +392,99 @@ osm_sa_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_mftr_rcv_ctrl_init( - &p_sa->mftr_rcv_ctrl, - &p_sa->mftr_rcv, - p_log, - p_disp ); - if( status != IB_SUCCESS ) + p_sa->cpi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_CLASS_PORT_INFO, + osm_cpi_rcv_process, &p_sa->cpi_rcv); + if( p_sa->cpi_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->nr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_RECORD, + osm_nr_rcv_process, &p_sa->nr_rcv); + if( p_sa->nr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->pir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORTINFO_RECORD, + osm_pir_rcv_process, &p_sa->pir_rcv); + if( p_sa->pir_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->gir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_GUIDINFO_RECORD, + osm_gir_rcv_process, &p_sa->gir_rcv); + if( p_sa->gir_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->lr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LINK_RECORD, + osm_lr_rcv_process, &p_sa->lr_rcv); + if( p_sa->lr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->pr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PATH_RECORD, + osm_pr_rcv_process, &p_sa->pr_rcv); + if( p_sa->pr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + p_sa->mpr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MULTIPATH_RECORD, + osm_mpr_rcv_process, &p_sa->mpr_rcv); + if( p_sa->mpr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; +#endif + + p_sa->smir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SMINFO_RECORD, + osm_smir_rcv_process, &p_sa->smir_rcv); + if( p_sa->smir_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->mcmr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MCMEMBER_RECORD, + osm_mcmr_rcv_process, &p_sa->mcmr_rcv); + if( p_sa->mcmr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->sr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SERVICE_RECORD, + osm_sr_rcv_process, &p_sa->sr_rcv); + if( p_sa->sr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->infr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_INFORM_INFO, + osm_infr_rcv_process, &p_sa->infr_rcv); + if( p_sa->infr_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->infir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_INFORM_INFO_RECORD, + osm_infir_rcv_process, &p_sa->infr_rcv); + if( p_sa->infir_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->vlarb_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB_RECORD, + osm_vlarb_rec_rcv_process, + &p_sa->vlarb_rec_rcv); + if( p_sa->vlarb_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->slvl_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SLVL_TBL_RECORD, + osm_slvl_rec_rcv_process, + &p_sa->slvl_rec_rcv); + if( p_sa->slvl_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->pkey_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PKEY_TBL_RECORD, + osm_pkey_rec_rcv_process, + &p_sa->pkey_rec_rcv); + if( p_sa->pkey_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT_RECORD, + osm_lftr_rcv_process, &p_sa->lftr_rcv); + if( p_sa->lft_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->sir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO_RECORD, + osm_sir_rcv_process, &p_sa->sir_rcv); + if( p_sa->sir_disp_h == CL_DISP_INVALID_HANDLE ) + goto Exit; + + p_sa->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT_RECORD, + osm_mftr_rcv_process, &p_sa->mftr_rcv); + if( p_sa->mft_disp_h == CL_DISP_INVALID_HANDLE ) goto Exit; Exit: diff --git a/osm/opensm/osm_sa_class_port_info.c b/osm/opensm/osm_sa_class_port_info.c index 84fa016..da107ee 100644 --- a/osm/opensm/osm_sa_class_port_info.c +++ b/osm/opensm/osm_sa_class_port_info.c @@ -242,9 +242,11 @@ __osm_cpi_rcv_respond( **********************************************************************/ void osm_cpi_rcv_process( - IN osm_cpi_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_cpi_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; const ib_path_rec_t* p_pr; const ib_sa_mad_t* p_sa_mad; diff --git a/osm/opensm/osm_sa_class_port_info_ctrl.c b/osm/opensm/osm_sa_class_port_info_ctrl.c deleted file mode 100644 index 9197c0a..0000000 --- a/osm/opensm/osm_sa_class_port_info_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_pr_rcv_ctrl_t. - * This object represents the ClassPortInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.3 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_cpi_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_cpi_rcv_process( ((osm_cpi_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_cpi_rcv_ctrl_construct( - IN osm_cpi_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_cpi_rcv_ctrl_destroy( - IN osm_cpi_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_cpi_rcv_ctrl_init( - IN osm_cpi_rcv_ctrl_t* const p_ctrl, - IN osm_cpi_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_cpi_rcv_ctrl_init ); - - osm_cpi_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_CLASS_PORT_INFO, - __osm_cpi_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_cpi_rcv_ctrl_init: ERR 1501: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sa_guidinfo_record.c b/osm/opensm/osm_sa_guidinfo_record.c index 5d7c4ba..10fac3c 100644 --- a/osm/opensm/osm_sa_guidinfo_record.c +++ b/osm/opensm/osm_sa_guidinfo_record.c @@ -392,9 +392,11 @@ __osm_sa_gir_by_comp_mask_cb( **********************************************************************/ void osm_gir_rcv_process( - IN osm_gir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_gir_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_guidinfo_record_t* p_rcvd_rec; cl_qlist_t rec_list; diff --git a/osm/opensm/osm_sa_guidinfo_record_ctrl.c b/osm/opensm/osm_sa_guidinfo_record_ctrl.c deleted file mode 100644 index df1a230..0000000 --- a/osm/opensm/osm_sa_guidinfo_record_ctrl.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_gir_rcv_ctrl_t. - * This object represents the GUIDInfoRecord request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_gir_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_gir_rcv_process( ((osm_gir_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_gir_rcv_ctrl_construct( - IN osm_gir_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_gir_rcv_ctrl_destroy( - IN osm_gir_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_gir_rcv_ctrl_init( - IN osm_gir_rcv_ctrl_t* const p_ctrl, - IN osm_gir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_gir_rcv_ctrl_init ); - - osm_gir_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_GUIDINFO_RECORD, - __osm_gir_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_gir_rcv_ctrl_init: ERR 5201: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_informinfo.c b/osm/opensm/osm_sa_informinfo.c index e427d5a..340a7f1 100644 --- a/osm/opensm/osm_sa_informinfo.c +++ b/osm/opensm/osm_sa_informinfo.c @@ -858,9 +858,11 @@ osm_infr_rcv_process_set_method( **********************************************************************/ void osm_infr_rcv_process( - IN osm_infr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_infr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_sa_mad_t *p_sa_mad; OSM_LOG_ENTER( p_rcv->p_log, osm_infr_rcv_process ); @@ -891,9 +893,11 @@ osm_infr_rcv_process( **********************************************************************/ void osm_infir_rcv_process( - IN osm_infr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_infr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_sa_mad_t *p_sa_mad; OSM_LOG_ENTER( p_rcv->p_log, osm_infr_rcv_process ); diff --git a/osm/opensm/osm_sa_informinfo_ctrl.c b/osm/opensm/osm_sa_informinfo_ctrl.c deleted file mode 100644 index 1637155..0000000 --- a/osm/opensm/osm_sa_informinfo_ctrl.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_infr_rcv_ctrl_t. - * This object represents the InformInfo set request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -static void -__osm_infr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_infr_rcv_process( ((osm_infr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_infir_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_infir_rcv_process( ((osm_infr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_infr_rcv_ctrl_construct( - IN osm_infr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; - p_ctrl->h_disp2 = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_infr_rcv_ctrl_destroy( - IN osm_infr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp2 ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_infr_rcv_ctrl_init( - IN osm_infr_rcv_ctrl_t* const p_ctrl, - IN osm_infr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_infr_rcv_ctrl_init ); - - osm_infr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_INFORM_INFO, - __osm_infr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_infr_rcv_ctrl_init: ERR 1701: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - p_ctrl->h_disp2 = cl_disp_register( - p_disp, - OSM_MSG_MAD_INFORM_INFO_RECORD, - __osm_infir_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp2 == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_infr_rcv_ctrl_init: ERR 1702: " - "Dispatcher registration failed\n" ); - cl_disp_unregister( p_ctrl->h_disp ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_lft_record.c b/osm/opensm/osm_sa_lft_record.c index 46bebf2..b6333e7 100644 --- a/osm/opensm/osm_sa_lft_record.c +++ b/osm/opensm/osm_sa_lft_record.c @@ -298,9 +298,11 @@ __osm_lftr_rcv_by_comp_mask( **********************************************************************/ void osm_lftr_rcv_process( - IN osm_lftr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_lftr_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_lft_record_t* p_rcvd_rec; ib_lft_record_t* p_resp_rec; diff --git a/osm/opensm/osm_sa_lft_record_ctrl.c b/osm/opensm/osm_sa_lft_record_ctrl.c deleted file mode 100644 index 250e8e3..0000000 --- a/osm/opensm/osm_sa_lft_record_ctrl.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_lftr_rcv_ctrl_t. - * This object represents the LinearForwardingTable request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_lftr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_lftr_rcv_process( ((osm_lftr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_lftr_rcv_ctrl_construct( - IN osm_lftr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_lftr_rcv_ctrl_destroy( - IN osm_lftr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_lftr_rcv_ctrl_init( - IN osm_lftr_rcv_ctrl_t* const p_ctrl, - IN osm_lftr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_lftr_rcv_ctrl_init ); - - osm_lftr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_LFT_RECORD, - __osm_lftr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_ctrl_init: ERR 4501: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_link_record.c b/osm/opensm/osm_sa_link_record.c index e9518f4..169e75e 100644 --- a/osm/opensm/osm_sa_link_record.c +++ b/osm/opensm/osm_sa_link_record.c @@ -697,9 +697,11 @@ __osm_lr_rcv_respond( **********************************************************************/ void osm_lr_rcv_process( - IN osm_lr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_lr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; const ib_link_record_t* p_lr; const ib_sa_mad_t* p_sa_mad; const osm_port_t* p_src_port; diff --git a/osm/opensm/osm_sa_link_record_ctrl.c b/osm/opensm/osm_sa_link_record_ctrl.c deleted file mode 100644 index d95c8e3..0000000 --- a/osm/opensm/osm_sa_link_record_ctrl.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_lr_rcv_ctrl_t. - * This object represents the link record controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_lr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_lr_rcv_process( ((osm_lr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_lr_rcv_ctrl_construct( - IN osm_lr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_lr_rcv_ctrl_destroy( - IN osm_lr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_lr_rcv_ctrl_init( - IN osm_lr_rcv_ctrl_t* const p_ctrl, - IN osm_lr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_lr_rcv_ctrl_init ); - - osm_lr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_LINK_RECORD, - __osm_lr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_lr_rcv_ctrl_init: ERR 1901: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - - diff --git a/osm/opensm/osm_sa_mcmember_record.c b/osm/opensm/osm_sa_mcmember_record.c index 819a2da..2c55198 100644 --- a/osm/opensm/osm_sa_mcmember_record.c +++ b/osm/opensm/osm_sa_mcmember_record.c @@ -2276,9 +2276,11 @@ __osm_mcmr_query_mgrp(IN osm_mcmr_recv_t* const p_rcv, **********************************************************************/ void osm_mcmr_rcv_process( - IN osm_mcmr_recv_t* const p_rcv, - const IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_mcmr_recv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_sa_mad_t *p_sa_mad; ib_net16_t sa_status = IB_SA_MAD_STATUS_REQ_INVALID; ib_member_rec_t *p_recvd_mcmember_rec; diff --git a/osm/opensm/osm_sa_mcmember_record_ctrl.c b/osm/opensm/osm_sa_mcmember_record_ctrl.c deleted file mode 100644 index 75f1d7e..0000000 --- a/osm/opensm/osm_sa_mcmember_record_ctrl.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_mcmr_rcv_ctrl_t. - * This object represents the Multicast member record controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_mcmr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_mcmr_rcv_process( ((osm_mcmr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_mcmr_rcv_ctrl_construct( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_mcmr_rcv_ctrl_destroy( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_mcmr_rcv_ctrl_init( - IN osm_mcmr_rcv_ctrl_t* const p_ctrl, - IN osm_mcmr_recv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_mcmr_rcv_ctrl_init ); - - osm_mcmr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_MCMEMBER_RECORD, - __osm_mcmr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_ctrl_init: ERR 1C01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sa_mft_record.c b/osm/opensm/osm_sa_mft_record.c index 3d85739..005c9bd 100644 --- a/osm/opensm/osm_sa_mft_record.c +++ b/osm/opensm/osm_sa_mft_record.c @@ -330,9 +330,11 @@ __osm_mftr_rcv_by_comp_mask( **********************************************************************/ void osm_mftr_rcv_process( - IN osm_mftr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_mftr_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_mft_record_t* p_rcvd_rec; ib_mft_record_t* p_resp_rec; diff --git a/osm/opensm/osm_sa_mft_record_ctrl.c b/osm/opensm/osm_sa_mft_record_ctrl.c deleted file mode 100644 index cf433a9..0000000 --- a/osm/opensm/osm_sa_mft_record_ctrl.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_mftr_rcv_ctrl_t. - * This object represents the MulticastForwardingTable request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_mftr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_mftr_rcv_process( ((osm_mftr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_mftr_rcv_ctrl_construct( - IN osm_mftr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_mftr_rcv_ctrl_destroy( - IN osm_mftr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_mftr_rcv_ctrl_init( - IN osm_mftr_rcv_ctrl_t* const p_ctrl, - IN osm_mftr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_mftr_rcv_ctrl_init ); - - osm_mftr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_MFT_RECORD, - __osm_mftr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_ctrl_init: ERR 4A01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 2f61fb8..c268f48 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -1554,9 +1554,11 @@ __osm_mpr_rcv_respond( **********************************************************************/ void osm_mpr_rcv_process( - IN osm_mpr_rcv_t* const p_rcv, - IN osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_mpr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; const ib_multipath_rec_t* p_mpr; const ib_sa_mad_t* p_sa_mad; osm_port_t* requester_port; diff --git a/osm/opensm/osm_sa_multipath_record_ctrl.c b/osm/opensm/osm_sa_multipath_record_ctrl.c deleted file mode 100644 index cf83a07..0000000 --- a/osm/opensm/osm_sa_multipath_record_ctrl.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_mpr_rcv_ctrl_t. - * This object represents the MultiPathRecord request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -static void -__osm_mpr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_mpr_rcv_process( ((osm_mpr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_mpr_rcv_ctrl_construct( - IN osm_mpr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_mpr_rcv_ctrl_destroy( - IN osm_mpr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_mpr_rcv_ctrl_init( - IN osm_mpr_rcv_ctrl_t* const p_ctrl, - IN osm_mpr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_mpr_rcv_ctrl_init ); - - osm_mpr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_MULTIPATH_RECORD, - __osm_mpr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_mpr_rcv_ctrl_init: ERR 4B01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - -#endif diff --git a/osm/opensm/osm_sa_node_record.c b/osm/opensm/osm_sa_node_record.c index e8ea45a..892582e 100644 --- a/osm/opensm/osm_sa_node_record.c +++ b/osm/opensm/osm_sa_node_record.c @@ -394,9 +394,11 @@ __osm_nr_rcv_by_comp_mask( **********************************************************************/ void osm_nr_rcv_process( - IN osm_nr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_nr_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_node_record_t* p_rcvd_rec; ib_node_record_t* p_resp_rec; diff --git a/osm/opensm/osm_sa_node_record_ctrl.c b/osm/opensm/osm_sa_node_record_ctrl.c deleted file mode 100644 index f101df7..0000000 --- a/osm/opensm/osm_sa_node_record_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_nr_rcv_ctrl_t. - * This object represents the Node Record controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_nr_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_nr_rcv_process( ((osm_nr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_nr_rcv_ctrl_construct( - IN osm_nr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_nr_rcv_ctrl_destroy( - IN osm_nr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_nr_rcv_ctrl_init( - IN osm_nr_rcv_ctrl_t* const p_ctrl, - IN osm_nr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_nr_rcv_ctrl_init ); - - osm_nr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_NODE_RECORD, - __osm_nr_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_nr_rcv_ctrl_init: ERR 1E01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 7707f52..a0dbb07 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -1893,9 +1893,11 @@ __osm_pr_rcv_respond( **********************************************************************/ void osm_pr_rcv_process( - IN osm_pr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_pr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; const ib_path_rec_t* p_pr; const ib_sa_mad_t* p_sa_mad; const osm_port_t* p_src_port; diff --git a/osm/opensm/osm_sa_path_record_ctrl.c b/osm/opensm/osm_sa_path_record_ctrl.c deleted file mode 100644 index 461b32c..0000000 --- a/osm/opensm/osm_sa_path_record_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_pr_rcv_ctrl_t. - * This object represents the PathRecord request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -static void -__osm_pr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_pr_rcv_process( ((osm_pr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_pr_rcv_ctrl_construct( - IN osm_pr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_pr_rcv_ctrl_destroy( - IN osm_pr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_pr_rcv_ctrl_init( - IN osm_pr_rcv_ctrl_t* const p_ctrl, - IN osm_pr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_pr_rcv_ctrl_init ); - - osm_pr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_PATH_RECORD, - __osm_pr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_pr_rcv_ctrl_init: ERR 2001: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sa_pkey_record.c b/osm/opensm/osm_sa_pkey_record.c index 8e56c6c..5eb15df 100644 --- a/osm/opensm/osm_sa_pkey_record.c +++ b/osm/opensm/osm_sa_pkey_record.c @@ -308,9 +308,11 @@ __osm_sa_pkey_by_comp_mask_cb( **********************************************************************/ void osm_pkey_rec_rcv_process( - IN osm_pkey_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_pkey_rec_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_pkey_table_record_t* p_rcvd_rec; const cl_ptr_vector_t* p_tbl; diff --git a/osm/opensm/osm_sa_pkey_record_ctrl.c b/osm/opensm/osm_sa_pkey_record_ctrl.c deleted file mode 100644 index e0d1bb7..0000000 --- a/osm/opensm/osm_sa_pkey_record_ctrl.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_pkey_rec_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_pkey_rec_rcv_process( ((osm_pkey_rec_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_pkey_rec_rcv_ctrl_construct( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_pkey_rec_rcv_ctrl_destroy( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_pkey_rec_rcv_ctrl_init( - IN osm_pkey_rec_rcv_ctrl_t* const p_ctrl, - IN osm_pkey_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_pkey_rec_rcv_ctrl_init ); - - osm_pkey_rec_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_PKEY_TBL_RECORD, - __osm_pkey_rec_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_ctrl_init: ERR 4701: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sa_portinfo_record.c b/osm/opensm/osm_sa_portinfo_record.c index 300d536..5d9b1b2 100644 --- a/osm/opensm/osm_sa_portinfo_record.c +++ b/osm/opensm/osm_sa_portinfo_record.c @@ -592,9 +592,11 @@ __osm_sa_pir_by_comp_mask_cb( **********************************************************************/ void osm_pir_rcv_process( - IN osm_pir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_pir_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_portinfo_record_t* p_rcvd_rec; const cl_ptr_vector_t* p_tbl; diff --git a/osm/opensm/osm_sa_portinfo_record_ctrl.c b/osm/opensm/osm_sa_portinfo_record_ctrl.c deleted file mode 100644 index e2a8208..0000000 --- a/osm/opensm/osm_sa_portinfo_record_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_pir_rcv_ctrl_t. - * This object represents the PortInfoRecord request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_pir_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_pir_rcv_process( ((osm_pir_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_pir_rcv_ctrl_construct( - IN osm_pir_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_pir_rcv_ctrl_destroy( - IN osm_pir_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_pir_rcv_ctrl_init( - IN osm_pir_rcv_ctrl_t* const p_ctrl, - IN osm_pir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_pir_rcv_ctrl_init ); - - osm_pir_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_PORTINFO_RECORD, - __osm_pir_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_pir_rcv_ctrl_init: ERR 2201: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_service_record.c b/osm/opensm/osm_sa_service_record.c index faa4af7..b23a12d 100644 --- a/osm/opensm/osm_sa_service_record.c +++ b/osm/opensm/osm_sa_service_record.c @@ -1051,9 +1051,11 @@ osm_sr_rcv_process_delete_method( **********************************************************************/ void osm_sr_rcv_process( - IN osm_sr_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *context, + IN void *data ) { + osm_sr_rcv_t *p_rcv = context; + osm_madw_t *p_madw = data; ib_sa_mad_t * p_sa_mad; ib_net16_t sa_status = IB_SA_MAD_STATUS_REQ_INVALID; boolean_t valid; diff --git a/osm/opensm/osm_sa_service_record_ctrl.c b/osm/opensm/osm_sa_service_record_ctrl.c deleted file mode 100644 index 5fad783..0000000 --- a/osm/opensm/osm_sa_service_record_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_sr_rcv_ctrl_t. - * This object represents the ServiceRecord request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -static void -__osm_sr_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_sr_rcv_process( ((osm_sr_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_sr_rcv_ctrl_construct( - IN osm_sr_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_sr_rcv_ctrl_destroy( - IN osm_sr_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_sr_rcv_ctrl_init( - IN osm_sr_rcv_ctrl_t* const p_ctrl, - IN osm_sr_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_sr_rcv_ctrl_init ); - - osm_sr_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SERVICE_RECORD, - __osm_sr_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_sr_rcv_ctrl_init: ERR 2501: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_slvl_record.c b/osm/opensm/osm_sa_slvl_record.c index b9146b0..d831ffd 100644 --- a/osm/opensm/osm_sa_slvl_record.c +++ b/osm/opensm/osm_sa_slvl_record.c @@ -292,9 +292,11 @@ __osm_sa_slvl_by_comp_mask_cb( **********************************************************************/ void osm_slvl_rec_rcv_process( - IN osm_slvl_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_slvl_rec_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_slvl_table_record_t* p_rcvd_rec; const cl_ptr_vector_t* p_tbl; diff --git a/osm/opensm/osm_sa_slvl_record_ctrl.c b/osm/opensm/osm_sa_slvl_record_ctrl.c deleted file mode 100644 index 76fff0e..0000000 --- a/osm/opensm/osm_sa_slvl_record_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_slvl_rec_rcv_ctrl_t. - * This object represents the SLtoVL Map Record SA request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_slvl_rec_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_slvl_rec_rcv_process( ((osm_slvl_rec_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_slvl_rec_rcv_ctrl_construct( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_slvl_rec_rcv_ctrl_destroy( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_slvl_rec_rcv_ctrl_init( - IN osm_slvl_rec_rcv_ctrl_t* const p_ctrl, - IN osm_slvl_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_slvl_rec_rcv_ctrl_init ); - - osm_slvl_rec_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SLVL_TBL_RECORD, - __osm_slvl_rec_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_ctrl_init: ERR 2701: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sa_sminfo_record.c b/osm/opensm/osm_sa_sminfo_record.c index 3f3ae27..5e15f52 100644 --- a/osm/opensm/osm_sa_sminfo_record.c +++ b/osm/opensm/osm_sa_sminfo_record.c @@ -257,9 +257,11 @@ __osm_sa_smir_by_comp_mask_cb( **********************************************************************/ void osm_smir_rcv_process( - IN osm_smir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_smir_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_sminfo_record_t* p_rcvd_rec; const cl_qmap_t* p_tbl; diff --git a/osm/opensm/osm_sa_sminfo_record_ctrl.c b/osm/opensm/osm_sa_sminfo_record_ctrl.c deleted file mode 100644 index 09cc99a..0000000 --- a/osm/opensm/osm_sa_sminfo_record_ctrl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_smir_ctrl_t. - * This object represents the SMInfo request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_smir_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_smir_rcv_process( ((osm_smir_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_smir_ctrl_construct( - IN osm_smir_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_smir_ctrl_destroy( - IN osm_smir_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_smir_ctrl_init( - IN osm_smir_ctrl_t* const p_ctrl, - IN osm_smir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_smir_ctrl_init ); - - osm_smir_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SMINFO_RECORD, - __osm_smir_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_smir_ctrl_init: ERR 2901: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_sw_info_record.c b/osm/opensm/osm_sa_sw_info_record.c index d60f237..da65864 100644 --- a/osm/opensm/osm_sa_sw_info_record.c +++ b/osm/opensm/osm_sa_sw_info_record.c @@ -328,9 +328,11 @@ Exit: **********************************************************************/ void osm_sir_rcv_process( - IN osm_sir_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_sir_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_switch_info_record_t* p_rcvd_rec; ib_switch_info_record_t* p_resp_rec; diff --git a/osm/opensm/osm_sa_sw_info_record_ctrl.c b/osm/opensm/osm_sa_sw_info_record_ctrl.c deleted file mode 100644 index daf55cc..0000000 --- a/osm/opensm/osm_sa_sw_info_record_ctrl.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Implementation of osm_sir_rcv_ctrl_t. - * This object represents the SwitchInfo Record controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_sir_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_sir_rcv_process( ((osm_sir_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_sir_rcv_ctrl_construct( - IN osm_sir_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_sir_rcv_ctrl_destroy( - IN osm_sir_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_sir_rcv_ctrl_init( - IN osm_sir_rcv_ctrl_t* const p_ctrl, - IN osm_sir_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_sir_rcv_ctrl_init ); - - osm_sir_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_SWITCH_INFO_RECORD, - __osm_sir_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_sir_rcv_ctrl_init: ERR 5301: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} diff --git a/osm/opensm/osm_sa_vlarb_record.c b/osm/opensm/osm_sa_vlarb_record.c index d7f341e..f0ff957 100644 --- a/osm/opensm/osm_sa_vlarb_record.c +++ b/osm/opensm/osm_sa_vlarb_record.c @@ -312,9 +312,11 @@ __osm_sa_vl_arb_by_comp_mask_cb( **********************************************************************/ void osm_vlarb_rec_rcv_process( - IN osm_vlarb_rec_rcv_t* const p_rcv, - IN const osm_madw_t* const p_madw ) + IN void *ctx, + IN void *data ) { + osm_vlarb_rec_rcv_t *p_rcv = ctx; + osm_madw_t *p_madw = data; const ib_sa_mad_t* p_rcvd_mad; const ib_vl_arb_table_record_t* p_rcvd_rec; const cl_ptr_vector_t* p_tbl; diff --git a/osm/opensm/osm_sa_vlarb_record_ctrl.c b/osm/opensm/osm_sa_vlarb_record_ctrl.c deleted file mode 100644 index b399276..0000000 --- a/osm/opensm/osm_sa_vlarb_record_ctrl.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_vlarb_rec_rcv_ctrl_t. - * This object represents the VL Arbitration Record SA request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_vlarb_rec_rcv_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_vlarb_rec_rcv_process( ((osm_vlarb_rec_rcv_ctrl_t*)context)->p_rcv, - (osm_madw_t*)p_data ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_vlarb_rec_rcv_ctrl_construct( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_vlarb_rec_rcv_ctrl_destroy( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_vlarb_rec_rcv_ctrl_init( - IN osm_vlarb_rec_rcv_ctrl_t* const p_ctrl, - IN osm_vlarb_rec_rcv_t* const p_rcv, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_vlarb_rec_rcv_ctrl_init ); - - osm_vlarb_rec_rcv_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - p_ctrl->p_rcv = p_rcv; - p_ctrl->p_disp = p_disp; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_MAD_VL_ARB_RECORD, - __osm_vlarb_rec_rcv_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_ctrl_init: ERR 2B01: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c index 4f43a0d..16297c9 100644 --- a/osm/opensm/osm_state_mgr.c +++ b/osm/opensm/osm_state_mgr.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #define SUBNET_LIST_FILENAME "/osm-subnet.lst" -- 1.4.4.2.gfc82d From eitan at sw053.yok.mtl.com Fri Jan 19 21:19:16 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sat, 20 Jan 2007 07:19:16 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-20:normal completion Message-ID: <200701200519.l0K5JGlB021557@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Fri_Jan_19_07:20:45_2007 c556e9 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From swise at opengridcomputing.com Sat Jan 20 07:03:43 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Sat, 20 Jan 2007 09:03:43 -0600 Subject: [openib-general] [PATCH ] ofabuild Allow command-line override of $scripts_git. Message-ID: <20070120150343.3308.874.stgit@dell3.ogc.int> Useful when testing changes to the ofsscripts tree. Signed-off-by: Steve Wise --- build_ofa_user.sh | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/build_ofa_user.sh b/build_ofa_user.sh index 52fe3af..a6bef75 100755 --- a/build_ofa_user.sh +++ b/build_ofa_user.sh @@ -28,7 +28,7 @@ mailto="openib-general at openib.org" ullibs="libibverbs libmthca libehca libipathverbs tvflash libibcm libsdp mstflint perftest srptools ipoibtools librdmacm dapl imgen management libcxgb3" # User level git packages -scripts_git="git://git.openfabrics.org/~vlad/ofascripts.git" +scripts_git=${scripts_git:-"git://git.openfabrics.org/~vlad/ofascripts.git"} scripts_branch="master" libibverbs_git="git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git" libibverbs_branch="master" From hnguyen at linux.vnet.ibm.com Sat Jan 20 07:20:15 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Sat, 20 Jan 2007 16:20:15 +0100 Subject: [openib-general] [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler In-Reply-To: <200701192251.01888.hnguyen@linux.vnet.ibm.com> References: <200701192251.01888.hnguyen@linux.vnet.ibm.com> Message-ID: <200701201620.16097.hnguyen@linux.vnet.ibm.com> hmm, code line too long. please ignore the previous patch. here is the one with correct length of code line. Thanks Nam This is a patch for ehca_irq.c that fixes an unproper use of spin_unlock in irq handler. Signed-off-by Hoang-Nam Nguyen --- ehca_irq.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index e7209af..fd1a5fb 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore( + &ehca_cq_idr_lock, + flags); break; } From swise at opengridcomputing.com Sat Jan 20 07:27:28 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sat, 20 Jan 2007 09:27:28 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> References: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> Message-ID: <1169306848.5132.11.camel@linux-q667.site> BTW: "local_sa" is still too long. I reduced it to "lsa" and it worked. On Thu, 2007-01-18 at 14:09 -0800, Sean Hefty wrote: > Can you see if the call to create_singlethread_workqueue() passes in a string > longer than 8 characters? If so, try reducing the size of the string. > > I've committed a change to the local_sa.c file this morning to avoid this sort > of backport issue, if this is indeed what it is. > > - Sean > > >Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a > >known issue? > > > >----------- [cut here ] --------- [please bite here ] --------- > >Kernel BUG at workqueue:308 > >invalid operand: 0000 [1] SMP > >CPU 0 > >Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH- > >200512121832250000) > >RIP: 0010:[] {__create_workqueue+33} > >RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 > >RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 > >RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 > >RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 > >R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 > >R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 > >FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 > >CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > >CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 > >Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task > >000001001fcdc9e0) > >Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 > > 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f > > ffffffff803e4020 ffffffff8015d4dd > >Call Trace:{:ib_local_sa:sa_db_init+143} > >{sys_init_module+6589} > > {generic_file_read+187} > >{:ib_local_sa:sa_db_init+0} > > {vfs_read+244} {sys_read+157} > > {system_call+124} > > > >Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 > >RIP {__create_workqueue+33} RSP <000001003f4f7d38> > > > > > > > >_______________________________________________ > >openib-general mailing list > >openib-general at openib.org > >http://openib.org/mailman/listinfo/openib-general > > > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From halr at voltaire.com Sat Jan 20 07:27:22 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 20 Jan 2007 10:27:22 -0500 Subject: [openib-general] [PATCH 0/3]: OpenSM: equalize-ignore-guids fixes Message-ID: <1169306799.28635.90387.camel@hal.voltaire.com> OpenSM: equalize-ignore-guids fixes This patch series fixes a number of problems in handling the equalize-ignore-guids file. Signed-off-by: Dale Purdy Signed-off-by: Hal Rosenstock From halr at voltaire.com Sat Jan 20 07:31:22 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 20 Jan 2007 10:31:22 -0500 Subject: [openib-general] [PATCH 1/3] OpenSM/port profile: Fix endian for some osm_port_prof routines Message-ID: <1169306844.28635.90389.camel@hal.voltaire.com> OpenSM/port profile: Fix endian for some osm_port_prof routines osm_port_prof_set_ignored_port and osm_port_prof_is_ignored_port take network rather than host order Signed-off-by: Dale Purdy Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_port_profile.h b/osm/include/opensm/osm_port_profile.h index 53fcb13..88d4caf 100644 --- a/osm/include/opensm/osm_port_profile.h +++ b/osm/include/opensm/osm_port_profile.h @@ -209,7 +209,7 @@ osm_port_prof_path_count_get( static inline boolean_t osm_port_prof_is_ignored_port( IN const osm_subn_t *p_subn, - IN uint64_t port_guid, + IN ib_net64_t port_guid, IN uint8_t port_num ) { const cl_map_t *p_map = &(p_subn->opt.port_prof_ignore_guids); @@ -251,7 +251,7 @@ osm_port_prof_is_ignored_port( static inline void osm_port_prof_set_ignored_port( IN osm_subn_t *p_subn, - IN uint64_t port_guid, + IN ib_net64_t port_guid, IN uint8_t port_num ) { cl_map_t *p_map = &(p_subn->opt.port_prof_ignore_guids); diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c index 118c9ef..ded3880 100644 --- a/osm/opensm/osm_ucast_mgr.c +++ b/osm/opensm/osm_ucast_mgr.c @@ -840,7 +840,7 @@ __osm_ucast_mgr_process_port( as in the case of the Mellanox Anafa Internal PCI TCA port */ is_ignored_by_port_prof = - osm_port_prof_is_ignored_port(p_mgr->p_subn, cl_ntoh64(node_guid), port); + osm_port_prof_is_ignored_port(p_mgr->p_subn, node_guid, port); /* We also would ignore this route if the target lid is of a switch From halr at voltaire.com Sat Jan 20 07:31:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 20 Jan 2007 10:31:47 -0500 Subject: [openib-general] [PATCH 2/3] OpenSM/main.c: Fix endian for GUID in osm_log debug message and add port number to same debug message Message-ID: <1169307048.28635.90553.camel@hal.voltaire.com> OpenSM/main.c: Fix endian for GUID in osm_log debug message and add port number to same debug message Signed-off-by: Dale Purdy Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/main.c b/osm/opensm/main.c index 9186576..9fa297a 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -486,7 +486,7 @@ parse_ignore_guids_file(IN char *guids_f line_num++; p_c = line; while ( (*p_c == ' ') && (*p_c != '\0')) p_c++; - port_guid = cl_hton64( strtoull( p_c, &p_ec, 16 ) ); + port_guid = strtoull( p_c, &p_ec, 16 ); if (p_ec == p_c) { osm_log( &p_osm->log, OSM_LOG_ERROR, @@ -511,11 +511,11 @@ parse_ignore_guids_file(IN char *guids_f } /* ok insert it */ - osm_port_prof_set_ignored_port(&p_osm->subn, port_guid, port_num); + osm_port_prof_set_ignored_port(&p_osm->subn, cl_hton64( port_guid ), port_num); osm_log( &p_osm->log, OSM_LOG_DEBUG, "parse_ignore_guids_file: " - "Inserted Port: 0x%" PRIx64 " into ignored guids list\n" , - port_guid + "Inserted Port: 0x%" PRIx64 " PortNum: 0x%X into ignored guids list\n" , + port_guid, port_num ); } From halr at voltaire.com Sat Jan 20 07:39:39 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 20 Jan 2007 10:39:39 -0500 Subject: [openib-general] [PATCH 3/3] OpenSM/osm_port_profile.h: Fix reinsertion issue in osm_port_prof_set_ignored_port Message-ID: <1169307095.28635.90555.camel@hal.voltaire.com> OpenSM/osm_port_profile.h: Fix reinsertion issue in osm_port_prof_set_ignored_port In osm_port_prof_set_ignored_port, if key already exists in map, it must be removed prior to reinserting. This is to handle more than one port for the same GUID. Signed-off-by: Dale Purdy Signed-off-by: Hal Rosenstock diff --git a/osm/include/opensm/osm_port_profile.h b/osm/include/opensm/osm_port index 88d4caf..952393d 100644 --- a/osm/include/opensm/osm_port_profile.h +++ b/osm/include/opensm/osm_port_profile.h @@ -263,6 +263,7 @@ osm_port_prof_set_ignored_port( if (p_obj != NULL) { value = (size_t)p_obj; + cl_map_remove(p_map, port_guid); } value = value | (1 << port_num); From swise at opengridcomputing.com Sat Jan 20 07:44:26 2007 From: swise at opengridcomputing.com (Steve WIse) Date: Sat, 20 Jan 2007 09:44:26 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <1169306848.5132.11.camel@linux-q667.site> References: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> <1169306848.5132.11.camel@linux-q667.site> Message-ID: <1169307866.5132.16.camel@linux-q667.site> On Sat, 2007-01-20 at 09:27 -0600, Steve WIse wrote: > BTW: "local_sa" is still too long. I reduced it to "lsa" and it > worked. > I cannot explain this. The BUG_ON() is for (strlen(name) > 10). "local_sa" is 8...9 if you count the NUL at the end. But I hit that BUG_ON() for rhel4u4 and sles9sp3. I reduced the name to "lsa" and it loads fine. ???? > > On Thu, 2007-01-18 at 14:09 -0800, Sean Hefty wrote: > > Can you see if the call to create_singlethread_workqueue() passes in a string > > longer than 8 characters? If so, try reducing the size of the string. > > > > I've committed a change to the local_sa.c file this morning to avoid this sort > > of backport issue, if this is indeed what it is. > > > > - Sean > > > > >Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a > > >known issue? > > > > > >----------- [cut here ] --------- [please bite here ] --------- > > >Kernel BUG at workqueue:308 > > >invalid operand: 0000 [1] SMP > > >CPU 0 > > >Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH- > > >200512121832250000) > > >RIP: 0010:[] {__create_workqueue+33} > > >RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 > > >RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 > > >RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 > > >RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 > > >R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 > > >R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 > > >FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 > > >CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > >CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 > > >Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task > > >000001001fcdc9e0) > > >Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 > > > 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f > > > ffffffff803e4020 ffffffff8015d4dd > > >Call Trace:{:ib_local_sa:sa_db_init+143} > > >{sys_init_module+6589} > > > {generic_file_read+187} > > >{:ib_local_sa:sa_db_init+0} > > > {vfs_read+244} {sys_read+157} > > > {system_call+124} > > > > > >Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 > > >RIP {__create_workqueue+33} RSP <000001003f4f7d38> > > > > > > > > > > > >_______________________________________________ > > >openib-general mailing list > > >openib-general at openib.org > > >http://openib.org/mailman/listinfo/openib-general > > > > > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Sat Jan 20 08:41:14 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Sat, 20 Jan 2007 10:41:14 -0600 Subject: [openib-general] abi_version file not created on SLES9SP3 or RHEL4U4 In-Reply-To: References: Message-ID: <1169311274.31297.1.camel@stevo-desktop> Your mailer garbled this patch. Can you please email me this as an attachment, or make sure your email client doesn't do line wrapping? Thanks, Steve. On Fri, 2007-01-19 at 11:06 -0800, Woodruff, Robert J wrote: > Steve wrote, > >Sean, > > >I'm testing the latest ofed_1_2 code on RHEL4U4 and SLES9SP3. After > >loading the rdma_ucm module, there isn't a abi_version file installed > >in /sys/class/misc/rdma_cm. There is no error logged from the ucma so > I > >_think_ it created the file ok in ucma_init(). > > >Have you seen this? > > I ran into the same problem when backporting Sean's latest > multicast+sa_cache > branches to EL4. > > This backport patch can be applied to sean's latest code to > allow it to work on EL4. > I also had to make other fixes to get Sean latest code base to work on > EL4 > My complete backport patch set is located in my directory on the open > fabrics > server under, > /home/woody/ofa_patches_and_backports/trunk/kernel/backports/EL4-U4 > > > diff -Naurp linux-2.6.9/drivers/infiniband/core/ucma.c > linux-2.6.9-openib-drivers-git011107-fixups/drivers/infiniband/core/ucma > .c > --- linux-2.6.9/drivers/infiniband/core/ucma.c 2007-01-11 > 15:54:57.000000000 -0800 > +++ > linux-2.6.9-openib-drivers-git011107-fixups/drivers/infiniband/core/ucma > .c 2007-01-12 13:19:15.000000000 -0800 > @@ -1034,13 +1034,13 @@ static struct miscdevice ucma_misc = { > .fops = &ucma_fops, > }; > > -static ssize_t show_abi_version(struct device *dev, > - struct device_attribute *attr, > - char *buf) > +static struct class *ucma_class; > +static ssize_t show_abi_version(struct class *class_dev, char *buf) > { > - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); > + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); > } > -static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); > +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); > + > > static int __init ucma_init(void) > { > @@ -1050,22 +1050,28 @@ static int __init ucma_init(void) > if (ret) > return ret; > > - ret = device_create_file(ucma_misc.this_device, > &dev_attr_abi_version); > - if (ret) { > - printk(KERN_ERR "rdma_ucm: couldn't create abi_version > attr\n"); > - goto err; > - } > - return 0; > + ucma_class = class_create(THIS_MODULE, "infiniband_ucma"); > + if (IS_ERR(ucma_class)) { > + printk(KERN_ERR "rdma_ucm: couldn't create class > infiniband_ucma\n"); > + goto err; > + } > + > + ret = class_create_file(ucma_class, &class_attr_abi_version); > + if (ret) { > + printk(KERN_ERR "user_verbs: couldn't create > abi_version attribute\n"); > + goto err; > + } > + > + return 0; > err: > - misc_deregister(&ucma_misc); > - return ret; > + misc_deregister(&ucma_misc); > + return ret; > } > > + > static void __exit ucma_cleanup(void) > { > - device_remove_file(ucma_misc.this_device, > &dev_attr_abi_version); > misc_deregister(&ucma_misc); > - idr_destroy(&ctx_idr); > } > > module_init(ucma_init); From marciaalvesribeiro at click21.com.br Sat Jan 20 11:04:11 2007 From: marciaalvesribeiro at click21.com.br (Marcia Ribeiro) Date: Sat, 20 Jan 2007 16:04:11 -0300 Subject: Video Aulas, cursos em vdeo. Aprenda a fazer tudo com video cursos Message-ID: <20070120190302.EC73A3B0006@sentry-two.sandia.gov> Video Aulas para download, cursos em vídeo. Vários assuntos. Aprenda a fazer sushi, curso de Yoga em vído, video aula de sedução, video curso de massagem sensual: http://www.gueb.de/videocursosbrasil Video cursos de informática, ginástica, artes marciais. Tudo em cursos para download. Vídeos com entrega imediata. Aulas práticas e demonstrativas. Plataforma para e learning. Ensino a distancia. Treinamentos, cursos, congressos, seminarios, video digital com transmissão ao vivo, aulas. http://www.gueb.de/videocursosbrasil Comercializa vídeos de aulas de música, incluindo guitarra, baixo, bateria, flauta, gaita, pandeiro, canto, dança e harmonia. Encontre no nosso site cursos de Várias áreas para sua escolha e aprimoramento: http://www.gueb.de/videocursosbrasil From eitan at sw053.yok.mtl.com Sat Jan 20 21:21:19 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sun, 21 Jan 2007 07:21:19 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-21:normal completion Message-ID: <200701210521.l0L5LJsd028750@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_20_10:09:52_2007 25aa01 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From vlad at mellanox.co.il Sat Jan 20 23:28:29 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 21 Jan 2007 09:28:29 +0200 Subject: [openib-general] [PATCH ] ofabuild Allow command-line override of $scripts_git. In-Reply-To: <20070120150343.3308.874.stgit@dell3.ogc.int> References: <20070120150343.3308.874.stgit@dell3.ogc.int> Message-ID: <1169364510.23809.89.camel@swlab50.yok.mtl.com> Thanks, applied. Regards, Vladimir On Sat, 2007-01-20 at 09:03 -0600, Steve Wise wrote: > Useful when testing changes to the ofsscripts tree. > > Signed-off-by: Steve Wise From erezz at voltaire.com Sat Jan 20 23:33:35 2007 From: erezz at voltaire.com (Erez Zilber) Date: Sun, 21 Jan 2007 09:33:35 +0200 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent In-Reply-To: References: <45A0CB32.1060301@voltaire.com> Message-ID: <45B3174F.6090703@voltaire.com> Roland Dreier wrote: > Thanks, queued for 2.6.20. (And thanks for letting me know that this > patch should go for 2.6.20) > Roland, What do I need to do in order to insert this fix into the 2.6.19 stable version (never done that before)? Thanks, Erez From kliteyn at dev.mellanox.co.il Sun Jan 21 00:46:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 21 Jan 2007 10:46:42 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <20070118000120.GO21043@sashak.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> Message-ID: <45B32872.4070006@dev.mellanox.co.il> Hi Sasha. Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >> Hi Hal >> >> The following series of six patches implements QoS policy file parser: >> >> 1. QoS parser Lex file >> 2. QoS parser Lex-generated c file >> 3. QoS parser grammar (Yacc) file >> 4. QoS parser Yacc-generated grammar c and h file >> 5. QoS parser header file that defines parse tree data structures >> 6. Changes in makefiles and configure.in file for compiling QoS parser files > > Is there any description of proposed format and functionality? The parser is based on QoS RFC sent by Eitan in May 2006, with a few minor modifications. You can find the RFC here: http://openib.org/pipermail/openib-general/2006-May/022336.html > Also what about using human readable formats? To me the xml-like format in the RFC looks pretty readable. It has very limited number of keywords (tags), so it's easy to follow and/or to modify. -- Yevgeny > Sasha > From mst at mellanox.co.il Sun Jan 21 01:10:54 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 21 Jan 2007 11:10:54 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B32872.4070006@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> Message-ID: <20070121091054.GA28254@mellanox.co.il> > > Also what about using human readable formats? > > To me the xml-like format in the RFC looks pretty readable. > It has very limited number of keywords (tags), so it's easy > to follow and/or to modify. BTW, an interesting approach to take would be relax-ng http://openib.org/pipermail/openib-general/2006-May/022336.html which supports both XML and non-XML formats. -- MST From kliteyn at dev.mellanox.co.il Sun Jan 21 01:17:44 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 21 Jan 2007 11:17:44 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <20070118195340.GB23783@sashak.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> Message-ID: <45B32FB8.40700@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: >> As for the mailing list it's openib-windows at openib.org. You can access >> it here: http://openib.org/mailman/listinfo/openib-windows > > I found only references to svn://windows.openib.org, where > 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | > head -n 40' shows: > > ------------------------------------------------------------------------ > r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line > > Set property svn:keywords "id" on all repository > ------------------------------------------------------------------------ > r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line > > [OPENSM] When running as a service, if all ports are down, use the first port. > ------------------------------------------------------------------------ > r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines > > [OPENSM] When trying to set to INIT the remote port of the given physical port > in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no > check whether the physical port in null (e.g., if it's disconnected). > ------------------------------------------------------------------------ > r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line > > [opensm] Base service status on results that were received from opensm log messages. > ------------------------------------------------------------------------ > r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line > > [OPENSM] missed fix for OPENSM logging to System Event Log > ------------------------------------------------------------------------ > r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines > > [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! > 2. bugfix: added message file for correct logging to System Event Log. > 3. bugfix: wrong passing parameters in server mode; > 4. bugfix: error in table of parameters > > ------------------------------------------------------------------------ > r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line > > [opensm] Fix a trivial build break > ------------------------------------------------------------------------ > r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines > > if the guid2lid is corrupted, don't exit when running with -y option > (don't exit on fatal) - just ignore the file > > > > Seems that development there was stopped in Aug 2006, and it doesn't > have recent Win port patches. Am I looking in the wrong place? You were looking in the right place. It appears that I didn't describe the development process correctly. I think this repository is updated with stable OSM versions, after the code is tested. If you need more details, I think it's better for you to ask windows folks directly, since as we see, my knowledge in this area is very limited. -- Yevgeny > Sasha > From kliteyn at dev.mellanox.co.il Sun Jan 21 01:24:05 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 21 Jan 2007 11:24:05 +0200 Subject: [openib-general] [Fwd: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes]] Message-ID: <45B33135.4010606@dev.mellanox.co.il> Tzachi, Yossi, please join the thread. What do you think about distributing a copy of the pthread DLL with opensm? -- Yevgeny. -------- Original Message -------- Subject: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] Date: Fri, 19 Jan 2007 00:20:32 +0200 From: Sasha Khapyorsky To: Michael S. Tsirkin CC: Yevgeny Kliteynik , OPENIB References: <20070118194403.GA23783 at sashak.voltaire.com> <20070118215023.GP9890 at mellanox.co.il> On 23:50 Thu 18 Jan , Michael S. Tsirkin wrote: > > Quoting Sasha Khapyorsky : > > Subject: Re: win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] > > > > On 07:00 Thu 18 Jan , Michael S. Tsirkin wrote: > > > > What about pure opensource - http://sourceware.org/pthreads-win32/? It > > > > is licensed under LGPL, I see on the net many positive reports about > > > > stability and usability. > > > > > > I used it to do a windows port of linux complib at some point and opensm > > > seemed to work fine with it. What it was lacking at that point was > > > support for 64 bit applications, and for some reason (which is > > > still unclear to me) there was a strong desire to run opensm in 64 bit mode. > > > Seems to have been fixed now, BTW. > > > > So this seems to be good option for OpenSM on Windows. Right? > > No idea. Distributing a copy of the pthread DLL with opensm does not > look like a problem. But is it worth it? Sure, it makes windows porting much more transparent and let us to use standard *nix stuff w/out #ifndef WIN32. Other (generic) benefit is that posix is more standard and powerful than wrappers like complib. Sasha From ogerlitz at voltaire.com Sun Jan 21 03:51:17 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 21 Jan 2007 13:51:17 +0200 (IST) Subject: [openib-general] failure to use libibverbs clone Message-ID: Roland, Using a fresh clone of libibverbs, libmthca and a kernel based on 2.6.20-rc3 (clone of Sean's rdma-dev git tree at open fabrics) I am getting errors such as # LD_LIBRARY_PATH=/usr/local/rdmacm/lib /usr/local/rdmacm/bin/ibv_devinfo libibverbs: Warning: couldn't open config directory '/usr/local/rdmacm/etc/libibverbs.d'. libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 No IB devices found the strace traces follow, the system is very much operative (eg with IPoIB) Or. execve("/usr/local/rdmacm/bin/ibv_devinfo", ["/usr/local/rdmacm/bin/ibv_devinfo"], [/* 70 vars */]) = 0 uname({sys="Linux", node="dill", ...}) = 0 brk(0) = 0x503000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aeba2f6f000 open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/tls/x86_64/libibverbs.so.2", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/usr/local/rdmacm/lib/tls/x86_64", 0x7fff07b4e0f0) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/tls/libibverbs.so.2", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/usr/local/rdmacm/lib/tls", 0x7fff07b4e0f0) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/x86_64/libibverbs.so.2", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/usr/local/rdmacm/lib/x86_64", 0x7fff07b4e0f0) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/libibverbs.so.2", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\'\0"..., 640) = 640 fstat(3, {st_mode=S_IFREG|0755, st_size=164431, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aeba2f70000 mmap(NULL, 1085352, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aeba3071000 madvise(0x2aeba3071000, 1085352, MADV_SEQUENTIAL|0x1) = 0 mprotect(0x2aeba3079000, 1052584, PROT_NONE) = 0 mmap(0x2aeba3171000, 36864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x2aeba3171000 close(3) = 0 open("/usr/local/rdmacm/lib/libpthread.so.0", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/libpthread.so.0", O_RDONLY) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=130091, ...}) = 0 mmap(NULL, 130091, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2aeba317a000 close(3) = 0 open("/lib64/tls/libpthread.so.0", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340X\0\0"..., 640) = 640 fstat(3, {st_mode=S_IFREG|0755, st_size=99188, ...}) = 0 mmap(NULL, 1129880, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aeba319a000 madvise(0x2aeba319a000, 1129880, MADV_SEQUENTIAL|0x1) = 0 mprotect(0x2aeba31a8000, 1072536, PROT_NONE) = 0 mmap(0x2aeba329a000, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x2aeba329a000 mmap(0x2aeba32aa000, 15768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2aeba32aa000 close(3) = 0 open("/usr/local/rdmacm/lib/libdl.so.2", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/libdl.so.2", O_RDONLY) = -1 ENOENT (No such file or directory) open("/lib64/libdl.so.2", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\36\0"..., 640) = 640 fstat(3, {st_mode=S_IFREG|0755, st_size=16807, ...}) = 0 mmap(NULL, 1058904, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aeba32ae000 madvise(0x2aeba32ae000, 1058904, MADV_SEQUENTIAL|0x1) = 0 mprotect(0x2aeba32b1000, 1046616, PROT_NONE) = 0 mmap(0x2aeba33ae000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x2aeba33ae000 close(3) = 0 open("/usr/local/rdmacm/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/local/rdmacm/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) open("/lib64/tls/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\313\1\0"..., 640) = 640 lseek(3, 624, SEEK_SET) = 624 read(3, "\4\0\0\0\20\0\0\0\1\0\0\0GNU\0\0\0\0\0\2\0\0\0\6\0\0\0"..., 32) = 32 fstat(3, {st_mode=S_IFREG|0755, st_size=1401317, ...}) = 0 mmap(NULL, 2235432, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aeba33b1000 madvise(0x2aeba33b1000, 2235432, MADV_SEQUENTIAL|0x1) = 0 mprotect(0x2aeba34b7000, 1162280, PROT_NONE) = 0 mmap(0x2aeba35b1000, 122880, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x100000) = 0x2aeba35b1000 mmap(0x2aeba35cf000, 15400, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2aeba35cf000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aeba35d3000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aeba35d4000 arch_prctl(0x1002, 0x2aeba35d3900) = 0 munmap(0x2aeba317a000, 130091) = 0 SYS_218(0x2aeba35d3990, 0x7fff07b4ecc8, 0x2aeba35d3900, 0xffffffffffffffff, 0xffffffff00000000) = 8760 rt_sigaction(SIGRTMIN, {0x2aeba319f840, [], SA_SIGINFO|0x4000000}, NULL, 8) = 0 rt_sigprocmask(SIG_UNBLOCK, [RTMIN], NULL, 8) = 0 getrlimit(0x3, 0x7fff07b4eb50) = 0 _sysctl({{CTL_KERN, KERN_VERSION}, 2, 0x7fff07b4e950, 35, (nil), 0}) = 0 getuid() = 0 geteuid() = 0 brk(0) = 0x503000 brk(0x524000) = 0x524000 brk(0) = 0x524000 open("/sys/class/infiniband_verbs/abi_version", O_RDONLY) = 3 read(3, "6\n", 8) = 2 close(3) = 0 open("/usr/local/rdmacm/etc/libibverbs.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = -1 ENOENT (No such file or directory) write(2, "libibverbs: Warning: couldn\'t op"..., 90libibverbs: Warning: couldn't open config directory '/usr/local/rdmacm/etc/libibverbs.d'. ) = 90 open("/sys/class/infiniband_verbs", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 3 fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 fcntl(3, F_SETFD, FD_CLOEXEC) = 0 getdents(3, /* 4 entries */, 4096) = 112 stat("/sys/class/infiniband_verbs/uverbs0", {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 open("/sys/class/infiniband_verbs/uverbs0/ibdev", O_RDONLY) = 4 read(4, "mthca0\n", 64) = 7 close(4) = 0 open("/sys/class/infiniband_verbs/uverbs0/abi_version", O_RDONLY) = 4 read(4, "1\n", 8) = 2 close(4) = 0 stat("/sys/class/infiniband_verbs/abi_version", {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 getdents(3, /* 0 entries */, 4096) = 0 close(3) = 0 futex(0x2aeba33b0854, FUTEX_WAKE, 2147483647) = 0 getuid() = 0 geteuid() = 0 write(2, "libibverbs: Warning: no userspac"..., 103libibverbs: Warning: no userspace device-specific driver found for /sys/class/infiniband_verbs/uverbs0 ) = 103 write(2, "No IB devices found\n", 20No IB devices found ) = 20 exit_group(-1) = ? From mst at mellanox.co.il Sun Jan 21 03:56:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 21 Jan 2007 13:56:48 +0200 Subject: [openib-general] failure to use libibverbs clone In-Reply-To: References: Message-ID: <20070121115648.GH28254@mellanox.co.il> > libibverbs: Warning: couldn't open config directory '/usr/local/rdmacm/etc/libibverbs.d'. Well, do you have /usr/local/rdmacm/etc/libibverbs.d? -- MST From ogerlitz at voltaire.com Sun Jan 21 04:12:08 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 21 Jan 2007 14:12:08 +0200 Subject: [openib-general] failure to use libibverbs clone In-Reply-To: <20070121115648.GH28254@mellanox.co.il> References: <20070121115648.GH28254@mellanox.co.il> Message-ID: <45B35898.4020108@voltaire.com> Michael S. Tsirkin wrote: >> libibverbs: Warning: couldn't open config directory '/usr/local/rdmacm/etc/libibverbs.d'. > > Well, do you have /usr/local/rdmacm/etc/libibverbs.d? no, who should create it? doing $ make install under libibverbs does not do it. I have created manually an empty library and then this warning went away but the other one and the failure to find devices stayed. Or. From ogerlitz at voltaire.com Sun Jan 21 04:15:55 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 21 Jan 2007 14:15:55 +0200 Subject: [openib-general] OFED ipoib_8111_to_2_6_16.patch In-Reply-To: <20070118152626.GA4298@mellanox.co.il> References: <45AF7FE6.60200@voltaire.com> <20070118152626.GA4298@mellanox.co.il> Message-ID: <45B3597B.7060404@voltaire.com> Michael S. Tsirkin wrote: >> OK, thanks for the info. The context here is the bonding support. We had >> an issue with distro (eg RH4 U3, SLES10) kernels that was not reproduced >> with upstream kernels and it seems to be related to the change you have >> pushed to 2.6.17. I will let you know if we need more clarifications. > Was the issue triggered at ipoib module unload? no, its an issue related to the bonding design and the two layer nature of the ipoib neighbouring scheme: struct neighbour "pointing" to struct ipoib_neigh etc. We are still investigating it, hope to know more by tomorrow. Or. From mst at mellanox.co.il Sun Jan 21 04:22:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 21 Jan 2007 14:22:51 +0200 Subject: [openib-general] failure to use libibverbs clone In-Reply-To: <45B35898.4020108@voltaire.com> References: <20070121115648.GH28254@mellanox.co.il> <45B35898.4020108@voltaire.com> Message-ID: <20070121122251.GK28254@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: failure to use libibverbs clone > > >> libibverbs: Warning: couldn't open config directory '/usr/local/rdmacm/etc/libibverbs.d'. > > > > Well, do you have /usr/local/rdmacm/etc/libibverbs.d? > > no, who should create it? doing $ make install under libibverbs does not > do it. I have created manually an empty library and then this warning > went away but the other one and the failure to find devices stayed. It's created when you install a low level driver (e.g. libmthca). -- MST From ogerlitz at voltaire.com Sun Jan 21 05:06:34 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 21 Jan 2007 15:06:34 +0200 (IST) Subject: [openib-general] failure to use libibverbs clone In-Reply-To: References: Message-ID: install libmthca does not seem to create the etc directory Or. dill:/usr/src/libmthca # make install make[1]: Entering directory `/usr/src/libmthca' test -z "/usr/local/rdmacm/lib" || mkdir -p -- . "/usr/local/rdmacm/lib" test -z "" || mkdir -p -- . "" test -z "/usr/local/rdmacm/lib/infiniband" || mkdir -p -- . "/usr/local/rdmacm/lib/infiniband" /bin/sh ./libtool --mode=install /usr/bin/install -c 'src/mthca.la' '/usr/local/rdmacm/lib/infiniband/mthca.la' /usr/bin/install -c src/.libs/mthca.so /usr/local/rdmacm/lib/infiniband/mthca.so /usr/bin/install -c src/.libs/mthca.lai /usr/local/rdmacm/lib/infiniband/mthca.la /usr/bin/install -c src/.libs/mthca.a /usr/local/rdmacm/lib/infiniband/mthca.a ranlib /usr/local/rdmacm/lib/infiniband/mthca.a chmod 644 /usr/local/rdmacm/lib/infiniband/mthca.a PATH="$PATH:/sbin" ldconfig -n /usr/local/rdmacm/lib/infiniband ---------------------------------------------------------------------- Libraries have been installed in: /usr/local/rdmacm/lib/infiniband If you ever happen to want to link against installed libraries in a given directory, LIBDIR, you must either use libtool, and specify the full pathname of the library, or use the `-LLIBDIR' flag during linking and do at least one of the following: - add LIBDIR to the `LD_LIBRARY_PATH' environment variable during execution - add LIBDIR to the `LD_RUN_PATH' environment variable during linking - use the `-Wl,--rpath -Wl,LIBDIR' linker flag - have your system administrator add LIBDIR to `/etc/ld.so.conf' See any operating system documentation about shared libraries for more information, such as the ld(1) and ld.so(8) manual pages. ---------------------------------------------------------------------- make[1]: Leaving directory `/usr/src/libmthca' From mst at mellanox.co.il Sun Jan 21 05:16:02 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 21 Jan 2007 15:16:02 +0200 Subject: [openib-general] failure to use libibverbs clone In-Reply-To: References: Message-ID: <20070121131602.GO28254@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: failure to use libibverbs clone > > install libmthca does not seem to create the etc directory > > Or. > > > dill:/usr/src/libmthca # make install > make[1]: Entering directory `/usr/src/libmthca' > test -z "/usr/local/rdmacm/lib" || mkdir -p -- . "/usr/local/rdmacm/lib" > test -z "" || mkdir -p -- . "" > test -z "/usr/local/rdmacm/lib/infiniband" || mkdir -p -- . > "/usr/local/rdmacm/lib/infiniband" > /bin/sh ./libtool --mode=install /usr/bin/install -c 'src/mthca.la' > '/usr/local/rdmacm/lib/infiniband/mthca.la' > /usr/bin/install -c src/.libs/mthca.so > /usr/local/rdmacm/lib/infiniband/mthca.so > /usr/bin/install -c src/.libs/mthca.lai > /usr/local/rdmacm/lib/infiniband/mthca.la > /usr/bin/install -c src/.libs/mthca.a > /usr/local/rdmacm/lib/infiniband/mthca.a > ranlib /usr/local/rdmacm/lib/infiniband/mthca.a > chmod 644 /usr/local/rdmacm/lib/infiniband/mthca.a > PATH="$PATH:/sbin" ldconfig -n /usr/local/rdmacm/lib/infiniband > ---------------------------------------------------------------------- > Libraries have been installed in: > /usr/local/rdmacm/lib/infiniband This is your problem right here. You have configured mthca against an old libibverbs installation, so it puts libraries in the wrong places. -- MST From tziporet at mellanox.co.il Sun Jan 21 05:47:47 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Sun, 21 Jan 2007 15:47:47 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build Message-ID: <45B36F03.9070201@mellanox.co.il> Hi Sean and Arlin, There where several mails regarding the libraries you maintain. These are the relevant branches under your git trees (the branches we use now are starred). ~shefty/librdmacm.git master rdma_ucm-abi3* multicast ~ardavis/dapl.git: rdma_ucm* master Can you reply which branch to use in our daily ofed 1.2 builds. Thanks, Tziporet From kliteyn at dev.mellanox.co.il Sun Jan 21 06:27:47 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 21 Jan 2007 16:27:47 +0200 Subject: [openib-general] [PATCH] osm: bug in PathRecord.HopLimit access function Message-ID: <45B37863.5040109@dev.mellanox.co.il> Hi Hal Fixing bug in PathRecord.HopLimit access function. Signed-off-by: Yevgeny Kliteynik --- osm/include/iba/ib_types.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 26727df..22f7f62 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -3052,7 +3052,7 @@ static inline uint8_t OSM_API ib_path_rec_hop_limit( IN const ib_path_rec_t* const p_rec ) { - return( (uint8_t)(p_rec->hop_flow_raw & 0x000000FF ) ); + return( (uint8_t)( cl_ntoh32(p_rec->hop_flow_raw) & 0x000000FF ) ); } /* * PARAMETERS -- 1.4.4.1.GIT From rdreier at cisco.com Sun Jan 21 10:09:46 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 21 Jan 2007 10:09:46 -0800 Subject: [openib-general] [PATCH 1/1] IB/iser: return error code when PDUs may not be sent In-Reply-To: <45B3174F.6090703@voltaire.com> (Erez Zilber's message of "Sun, 21 Jan 2007 09:33:35 +0200") References: <45A0CB32.1060301@voltaire.com> <45B3174F.6090703@voltaire.com> Message-ID: > What do I need to do in order to insert this fix into the 2.6.19 stable > version (never done that before)? Send the patch to stable at kernel.org From rdreier at cisco.com Sun Jan 21 14:06:09 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 21 Jan 2007 14:06:09 -0800 Subject: [openib-general] [PATCH 2.6.20 1/2] ehca: ehca_cq.c: fix unproper use of yield within spinlock context In-Reply-To: <200701192250.10765.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Fri, 19 Jan 2007 22:50:10 +0100") References: <200701192250.10765.hnguyen@linux.vnet.ibm.com> Message-ID: Very minor but > Signed-off-by Hoang-Nam Nguyen should be Signed-off-by: Hoang-Nam Nguyen (':' after the "-by") Anyway, queued for 2.6.20, thanks. - R. From eitan at sw053.yok.mtl.com Sun Jan 21 21:23:49 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Mon, 22 Jan 2007 07:23:49 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-22:normal completion Message-ID: <200701220523.l0M5NnrF013209@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_20_10:09:52_2007 25aa01 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo 9 Pkey IS3-128.topo Failures: 1 Pkey IS3-128.topo From mst at mellanox.co.il Sun Jan 21 22:23:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 08:23:22 +0200 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <1169307866.5132.16.camel@linux-q667.site> References: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> <1169306848.5132.11.camel@linux-q667.site> <1169307866.5132.16.camel@linux-q667.site> Message-ID: <20070122062322.GB17366@mellanox.co.il> > Quoting Steve WIse : > Subject: Re: ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 > > On Sat, 2007-01-20 at 09:27 -0600, Steve WIse wrote: > > BTW: "local_sa" is still too long. I reduced it to "lsa" and it > > worked. > > > > I cannot explain this. The BUG_ON() is for (strlen(name) > 10). > "local_sa" is 8...9 if you count the NUL at the end. > > But I hit that BUG_ON() for rhel4u4 and sles9sp3. I reduced the name to > "lsa" and it loads fine. Seems to work fine here. Pls re-check your setup. -- MST From mst at mellanox.co.il Sun Jan 21 22:26:17 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 08:26:17 +0200 Subject: [openib-general] bugzilla email gateway In-Reply-To: References: <296ACF11-7596-4D9D-8F9A-BB0A19C89939@cisco.com> <20070111130125.GL1759@mellanox.co.il> Message-ID: <20070122062617.GC17366@mellanox.co.il> Jeff, Michael, when can this be addressed? We are nearing kick-off date for the 1.2 testing phase, and this is becoming urgent as people will start reporting issues in bugzilla. Quoting Jeff Squyres : Subject: Re: bugzilla email gateway Michael will have to answer that. My assumption is that it's going to be broken for now. We took the approach of moving everything else first, and then moving all mail-related services second. Perhaps that was a mistake. :-\ Depending on how much longer it takes to migrate all the rest of the mail services, this problem could go away soon anyway. On Jan 11, 2007, at 8:01 AM, Michael S. Tsirkin wrote: >> Quoting Jeff Squyres : >>> Is the bugzilla email gateway functional in bugzilla? >> >> None of the e-mail for openfabrics or openib have been moved to the >> new server; we're still sorting out DNS issues. > > So, can you make bugzilla email gateway work with old e-mail > addresses, > or is this broken for now? > > -- > MST -- Jeff Squyres Server Virtualization Business Unit Cisco Systems _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mst at mellanox.co.il Mon Jan 22 01:04:14 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 11:04:14 +0200 Subject: [openib-general] [PATCH] IB/core: fix ib_umad alignment fault on ia64 In-Reply-To: <20070118230744.GR12386@obsidianresearch.com> References: <20070118230744.GR12386@obsidianresearch.com> Message-ID: <20070122090414.GB26176@mellanox.co.il> From: Jason Gunthorpe Make the untyped data region in ib_user_mad u64 aligned so that casting ib_user_mad data to structs with u64s in them is safe on 64 bit platforms. Note this does not affect the ABI - ib_user_mad format stays the same, the only effect this has is in how ib_user_mad is placed within larger kernel-only structures. Signed-off-by: Jason Gunthorpe Acked-by: Michael S. Tsirkin --- Works fine for me, and while this mainly removes an annoying message from kernel log on ia64, I think this is safe enough to be 2.6.20 material. Further, compiler has right to assume a structure with a 64 bit field is naturally aligned on a 64 bit platform, so current code might thinkably trigger incorrect code to be generated with e.g. memcpy (never observed this in practice though). Roland, could you queue this please? diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index 44537aa..d66b15e 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -98,7 +98,7 @@ struct ib_user_mad_hdr { */ struct ib_user_mad { struct ib_user_mad_hdr hdr; - __u8 data[0]; + __u64 data[0]; }; /** -- MST From ogerlitz at voltaire.com Mon Jan 22 01:25:43 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 11:25:43 +0200 (IST) Subject: [openib-general] failure to use libibverbs clone In-Reply-To: References: Message-ID: I have configured libmthca against the install of libibverbs (/usr/local/rdmacm), the configure output is below. Or. dill:/usr/src/libmthca # ./configure --prefix=/usr/local/rdmacm CFLAGS=-I/usr/local/rdmacm/include \ LDFLAGS=-L/usr/local/rdmacm/lib LD_LIBRARY_PATH=/usr/local/rdmacm/lib checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for gawk... gawk checking whether make sets $(MAKE)... yes checking build system type... x86_64-suse-linux checking host system type... x86_64-suse-linux checking for style of include used by make... GNU checking for gcc... gcc checking for C compiler default output file name... a.out checking whether the C compiler works... yes checking whether we are cross compiling... no checking for suffix of executables... checking for suffix of object files... o checking whether we are using the GNU C compiler... yes checking whether gcc accepts -g... yes checking for gcc option to accept ANSI C... none needed checking dependency style of gcc... gcc3 checking for a sed that does not truncate output... /usr/bin/sed checking for egrep... grep -E checking for ld used by gcc... /usr/x86_64-suse-linux/bin/ld checking if the linker (/usr/x86_64-suse-linux/bin/ld) is GNU ld... yes checking for /usr/x86_64-suse-linux/bin/ld option to reload object files... -r checking for BSD-compatible nm... /usr/bin/nm -B checking whether ln -s works... yes checking how to recognise dependent libraries... pass_all checking how to run the C preprocessor... gcc -E checking for ANSI C header files... yes checking for sys/types.h... yes checking for sys/stat.h... yes checking for stdlib.h... yes checking for string.h... yes checking for memory.h... yes checking for strings.h... yes checking for inttypes.h... yes checking for stdint.h... yes checking for unistd.h... yes checking dlfcn.h usability... yes checking dlfcn.h presence... yes checking for dlfcn.h... yes checking for g++... g++ checking whether we are using the GNU C++ compiler... yes checking whether g++ accepts -g... yes checking dependency style of g++... gcc3 checking how to run the C++ preprocessor... g++ -E checking for g77... g77 checking whether we are using the GNU Fortran 77 compiler... yes checking whether g77 accepts -g... yes checking the maximum length of command line arguments... 32768 checking command to parse /usr/bin/nm -B output from gcc object... ok checking for objdir... .libs checking for ar... ar checking for ranlib... ranlib checking for strip... strip checking if gcc static flag works... yes checking if gcc supports -fno-rtti -fno-exceptions... no checking for gcc option to produce PIC... -fPIC checking if gcc PIC flag -fPIC works... yes checking if gcc supports -c -o file.o... yes checking whether the gcc linker (/usr/x86_64-suse-linux/bin/ld -m elf_x86_64) supports shared libraries... yes checking whether -lc should be explicitly linked in... no checking dynamic linker characteristics... GNU/Linux ld.so checking how to hardcode library paths into programs... immediate checking whether stripping libraries is possible... yes checking if libtool supports shared libraries... yes checking whether to build shared libraries... yes checking whether to build static libraries... yes configure: creating libtool appending configuration tag "CXX" to libtool checking for ld used by g++... /usr/x86_64-suse-linux/bin/ld -m elf_x86_64 checking if the linker (/usr/x86_64-suse-linux/bin/ld -m elf_x86_64) is GNU ld... yes checking whether the g++ linker (/usr/x86_64-suse-linux/bin/ld -m elf_x86_64) supports shared libraries... yes checking for g++ option to produce PIC... -fPIC checking if g++ PIC flag -fPIC works... yes checking if g++ supports -c -o file.o... yes checking whether the g++ linker (/usr/x86_64-suse-linux/bin/ld -m elf_x86_64) supports shared libraries... yes checking dynamic linker characteristics... GNU/Linux ld.so checking how to hardcode library paths into programs... immediate checking whether stripping libraries is possible... yes appending configuration tag "F77" to libtool checking if libtool supports shared libraries... yes checking whether to build shared libraries... yes checking whether to build static libraries... yes checking for g77 option to produce PIC... -fPIC checking if g77 PIC flag -fPIC works... yes checking if g77 supports -c -o file.o... yes checking whether the g77 linker (/usr/x86_64-suse-linux/bin/ld -m elf_x86_64) supports shared libraries... yes checking dynamic linker characteristics... GNU/Linux ld.so checking how to hardcode library paths into programs... immediate checking whether stripping libraries is possible... yes checking for gcc... (cached) gcc checking whether we are using the GNU C compiler... (cached) yes checking whether gcc accepts -g... (cached) yes checking for gcc option to accept ANSI C... (cached) none needed checking dependency style of gcc... (cached) gcc3 checking for ibv_get_device_list in -libverbs... yes checking infiniband/driver.h usability... yes checking infiniband/driver.h presence... no configure: WARNING: infiniband/driver.h: accepted by the compiler, rejected by the preprocessor! configure: WARNING: infiniband/driver.h: proceeding with the compiler's result checking for infiniband/driver.h... yes checking for ANSI C header files... (cached) yes checking valgrind/memcheck.h usability... no checking valgrind/memcheck.h presence... no checking for valgrind/memcheck.h... no checking for an ANSI C-conforming const... yes checking for long... yes checking size of long... 8 checking for ibv_read_sysfs_file... yes checking for ibv_dontfork_range... yes checking for ibv_dofork_range... yes checking for ibv_register_driver... yes checking whether ld accepts --version-script... configure: creating ./config.status config.status: creating Makefile config.status: creating libmthca.spec config.status: creating config.h config.status: config.h is unchanged config.status: executing depfiles commands From mst at mellanox.co.il Mon Jan 22 01:50:44 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 11:50:44 +0200 Subject: [openib-general] failure to use libibverbs clone In-Reply-To: References: Message-ID: <20070122095044.GE26176@mellanox.co.il> > Quoting Or Gerlitz : > Subject: Re: failure to use libibverbs clone > > I have configured libmthca against the install of libibverbs (/usr/local/rdmacm), > the configure output is below. > > Or. > > dill:/usr/src/libmthca # ./configure --prefix=/usr/local/rdmacm CFLAGS=-I/usr/local/rdmacm/include \ > LDFLAGS=-L/usr/local/rdmacm/lib LD_LIBRARY_PATH=/usr/local/rdmacm/lib Works for me. Look at how configure defines HAVE_IBV_DEVICE_LIBRARY_EXTENSION, this is what goes wrong for you. > configure: WARNING: infiniband/driver.h: accepted by the compiler, rejected by the preprocessor! > configure: WARNING: infiniband/driver.h: proceeding with the compiler's result Try also checking why do you see this. -- MST From mst at mellanox.co.il Mon Jan 22 02:29:52 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 12:29:52 +0200 Subject: [openib-general] ofa_1_2_kernel 20070122-0200 daily build status In-Reply-To: <20070122102053.A4E43E603A6@openfabrics.org> References: <20070122102053.A4E43E603A6@openfabrics.org> Message-ID: <20070122102952.GG26176@mellanox.co.il> Jeff, Michael, Vlad's nightly build mail is being sent from openfabrics.org. I get this for several days now, but this still does not get forwarded by openib-general. Why? Could this be fixed please? Quoting Vladimir Sokolovsky : Subject: ofa_1_2_kernel 20070122-0200 daily build status This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.14 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.18 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Failed: -- MST From ogerlitz at voltaire.com Mon Jan 22 05:11:47 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 15:11:47 +0200 (IST) Subject: [openib-general] [PATCH] rdma/cma: remove per multicast group qkey usage Message-ID: Sean, Please see the cleanup below, also i see now that librdmacm has two functions to init a qp: ucma_init_ud_qp for UD QPs and ucma_init_ib_qp for RC QPs, where the rdmacm kernel code only has ucma_init_ib_qp, i guess something here is missing (is it only set the QKEY into the UD QP or also modify to RTR and RTS ? let me know and i can send a patch). a cleanup on the RDMA CM UD code: remove per group qkey usage for the join flow as this is impossible to achieve in practice with same UD QP serving attached to multiple group Signed-off-by: Or Gerlitz Index: rdma-dev/drivers/infiniband/core/cma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/cma.c 2007-01-21 12:08:06.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/cma.c 2007-01-21 12:11:16.000000000 +0200 @@ -2434,7 +2434,6 @@ static int cma_join_ib_multicast(struct ib_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; - rec.qkey = sin->sin_addr.s_addr; comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | From ogerlitz at voltaire.com Mon Jan 22 05:17:22 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 15:17:22 +0200 (IST) Subject: [openib-general] [RFC/PATCH] rdma/cma: use the ipoib broadcast group qkey Message-ID: Sean, Please let me know what you think - my intention is to have the group type effect only whether or not to set the rdmacm signature byte on the mgid and as for the qkey, just make the ipoib broadcast group qkey being used instread a qkey defined by the rdma cm. The patch is not completed yet in the sense that the qkey associated with the rdma cm kernel id should be exported to user space (on the client side it would be on the addr resolve event flow and on the server side on the conn req event flow) to be set by librdmacm into the user UD QP on the time rdma_create_qp is called. change the kernel rdma cm use the ipoib broadcast group qkey instead a qkey of its own. Signed-off-by: Or Gerlitz Index: rdma-dev/drivers/infiniband/core/cma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/cma.c 2007-01-21 12:11:16.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/cma.c 2007-01-22 14:05:22.000000000 +0200 @@ -136,6 +136,7 @@ struct rdma_id_private { u32 seq_num; u32 qp_num; u8 srq; + u32 qkey; }; struct cma_multicast { @@ -884,6 +885,21 @@ out: return ret; } +static int get_broadcast_group_qkey(struct rdma_id_private *id_priv) +{ + struct ib_sa_mcmember_rec rec; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int ret; + + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return -EINVAL; + id_priv->qkey = rec.qkey; + return 0; +} + static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event) { @@ -1021,6 +1037,10 @@ static int cma_req_handler(struct ib_cm_ if (ret) goto release_conn_id; + ret = get_broadcast_group_qkey(conn_id); + if (ret) + goto release_conn_id; + conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; @@ -1600,6 +1620,7 @@ static void addr_handler(int status, str { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; + int ret; memset(&event, 0, sizeof event); atomic_inc(&id_priv->dev_remove); @@ -1626,6 +1647,11 @@ static void addr_handler(int status, str } else { memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); + ret = get_broadcast_group_qkey(id_priv); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = ret; + } event.event = RDMA_CM_EVENT_ADDR_RESOLVED; } @@ -1936,7 +1962,9 @@ static int cma_sidr_rep_handler(struct i event.status = ib_event->param.sidr_rep_rcvd.status; break; } - if (rep->qkey != RDMA_UD_QKEY) { + if (rep->qkey != id_priv->qkey) { + printk(KERN_WARNING "qkey mismatch %.8x client qkey %.8x\n", + rep->qkey, id_priv->qkey); event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; @@ -2231,7 +2259,7 @@ static int cma_send_sidr_rep(struct rdma rep.status = status; if (status == IB_SIDR_SUCCESS) { rep.qp_num = id_priv->qp_num; - rep.qkey = RDMA_UD_QKEY; + rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; From jlentini at netapp.com Mon Jan 22 05:51:35 2007 From: jlentini at netapp.com (James Lentini) Date: Mon, 22 Jan 2007 08:51:35 -0500 (EST) Subject: [openib-general] [PATCH] uDAPL - rdma_ucm branch: add changes to support rr/init exchange In-Reply-To: <000001c73c23$86a59930$4297070a@amr.corp.intel.com> References: <000001c73c23$86a59930$4297070a@amr.corp.intel.com> Message-ID: Looks good Arlin. On Fri, 19 Jan 2007, Arlin Davis wrote: > Some uDAPL changes to support exchanging and validation of the device responder_resources and the > initiator_depth during connection establishment. > > Signed-off by: Arlin Davis ardavis at ichips.intel.com > > > diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c > old mode 100644 > new mode 100755 > index 0f24244..8bdd0eb > --- a/dapl/openib_cma/dapl_ib_cm.c > +++ b/dapl/openib_cma/dapl_ib_cm.c > @@ -259,6 +259,18 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, > new_conn->sp = conn->sp; > new_conn->hca = conn->hca; > > + /* Get requesters connect data, setup for accept */ > + new_conn->params.responder_resources = > + DAPL_MIN(event->param.conn.initiator_depth, > + conn->hca->ib_trans.max_rdma_rd_in); > + new_conn->params.initiator_depth = > + DAPL_MIN(event->param.conn.responder_resources, > + conn->hca->ib_trans.max_rdma_rd_out); > + > + new_conn->params.flow_control = event->param.conn.flow_control; > + new_conn->params.rnr_retry_count = event->param.conn.rnr_retry_count; > + new_conn->params.retry_count = event->param.conn.retry_count; > + > /* save private data */ > if (event->param.conn.private_data_len) { > dapl_os_memcpy(new_conn->p_data, > @@ -279,7 +291,8 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, > event->param.conn.private_data, event->param.conn.private_data_len); > > dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: " > - "REQ: IP SRC %x PORT %d DST %x PORT %d\n", > + "REQ: IP SRC %x PORT %d DST %x PORT %d " > + "rr %d init %d\n", > ntohl(((struct sockaddr_in *) > &ipaddr->src_addr)->sin_addr.s_addr), > ntohs(((struct sockaddr_in *) > @@ -287,7 +300,9 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, > ntohl(((struct sockaddr_in *) > &ipaddr->dst_addr)->sin_addr.s_addr), > ntohs(((struct sockaddr_in *) > - &ipaddr->dst_addr)->sin_port)); > + &ipaddr->dst_addr)->sin_port), > + new_conn->params.responder_resources, > + new_conn->params.initiator_depth); > } > return new_conn; > } > @@ -556,8 +571,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, > > /* Setup QP/CM parameters and private data in cm_id */ > (void)dapl_os_memzero(&conn->params, sizeof(conn->params)); > - conn->params.responder_resources = IB_TARGET_MAX; > - conn->params.initiator_depth = IB_INITIATOR_DEPTH; > + conn->params.responder_resources = conn->hca->ib_trans.max_rdma_rd_in; > + conn->params.initiator_depth = conn->hca->ib_trans.max_rdma_rd_out; > conn->params.flow_control = 1; > conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT; > conn->params.retry_count = IB_RC_RETRY_COUNT; > @@ -814,7 +829,6 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, > struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle; > int ret; > DAT_RETURN dat_status; > - struct rdma_conn_param conn_params; > > dapl_dbg_log(DAPL_DBG_TYPE_CM, > " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n", > @@ -867,16 +881,10 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, > ep_ptr->qp_handle = cr_conn; > ep_ptr->cm_handle = cr_conn; > cr_conn->ep = ep_ptr; > + cr_conn->params.private_data = p_data; > + cr_conn->params.private_data_len = p_size; > > - memset(&conn_params, 0, sizeof(conn_params)); > - conn_params.private_data = p_data; > - conn_params.private_data_len = p_size; > - conn_params.responder_resources = IB_TARGET_MAX; > - conn_params.initiator_depth = IB_INITIATOR_DEPTH; > - conn_params.flow_control = 1; > - conn_params.rnr_retry_count = IB_RNR_RETRY_COUNT; > - > - ret = rdma_accept(cr_conn->cm_id, &conn_params); > + ret = rdma_accept(cr_conn->cm_id, &cr_conn->params); > if (ret) { > dapl_dbg_log(DAPL_DBG_TYPE_ERR," accept: ERROR %d\n", ret); > dat_status = dapl_convert_errno(ret, "accept"); > diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c > old mode 100644 > new mode 100755 > index 6bb35f6..0606312 > --- a/dapl/openib_cma/dapl_ib_util.c > +++ b/dapl/openib_cma/dapl_ib_util.c > @@ -469,6 +469,9 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA *hca_ptr, > ia_attr->num_vendor_attr = 0; > ia_attr->vendor_attr = NULL; > ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge; > + /* save rd_atom for peer validation during connect requests */ > + hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom; > + hca_ptr->ib_trans.max_rdma_rd_out = dev_attr.max_qp_rd_atom; > > dapl_dbg_log(DAPL_DBG_TYPE_UTIL, > " query_hca: (ver=%x) ep %d ep_q %d evd %d evd_q %d\n", > diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h > old mode 100644 > new mode 100755 > index 1fadb08..457d26b > --- a/dapl/openib_cma/dapl_ib_util.h > +++ b/dapl/openib_cma/dapl_ib_util.h > @@ -73,10 +73,7 @@ typedef ib_hca_handle_t dapl_ibal_ca_t; > #define IB_ARP_RETRY_COUNT 15 /* 60 sec total */ > #define IB_ROUTE_TIMEOUT 4000 /* 4 sec */ > #define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */ > -#define IB_REQ_MRA_TIMEOUT 27 /* a little over 9 minutes */ > #define IB_MAX_AT_RETRY 3 > -#define IB_TARGET_MAX 4 /* max_qp_ous_rd_atom */ > -#define IB_INITIATOR_DEPTH 4 /* max_qp_init_rd_atom */ > > typedef enum { > IB_CME_CONNECTED, > @@ -272,6 +269,9 @@ typedef struct _ib_hca_transport > ib_async_qp_handler_t async_qp_error; > uint8_t max_cm_timeout; > uint8_t max_cm_retries; > + /* device attributes */ > + int max_rdma_rd_in; > + int max_rdma_rd_out; > > } ib_hca_transport_t; > > From halr at voltaire.com Mon Jan 22 06:08:19 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 22 Jan 2007 09:08:19 -0500 Subject: [openib-general] [PATCH] OpenSM/osm_ucast_updn.c: In __updn_bfs_by_node, no longer check against max_lid Message-ID: <1169474711.28635.268739.camel@hal.voltaire.com> OpenSM/osm_ucast_updn.c: In __updn_bfs_by_node, no longer check against max_lid Eliminate __updn_bfs_by_node: ERR AA09: Invalid lid for switch by no longer checking CA/router LID for being within range of 1 to max lid of neighbor switch. This is because most of the destination lids (root_lid) have not been populated yet in the forwarding tables (so max_sw_lid is small) and we are only now creating them. So this test doesn't make any sense any more since the generation of the LFTs has been relaxed. Signed-off-by: Dale Purdy Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/osm_ucast_updn.c b/osm/opensm/osm_ucast_updn.c index 5476b26..0142f98 100644 --- a/osm/opensm/osm_ucast_updn.c +++ b/osm/opensm/osm_ucast_updn.c @@ -233,7 +233,7 @@ __updn_bfs_by_node( uint8_t pn, pn_rem; osm_physp_t *p_physp, *p_remote_physp; cl_list_t *p_currList, *p_nextList; - uint16_t root_lid, max_sw_lid; + uint16_t root_lid; updn_next_step_t *p_updn_switch, *p_tmp; updn_switch_dir_t next_dir, current_dir; osm_log_t *p_log = &p_updn->p_osm->log; @@ -303,30 +303,13 @@ __updn_bfs_by_node( else { p_self_node = p_remote_physp->p_node->sw; - max_sw_lid = osm_switch_get_max_lid_ho(p_self_node); - if ((1 <= root_lid) && (root_lid <= max_sw_lid)) - /* Update its Min Hop Table */ - { - /* NOTE : Check if there is a function which prints the Min Hop Table */ - osm_log( p_log, OSM_LOG_DEBUG, - "__updn_bfs_by_node: " - "Update Min Hop Table of GUID 0x%" PRIx64 "\n", - cl_ntoh64(p_remote_physp->port_guid) ); - osm_switch_set_hops(p_self_node, root_lid, - p_remote_physp->port_num, 1); - - } - else - { - osm_log( p_log, OSM_LOG_ERROR, - "__updn_bfs_by_node: ERR AA09: " - "Invalid lid 0x%x for switch 0x%" PRIx64 " max lid 0x%x\n", - root_lid, - cl_ntoh64(p_self_node->p_node->node_info.port_guid), - max_sw_lid ); - OSM_LOG_EXIT( p_log ); - return 1; - } + /* Update its Min Hop Table */ + /* NOTE : Check if there is a function which prints the Min Hop Table */ + osm_log( p_log, OSM_LOG_DEBUG, + "__updn_bfs_by_node: " + "Update Min Hop Table of GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_remote_physp->port_guid) ); + osm_switch_set_hops(p_self_node, root_lid, p_remote_physp->port_num, 1); } } } From halr at voltaire.com Mon Jan 22 06:11:59 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 22 Jan 2007 09:11:59 -0500 Subject: [openib-general] [PATCH] osm: bug in PathRecord.HopLimit access function In-Reply-To: <45B37863.5040109@dev.mellanox.co.il> References: <45B37863.5040109@dev.mellanox.co.il> Message-ID: <1169474974.28635.268825.camel@hal.voltaire.com> On Sun, 2007-01-21 at 09:27, Yevgeny Kliteynik wrote: > Hi Hal > > Fixing bug in PathRecord.HopLimit access function. > > Signed-off-by: Yevgeny Kliteynik Thanks. Applied. -- Hal From mst at mellanox.co.il Mon Jan 22 06:41:46 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:41:46 +0200 Subject: [openib-general] [PATCH RFC 0 of 5] QoS support Message-ID: <20070122144146.GB7261@mellanox.co.il> The following (lightly tested) RFC patch series implement QoS support according to the proposed LWG2 QoS Annex. This is *not* intended for upstream merge, but I do intent to put it in OFED as a technology preview. Some notes: - Module parameters (off by default) were added to control QoS support. This is a safety measure, due to the fact QoS is not in spec yet. These will have to go before upstream submission. - Once enabled, all CMA based ULPs will get Service ID set in their path query requests. - To set priority, a new rdma_set_priority call was added. It should be possible to implement it on top of iWarp as well (IB QoS is explicitly defined to match DSCP semantics), so I think this belongs in the common header. For now this value is ignored on iWarp HCA. -- MST From mst at mellanox.co.il Mon Jan 22 06:43:06 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:43:06 +0200 Subject: [openib-general] [PATCH RFC 1 of 5] IB/sa: QoS support In-Reply-To: <20070122144146.GB7261@mellanox.co.il> References: <20070122144146.GB7261@mellanox.co.il> Message-ID: <20070122144306.GC7261@mellanox.co.il> QoS support according to the proposed LWG2 QoS Annex. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/core/sa_query.c =================================================================== --- linux-2.6.orig/drivers/infiniband/core/sa_query.c +++ linux-2.6/drivers/infiniband/core/sa_query.c @@ -122,14 +122,10 @@ static u32 tid; .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { - { RESERVED, - .offset_words = 0, - .offset_bits = 0, - .size_bits = 32 }, - { RESERVED, - .offset_words = 1, + { PATH_REC_FIELD(service_id), + .offset_words = 2, .offset_bits = 0, - .size_bits = 32 }, + .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, @@ -178,7 +174,7 @@ static const struct ib_field path_rec_ta .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, - { RESERVED, + { PATH_REC_FIELD(priority), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, Index: linux-2.6/include/rdma/ib_sa.h =================================================================== --- linux-2.6.orig/include/rdma/ib_sa.h +++ linux-2.6/include/rdma/ib_sa.h @@ -109,8 +109,9 @@ enum ib_sa_selector { * Reserved rows are indicated with comments to help maintainability. */ -/* reserved: 0 */ -/* reserved: 1 */ +#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) | \ + IB_SA_COMP_MASK( 1)) + #define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) #define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) #define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) @@ -123,7 +124,7 @@ enum ib_sa_selector { #define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) #define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) #define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) -/* reserved: 14 */ +#define IB_SA_PATH_REC_PRIORITY IB_SA_COMP_MASK(14) #define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) #define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) #define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) @@ -134,8 +135,7 @@ enum ib_sa_selector { #define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) struct ib_sa_path_rec { - /* reserved */ - /* reserved */ + __be64 service_id; union ib_gid dgid; union ib_gid sgid; __be16 dlid; @@ -148,7 +148,7 @@ struct ib_sa_path_rec { int reversible; u8 numb_path; __be16 pkey; - /* reserved */ + __be16 priority; u8 sl; u8 mtu_selector; u8 mtu; -- MST From mst at mellanox.co.il Mon Jan 22 06:45:00 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:45:00 +0200 Subject: [openib-general] [PATCH RFC 2 of 5] IB/cma: QoS support Message-ID: <20070122144500.GD7261@mellanox.co.il> QoS support according to the proposed LWG2 QoS Annex: pass sevice ID in SA query, add option to set priority. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/core/cma.c =================================================================== --- linux-2.6.orig/drivers/infiniband/core/cma.c +++ linux-2.6/drivers/infiniband/core/cma.c @@ -53,6 +53,10 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 +static int cma_qos_support = 0; +module_param_named(qos_support, cma_qos_support, int, 0644); +MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0"); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -135,6 +139,7 @@ struct rdma_id_private { u32 seq_num; u32 qp_num; u8 srq; + u8 priority; }; struct cma_work { @@ -1397,7 +1402,24 @@ static int cma_query_ib_route(struct rdm path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, + if (cma_qos_support) { + ib_sa_comp_mask m = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH; + struct sockaddr *sockaddr = &id_priv->id.route.addr.dst_addr; + __be64 svc_id = cma_get_service_id(id_priv->id.ps, sockaddr); + + path_rec.service_id = svc_id; + m |= IB_SA_PATH_REC_SERVICE_ID; + if (id_priv->priority) { + path_rec.priority = __cpu_to_be16(id_priv->priority); + m |= IB_SA_PATH_REC_PRIORITY; + } + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, + id_priv->id.port_num, &path_rec, m, + timeout_ms, GFP_KERNEL, + cma_query_handler, work, &id_priv->query); + } else + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, @@ -1464,6 +1486,19 @@ err1: return ret; } +int rdma_set_priority(struct rdma_cm_id *id, u8 priority) +{ + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_ADDR_RESOLVED)) + return -EINVAL; + + id_priv->priority = priority; + return 0; +} +EXPORT_SYMBOL(rdma_set_priority); + int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths) { Index: linux-2.6/include/rdma/rdma_cm.h =================================================================== --- linux-2.6.orig/include/rdma/rdma_cm.h +++ linux-2.6/include/rdma/rdma_cm.h @@ -177,6 +177,15 @@ int rdma_resolve_addr(struct rdma_cm_id struct sockaddr *dst_addr, int timeout_ms); /** + * rdma_set_priority - Set traffice class for connection. + * Must be set before rdma_resolve_route is called. + * + * @id: RDMA identifier. + * @priority: priority value to use. 0 is the default (wildcard) value. + */ +int rdma_set_priority(struct rdma_cm_id *id, u8 priority); + +/** * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier * into route information needed to establish a connection. * -- MST From mst at mellanox.co.il Mon Jan 22 06:46:04 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:46:04 +0200 Subject: [openib-general] [PATCH RFC 3 of 5] IB/srp: QoS support Message-ID: <20070122144604.GE7261@mellanox.co.il> QoS support according to the proposed LWG2 QoS Annex: pass sevice ID in SA query Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/ulp/srp/ib_srp.c =================================================================== --- linux-2.6.orig/drivers/infiniband/ulp/srp/ib_srp.c +++ linux-2.6/drivers/infiniband/ulp/srp/ib_srp.c @@ -62,6 +62,11 @@ MODULE_DESCRIPTION("InfiniBand SCSI RDMA "v" DRV_VERSION " (" DRV_RELDATE ")"); MODULE_LICENSE("Dual BSD/GPL"); +static int srp_qos_support; + +module_param_named(qos_support, srp_qos_support, int, 0644); +MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0."); + static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE; static int srp_max_iu_len; @@ -267,7 +272,23 @@ static int srp_lookup_path(struct srp_ta init_completion(&target->done); - target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, + if (srp_qos_support) { + ib_sa_comp_mask m = IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_PKEY; + target->path.service_id = target->service_id; + m |= IB_SA_PATH_REC_SERVICE_ID; + target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, + target->srp_host->dev->dev, + target->srp_host->port, + &target->path, m, + SRP_PATH_REC_TIMEOUT_MS, + GFP_KERNEL, + srp_path_rec_completion, + target, &target->path_query); + } else + target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, target->srp_host->dev->dev, target->srp_host->port, &target->path, -- MST From cap at nsc.liu.se Mon Jan 22 06:48:53 2007 From: cap at nsc.liu.se (Peter Kjellstrom) Date: Mon, 22 Jan 2007 15:48:53 +0100 Subject: [openib-general] OFED embedded in CentOS 4.4 doesn't work In-Reply-To: References: Message-ID: <200701221548.53321.cap@nsc.liu.se> On Friday 19 January 2007 06:30, zhang chao wrote: > Hi, openib maillist: > > I have a cluster installed CentOS 4.4 -- which embedded the OFED > packages(under /usr/ofed directory). All infiniband drivers and libraries > have been installed and I have configured IPoIB, it also works well. The > OFED version is 1.0 -- maybe. AFAIK, It's not quite ofed-1.0 but close to it. We tested this exact stack and we had no problems with IPoIB and ScaMPI (never did test mvapich on it). Either way I think the general recommendation is to install OFED-1.1 which contains alot of fixes for problems present in 1.0. /Peter -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available URL: From mst at mellanox.co.il Mon Jan 22 06:50:21 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:50:21 +0200 Subject: [openib-general] [PATCH RFC 4 of 5] IB/mthca: QoS support Message-ID: <20070122145021.GF7261@mellanox.co.il> encode SL in sched_queue field to improve hardware QoS guarantees for connected QPs. Signed-off-by: Michael S. Tsirkin --- Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_qp.c =================================================================== --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_qp.c +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_qp.c @@ -49,6 +49,10 @@ #include "mthca_memfree.h" #include "mthca_wqe.h" +static int mthca_qos_support = 0; +module_param_named(qos_support, mthca_qos_support, int, 0644); +MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0"); + enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, @@ -694,6 +698,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, goto out_mailbox; qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); + if (mthca_qos_support) { + u8 sl = attr->ah_attr.sl; + u8 sched_queue = (sl & 0x8) | (sl & (~(sl >> 1)) & 0x4) | + ((sl >> 1) & (sl >> 2) & 0x2) | ((sl >> 1) & 0x1); + + if (mthca_is_memfree(dev)) { + qp_context->rlkey_arbel_sched_queue |= sched_queue; + } else { + qp_context->tavor_sched_queue |= sched_queue; + } + qp_param->opt_param_mask |= + cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE); + } } if (attr_mask & IB_QP_TIMEOUT) { -- MST From mst at mellanox.co.il Mon Jan 22 06:50:48 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 16:50:48 +0200 Subject: [openib-general] [PATCH RFC 5 of 5] IB/sdp: QoS support Message-ID: <20070122145047.GG7261@mellanox.co.il> QoS support according to the proposed LWG2 QoS Annex: pass socket priority to CMA Signed-off-by: Michael S. Tsirkin --- SDP is not upstream, this patch is posted for completeness, to demonstrate how the new rdma_set_priority call might be used. diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c b/drivers/infiniband/ulp/sdp/sdp_cma.c index 59c8052..50c4e38 100644 --- a/drivers/infiniband/ulp/sdp/sdp_cma.c +++ b/drivers/infiniband/ulp/sdp/sdp_cma.c @@ -47,6 +47,11 @@ #include "sdp_socket.h" #include "sdp.h" +static int sdp_qos_support; + +module_param_named(qos_support, sdp_qos_support, int, 0644); +MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0."); + union cma_ip_addr { struct in6_addr ip6; struct { @@ -363,6 +368,12 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n"); + if (sdp_qos_support) { + rc = rdma_set_priority(id, sk->sk_priority); + if (rc) + sdp_warn(sk, "rdma_set_priority failed: %d\n", rc); + } + rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: -- MST From tziporet at mellanox.co.il Mon Jan 22 07:26:21 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 22 Jan 2007 17:26:21 +0200 Subject: [openib-general] resolving sending mails from OFA new server Message-ID: <45B4D79D.3030909@mellanox.co.il> Hi Johann, Vlad arranged daily build mails so everybody can be notified on compilation status. However there are technical problems that prevent us from sending these mails that are related to the DNS See thread: http://openib.org/pipermail/openib-general/2007-January/031831.html Note that bugzilla update mails are not sent from the same reason. Since we going to have the code freeze and alpha soon (end of next week) it will be very important to fix these isses Thanks, Tziporet From jsquyres at cisco.com Mon Jan 22 07:29:00 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 10:29:00 -0500 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <45B4D79D.3030909@mellanox.co.il> References: <45B4D79D.3030909@mellanox.co.il> Message-ID: Michael Lee from Sandia is working on the mail migration issues; I don't know what his timeframe is. On Jan 22, 2007, at 10:26 AM, Tziporet Koren wrote: > Hi Johann, > Vlad arranged daily build mails so everybody can be notified on > compilation status. > However there are technical problems that prevent us from sending > these mails that are related to the DNS > See thread: http://openib.org/pipermail/openib-general/2007-January/ > 031831.html > > Note that bugzilla update mails are not sent from the same reason. > > Since we going to have the code freeze and alpha soon (end of next > week) it will be very important to fix these isses > > Thanks, > Tziporet -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From kliteyn at dev.mellanox.co.il Mon Jan 22 08:05:22 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 22 Jan 2007 18:05:22 +0200 Subject: [openib-general] [PATCH] osm: QoS: added qos class and service id to the path record Message-ID: <45B4E0C2.8060102@dev.mellanox.co.il> Hi Hal QoS patch: added qos class and service id to the path record Signed-off-by: Yevgeny Kliteynik --- osm/include/iba/ib_types.h | 149 +++++++++++++++++++++++++++++++--- osm/opensm/osm_helper.c | 8 +- osm/opensm/osm_sa_multipath_record.c | 2 +- osm/opensm/osm_sa_path_record.c | 5 +- osm/osmtest/osmtest.c | 2 +- 5 files changed, 147 insertions(+), 19 deletions(-) diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 22f7f62..7762ed2 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -1700,6 +1700,28 @@ ib_class_is_rmpp( #define IB_SMINFO_STATE_MASTER 3 /**********/ +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK +* NAME +* IB_PATH_REC_SL_MASK +* +* DESCRIPTION +* Mask for the sl field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_SL_MASK 0xF + +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK +* NAME +* IB_PATH_REC_QOS_CLASS_MASK +* +* DESCRIPTION +* Mask for the QoS class field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 + /****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK * NAME * IB_PATH_REC_SELECTOR_MASK @@ -2314,7 +2336,7 @@ ib_gid_get_guid( #include typedef struct _ib_path_rec { - uint8_t resv0[8]; + ib_net64_t service_id; ib_gid_t dgid; ib_gid_t sgid; ib_net16_t dlid; @@ -2323,7 +2345,7 @@ typedef struct _ib_path_rec uint8_t tclass; uint8_t num_path; ib_net16_t pkey; - ib_net16_t sl; + ib_net16_t qos_class_sl; uint8_t mtu; uint8_t rate; uint8_t pkt_life; @@ -2363,11 +2385,8 @@ typedef struct _ib_path_rec * pkey * Partition key (P_Key) to use on this path. * -* resv1 -* Reserved byte. -* -* sl -* Service level to use on this path. +* qos_class_sl +* QoS class and service level to use on this path. * * mtu * MTU and MTU selector fields to use on this path @@ -2388,6 +2407,7 @@ typedef struct _ib_path_rec *********/ /* Path Record Component Masks */ +#define IB_PR_COMPMASK_SERVICEID (CL_HTON64(((uint64_t)1)<<1)) #define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) #define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) #define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) @@ -2400,7 +2420,7 @@ typedef struct _ib_path_rec #define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) #define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) #define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) -#define IB_PR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<14)) +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) #define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) #define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) #define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) @@ -2658,6 +2678,7 @@ ib_path_rec_init_local( IN ib_net16_t slid, IN uint8_t num_path, IN ib_net16_t pkey, + IN uint16_t qos_class, IN uint8_t sl, IN uint8_t mtu_selector, IN uint8_t mtu, @@ -2673,8 +2694,8 @@ ib_path_rec_init_local( p_rec->slid = slid; p_rec->num_path = num_path; p_rec->pkey = pkey; - /* Lower 4 bits of path rec's SL are reserved. */ - p_rec->sl = cl_ntoh16( sl ); + p_rec->qos_class_sl = cl_hton16( (sl & IB_PATH_REC_SL_MASK) | + (qos_class << 4) ); p_rec->mtu = (uint8_t)((mtu & IB_PATH_REC_BASE_MASK) | (uint8_t)(mtu_selector << 6)); p_rec->rate = (uint8_t)((rate & IB_PATH_REC_BASE_MASK) | @@ -2686,8 +2707,8 @@ ib_path_rec_init_local( /* Clear global routing fields for local path records */ p_rec->hop_flow_raw = 0; p_rec->tclass = 0; + p_rec->service_id = 0; - *((uint64_t*)p_rec->resv0) = 0; *((uint32_t*)p_rec->resv2) = 0; *((uint16_t*)p_rec->resv2 + 2) = 0; } @@ -2716,6 +2737,9 @@ ib_path_rec_init_local( * pkey * [in] Partition key (P_Key) to use on this path. * +* qos_class +* [in] QoS class to use on this path. Lower 12-bits are valid. +* * sl * [in] Service level to use on this path. Lower 4-bits are valid. * @@ -2779,6 +2803,41 @@ ib_path_rec_num_path( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_sl +* NAME +* ib_path_rec_set_sl +* +* DESCRIPTION +* Set path service level. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_sl( + IN ib_path_rec_t* const p_rec, + IN const uint8_t sl ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_QOS_CLASS_MASK ) | + ( sl & IB_PATH_REC_SL_MASK) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* sl +* [in] Service level to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + /****f* IBA Base: Types/ib_path_rec_sl * NAME * ib_path_rec_sl @@ -2792,7 +2851,7 @@ static inline uint8_t OSM_API ib_path_rec_sl( IN const ib_path_rec_t* const p_rec ) { - return( (uint8_t)((cl_ntoh16( p_rec->sl )) & 0xF) ); + return( (uint8_t)((cl_ntoh16( p_rec->qos_class_sl )) & IB_PATH_REC_SL_MASK) ); } /* * PARAMETERS @@ -2808,6 +2867,72 @@ ib_path_rec_sl( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_qos_class +* NAME +* ib_path_rec_set_qos_class +* +* DESCRIPTION +* Set path QoS class. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_qos_class( + IN ib_path_rec_t* const p_rec, + IN const uint16_t qos_class ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_QOS_CLASS_MASK ) | + ( qos_class << 4) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* qos_class +* [in] QoS class to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_qos_class +* NAME +* ib_path_rec_qos_class +* +* DESCRIPTION +* Get QoS class. +* +* SYNOPSIS +*/ +static inline uint16_t OSM_API +ib_path_rec_qos_class( + IN const ib_path_rec_t* const p_rec ) +{ + return( (uint16_t)( cl_ntoh16( p_rec->qos_class_sl ) & + IB_PATH_REC_QOS_CLASS_MASK ) >> 4 ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* QoS class of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + + /****f* IBA Base: Types/ib_path_rec_mtu * NAME * ib_path_rec_mtu diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c index 2ef8e38..e0b5aef 100644 --- a/osm/opensm/osm_helper.c +++ b/osm/opensm/osm_helper.c @@ -1095,7 +1095,7 @@ osm_dump_path_record( { osm_log( p_log, log_level, "PathRecord dump:\n" - "\t\t\t\tresv0...................0x%016" PRIx64 "\n" + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" "\t\t\t\tdgid....................0x%016" PRIx64 " : " "0x%016" PRIx64 "\n" "\t\t\t\tsgid....................0x%016" PRIx64 " : " @@ -1106,6 +1106,7 @@ osm_dump_path_record( "\t\t\t\ttclass..................0x%X\n" "\t\t\t\tnum_path_revers.........0x%X\n" "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\tqos_class...............0x%X\n" "\t\t\t\tsl......................0x%X\n" "\t\t\t\tmtu.....................0x%X\n" "\t\t\t\trate....................0x%X\n" @@ -1114,7 +1115,7 @@ osm_dump_path_record( "\t\t\t\tresv2...................0x%X\n" "\t\t\t\tresv3...................0x%X\n" "", - *(uint64_t*)p_pr->resv0, + cl_ntoh64(p_pr->service_id), cl_ntoh64( p_pr->dgid.unicast.prefix ), cl_ntoh64( p_pr->dgid.unicast.interface_id ), cl_ntoh64( p_pr->sgid.unicast.prefix ), @@ -1125,7 +1126,8 @@ osm_dump_path_record( p_pr->tclass, p_pr->num_path, cl_ntoh16( p_pr->pkey ), - cl_ntoh16( p_pr->sl ), + ib_path_rec_qos_class(p_pr), + ib_path_rec_sl(p_pr), p_pr->mtu, p_pr->rate, p_pr->pkt_life, diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 2f61fb8..5ec0006 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -759,7 +759,7 @@ __osm_mpr_rcv_build_pr( p_pr->hop_flow_raw &= cl_hton32(1<<31); p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16( p_parms->sl ); + ib_path_rec_set_sl(p_pr, p_parms->sl); p_pr->mtu = (uint8_t)( p_parms->mtu | 0x80 ); p_pr->rate = (uint8_t)( p_parms->rate | 0x80 ); diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index 7707f52..5a43912 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -774,7 +774,8 @@ __osm_pr_rcv_build_pr( #endif p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16(p_parms->sl); + ib_path_rec_set_qos_class(p_pr,0); + ib_path_rec_set_sl(p_pr,p_parms->sl); p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); p_pr->rate = (uint8_t)(p_parms->rate | 0x80); @@ -2051,7 +2052,7 @@ osm_pr_rcv_process( /* SL, Hop Limit, and Flow Label */ ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, &sl, &flow_label, &hop_limit ); - p_pr_item->path_rec.sl = cl_hton16( sl ); + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); #ifndef ROUTER_EXP p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | (flow_label << 8); diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c index b9e3bf7..c42b037 100644 --- a/osm/osmtest/osmtest.c +++ b/osm/osmtest/osmtest.c @@ -1982,7 +1982,7 @@ osmtest_write_path_info( IN osmtest_t * cl_ntoh64( p_rec->sgid.unicast.interface_id ), cl_ntoh16( p_rec->dlid ), cl_ntoh16( p_rec->slid ), cl_ntoh32( p_rec->hop_flow_raw ), p_rec->tclass, - p_rec->num_path, cl_ntoh16( p_rec->pkey ), p_rec->sl, + p_rec->num_path, cl_ntoh16( p_rec->pkey ), ib_path_rec_sl(p_rec), p_rec->mtu, p_rec->rate, p_rec->pkt_life, p_rec->preference ); -- 1.4.4.1.GIT From mshefty at ichips.intel.com Mon Jan 22 09:12:59 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 09:12:59 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B36F03.9070201@mellanox.co.il> References: <45B36F03.9070201@mellanox.co.il> Message-ID: <45B4F09B.4050407@ichips.intel.com> > ~shefty/librdmacm.git > master > rdma_ucm-abi3* This goes with the 2.6.20 kernel. > multicast This goes with the multicast branch of my rdma-dev git tree. IMO, OFED should determine which features they want and pull in the appropriate branch. I know that Voltaire would like the multicast feature, but require a couple of changes to the code before its usable for them. - Sean From tziporet at mellanox.co.il Mon Jan 22 09:38:27 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 22 Jan 2007 19:38:27 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B4F09B.4050407@ichips.intel.com> References: <45B36F03.9070201@mellanox.co.il> <45B4F09B.4050407@ichips.intel.com> Message-ID: <45B4F693.9050207@mellanox.co.il> Sean Hefty wrote: >> multicast >> > > This goes with the multicast branch of my rdma-dev git tree. > > IMO, OFED should determine which features they want and pull in the appropriate > branch. I know that Voltaire would like the multicast feature, but require a > couple of changes to the code before its usable for them. > > > Moni/Or Can you update us regarding multicast feature status and testing Thanks, Tziporet From mst at mellanox.co.il Mon Jan 22 09:40:06 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 19:40:06 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B4F09B.4050407@ichips.intel.com> References: <45B36F03.9070201@mellanox.co.il> <45B4F09B.4050407@ichips.intel.com> Message-ID: <20070122174006.GB19017@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: librdmacm and udapl: Which git branch to use in ofed_1_2 build > > > ~shefty/librdmacm.git > > master > > rdma_ucm-abi3* > > This goes with the 2.6.20 kernel. > > > multicast > > This goes with the multicast branch of my rdma-dev git tree. > > IMO, OFED should determine which features they want and pull in the appropriate > branch. I know that Voltaire would like the multicast feature, but require a > couple of changes to the code before its usable for them. Sean, this is not a decision we as OFED maintaners need/can take, this is up to you as maintainer to decide whether features planned for OFED (such as multicast) can be in librdmacm release 1.0 and can be ready by feature freeze. If there is a conflict, please raise the flag, and we'll try to reach consensus. Where will you branch release 1.0 from? Since we agreed OFED will be based on release 1.0 of librdmacm, this is what OFED shall take. -- MST From arlin.r.davis at intel.com Mon Jan 22 09:50:25 2007 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 22 Jan 2007 09:50:25 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B36F03.9070201@mellanox.co.il> Message-ID: <000001c73e4d$cc46bba0$4297070a@amr.corp.intel.com> >~ardavis/dapl.git: > rdma_ucm* > master > >Can you reply which branch to use in our daily ofed 1.2 builds. for dapl use rdma_ucm branch From sashak at voltaire.com Mon Jan 22 09:59:52 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 22 Jan 2007 19:59:52 +0200 Subject: [openib-general] [PATCH] osm: QoS: added qos class and service id to the path record In-Reply-To: <45B4E0C2.8060102@dev.mellanox.co.il> References: <45B4E0C2.8060102@dev.mellanox.co.il> Message-ID: <20070122175952.GG23783@sashak.voltaire.com> Hi Yevgeny, On 18:05 Mon 22 Jan , Yevgeny Kliteynik wrote: > Hi Hal > > QoS patch: added qos class and service id to the path record > > Signed-off-by: Yevgeny Kliteynik Couple of comments... > --- > osm/include/iba/ib_types.h | 149 +++++++++++++++++++++++++++++++--- > osm/opensm/osm_helper.c | 8 +- > osm/opensm/osm_sa_multipath_record.c | 2 +- > osm/opensm/osm_sa_path_record.c | 5 +- > osm/osmtest/osmtest.c | 2 +- > 5 files changed, 147 insertions(+), 19 deletions(-) > > diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h > index 22f7f62..7762ed2 100644 > --- a/osm/include/iba/ib_types.h > +++ b/osm/include/iba/ib_types.h > @@ -1700,6 +1700,28 @@ ib_class_is_rmpp( > #define IB_SMINFO_STATE_MASTER 3 > /**********/ > > +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK > +* NAME > +* IB_PATH_REC_SL_MASK > +* > +* DESCRIPTION > +* Mask for the sl field for path record > +* > +* SOURCE > +*/ > +#define IB_PATH_REC_SL_MASK 0xF > + > +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK > +* NAME > +* IB_PATH_REC_QOS_CLASS_MASK > +* > +* DESCRIPTION > +* Mask for the QoS class field for path record > +* > +* SOURCE > +*/ > +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 > + > /****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK > * NAME > * IB_PATH_REC_SELECTOR_MASK > @@ -2314,7 +2336,7 @@ ib_gid_get_guid( > #include > typedef struct _ib_path_rec > { > - uint8_t resv0[8]; > + ib_net64_t service_id; > ib_gid_t dgid; > ib_gid_t sgid; > ib_net16_t dlid; > @@ -2323,7 +2345,7 @@ typedef struct _ib_path_rec > uint8_t tclass; > uint8_t num_path; > ib_net16_t pkey; > - ib_net16_t sl; > + ib_net16_t qos_class_sl; > uint8_t mtu; > uint8_t rate; > uint8_t pkt_life; > @@ -2363,11 +2385,8 @@ typedef struct _ib_path_rec > * pkey > * Partition key (P_Key) to use on this path. > * > -* resv1 > -* Reserved byte. > -* > -* sl > -* Service level to use on this path. > +* qos_class_sl > +* QoS class and service level to use on this path. > * > * mtu > * MTU and MTU selector fields to use on this path > @@ -2388,6 +2407,7 @@ typedef struct _ib_path_rec > *********/ > > /* Path Record Component Masks */ > +#define IB_PR_COMPMASK_SERVICEID (CL_HTON64(((uint64_t)1)<<1)) > #define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) > #define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) > #define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) > @@ -2400,7 +2420,7 @@ typedef struct _ib_path_rec > #define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) > #define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) > #define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) > -#define IB_PR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<14)) > +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) > #define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) > #define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) > #define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) > @@ -2658,6 +2678,7 @@ ib_path_rec_init_local( > IN ib_net16_t slid, > IN uint8_t num_path, > IN ib_net16_t pkey, > + IN uint16_t qos_class, > IN uint8_t sl, > IN uint8_t mtu_selector, > IN uint8_t mtu, > @@ -2673,8 +2694,8 @@ ib_path_rec_init_local( > p_rec->slid = slid; > p_rec->num_path = num_path; > p_rec->pkey = pkey; > - /* Lower 4 bits of path rec's SL are reserved. */ > - p_rec->sl = cl_ntoh16( sl ); > + p_rec->qos_class_sl = cl_hton16( (sl & IB_PATH_REC_SL_MASK) | > + (qos_class << 4) ); > p_rec->mtu = (uint8_t)((mtu & IB_PATH_REC_BASE_MASK) | > (uint8_t)(mtu_selector << 6)); > p_rec->rate = (uint8_t)((rate & IB_PATH_REC_BASE_MASK) | > @@ -2686,8 +2707,8 @@ ib_path_rec_init_local( > /* Clear global routing fields for local path records */ > p_rec->hop_flow_raw = 0; > p_rec->tclass = 0; > + p_rec->service_id = 0; > > - *((uint64_t*)p_rec->resv0) = 0; > *((uint32_t*)p_rec->resv2) = 0; > *((uint16_t*)p_rec->resv2 + 2) = 0; > } > @@ -2716,6 +2737,9 @@ ib_path_rec_init_local( > * pkey > * [in] Partition key (P_Key) to use on this path. > * > +* qos_class > +* [in] QoS class to use on this path. Lower 12-bits are valid. > +* > * sl > * [in] Service level to use on this path. Lower 4-bits are valid. > * > @@ -2779,6 +2803,41 @@ ib_path_rec_num_path( > * ib_path_rec_t > *********/ > > +/****f* IBA Base: Types/ib_path_rec_set_sl > +* NAME > +* ib_path_rec_set_sl > +* > +* DESCRIPTION > +* Set path service level. > +* > +* SYNOPSIS > +*/ > +static inline void OSM_API > +ib_path_rec_set_sl( > + IN ib_path_rec_t* const p_rec, > + IN const uint8_t sl ) > +{ > + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & > + IB_PATH_REC_QOS_CLASS_MASK ) | > + ( sl & IB_PATH_REC_SL_MASK) ); > +} > +/* > +* PARAMETERS > +* p_rec > +* [in] Pointer to the path record object. > +* > +* sl > +* [in] Service level to set. > +* > +* RETURN VALUES > +* None > +* > +* NOTES > +* > +* SEE ALSO > +* ib_path_rec_t > +*********/ > + > /****f* IBA Base: Types/ib_path_rec_sl > * NAME > * ib_path_rec_sl > @@ -2792,7 +2851,7 @@ static inline uint8_t OSM_API > ib_path_rec_sl( > IN const ib_path_rec_t* const p_rec ) > { > - return( (uint8_t)((cl_ntoh16( p_rec->sl )) & 0xF) ); > + return( (uint8_t)((cl_ntoh16( p_rec->qos_class_sl )) & IB_PATH_REC_SL_MASK) ); > } > /* > * PARAMETERS > @@ -2808,6 +2867,72 @@ ib_path_rec_sl( > * ib_path_rec_t > *********/ > > +/****f* IBA Base: Types/ib_path_rec_set_qos_class > +* NAME > +* ib_path_rec_set_qos_class > +* > +* DESCRIPTION > +* Set path QoS class. > +* > +* SYNOPSIS > +*/ > +static inline void OSM_API > +ib_path_rec_set_qos_class( > + IN ib_path_rec_t* const p_rec, > + IN const uint16_t qos_class ) > +{ > + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & > + IB_PATH_REC_QOS_CLASS_MASK ) | > + ( qos_class << 4) ); > +} IB_PATH_REC_QOS_CLASS_MASK is 0xfff0, so this will clear sl component. > +/* > +* PARAMETERS > +* p_rec > +* [in] Pointer to the path record object. > +* > +* qos_class > +* [in] QoS class to set. > +* > +* RETURN VALUES > +* None > +* > +* NOTES > +* > +* SEE ALSO > +* ib_path_rec_t > +*********/ > + > +/****f* IBA Base: Types/ib_path_rec_qos_class > +* NAME > +* ib_path_rec_qos_class > +* > +* DESCRIPTION > +* Get QoS class. > +* > +* SYNOPSIS > +*/ > +static inline uint16_t OSM_API > +ib_path_rec_qos_class( > + IN const ib_path_rec_t* const p_rec ) > +{ > + return( (uint16_t)( cl_ntoh16( p_rec->qos_class_sl ) & Why (uint16_t) casting is needed? > + IB_PATH_REC_QOS_CLASS_MASK ) >> 4 ); > +} &IB_PATH_REC_QOS_CLASS_MASK is not needed - follow >> 4 drops lower bits. Sasha > +/* > +* PARAMETERS > +* p_rec > +* [in] Pointer to the path record object. > +* > +* RETURN VALUES > +* QoS class of the path record. > +* > +* NOTES > +* > +* SEE ALSO > +* ib_path_rec_t > +*********/ > + > + > /****f* IBA Base: Types/ib_path_rec_mtu > * NAME > * ib_path_rec_mtu > diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c > index 2ef8e38..e0b5aef 100644 > --- a/osm/opensm/osm_helper.c > +++ b/osm/opensm/osm_helper.c > @@ -1095,7 +1095,7 @@ osm_dump_path_record( > { > osm_log( p_log, log_level, > "PathRecord dump:\n" > - "\t\t\t\tresv0...................0x%016" PRIx64 "\n" > + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" > "\t\t\t\tdgid....................0x%016" PRIx64 " : " > "0x%016" PRIx64 "\n" > "\t\t\t\tsgid....................0x%016" PRIx64 " : " > @@ -1106,6 +1106,7 @@ osm_dump_path_record( > "\t\t\t\ttclass..................0x%X\n" > "\t\t\t\tnum_path_revers.........0x%X\n" > "\t\t\t\tpkey....................0x%X\n" > + "\t\t\t\tqos_class...............0x%X\n" > "\t\t\t\tsl......................0x%X\n" > "\t\t\t\tmtu.....................0x%X\n" > "\t\t\t\trate....................0x%X\n" > @@ -1114,7 +1115,7 @@ osm_dump_path_record( > "\t\t\t\tresv2...................0x%X\n" > "\t\t\t\tresv3...................0x%X\n" > "", > - *(uint64_t*)p_pr->resv0, > + cl_ntoh64(p_pr->service_id), > cl_ntoh64( p_pr->dgid.unicast.prefix ), > cl_ntoh64( p_pr->dgid.unicast.interface_id ), > cl_ntoh64( p_pr->sgid.unicast.prefix ), > @@ -1125,7 +1126,8 @@ osm_dump_path_record( > p_pr->tclass, > p_pr->num_path, > cl_ntoh16( p_pr->pkey ), > - cl_ntoh16( p_pr->sl ), > + ib_path_rec_qos_class(p_pr), > + ib_path_rec_sl(p_pr), > p_pr->mtu, > p_pr->rate, > p_pr->pkt_life, > diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c > index 2f61fb8..5ec0006 100644 > --- a/osm/opensm/osm_sa_multipath_record.c > +++ b/osm/opensm/osm_sa_multipath_record.c > @@ -759,7 +759,7 @@ __osm_mpr_rcv_build_pr( > p_pr->hop_flow_raw &= cl_hton32(1<<31); > > p_pr->pkey = p_parms->pkey; > - p_pr->sl = cl_hton16( p_parms->sl ); > + ib_path_rec_set_sl(p_pr, p_parms->sl); > p_pr->mtu = (uint8_t)( p_parms->mtu | 0x80 ); > p_pr->rate = (uint8_t)( p_parms->rate | 0x80 ); > > diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c > index 7707f52..5a43912 100644 > --- a/osm/opensm/osm_sa_path_record.c > +++ b/osm/opensm/osm_sa_path_record.c > @@ -774,7 +774,8 @@ __osm_pr_rcv_build_pr( > #endif > > p_pr->pkey = p_parms->pkey; > - p_pr->sl = cl_hton16(p_parms->sl); > + ib_path_rec_set_qos_class(p_pr,0); > + ib_path_rec_set_sl(p_pr,p_parms->sl); > p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); > p_pr->rate = (uint8_t)(p_parms->rate | 0x80); > > @@ -2051,7 +2052,7 @@ osm_pr_rcv_process( > /* SL, Hop Limit, and Flow Label */ > ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, > &sl, &flow_label, &hop_limit ); > - p_pr_item->path_rec.sl = cl_hton16( sl ); > + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); > #ifndef ROUTER_EXP > p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | > (flow_label << 8); > diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c > index b9e3bf7..c42b037 100644 > --- a/osm/osmtest/osmtest.c > +++ b/osm/osmtest/osmtest.c > @@ -1982,7 +1982,7 @@ osmtest_write_path_info( IN osmtest_t * > cl_ntoh64( p_rec->sgid.unicast.interface_id ), > cl_ntoh16( p_rec->dlid ), cl_ntoh16( p_rec->slid ), > cl_ntoh32( p_rec->hop_flow_raw ), p_rec->tclass, > - p_rec->num_path, cl_ntoh16( p_rec->pkey ), p_rec->sl, > + p_rec->num_path, cl_ntoh16( p_rec->pkey ), ib_path_rec_sl(p_rec), > p_rec->mtu, p_rec->rate, p_rec->pkt_life, > p_rec->preference ); > > -- > 1.4.4.1.GIT > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From mshefty at ichips.intel.com Mon Jan 22 09:52:09 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 09:52:09 -0800 Subject: [openib-general] [PATCH RFC 1 of 5] IB/sa: QoS support In-Reply-To: <20070122144306.GC7261@mellanox.co.il> References: <20070122144146.GB7261@mellanox.co.il> <20070122144306.GC7261@mellanox.co.il> Message-ID: <45B4F9C9.6040603@ichips.intel.com> > static const struct ib_field path_rec_table[] = { > - { RESERVED, > - .offset_words = 0, > - .offset_bits = 0, > - .size_bits = 32 }, > - { RESERVED, > - .offset_words = 1, > + { PATH_REC_FIELD(service_id), > + .offset_words = 2, I haven't read the proposed annex, but I would have expected this to be offset_words = 0. Otherwise, it seems that the service_id would fall over the dgid. > .offset_bits = 0, > - .size_bits = 32 }, > + .size_bits = 64 }, > { PATH_REC_FIELD(dgid), > .offset_words = 2, > .offset_bits = 0, > @@ -178,7 +174,7 @@ static const struct ib_field path_rec_ta > .offset_words = 12, > .offset_bits = 16, > .size_bits = 16 }, > - { RESERVED, > + { PATH_REC_FIELD(priority), > .offset_words = 13, > .offset_bits = 0, > .size_bits = 12 }, > Index: linux-2.6/include/rdma/ib_sa.h > =================================================================== > --- linux-2.6.orig/include/rdma/ib_sa.h > +++ linux-2.6/include/rdma/ib_sa.h > @@ -109,8 +109,9 @@ enum ib_sa_selector { > * Reserved rows are indicated with comments to help maintainability. > */ > > -/* reserved: 0 */ > -/* reserved: 1 */ > +#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) | \ > + IB_SA_COMP_MASK( 1)) > + Does the annex redefine how component mask works, or does it make path records have a special case? (I'll try to look at the annex; it's just that this definition isn't obvious to me.) - Sean From mst at mellanox.co.il Mon Jan 22 09:57:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 19:57:59 +0200 Subject: [openib-general] [PATCH RFC 1 of 5] IB/sa: QoS support In-Reply-To: <45B4F9C9.6040603@ichips.intel.com> References: <45B4F9C9.6040603@ichips.intel.com> Message-ID: <20070122175759.GC19017@mellanox.co.il> > > Index: linux-2.6/include/rdma/ib_sa.h > > =================================================================== > > --- linux-2.6.orig/include/rdma/ib_sa.h > > +++ linux-2.6/include/rdma/ib_sa.h > > @@ -109,8 +109,9 @@ enum ib_sa_selector { > > * Reserved rows are indicated with comments to help maintainability. > > */ > > > > -/* reserved: 0 */ > > -/* reserved: 1 */ > > +#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) | \ > > + IB_SA_COMP_MASK( 1)) > > + > > Does the annex redefine how component mask works, or does it make path records > have a special case? (I'll try to look at the annex; it's just that this > definition isn't obvious to me.) Service ID is a special case - it has 2 comp masks that must always go together. -- MST From mshefty at ichips.intel.com Mon Jan 22 09:59:13 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 09:59:13 -0800 Subject: [openib-general] [PATCH RFC 2 of 5] IB/cma: QoS support In-Reply-To: <20070122144500.GD7261@mellanox.co.il> References: <20070122144500.GD7261@mellanox.co.il> Message-ID: <45B4FB71.1020509@ichips.intel.com> > - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > + if (cma_qos_support) { > + ib_sa_comp_mask m = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH; > + struct sockaddr *sockaddr = &id_priv->id.route.addr.dst_addr; > + __be64 svc_id = cma_get_service_id(id_priv->id.ps, sockaddr); > + > + path_rec.service_id = svc_id; > + m |= IB_SA_PATH_REC_SERVICE_ID; > + if (id_priv->priority) { > + path_rec.priority = __cpu_to_be16(id_priv->priority); > + m |= IB_SA_PATH_REC_PRIORITY; > + } > + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > + id_priv->id.port_num, &path_rec, m, > + timeout_ms, GFP_KERNEL, > + cma_query_handler, work, &id_priv->query); > + } else > + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > id_priv->id.port_num, &path_rec, > IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, Minor comment that we might be able to restructure this slightly to have a single call to ib_sa_path_rec_get(). What I'm not as sure about is separating the call to set the priority from resolving the route. My first impression was that this was a good idea, but it may give the impression that the priority can change after route resolution. Does anyone else have any thoughts on this? - Sean From mst at mellanox.co.il Mon Jan 22 10:22:27 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 20:22:27 +0200 Subject: [openib-general] [PATCH RFC 2 of 5] IB/cma: QoS support In-Reply-To: <45B4FB71.1020509@ichips.intel.com> References: <20070122144500.GD7261@mellanox.co.il> <45B4FB71.1020509@ichips.intel.com> Message-ID: <20070122182227.GD19017@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: [PATCH RFC 2 of 5] IB/cma: QoS support > > > - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > > + if (cma_qos_support) { > > + ib_sa_comp_mask m = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > > + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH; > > + struct sockaddr *sockaddr = &id_priv->id.route.addr.dst_addr; > > + __be64 svc_id = cma_get_service_id(id_priv->id.ps, sockaddr); > > + > > + path_rec.service_id = svc_id; > > + m |= IB_SA_PATH_REC_SERVICE_ID; > > + if (id_priv->priority) { > > + path_rec.priority = __cpu_to_be16(id_priv->priority); > > + m |= IB_SA_PATH_REC_PRIORITY; > > + } > > + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > > + id_priv->id.port_num, &path_rec, m, > > + timeout_ms, GFP_KERNEL, > > + cma_query_handler, work, &id_priv->query); > > + } else > > + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, > > id_priv->id.port_num, &path_rec, > > IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | > > IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, > > Minor comment that we might be able to restructure this slightly to have a > single call to ib_sa_path_rec_get(). Of course. We don't need the module option upstream either. This is just for technical preview purposes. -- MST From mst at mellanox.co.il Mon Jan 22 10:28:44 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 20:28:44 +0200 Subject: [openib-general] [PATCH RFC 1 of 5] IB/sa: QoS support In-Reply-To: <45B4F9C9.6040603@ichips.intel.com> References: <20070122144146.GB7261@mellanox.co.il> <20070122144306.GC7261@mellanox.co.il> <45B4F9C9.6040603@ichips.intel.com> Message-ID: <20070122182844.GE19017@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: [PATCH RFC 1 of 5] IB/sa: QoS support > > > static const struct ib_field path_rec_table[] = { > > - { RESERVED, > > - .offset_words = 0, > > - .offset_bits = 0, > > - .size_bits = 32 }, > > - { RESERVED, > > - .offset_words = 1, > > + { PATH_REC_FIELD(service_id), > > + .offset_words = 2, > > I haven't read the proposed annex, but I would have expected this to be > offset_words = 0. Otherwise, it seems that the service_id would fall over the dgid. Thanks, good catch. -- MST From mst at mellanox.co.il Mon Jan 22 10:41:55 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 20:41:55 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070122174006.GB19017@mellanox.co.il> References: <45B36F03.9070201@mellanox.co.il> <45B4F09B.4050407@ichips.intel.com> <20070122174006.GB19017@mellanox.co.il> Message-ID: <20070122184155.GB26634@mellanox.co.il> > Where will you branch release 1.0 from? Since we agreed OFED will be based > on release 1.0 of librdmacm, this is what OFED shall take. Sean, could you answer this please? -- MST From sashak at voltaire.com Mon Jan 22 11:02:49 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 22 Jan 2007 21:02:49 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B32872.4070006@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> Message-ID: <20070122190249.GI23783@sashak.voltaire.com> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > Hi Sasha. > > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >> Hi Hal > >> > >> The following series of six patches implements QoS policy file parser: > >> > >> 1. QoS parser Lex file > >> 2. QoS parser Lex-generated c file > >> 3. QoS parser grammar (Yacc) file > >> 4. QoS parser Yacc-generated grammar c and h file > >> 5. QoS parser header file that defines parse tree data structures > >> 6. Changes in makefiles and configure.in file for compiling QoS parser files > > > > Is there any description of proposed format and functionality? > > The parser is based on QoS RFC sent by Eitan in May 2006, with a few > minor modifications. You can find the RFC here: > http://openib.org/pipermail/openib-general/2006-May/022336.html This was RFC and couple of issues were discussed then. Now you are about implementation phase and exact format description would be desired. For example what "few minor modifications" are? > > Also what about using human readable formats? > > To me the xml-like format in the RFC looks pretty readable. > It has very limited number of keywords (tags), so it's easy > to follow and/or to modify. It is your opinion, not everybody will agree with it (AFAIR this was discussed too during RFC). I would not be care, but I don't know any example of really successful XML using for configuration purposes (especially where advanced graphical config editors/viewers were not used). Do you know? Sasha From mshefty at ichips.intel.com Mon Jan 22 11:05:36 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 11:05:36 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070122184155.GB26634@mellanox.co.il> References: <45B36F03.9070201@mellanox.co.il> <45B4F09B.4050407@ichips.intel.com> <20070122174006.GB19017@mellanox.co.il> <20070122184155.GB26634@mellanox.co.il> Message-ID: <45B50B00.4020705@ichips.intel.com> Michael S. Tsirkin wrote: >>Where will you branch release 1.0 from? Since we agreed OFED will be based >>on release 1.0 of librdmacm, this is what OFED shall take. > > Sean, could you answer this please? This depends on the status of the multicast changes by feature freeze, and which kernel OFED wants to ship. Since multicast support has been requested, I would like to get this in. However, the kernel multicast changes are not in 2.6.20. What's OFED's stance on kernel patches on this? This is different than the local_sa, which is mostly self contained, since the multicast patches affect ipoib. For now, current branch (rdma_ucm-abi3) that OFED is pulling is fine. On a side note, I'm not sure that the release should be called 1.0. When OFED 1.1 shipped the librdmacm, there was not a release number assigned, and my understanding is that the distributors assigned one. Can we clarify how we want to deal with this? - Sean From mst at mellanox.co.il Mon Jan 22 11:15:58 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 21:15:58 +0200 Subject: [openib-general] [PATCH] IB/SRP add need_reset In-Reply-To: <45ACEE9E.108@dev.mellanox.co.il> References: <45ACEE9E.108@dev.mellanox.co.il> Message-ID: <20070122191558.GB1105@mellanox.co.il> > Quoting ishai : > Subject: [PATCH] IB/SRP add need_reset > > > When there is a call to send_tsk_mgmt it posts a send and waits for 5 seconds > to get a response. > When the QP is in an error state it is obvious that there will be no response > so it is quite useless to wait. > This timeout causes SRP to wait a long time to reconnect. (Each abort and > each reset_device call send_tsk_mgmt that waits for the timeout). > The following patch solves this problem by identifying the failure > and returning an immediate error code. > > Signed-off-by: Ishai Rabinovitz > --- > Hi Roland, > > This is an old patch. We thought at first that the timeout is because there > is a bug in low level driver. After rechecking it, I found that the bug is > internal to SRP. Roland, could you look at this again please? This addresses a real issue seen by Ishai, and his explanation for why this helps sounds convincing. -- MST From mst at mellanox.co.il Mon Jan 22 11:30:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 21:30:15 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B50B00.4020705@ichips.intel.com> References: <45B50B00.4020705@ichips.intel.com> Message-ID: <20070122193015.GC1105@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: librdmacm and udapl: Which git branch to use in ofed_1_2 build > > Michael S. Tsirkin wrote: > >>Where will you branch release 1.0 from? Since we agreed OFED will be based > >>on release 1.0 of librdmacm, this is what OFED shall take. > > > > Sean, could you answer this please? > > This depends on the status of the multicast changes by feature freeze, and which > kernel OFED wants to ship. Since multicast support has been requested, I would > like to get this in. However, the kernel multicast changes are not in 2.6.20. > What's OFED's stance on kernel patches on this? This is different than the > local_sa, which is mostly self contained, since the multicast patches affect ipoib. Yes, but everyone agreed to have this patch in (that is - several people want it and no one objects). As far as Mellanox testing goes, crashes we saw with it were resolved. Again, I am just trying to avoid duplicating work, so I am waiting for you to tell me when to take the code and put in OFED. > For now, current branch (rdma_ucm-abi3) that OFED is pulling is fine. OK. Note however we won't be able to switch after feature freeze, so whatever it will be in OFED needs to be decided by then. > On a side note, I'm not sure that the release should be called 1.0. When OFED > 1.1 shipped the librdmacm, there was not a release number assigned, and my > understanding is that the distributors assigned one. Can we clarify how we want > to deal with this? I think we can just ignore this. OFED 1.1 shipped a pre-release and named it (I think) 1.0, but I am not sure this means we have to worry about collisions too much - this is always a problem with distributing pre-releases. And note how once kernel part is updated an old librdmacm lib will break anyway even if you do not override it. -- MST From mshefty at ichips.intel.com Mon Jan 22 11:36:47 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 11:36:47 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070122193015.GC1105@mellanox.co.il> References: <45B50B00.4020705@ichips.intel.com> <20070122193015.GC1105@mellanox.co.il> Message-ID: <45B5124F.60907@ichips.intel.com> > Yes, but everyone agreed to have this patch in (that is - several > people want it and no one objects). As far as Mellanox testing goes, crashes we > saw with it were resolved. > > Again, I am just trying to avoid duplicating work, so I am waiting for > you to tell me when to take the code and put in OFED. There will be additional changes to the multicast branch, which I'm hoping to complete this week. If you want to begin pulling in the following branches, wider testing of the multicast code can begin sooner: rdma-dev.git - multicast-sa_cache branch would be easiest for you librdmacm.git - master would be easiest >>On a side note, I'm not sure that the release should be called 1.0. When OFED >>1.1 shipped the librdmacm, there was not a release number assigned, and my >>understanding is that the distributors assigned one. Can we clarify how we want >>to deal with this? > > I think we can just ignore this. That's easy enough to do. - Sean From halr at voltaire.com Mon Jan 22 12:05:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 22 Jan 2007 15:05:03 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B32872.4070006@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> Message-ID: <1169496259.23046.10310.camel@hal.voltaire.com> Hi Yevgeny, On Sun, 2007-01-21 at 03:46, Yevgeny Kliteynik wrote: > Hi Sasha. > > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > > > On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >> Hi Hal > >> > >> The following series of six patches implements QoS policy file parser: > >> > >> 1. QoS parser Lex file > >> 2. QoS parser Lex-generated c file > >> 3. QoS parser grammar (Yacc) file > >> 4. QoS parser Yacc-generated grammar c and h file > >> 5. QoS parser header file that defines parse tree data structures > >> 6. Changes in makefiles and configure.in file for compiling QoS parser files > > > > Is there any description of proposed format and functionality? > > The parser is based on QoS RFC sent by Eitan in May 2006, with a few > minor modifications. You can find the RFC here: > http://openib.org/pipermail/openib-general/2006-May/022336.html > > > Also what about using human readable formats? > > To me the xml-like format in the RFC looks pretty readable. > It has very limited number of keywords (tags), so it's easy > to follow and/or to modify. Putting aside the issue of plain text versus XML file formats for a moment, can an example of the XML format be supplied ? What are the tags used and their relationships ? I don't think there's been a discussion on this yet. Also, why were lex and yacc chosen to be used rather than some open source XML parser (already written in C) ? I also have some questions about the patches but I'll wait to see more of the bigger picture here. -- Hal > -- Yevgeny > > > Sasha > > From ogerlitz at voltaire.com Mon Jan 22 12:11:05 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 22:11:05 +0200 (IST) Subject: [openib-general] [RFC/PATCH v2] rdma/cma: use the ipoib broadcast group qkey In-Reply-To: References: Message-ID: Modify the kernel rdma cm use the ipoib broadcast group qkey instead a qkey of its own for its UD IDs/QPs. For RDMA_PS_UDP ID, the qkey is stored in struct rdma_id_private and delivered also in ADDR_RESOLVED and CONNECT_REQUEST events. The user space library learns the qkey from these events and use them when it is called to create UD QP. Signed-off-by: Or Gerlitz Index: rdma-dev/drivers/infiniband/core/cma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/cma.c 2007-01-21 12:11:16.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/cma.c 2007-01-22 21:52:30.000000000 +0200 @@ -136,6 +136,7 @@ struct rdma_id_private { u32 seq_num; u32 qp_num; u8 srq; + u32 qkey; }; struct cma_multicast { @@ -884,6 +885,21 @@ out: return ret; } +static int get_broadcast_group_qkey(struct rdma_id_private *id_priv) +{ + struct ib_sa_mcmember_rec rec; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int ret; + + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return -EINVAL; + id_priv->qkey = rec.qkey; + return 0; +} + static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event) { @@ -1020,7 +1036,14 @@ static int cma_req_handler(struct ib_cm_ mutex_unlock(&lock); if (ret) goto release_conn_id; - + + if (conn_id->id.ps == RDMA_PS_UDP) { + ret = get_broadcast_group_qkey(conn_id); + if (ret) + goto release_conn_id; + event.param.ud.qkey = conn_id->qkey; + } + conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; @@ -1600,6 +1623,7 @@ static void addr_handler(int status, str { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; + int ret; memset(&event, 0, sizeof event); atomic_inc(&id_priv->dev_remove); @@ -1627,6 +1651,14 @@ static void addr_handler(int status, str memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + if (id_priv->id.ps == RDMA_PS_UDP) { + ret = get_broadcast_group_qkey(id_priv); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = ret; + } else + event.param.ud.qkey = id_priv->qkey; + } } if (id_priv->id.event_handler(&id_priv->id, &event)) { @@ -1936,7 +1968,9 @@ static int cma_sidr_rep_handler(struct i event.status = ib_event->param.sidr_rep_rcvd.status; break; } - if (rep->qkey != RDMA_UD_QKEY) { + if (rep->qkey != id_priv->qkey) { + printk(KERN_WARNING "qkey mismatch %.8x client qkey %.8x\n", + rep->qkey, id_priv->qkey); event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; @@ -2231,7 +2265,7 @@ static int cma_send_sidr_rep(struct rdma rep.status = status; if (status == IB_SIDR_SUCCESS) { rep.qp_num = id_priv->qp_num; - rep.qkey = RDMA_UD_QKEY; + rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; Index: rdma-dev/include/rdma/rdma_cm_ib.h =================================================================== --- rdma-dev.orig/include/rdma/rdma_cm_ib.h 2007-01-18 13:43:37.000000000 +0200 +++ rdma-dev/include/rdma/rdma_cm_ib.h 2007-01-22 21:59:34.000000000 +0200 @@ -44,7 +44,4 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths); -/* Global qkey for UD QPs and multicast groups. */ -#define RDMA_UD_QKEY 0x01234567 - #endif /* RDMA_CM_IB_H */ From ogerlitz at voltaire.com Mon Jan 22 12:15:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 22:15:26 +0200 (IST) Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: References: Message-ID: Modify librdmacm use a qkey for its UD IDs/QPs delivered to it by the rdma cm kernel code instead the a hard coded RDMA_UD_QKEY. For RDMA_PS_UDP ID, the qkey is provided by the kernel in ADDR_RESOLVED and CONNECT_REQUEST events and is stored by the library in struct cma_id_private. Later the library use the qkey when it is called to create a UD QP. Signed-off-by: Or Gerlitz Index: librdmacm/src/cma.c =================================================================== --- librdmacm.orig/src/cma.c 2007-01-22 21:21:37.000000000 +0200 +++ librdmacm/src/cma.c 2007-01-22 21:57:13.000000000 +0200 @@ -116,6 +116,7 @@ struct cma_id_private { pthread_mutex_t mut; uint32_t handle; struct cma_multicast *mc_list; + uint32_t qkey; }; struct cma_multicast { @@ -687,7 +688,7 @@ static int ucma_init_ud_qp(struct cma_id qp_attr.port_num = id_priv->id.port_num; qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.qkey = RDMA_UD_QKEY; + qp_attr.qkey = id_priv->qkey; ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY); if (ret) @@ -1169,6 +1170,7 @@ int rdma_get_cm_event(struct rdma_event_ struct ucma_abi_get_event *cmd; struct cma_event *evt; void *msg; + struct cma_id_private *id_priv; int ret, size; ret = cma_dev_cnt ? 0 : ucma_init(); @@ -1199,6 +1201,9 @@ retry: evt->event.status = ucma_query_route(&evt->id_priv->id); if (evt->event.status) evt->event.event = RDMA_CM_EVENT_ADDR_ERROR; + else if (evt->id_priv->id.ps == RDMA_PS_UDP) { + evt->id_priv->qkey = resp->param.ud.qkey; + } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: evt->id_priv = (void *) (uintptr_t) resp->uid; @@ -1211,12 +1216,16 @@ retry: evt->id_priv = (void *) (uintptr_t) resp->uid; if (evt->id_priv->id.ps == RDMA_PS_TCP) ucma_copy_conn_event(evt, &resp->param.conn); - else + else ucma_copy_ud_event(evt, &resp->param.ud); ret = ucma_process_conn_req(evt, resp->id); if (ret) goto retry; + if (evt->id_priv->id.ps == RDMA_PS_UDP) { + id_priv = container_of(evt->event.id, struct cma_id_private, id); + id_priv->qkey = resp->param.ud.qkey; + } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: evt->id_priv = (void *) (uintptr_t) resp->uid; Index: librdmacm/examples/udaddy.c =================================================================== --- librdmacm.orig/examples/udaddy.c 2007-01-22 21:19:52.000000000 +0200 +++ librdmacm/examples/udaddy.c 2007-01-22 22:02:07.000000000 +0200 @@ -415,6 +415,13 @@ static void destroy_nodes(void) free(test.nodes); } +/* + * Global qkey value for all UD QPs and multicast groups created via the + * RDMA CM. + * XXX FIXME - enhance test to not assume a pre defined qkey + */ +#define RDMA_UD_QKEY 0x01234567 + static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc) { node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem, Index: librdmacm/include/rdma/rdma_cma.h =================================================================== --- librdmacm.orig/include/rdma/rdma_cma.h 2007-01-22 21:56:13.000000000 +0200 +++ librdmacm/include/rdma/rdma_cma.h 2007-01-22 21:56:32.000000000 +0200 @@ -65,12 +65,6 @@ enum rdma_port_space { RDMA_PS_UDP = 0x0111, }; -/* - * Global qkey value for all UD QPs and multicast groups created via the - * RDMA CM. - */ -#define RDMA_UD_QKEY 0x01234567 - struct ib_addr { union ibv_gid sgid; union ibv_gid dgid; From mst at mellanox.co.il Mon Jan 22 12:22:10 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 22:22:10 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B5124F.60907@ichips.intel.com> References: <45B5124F.60907@ichips.intel.com> Message-ID: <20070122202210.GE1105@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: librdmacm and udapl: Which git branch to use in ofed_1_2 build > > > Yes, but everyone agreed to have this patch in (that is - several > > people want it and no one objects). As far as Mellanox testing goes, crashes we > > saw with it were resolved. > > > > Again, I am just trying to avoid duplicating work, so I am waiting for > > you to tell me when to take the code and put in OFED. > > There will be additional changes to the multicast branch, which I'm hoping to > complete this week. If you want to begin pulling in the following branches, > wider testing of the multicast code can begin sooner: > > rdma-dev.git - multicast-sa_cache branch would be easiest for you This is just multicast + sa cache, right? OK, so I'll take it from there, and you let me know if/when to update. > librdmacm.git - master would be easiest OK, we can switch to master. Then DAPL would need to be updated, right? Arlin? -- MST From ogerlitz at voltaire.com Mon Jan 22 12:23:20 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 22 Jan 2007 22:23:20 +0200 (IST) Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: References: Message-ID: Sean, Using the two patches udaddy works fine except for the packets sent by the passive side which are filtered out by the active side HCA/QP. This is b/c the passive side of this --test-- is not really doing RDMA CM UD qp and qkey resolution but rather uses the imm data to "exchange" (below) the active side qp and hard coded qkey. I think that in real life librdmacm apps this sort of design is much less expected, and the passive side would also initiate qp/qkey/sidr exchange. I need to think on this point a little bit to see if my design can be changed a little to allow for this sort of simplification. +/* + * Global qkey value for all UD QPs and multicast groups created via the + * RDMA CM. + * XXX FIXME - enhance test to not assume a pre defined qkey + */ +#define RDMA_UD_QKEY 0x01234567 + +static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc) +{ + node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem, + node->cma_id->port_num); + node->remote_qpn = ntohl(wc->imm_data); + node->remote_qkey = RDMA_UD_QKEY; +} Or. From jsquyres at cisco.com Mon Jan 22 12:28:31 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 15:28:31 -0500 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <45B4D79D.3030909@mellanox.co.il> References: <45B4D79D.3030909@mellanox.co.il> Message-ID: <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> Vlad/Michael -- Can you try again? The issue should be resolved now. On Jan 22, 2007, at 10:26 AM, Tziporet Koren wrote: > Hi Johann, > Vlad arranged daily build mails so everybody can be notified on > compilation status. > However there are technical problems that prevent us from sending > these mails that are related to the DNS > See thread: http://openib.org/pipermail/openib-general/2007-January/ > 031831.html > > Note that bugzilla update mails are not sent from the same reason. > > Since we going to have the code freeze and alpha soon (end of next > week) it will be very important to fix these isses > > Thanks, > Tziporet -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From sean.hefty at intel.com Mon Jan 22 12:30:39 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 12:30:39 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070122202210.GE1105@mellanox.co.il> Message-ID: <000101c73e64$2e710900$8698070a@amr.corp.intel.com> >> rdma-dev.git - multicast-sa_cache branch would be easiest for you > >This is just multicast + sa cache, right? >OK, so I'll take it from there, and you let me know if/when to update. Yes - this is just the merge of the multicast and sa_cache branches. >> librdmacm.git - master would be easiest > >OK, we can switch to master. Then DAPL would need to be updated, right? >Arlin? I think DAPL stays with rdma_ucm, but Arlin can confirm. - Sean From changquing.tang at hp.com Mon Jan 22 12:50:07 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 22 Jan 2007 20:50:07 -0000 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: Message-ID: <349DCDA352EACF42A0C49FA6DCEA840321B1E1@G3W0634.americas.hpqcorp.net> Roland: when RDMA is used, a message is transferred from card A (in node A) to card B (in node B), card B delivers the message to to user buffer, and sends ACK to card A, but ACK is lost due to switch fail. So process on node A get fail for this transfer, but process on node B check the memory and get the message(success). If send/recv(SRQ) is used, is it possible that process on node A get failure, but process on node B successfully get the message ? --CQ > -----Original Message----- > From: Roland Dreier [mailto:rdreier at cisco.com] > Sent: Tuesday, January 16, 2007 4:54 PM > To: Tang, Changqing > Cc: Tziporet Koren; EWG; OPENIB > Subject: Re: [openib-general] Minutes for January 15, 2007 > teleconference about OFED 1.2 development progress toward code freeze > > > We need this compatible feature very much so I hope you > double confirm > this. Can app build with 1.0 work on 1.1 ? > (if we only use the basic > feature). > > It's not a matter of which features you use. But yes, my > goal is to make apps compiled against libibverbs 1.0 still > work with libibverbs 1.1. I'm not positive it is possible > since I haven't fully implemented it, but that is what I am > trying to acheive. > From rdreier at cisco.com Mon Jan 22 12:54:23 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 22 Jan 2007 12:54:23 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA840321B1E1@G3W0634.americas.hpqcorp.net> (Changqing Tang's message of "Mon, 22 Jan 2007 20:50:07 -0000") References: <349DCDA352EACF42A0C49FA6DCEA840321B1E1@G3W0634.americas.hpqcorp.net> Message-ID: > when RDMA is used, a message is transferred from card A (in node > A) to card B (in node B), card B delivers the message to to user buffer, > and sends ACK to card A, but ACK is lost due to switch fail. So process > on node A get fail for this transfer, but process on node B check the > memory and get the message(success). > > If send/recv(SRQ) is used, is it possible that process on node A > get failure, but process on node B successfully get the message ? Yes, of course, for exactly the same reason you describe above (lost ACK). - R. From caitlinb at broadcom.com Mon Jan 22 13:03:03 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Mon, 22 Jan 2007 13:03:03 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA840321B1E1@G3W0634.americas.hpqcorp.net> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1FBC2B5@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > Roland: > when RDMA is used, a message is transferred from card A (in node > A) to card B (in node B), card B delivers the message to to > user buffer, and sends ACK to card A, but ACK is lost due to > switch fail. So process on node A get fail for this transfer, > but process on node B check the memory and get the message(success). > > If send/recv(SRQ) is used, is it possible that process > on node A get failure, but process on node B successfully get > the message ? > Yes, which is why the slight differences in IB vs. iWARP ack semantics really aren't that important. The above scenario can happen in both. If this is a problem, an application layer ack is ultimately the only solution. From halr at voltaire.com Mon Jan 22 12:41:02 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 22 Jan 2007 15:41:02 -0500 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45B32FB8.40700@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> Message-ID: <1169498411.23046.12190.camel@hal.voltaire.com> Hi Yevgeny, On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: > Sasha Khapyorsky wrote: > > On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > >> As for the mailing list it's openib-windows at openib.org. You can access > >> it here: http://openib.org/mailman/listinfo/openib-windows > > > > I found only references to svn://windows.openib.org, where > > 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | > > head -n 40' shows: > > > > ------------------------------------------------------------------------ > > r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line > > > > Set property svn:keywords "id" on all repository > > ------------------------------------------------------------------------ > > r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line > > > > [OPENSM] When running as a service, if all ports are down, use the first port. > > ------------------------------------------------------------------------ > > r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines > > > > [OPENSM] When trying to set to INIT the remote port of the given physical port > > in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no > > check whether the physical port in null (e.g., if it's disconnected). > > ------------------------------------------------------------------------ > > r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line > > > > [opensm] Base service status on results that were received from opensm log messages. > > ------------------------------------------------------------------------ > > r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line > > > > [OPENSM] missed fix for OPENSM logging to System Event Log > > ------------------------------------------------------------------------ > > r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines > > > > [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! > > 2. bugfix: added message file for correct logging to System Event Log. > > 3. bugfix: wrong passing parameters in server mode; > > 4. bugfix: error in table of parameters > > > > ------------------------------------------------------------------------ > > r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line > > > > [opensm] Fix a trivial build break > > ------------------------------------------------------------------------ > > r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines > > > > if the guid2lid is corrupted, don't exit when running with -y option > > (don't exit on fatal) - just ignore the file > > > > > > > > Seems that development there was stopped in Aug 2006, and it doesn't > > have recent Win port patches. Am I looking in the wrong place? > > You were looking in the right place. It appears that I didn't describe > the development process correctly. I think this repository is updated > with stable OSM versions, after the code is tested. Any idea on when the next version is expected ? -- Hal > If you need more details, I think it's better for you to ask windows folks > directly, since as we see, my knowledge in this area is very limited. > > -- Yevgeny > > > Sasha > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Mon Jan 22 13:14:06 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 22 Jan 2007 13:14:06 -0800 Subject: [openib-general] [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler In-Reply-To: <200701201620.16097.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Sat, 20 Jan 2007 16:20:15 +0100") References: <200701192251.01888.hnguyen@linux.vnet.ibm.com> <200701201620.16097.hnguyen@linux.vnet.ibm.com> Message-ID: Thanks, queued for 2.6.20 (although I left the line a little longer). I think the right thing to do is restructure this function so you don't have lines starting 6 tab stops to the right. - R. From HNGUYEN at de.ibm.com Mon Jan 22 13:33:25 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Mon, 22 Jan 2007 16:33:25 -0500 Subject: [openib-general] [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler In-Reply-To: Message-ID: > I think the right thing to do is restructure this function so you > don't have lines starting 6 tab stops to the right. Yes. Have reworked this irq handler and still struggling with testing. When we feel comfortable with the test result, we'll post it here (soon) and hopefully it is easier to read. Regards Nam From mst at mellanox.co.il Mon Jan 22 13:37:47 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 23:37:47 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> References: <45B4D79D.3030909@mellanox.co.il> <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> Message-ID: <20070122213747.GI1105@mellanox.co.il> Vlad will test outgoing mail in the morning. Is bugzilla mailgateway functioning as well? At which address? Quoting Jeff Squyres : Subject: Re: resolving sending mails from OFA new server Vlad/Michael -- Can you try again? The issue should be resolved now. On Jan 22, 2007, at 10:26 AM, Tziporet Koren wrote: > Hi Johann, > Vlad arranged daily build mails so everybody can be notified on > compilation status. > However there are technical problems that prevent us from sending > these mails that are related to the DNS > See thread: http://openib.org/pipermail/openib-general/2007-January/ > 031831.html > > Note that bugzilla update mails are not sent from the same reason. > > Since we going to have the code freeze and alpha soon (end of next > week) it will be very important to fix these isses > > Thanks, > Tziporet -- Jeff Squyres Server Virtualization Business Unit Cisco Systems _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From jsquyres at cisco.com Mon Jan 22 13:40:04 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 16:40:04 -0500 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <20070122213747.GI1105@mellanox.co.il> References: <45B4D79D.3030909@mellanox.co.il> <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> <20070122213747.GI1105@mellanox.co.il> Message-ID: <05A0FEF3-38E0-4CA4-AE92-7A4DFF1B7C31@cisco.com> On Jan 22, 2007, at 4:37 PM, Michael S. Tsirkin wrote: > Vlad will test outgoing mail in the morning. > Is bugzilla mailgateway functioning as well? > At which address? I don't know anything about the bugzilla interface -- who set it up? What exactly do you need? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Mon Jan 22 13:43:17 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 23:43:17 +0200 Subject: [openib-general] .openfabrics.org names In-Reply-To: <18010248-A970-470B-B92C-592E16820CBA@cisco.com> References: <18010248-A970-470B-B92C-592E16820CBA@cisco.com> Message-ID: <20070122214317.GC10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: .openfabrics.org names > > The name "staging.openfabrics.org" was really intended to be > temporary until the old openfabrics.org was taken offline and > replaced with the new one. BTW, SSL certificate was purchased only for staging,openfabrics.org. And, that certificate has expired. So when you visit bugs.openfabrics.org, you can not see anything, actually: firefox throws 1000 warnings about security. I think we need an updated SSL cert, and for proper name. Who's handling these things? -- MST From jsquyres at cisco.com Mon Jan 22 13:44:19 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 16:44:19 -0500 Subject: [openib-general] .openfabrics.org names In-Reply-To: <20070122214317.GC10025@mellanox.co.il> References: <18010248-A970-470B-B92C-592E16820CBA@cisco.com> <20070122214317.GC10025@mellanox.co.il> Message-ID: On Jan 22, 2007, at 4:43 PM, Michael S. Tsirkin wrote: >> The name "staging.openfabrics.org" was really intended to be >> temporary until the old openfabrics.org was taken offline and >> replaced with the new one. > > BTW, SSL certificate was purchased only for staging,openfabrics.org. > And, that certificate has expired. FWIW, I think it was just a self-signed cert. It wasn't actually purchased. > So when you visit bugs.openfabrics.org, you can not see anything, > actually: firefox throws 1000 warnings about security. That's a lot of warnings. :-) > I think we need an updated SSL cert, and for proper name. > Who's handling these things? Who handles the OFA money? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From halr at voltaire.com Mon Jan 22 13:43:51 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 22 Jan 2007 16:43:51 -0500 Subject: [openib-general] OpenSM/osm_helper.c: Change output format of DR path from hex to decimal port numbers Message-ID: <1169502206.23046.15898.camel@hal.voltaire.com> OpenSM/osm_helper.c: In osm_dump_dr_smp, osm_dump_dr_path, and osm_dump_smp_dr_path, change output format of DR path from hex to decimal port numbers to make compatible with OpenIB diagnostic tools Signed-off-by: Hal Rosenstock diff --git a/osm/opensm/ChangeLog b/osm/opensm/ChangeLog index 145d1c5..2c6564c 100644 --- a/osm/opensm/ChangeLog +++ b/osm/opensm/ChangeLog @@ -1,3 +1,7 @@ +2007-01-22 Hal Rosenstock + + * osm_helper.c: Change DR path format from [%X] to %d, + 2007-01-08 Sasha Khapyorsky * osm_log.c: Add osm_log_reopen_file API diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c index 74713b4..50a3413 100644 --- a/osm/opensm/osm_helper.c +++ b/osm/opensm/osm_helper.c @@ -1966,7 +1966,10 @@ osm_dump_dr_smp( for( i = 0; i <= p_smp->hop_count; i++ ) { - sprintf( line, "[%X]", p_smp->initial_path[i] ); + if (i == 0) + sprintf( line, "%d", p_smp->initial_path[i] ); + else + sprintf( line, ",%d", p_smp->initial_path[i] ); strcat( buf, line ); } @@ -1974,7 +1977,10 @@ osm_dump_dr_smp( for( i = 0; i <= p_smp->hop_count; i++ ) { - sprintf( line, "[%X]", p_smp->return_path[i] ); + if (i == 0) + sprintf( line, "%d", p_smp->return_path[i] ); + else + sprintf( line, ",%d", p_smp->return_path[i] ); strcat( buf, line ); } @@ -2114,7 +2120,10 @@ osm_dump_dr_path( for( i = 0; i <= p_path->hop_count; i++ ) { - sprintf( line, "[%X]", p_path->path[i] ); + if (i == 0) + sprintf( line, "%d", p_path->path[i] ); + else + sprintf( line, ",%d", p_path->path[i] ); strcat( buf, line ); } osm_log( p_log, log_level, "%s\n", buf ); @@ -2141,7 +2150,10 @@ osm_dump_smp_dr_path( for( i = 0; i <= p_smp->hop_count; i++ ) { - sprintf( line, "[%X]", p_smp->initial_path[i] ); + if (i == 0) + sprintf( line, "%d", p_smp->initial_path[i] ); + else + sprintf( line, ",%d", p_smp->initial_path[i] ); strcat( buf, line ); } @@ -2149,7 +2161,10 @@ osm_dump_smp_dr_path( for( i = 0; i <= p_smp->hop_count; i++ ) { - sprintf( line, "[%X]", p_smp->return_path[i] ); + if (i == 0) + sprintf( line, "%d", p_smp->return_path[i] ); + else + sprintf( line, ",%d", p_smp->return_path[i] ); strcat( buf, line ); } From mst at mellanox.co.il Mon Jan 22 13:54:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 23:54:24 +0200 Subject: [openib-general] .openfabrics.org names In-Reply-To: References: Message-ID: <20070122215424.GD10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: .openfabrics.org names > > On Jan 22, 2007, at 4:43 PM, Michael S. Tsirkin wrote: > > >> The name "staging.openfabrics.org" was really intended to be > >> temporary until the old openfabrics.org was taken offline and > >> replaced with the new one. > > > > BTW, SSL certificate was purchased only for staging,openfabrics.org. > > And, that certificate has expired. > > FWIW, I think it was just a self-signed cert. It wasn't actually > purchased. Maybe someone can do this for bugs.openfabrics.org for now? -- MST From mst at mellanox.co.il Mon Jan 22 13:57:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 22 Jan 2007 23:57:35 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <05A0FEF3-38E0-4CA4-AE92-7A4DFF1B7C31@cisco.com> References: <05A0FEF3-38E0-4CA4-AE92-7A4DFF1B7C31@cisco.com> Message-ID: <20070122215735.GF10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > On Jan 22, 2007, at 4:37 PM, Michael S. Tsirkin wrote: > > > Vlad will test outgoing mail in the morning. > > Is bugzilla mailgateway functioning as well? > > At which address? > > I don't know anything about the bugzilla interface -- who set it up? > What exactly do you need? Mail coming in for bugzilla-daemon address with subject [Bug XXX] to get filed under Bug XXX in bugzilla. I think for this it just needs to be piped into some script possibly contrib/bug_email_append.pl, or something like this. So you can take bugzilla mail and just reply to it from MUA. E.g. kernel.org has this I think. -- MST From mst at mellanox.co.il Mon Jan 22 14:02:31 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 00:02:31 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> References: <45B4D79D.3030909@mellanox.co.il> <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> Message-ID: <20070122220231.GG10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > Vlad/Michael -- > > Can you try again? The issue should be resolved now. Outgoing mail seems to work. Vlad'll check more tomorrow. -- MST From jsquyres at cisco.com Mon Jan 22 14:10:37 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 17:10:37 -0500 Subject: [openib-general] .openfabrics.org names In-Reply-To: <20070122215424.GD10025@mellanox.co.il> References: <20070122215424.GD10025@mellanox.co.il> Message-ID: <5986E26E-9CC6-4AFA-8296-D21D20467125@cisco.com> On Jan 22, 2007, at 4:54 PM, Michael S. Tsirkin wrote: >>> BTW, SSL certificate was purchased only for staging,openfabrics.org. >>> And, that certificate has expired. >> >> FWIW, I think it was just a self-signed cert. It wasn't actually >> purchased. > > Maybe someone can do this for bugs.openfabrics.org for now? Are you asking for a self-signed cert on bugs.openfabrics.org? Sure, that should be do-able. Michael -- could you do that? Thanks! -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From jsquyres at cisco.com Mon Jan 22 14:10:55 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 17:10:55 -0500 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <20070122220231.GG10025@mellanox.co.il> References: <45B4D79D.3030909@mellanox.co.il> <2BAEB20B-2C21-4365-8796-467E0CA58460@cisco.com> <20070122220231.GG10025@mellanox.co.il> Message-ID: Great; thanks. On Jan 22, 2007, at 5:02 PM, Michael S. Tsirkin wrote: >> Quoting Jeff Squyres : >> Subject: Re: resolving sending mails from OFA new server >> >> Vlad/Michael -- >> >> Can you try again? The issue should be resolved now. > > Outgoing mail seems to work. > Vlad'll check more tomorrow. > > -- > MST -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mshefty at ichips.intel.com Mon Jan 22 14:21:38 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 14:21:38 -0800 Subject: [openib-general] [RFC/PATCH v2] rdma/cma: use the ipoib broadcast group qkey In-Reply-To: References: Message-ID: <45B538F2.6020202@ichips.intel.com> Or Gerlitz wrote: > Modify the kernel rdma cm use the ipoib broadcast group qkey instead a qkey > of its own for its UD IDs/QPs. For RDMA_PS_UDP ID, the qkey is stored in > struct rdma_id_private and delivered also in ADDR_RESOLVED and > CONNECT_REQUEST events. The user space library learns the qkey from these > events and use them when it is called to create UD QP. Overall, I think this is a reasonable approach. I would just like the framework to provide a way to restrict any userspace application from joining an ipoib multicast group. What do you think of the idea of creating a new port space specific to ipoib, similar to what's provided for SDP? For example, add: enum rdma_port_space { RDMA_PS_SDP = 0x0001, + RDMA_PS_IPOIB = 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, The qkey/MGID would adjust based on the port space, which is specified as part of rdma_create_id(). Use of RDMA_PS_IPOIB could then be restricted using a check similar to that used for port assignment (see cma_use_port() - capable(CAP_NET_BIND_SERVICE)). - Sean From Brian.Cain at ge.com Mon Jan 22 14:22:18 2007 From: Brian.Cain at ge.com (Cain, Brian (GE Healthcare)) Date: Mon, 22 Jan 2007 17:22:18 -0500 Subject: [openib-general] .openfabrics.org names In-Reply-To: <5986E26E-9CC6-4AFA-8296-D21D20467125@cisco.com> References: <20070122215424.GD10025@mellanox.co.il> <5986E26E-9CC6-4AFA-8296-D21D20467125@cisco.com> Message-ID: <2376B63A5AF8564F8A2A2D76BC6DB033020F7753@CINMLVEM11.e2k.ad.ge.com> > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Jeff Squyres > Sent: Monday, January 22, 2007 4:11 PM > To: Michael S. Tsirkin > Cc: openib; Michael Paichi Lee > Subject: Re: [openib-general] .openfabrics.org names > > On Jan 22, 2007, at 4:54 PM, Michael S. Tsirkin wrote: > > >>> BTW, SSL certificate was purchased only for > staging,openfabrics.org. > >>> And, that certificate has expired. > >> > >> FWIW, I think it was just a self-signed cert. It wasn't actually > >> purchased. > > > > Maybe someone can do this for bugs.openfabrics.org for now? > > Are you asking for a self-signed cert on bugs.openfabrics.org? > > Sure, that should be do-able. Michael -- could you do that? Might I recommend a cert signed by CACert (http://www.cacert.org/)? It's no more expensive than self signed and easier to trust. -Brian From mst at mellanox.co.il Mon Jan 22 14:27:10 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 00:27:10 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <3D84A59A1AD3584DA02AEAD240E8863F03951FE2@ES22SNLNT.srn.sandia.gov> References: <3D84A59A1AD3584DA02AEAD240E8863F03951FE2@ES22SNLNT.srn.sandia.gov> Message-ID: <20070122222710.GK10025@mellanox.co.il> But, which address do I send it to? bugzilla-daemon at lists.openfabrics.org bounced. Quoting Lee, Michael Paichi : Subject: RE: resolving sending mails from OFA new server Michael, This should be working again right now. The change that fixed outbound e-mail from vlad at ssh.openfabrics.org to openib-general at openib.org should have fixed this problem as well. Michael -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Monday, January 22, 2007 1:58 PM To: Jeff Squyres Cc: Tziporet Koren; Johann George; OPENIB; Lee, Michael Paichi Subject: Re: resolving sending mails from OFA new server > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > On Jan 22, 2007, at 4:37 PM, Michael S. Tsirkin wrote: > > > Vlad will test outgoing mail in the morning. > > Is bugzilla mailgateway functioning as well? > > At which address? > > I don't know anything about the bugzilla interface -- who set it up? > What exactly do you need? Mail coming in for bugzilla-daemon address with subject [Bug XXX] to get filed under Bug XXX in bugzilla. I think for this it just needs to be piped into some script possibly contrib/bug_email_append.pl, or something like this. So you can take bugzilla mail and just reply to it from MUA. E.g. kernel.org has this I think. -- MST -- MST From jsquyres at cisco.com Mon Jan 22 14:29:35 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 17:29:35 -0500 Subject: [openib-general] .openfabrics.org names In-Reply-To: <2376B63A5AF8564F8A2A2D76BC6DB033020F7753@CINMLVEM11.e2k.ad.ge.com> References: <20070122215424.GD10025@mellanox.co.il> <5986E26E-9CC6-4AFA-8296-D21D20467125@cisco.com> <2376B63A5AF8564F8A2A2D76BC6DB033020F7753@CINMLVEM11.e2k.ad.ge.com> Message-ID: On Jan 22, 2007, at 5:22 PM, Cain, Brian ((GE Healthcare)) wrote: >> Are you asking for a self-signed cert on bugs.openfabrics.org? >> >> Sure, that should be do-able. Michael -- could you do that? > > Might I recommend a cert signed by CACert (http://www.cacert.org/)? > It's no more expensive than self signed and easier to trust. My $0.02 (and then I'm out of this conversation :-) ): 1. CACert has no degree of trust. If anyone can get them for free, then you have no guarantees about anything. You get SSL, but you can't trust it. 2. If we have no money to spend on certificates, it's not too difficult to create our own root CA and sign all of our certs from it. Hence, it's still just one cert to import into your browser. 3. If we have money to spend on certificates, then we should spend it and get ones signed by verisign or someone that already has a CA in popular browsers. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Mon Jan 22 14:34:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 00:34:51 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <20070122222710.GK10025@mellanox.co.il> References: <3D84A59A1AD3584DA02AEAD240E8863F03951FE2@ES22SNLNT.srn.sandia.gov> <20070122222710.GK10025@mellanox.co.il> Message-ID: <20070122223451.GL10025@mellanox.co.il> Tried bugzilla-daemon at openfabrics.org, doesn't work for me either. Quoting r. Michael S. Tsirkin : Subject: Re: resolving sending mails from OFA new server But, which address do I send it to? bugzilla-daemon at lists.openfabrics.org bounced. Quoting Lee, Michael Paichi : Subject: RE: resolving sending mails from OFA new server Michael, This should be working again right now. The change that fixed outbound e-mail from vlad at ssh.openfabrics.org to openib-general at openib.org should have fixed this problem as well. Michael -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Monday, January 22, 2007 1:58 PM To: Jeff Squyres Cc: Tziporet Koren; Johann George; OPENIB; Lee, Michael Paichi Subject: Re: resolving sending mails from OFA new server > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > On Jan 22, 2007, at 4:37 PM, Michael S. Tsirkin wrote: > > > Vlad will test outgoing mail in the morning. > > Is bugzilla mailgateway functioning as well? > > At which address? > > I don't know anything about the bugzilla interface -- who set it up? > What exactly do you need? Mail coming in for bugzilla-daemon address with subject [Bug XXX] to get filed under Bug XXX in bugzilla. I think for this it just needs to be piped into some script possibly contrib/bug_email_append.pl, or something like this. So you can take bugzilla mail and just reply to it from MUA. E.g. kernel.org has this I think. -- MST -- MST _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- MST From mplee at sandia.gov Mon Jan 22 14:45:21 2007 From: mplee at sandia.gov (Lee, Michael Paichi) Date: Mon, 22 Jan 2007 15:45:21 -0700 Subject: [openib-general] resolving sending mails from OFA new server References: <3D84A59A1AD3584DA02AEAD240E8863F03951FE2@ES22SNLNT.srn.sandia.gov> <20070122222710.GK10025@mellanox.co.il> Message-ID: <3D84A59A1AD3584DA02AEAD240E8863F0395201A@ES22SNLNT.srn.sandia.gov> Michael, I'm somewhat confused. What are you trying to send...to who? My understanding of the problem was that notification emails from bugzilla (which up until now has been bugzilla-daemon at openib.org) to openib-general at openib.org stopped working. The notification problem should be fixed now...changes to buzilla should appear on openib-general at openib.org. These e-mails most likely will come from bugzilla-daemon at lists.openfabrics.org, given that bugzilla now resides on that server. I know that e-mails to bugzilla-daemon at lists.openfabrics.org currently bounce because of an "user unknown in local recipient table," but does that matter right now...do you (or anybody on the list) actually send e-mails to the bugzilla-daemon user? If there's a need, let me know and I'll try to correct the problem. Michael -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Monday, January 22, 2007 2:27 PM To: Lee, Michael Paichi Cc: Jeff Squyres; openib-general at openib.org Subject: Re: resolving sending mails from OFA new server But, which address do I send it to? bugzilla-daemon at lists.openfabrics.org bounced. Quoting Lee, Michael Paichi : Subject: RE: resolving sending mails from OFA new server Michael, This should be working again right now. The change that fixed outbound e-mail from vlad at ssh.openfabrics.org to openib-general at openib.org should have fixed this problem as well. Michael -----Original Message----- From: Michael S. Tsirkin [mailto:mst at mellanox.co.il] Sent: Monday, January 22, 2007 1:58 PM To: Jeff Squyres Cc: Tziporet Koren; Johann George; OPENIB; Lee, Michael Paichi Subject: Re: resolving sending mails from OFA new server > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > On Jan 22, 2007, at 4:37 PM, Michael S. Tsirkin wrote: > > > Vlad will test outgoing mail in the morning. > > Is bugzilla mailgateway functioning as well? > > At which address? > > I don't know anything about the bugzilla interface -- who set it up? > What exactly do you need? Mail coming in for bugzilla-daemon address with subject [Bug XXX] to get filed under Bug XXX in bugzilla. I think for this it just needs to be piped into some script possibly contrib/bug_email_append.pl, or something like this. So you can take bugzilla mail and just reply to it from MUA. E.g. kernel.org has this I think. -- MST -- MST From jsquyres at cisco.com Mon Jan 22 14:55:06 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 17:55:06 -0500 Subject: [openib-general] Mailing lists Message-ID: <461EE498-11B4-448F-9469-D66EC2720195@cisco.com> All -- As you can see from the mails from Michaels Lee and Tsirkin, we're working on moving e-mail to the new server. We had in mind some changes to propose: 1. Rename the openib-general list to be general at lists.openfabrics.org. 2. Rename the openfabrics-ewg list to be ewg at lists.openfabrics.org. 3. ...similarly rename all other lists to remove the now-redundant openib-* and/or openfabrics-* prefixes. --> For items 1-3, mail aliases can be put in place so that people's current addressbooks won't break (i.e., if you mail openib- general at openib.org, it'll still get to the general at lists.openfabrics.org list), but the "real" list names will be the shorter names, etc. So mailman URLs will change, "from" addresses will change, etc. 4. Have a general commits at lists.openfabrics.org list that will get mails of all SVN commits and commit messages from anyone who chooses to have their git commit mails sent there. 5. Have a bugs at lists.openfabrics.org list that will get *all* bugzilla activity. These are just ideas, and not from the guys who are doing OFA development (i.e., Michael Lee and myself). So your input would be welcome here... Comments? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Mon Jan 22 14:59:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 00:59:09 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <3D84A59A1AD3584DA02AEAD240E8863F0395201A@ES22SNLNT.srn.sandia.gov> References: <3D84A59A1AD3584DA02AEAD240E8863F0395201A@ES22SNLNT.srn.sandia.gov> Message-ID: <20070122225909.GN10025@mellanox.co.il> > > Quoting Lee, Michael Paichi : > Subject: RE: resolving sending mails from OFA new server > > Michael, > > I'm somewhat confused. What are you trying to send...to who? My > understanding of the problem was that notification emails from bugzilla > (which up until now has been bugzilla-daemon at openib.org) to > openib-general at openib.org stopped working. The notification problem > should be fixed now...changes to buzilla should appear on > openib-general at openib.org. These e-mails most likely will come from > bugzilla-daemon at lists.openfabrics.org, given that bugzilla now resides > on that server. I know that e-mails to > bugzilla-daemon at lists.openfabrics.org currently bounce because of an > "user unknown in local recipient table," but does that matter right > now...do you (or anybody on the list) actually send e-mails to the > bugzilla-daemon user? If there's a need, let me know and I'll try to > correct the problem. > Michael, what I'm trying to use is the bugzilla email gateway. Explanation from my previous mail: What it's supposed to do it to file mail coming in for bugzilla-daemon address with subject [Bug XXX] under Bug XXX in bugzilla. I think for this to work mail to bugzilla-daemon just needs to be piped into some script possibly contrib/bug_email_append.pl, or something like this. So you can take bugzilla mail and just reply to it from MUA. E.g. kernel.org bugzilla setup has this I think. -- MST From mst at mellanox.co.il Mon Jan 22 15:03:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 01:03:38 +0200 Subject: [openib-general] Mailing lists In-Reply-To: <461EE498-11B4-448F-9469-D66EC2720195@cisco.com> References: <461EE498-11B4-448F-9469-D66EC2720195@cisco.com> Message-ID: <20070122230338.GO10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Mailing lists > > All -- > > As you can see from the mails from Michaels Lee and Tsirkin, we're > working on moving e-mail to the new server. We had in mind some > changes to propose: > > 1. Rename the openib-general list to be general at lists.openfabrics.org. > > 2. Rename the openfabrics-ewg list to be ewg at lists.openfabrics.org. > > 3. ...similarly rename all other lists to remove the now-redundant > openib-* and/or openfabrics-* prefixes. > > --> For items 1-3, mail aliases can be put in place so that people's > current addressbooks won't break (i.e., if you mail openib- > general at openib.org, it'll still get to the > general at lists.openfabrics.org list), but the "real" list names will > be the shorter names, etc. So mailman URLs will change, "from" > addresses will change, etc. Personally, I don't really care much, either way. Is it really worth the time? > 4. Have a general commits at lists.openfabrics.org list that will get > mails of all SVN commits and commit messages from anyone who chooses > to have their git commit mails sent there. > > 5. Have a bugs at lists.openfabrics.org list that will get *all* > bugzilla activity. These two sound quite useful. -- MST From arlin.r.davis at intel.com Mon Jan 22 15:36:41 2007 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 22 Jan 2007 15:36:41 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <000101c73e64$2e710900$8698070a@amr.corp.intel.com> Message-ID: <000101c73e7e$2bc54b20$4297070a@amr.corp.intel.com> >>OK, we can switch to master. Then DAPL would need to be updated, right? >>Arlin? > >I think DAPL stays with rdma_ucm, but Arlin can confirm. I created a 1.2 uDAPL branch to use with ofed_1_2 builds. From changquing.tang at hp.com Mon Jan 22 16:06:40 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 23 Jan 2007 00:06:40 -0000 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: Message-ID: <349DCDA352EACF42A0C49FA6DCEA84032998E2@G3W0634.americas.hpqcorp.net> > > when RDMA is used, a message is transferred from card A (in node > > A) to card B (in node B), card B delivers the message to > to user buffer, > and sends ACK to card A, but ACK is lost > due to switch fail. So process > on node A get fail for this > transfer, but process on node B check the > memory and get > the message(success). > > > > If send/recv(SRQ) is used, is it possible that process on node A > > get failure, but process on node B successfully get the message ? > > Yes, of course, for exactly the same reason you describe > above (lost ACK). Thanks. So it is NOT possible that sender gets success, but receiver gets failure, right ? Also another question, when it_post_send() is called, we only specify IBV_SEND_SIGNALED once a while to improve performance because completion event is not generated for other messages. But when there is an error on the QP connection, we hope to get event report for all messages. Does the driver/libibverbs support this ? --CQ > > - R. > From jsquyres at cisco.com Mon Jan 22 16:10:11 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 22 Jan 2007 19:10:11 -0500 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <20070122225909.GN10025@mellanox.co.il> References: <3D84A59A1AD3584DA02AEAD240E8863F0395201A@ES22SNLNT.srn.sandia.gov> <20070122225909.GN10025@mellanox.co.il> Message-ID: <928677AD-4A85-461E-97E7-44C16D58D04D@cisco.com> On Jan 22, 2007, at 5:59 PM, Michael S. Tsirkin wrote: > Michael, what I'm trying to use is the bugzilla email gateway. Ok. This is new functionality that I don't think we had on the old server, right? It'll be easier to get all the other lists migrated and working first rather than put in a bunch of hackarounds for new functionality before various DNS records and whatnot are switched, only to have to take them down once the electronic resources are finalized. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From caitlinb at broadcom.com Mon Jan 22 16:22:18 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Mon, 22 Jan 2007 16:22:18 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA84032998E2@G3W0634.americas.hpqcorp.net> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1FBC308@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > > > when RDMA is used, a message is transferred > from card A (in node >> > A) to card B (in node B), card B delivers the message to to user >> buffer, > and sends ACK to card A, but ACK is lost due to switch >> fail. So process > on node A get fail for this transfer, but process >> on node B check the > memory and get the message(success). > >> > If send/recv(SRQ) is used, is it possible that process on node A >> > get failure, but process on node B successfully get the message ? >> >> Yes, of course, for exactly the same reason you describe above (lost >> ACK). > > Thanks. So it is NOT possible that sender gets success, but > receiver gets failure, right ? > The fact that the message was acknowledged by card B at best means that the message is fully in the receiver's memory. Card B cannot tell the Sender that the reciever lived long enough to process or even notice the message. The only thing that the Sender should infer from a send completion is that the connection is still alive and that it no longer needs to maintain its copy of the message. It also knows that the Receiver will not receive any later message before it receives this one. If there is an application specific reason to know more then it needs to rely on an application layer message from the receiver. For most applications, such a response is a natural part of the application layer protocol anyway. From rdreier at cisco.com Mon Jan 22 20:34:33 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 22 Jan 2007 20:34:33 -0800 Subject: [openib-general] [PATCH] IB/ipoib_cm: reduce MTU on connected->datagram mode change In-Reply-To: <20070117160047.GA10889@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 17 Jan 2007 18:00:47 +0200") References: <20070117160047.GA10889@mellanox.co.il> Message-ID: Thanks, rolled into my ipoib-cm branch From rdreier at cisco.com Mon Jan 22 20:42:48 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 22 Jan 2007 20:42:48 -0800 Subject: [openib-general] Minutes for January 15, 2007 teleconference about OFED 1.2 development progress toward code freeze In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA84032998E2@G3W0634.americas.hpqcorp.net> (Changqing Tang's message of "Tue, 23 Jan 2007 00:06:40 -0000") References: <349DCDA352EACF42A0C49FA6DCEA84032998E2@G3W0634.americas.hpqcorp.net> Message-ID: > Thanks. So it is NOT possible that sender gets success, but receiver > gets failure, right ? It's probably better to read the spec to get clarity on these corner cases, but it wouldn't surprise me if there were a scenario where a receiver sends an ACK but then fails to complete a request successfully, maybe because a bus error occurs while writing the data to memory or something like that. > Also another question, when it_post_send() is called, we only specify > IBV_SEND_SIGNALED once a while to improve performance because completion > event is not generated for other messages. But when there is an error on > the QP connection, we hope to get event report for all messages. Does > the driver/libibverbs support this ? Yes, libibverbs and all drivers should follow the IB spec and always generate a completion for all requests that complete with an unsuccessful status. - R. From or.gerlitz at gmail.com Mon Jan 22 20:42:54 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Tue, 23 Jan 2007 06:42:54 +0200 Subject: [openib-general] [RFC/PATCH v2] rdma/cma: use the ipoib broadcast group qkey In-Reply-To: <45B538F2.6020202@ichips.intel.com> References: <45B538F2.6020202@ichips.intel.com> Message-ID: <15ddcffd0701222042r13bbfed1ub3d1cb2a117afac2@mail.gmail.com> On 1/23/07, Sean Hefty wrote: > Or Gerlitz wrote: > > Modify the kernel rdma cm use the ipoib broadcast group qkey instead a qkey > > of its own for its UD IDs/QPs. For RDMA_PS_UDP ID, the qkey is stored in > > struct rdma_id_private and delivered also in ADDR_RESOLVED and > > CONNECT_REQUEST events. The user space library learns the qkey from these > > events and use them when it is called to create UD QP. > > Overall, I think this is a reasonable approach. I would just like the framework > to provide a way to restrict any userspace application from joining an ipoib > multicast group. What do you think of the idea of creating a new port space > specific to ipoib, similar to what's provided for SDP? Basically, I am positive to this, under the assumption that it will be possible for --non-- root user space application to create RDMA_PS_IPOIB IDs and use them as i would have been doing with RDMA_PS_UDP IDs. > For example, add: > enum rdma_port_space { > RDMA_PS_SDP = 0x0001, > + RDMA_PS_IPOIB = 0x0002, > RDMA_PS_TCP = 0x0106, > RDMA_PS_UDP = 0x0111, > > The qkey/MGID would adjust based on the port space, which is specified as part > of rdma_create_id(). OK > of rdma_create_id(). Use of RDMA_PS_IPOIB could then be restricted using a > check similar to that used for port assignment (see cma_use_port() - > capable(CAP_NET_BIND_SERVICE)). I don't want to loose a day, so if you don't mind, i would ask you for a crash course here, i don't really think to fully understand the following lines from cma_use_port() ... 1753 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 1754 snum = ntohs(sin->sin_port); 1755 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 1756 return -EACCES; what would be the equivalent check for RDMA_PS_IPOIB? and would this check be done only on rdma_create_id time? Or. From mst at mellanox.co.il Mon Jan 22 21:05:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 07:05:51 +0200 Subject: [openib-general] resolving sending mails from OFA new server In-Reply-To: <928677AD-4A85-461E-97E7-44C16D58D04D@cisco.com> References: <3D84A59A1AD3584DA02AEAD240E8863F0395201A@ES22SNLNT.srn.sandia.gov> <20070122225909.GN10025@mellanox.co.il> <928677AD-4A85-461E-97E7-44C16D58D04D@cisco.com> Message-ID: <20070123050503.GT10025@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: resolving sending mails from OFA new server > > On Jan 22, 2007, at 5:59 PM, Michael S. Tsirkin wrote: > > > Michael, what I'm trying to use is the bugzilla email gateway. > > Ok. This is new functionality that I don't think we had on the old > server, right? Yes. -- MST From eitan at sw053.yok.mtl.com Mon Jan 22 21:26:17 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Tue, 23 Jan 2007 07:26:17 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-23:normal completion Message-ID: <200701230526.l0N5QHBt004706@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_22_09:01:01_2007 2db2f5 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From krkumar2 at in.ibm.com Mon Jan 22 21:34:58 2007 From: krkumar2 at in.ibm.com (Krishna Kumar) Date: Tue, 23 Jan 2007 11:04:58 +0530 Subject: [openib-general] [PATCH] libibverbs: should not access NULL pointer. Message-ID: <20070123053458.8302.67181.sendpatchset@localhost.localdomain> diff -ruNp org/libibverbs/src/device.c new/libibverbs/src/device.c --- org/libibverbs/src/device.c 2007-01-16 09:11:33.000000000 +0530 +++ new/libibverbs/src/device.c 2007-01-23 10:57:46.000000000 +0530 @@ -63,8 +63,10 @@ struct ibv_device **ibv_get_device_list( num_devices = ibverbs_init(&device_list); l = calloc(num_devices + 1, sizeof (struct ibv_device *)); - for (i = 0; i < num_devices; ++i) - l[i] = device_list[i]; + if (l) { + for (i = 0; i < num_devices; ++i) + l[i] = device_list[i]; + } pthread_mutex_unlock(&device_list_lock); From mst at mellanox.co.il Mon Jan 22 21:41:44 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 07:41:44 +0200 Subject: [openib-general] [PATCH] libibverbs: should not access NULL pointer. In-Reply-To: <20070123053458.8302.67181.sendpatchset@localhost.localdomain> References: <20070123053458.8302.67181.sendpatchset@localhost.localdomain> Message-ID: <20070123054144.GU10025@mellanox.co.il> > Quoting Krishna Kumar : > Subject: [PATCH] libibverbs: should not access NULL pointer. > > diff -ruNp org/libibverbs/src/device.c new/libibverbs/src/device.c > --- org/libibverbs/src/device.c 2007-01-16 09:11:33.000000000 +0530 > +++ new/libibverbs/src/device.c 2007-01-23 10:57:46.000000000 +0530 > @@ -63,8 +63,10 @@ struct ibv_device **ibv_get_device_list( > num_devices = ibverbs_init(&device_list); > > l = calloc(num_devices + 1, sizeof (struct ibv_device *)); BTW, Roland, l = calloc(num_devices + 1, sizeof *l); would be a bit prettier, would it not? -- MST From rdreier at cisco.com Mon Jan 22 21:49:17 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 22 Jan 2007 21:49:17 -0800 Subject: [openib-general] [PATCH] libibverbs: should not access NULL pointer. In-Reply-To: <20070123053458.8302.67181.sendpatchset@localhost.localdomain> (Krishna Kumar's message of "Tue, 23 Jan 2007 11:04:58 +0530") References: <20070123053458.8302.67181.sendpatchset@localhost.localdomain> Message-ID: I already fixed this in commit 789728f7, right? From mst at mellanox.co.il Mon Jan 22 22:13:42 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 08:13:42 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B5124F.60907@ichips.intel.com> References: <45B5124F.60907@ichips.intel.com> Message-ID: <20070123061342.GX10025@mellanox.co.il> > rdma-dev.git - multicast-sa_cache branch would be easiest for you Could you please rebase that to 2.6.20-rc5? -- MST From dotanb at dev.mellanox.co.il Mon Jan 22 22:49:14 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 23 Jan 2007 08:49:14 +0200 (IST) Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: <1169026741.20182.3.camel@mtls05.yok.mtl.com> References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> Message-ID: <4421.212.72.208.162.1169534954.squirrel@dev.mellanox.co.il> > Added resource cleaning before end of pingpong tests + ack to CQ events. Roland, did you had the chance to check this patch? thanks Dotan From kliteyn at dev.mellanox.co.il Mon Jan 22 23:07:34 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 23 Jan 2007 09:07:34 +0200 Subject: [openib-general] [PATCH] osm: QoS: added qos class and service id to the path record In-Reply-To: <20070122175952.GG23783@sashak.voltaire.com> References: <45B4E0C2.8060102@dev.mellanox.co.il> <20070122175952.GG23783@sashak.voltaire.com> Message-ID: <45B5B436.7040506@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 18:05 Mon 22 Jan , Yevgeny Kliteynik wrote: >> Hi Hal >> >> QoS patch: added qos class and service id to the path record >> >> Signed-off-by: Yevgeny Kliteynik > > Couple of comments... > >> --- >> osm/include/iba/ib_types.h | 149 +++++++++++++++++++++++++++++++--- >> osm/opensm/osm_helper.c | 8 +- >> osm/opensm/osm_sa_multipath_record.c | 2 +- >> osm/opensm/osm_sa_path_record.c | 5 +- >> osm/osmtest/osmtest.c | 2 +- >> 5 files changed, 147 insertions(+), 19 deletions(-) >> >> diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h >> index 22f7f62..7762ed2 100644 >> --- a/osm/include/iba/ib_types.h >> +++ b/osm/include/iba/ib_types.h >> @@ -1700,6 +1700,28 @@ ib_class_is_rmpp( >> #define IB_SMINFO_STATE_MASTER 3 >> /**********/ >> >> +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK >> +* NAME >> +* IB_PATH_REC_SL_MASK >> +* >> +* DESCRIPTION >> +* Mask for the sl field for path record >> +* >> +* SOURCE >> +*/ >> +#define IB_PATH_REC_SL_MASK 0xF >> + >> +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK >> +* NAME >> +* IB_PATH_REC_QOS_CLASS_MASK >> +* >> +* DESCRIPTION >> +* Mask for the QoS class field for path record >> +* >> +* SOURCE >> +*/ >> +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 >> + >> /****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK >> * NAME >> * IB_PATH_REC_SELECTOR_MASK >> @@ -2314,7 +2336,7 @@ ib_gid_get_guid( >> #include >> typedef struct _ib_path_rec >> { >> - uint8_t resv0[8]; >> + ib_net64_t service_id; >> ib_gid_t dgid; >> ib_gid_t sgid; >> ib_net16_t dlid; >> @@ -2323,7 +2345,7 @@ typedef struct _ib_path_rec >> uint8_t tclass; >> uint8_t num_path; >> ib_net16_t pkey; >> - ib_net16_t sl; >> + ib_net16_t qos_class_sl; >> uint8_t mtu; >> uint8_t rate; >> uint8_t pkt_life; >> @@ -2363,11 +2385,8 @@ typedef struct _ib_path_rec >> * pkey >> * Partition key (P_Key) to use on this path. >> * >> -* resv1 >> -* Reserved byte. >> -* >> -* sl >> -* Service level to use on this path. >> +* qos_class_sl >> +* QoS class and service level to use on this path. >> * >> * mtu >> * MTU and MTU selector fields to use on this path >> @@ -2388,6 +2407,7 @@ typedef struct _ib_path_rec >> *********/ >> >> /* Path Record Component Masks */ >> +#define IB_PR_COMPMASK_SERVICEID (CL_HTON64(((uint64_t)1)<<1)) >> #define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) >> #define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) >> #define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) >> @@ -2400,7 +2420,7 @@ typedef struct _ib_path_rec >> #define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) >> #define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) >> #define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) >> -#define IB_PR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<14)) >> +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) >> #define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) >> #define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) >> #define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) >> @@ -2658,6 +2678,7 @@ ib_path_rec_init_local( >> IN ib_net16_t slid, >> IN uint8_t num_path, >> IN ib_net16_t pkey, >> + IN uint16_t qos_class, >> IN uint8_t sl, >> IN uint8_t mtu_selector, >> IN uint8_t mtu, >> @@ -2673,8 +2694,8 @@ ib_path_rec_init_local( >> p_rec->slid = slid; >> p_rec->num_path = num_path; >> p_rec->pkey = pkey; >> - /* Lower 4 bits of path rec's SL are reserved. */ >> - p_rec->sl = cl_ntoh16( sl ); >> + p_rec->qos_class_sl = cl_hton16( (sl & IB_PATH_REC_SL_MASK) | >> + (qos_class << 4) ); >> p_rec->mtu = (uint8_t)((mtu & IB_PATH_REC_BASE_MASK) | >> (uint8_t)(mtu_selector << 6)); >> p_rec->rate = (uint8_t)((rate & IB_PATH_REC_BASE_MASK) | >> @@ -2686,8 +2707,8 @@ ib_path_rec_init_local( >> /* Clear global routing fields for local path records */ >> p_rec->hop_flow_raw = 0; >> p_rec->tclass = 0; >> + p_rec->service_id = 0; >> >> - *((uint64_t*)p_rec->resv0) = 0; >> *((uint32_t*)p_rec->resv2) = 0; >> *((uint16_t*)p_rec->resv2 + 2) = 0; >> } >> @@ -2716,6 +2737,9 @@ ib_path_rec_init_local( >> * pkey >> * [in] Partition key (P_Key) to use on this path. >> * >> +* qos_class >> +* [in] QoS class to use on this path. Lower 12-bits are valid. >> +* >> * sl >> * [in] Service level to use on this path. Lower 4-bits are valid. >> * >> @@ -2779,6 +2803,41 @@ ib_path_rec_num_path( >> * ib_path_rec_t >> *********/ >> >> +/****f* IBA Base: Types/ib_path_rec_set_sl >> +* NAME >> +* ib_path_rec_set_sl >> +* >> +* DESCRIPTION >> +* Set path service level. >> +* >> +* SYNOPSIS >> +*/ >> +static inline void OSM_API >> +ib_path_rec_set_sl( >> + IN ib_path_rec_t* const p_rec, >> + IN const uint8_t sl ) >> +{ >> + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & >> + IB_PATH_REC_QOS_CLASS_MASK ) | >> + ( sl & IB_PATH_REC_SL_MASK) ); >> +} >> +/* >> +* PARAMETERS >> +* p_rec >> +* [in] Pointer to the path record object. >> +* >> +* sl >> +* [in] Service level to set. >> +* >> +* RETURN VALUES >> +* None >> +* >> +* NOTES >> +* >> +* SEE ALSO >> +* ib_path_rec_t >> +*********/ >> + >> /****f* IBA Base: Types/ib_path_rec_sl >> * NAME >> * ib_path_rec_sl >> @@ -2792,7 +2851,7 @@ static inline uint8_t OSM_API >> ib_path_rec_sl( >> IN const ib_path_rec_t* const p_rec ) >> { >> - return( (uint8_t)((cl_ntoh16( p_rec->sl )) & 0xF) ); >> + return( (uint8_t)((cl_ntoh16( p_rec->qos_class_sl )) & IB_PATH_REC_SL_MASK) ); >> } >> /* >> * PARAMETERS >> @@ -2808,6 +2867,72 @@ ib_path_rec_sl( >> * ib_path_rec_t >> *********/ >> >> +/****f* IBA Base: Types/ib_path_rec_set_qos_class >> +* NAME >> +* ib_path_rec_set_qos_class >> +* >> +* DESCRIPTION >> +* Set path QoS class. >> +* >> +* SYNOPSIS >> +*/ >> +static inline void OSM_API >> +ib_path_rec_set_qos_class( >> + IN ib_path_rec_t* const p_rec, >> + IN const uint16_t qos_class ) >> +{ >> + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & >> + IB_PATH_REC_QOS_CLASS_MASK ) | >> + ( qos_class << 4) ); >> +} > > IB_PATH_REC_QOS_CLASS_MASK is 0xfff0, so this will clear sl component. Right, this is a bug. Good thing you spotted it. It should be IB_PATH_REC_SL_MASK instead of IB_PATH_REC_QOS_CLASS_MASK. >> +/* >> +* PARAMETERS >> +* p_rec >> +* [in] Pointer to the path record object. >> +* >> +* qos_class >> +* [in] QoS class to set. >> +* >> +* RETURN VALUES >> +* None >> +* >> +* NOTES >> +* >> +* SEE ALSO >> +* ib_path_rec_t >> +*********/ >> + >> +/****f* IBA Base: Types/ib_path_rec_qos_class >> +* NAME >> +* ib_path_rec_qos_class >> +* >> +* DESCRIPTION >> +* Get QoS class. >> +* >> +* SYNOPSIS >> +*/ >> +static inline uint16_t OSM_API >> +ib_path_rec_qos_class( >> + IN const ib_path_rec_t* const p_rec ) >> +{ >> + return( (uint16_t)( cl_ntoh16( p_rec->qos_class_sl ) & > > Why (uint16_t) casting is needed? > >> + IB_PATH_REC_QOS_CLASS_MASK ) >> 4 ); >> +} > > &IB_PATH_REC_QOS_CLASS_MASK is not needed - follow >> 4 drops lower bits. Right again (though not a bug this time). Instead of this: return( (uint16_t)( cl_ntoh16( p_rec->qos_class_sl ) & IB_PATH_REC_QOS_CLASS_MASK ) >> 4 ); there can be simply this: return( cl_ntoh16( p_rec->qos_class_sl ) >> 4 ); Hal, should I resubmit the patch? Thanks. -- Yevgeny > > Sasha > >> +/* >> +* PARAMETERS >> +* p_rec >> +* [in] Pointer to the path record object. >> +* >> +* RETURN VALUES >> +* QoS class of the path record. >> +* >> +* NOTES >> +* >> +* SEE ALSO >> +* ib_path_rec_t >> +*********/ >> + >> + >> /****f* IBA Base: Types/ib_path_rec_mtu >> * NAME >> * ib_path_rec_mtu >> diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c >> index 2ef8e38..e0b5aef 100644 >> --- a/osm/opensm/osm_helper.c >> +++ b/osm/opensm/osm_helper.c >> @@ -1095,7 +1095,7 @@ osm_dump_path_record( >> { >> osm_log( p_log, log_level, >> "PathRecord dump:\n" >> - "\t\t\t\tresv0...................0x%016" PRIx64 "\n" >> + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" >> "\t\t\t\tdgid....................0x%016" PRIx64 " : " >> "0x%016" PRIx64 "\n" >> "\t\t\t\tsgid....................0x%016" PRIx64 " : " >> @@ -1106,6 +1106,7 @@ osm_dump_path_record( >> "\t\t\t\ttclass..................0x%X\n" >> "\t\t\t\tnum_path_revers.........0x%X\n" >> "\t\t\t\tpkey....................0x%X\n" >> + "\t\t\t\tqos_class...............0x%X\n" >> "\t\t\t\tsl......................0x%X\n" >> "\t\t\t\tmtu.....................0x%X\n" >> "\t\t\t\trate....................0x%X\n" >> @@ -1114,7 +1115,7 @@ osm_dump_path_record( >> "\t\t\t\tresv2...................0x%X\n" >> "\t\t\t\tresv3...................0x%X\n" >> "", >> - *(uint64_t*)p_pr->resv0, >> + cl_ntoh64(p_pr->service_id), >> cl_ntoh64( p_pr->dgid.unicast.prefix ), >> cl_ntoh64( p_pr->dgid.unicast.interface_id ), >> cl_ntoh64( p_pr->sgid.unicast.prefix ), >> @@ -1125,7 +1126,8 @@ osm_dump_path_record( >> p_pr->tclass, >> p_pr->num_path, >> cl_ntoh16( p_pr->pkey ), >> - cl_ntoh16( p_pr->sl ), >> + ib_path_rec_qos_class(p_pr), >> + ib_path_rec_sl(p_pr), >> p_pr->mtu, >> p_pr->rate, >> p_pr->pkt_life, >> diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c >> index 2f61fb8..5ec0006 100644 >> --- a/osm/opensm/osm_sa_multipath_record.c >> +++ b/osm/opensm/osm_sa_multipath_record.c >> @@ -759,7 +759,7 @@ __osm_mpr_rcv_build_pr( >> p_pr->hop_flow_raw &= cl_hton32(1<<31); >> >> p_pr->pkey = p_parms->pkey; >> - p_pr->sl = cl_hton16( p_parms->sl ); >> + ib_path_rec_set_sl(p_pr, p_parms->sl); >> p_pr->mtu = (uint8_t)( p_parms->mtu | 0x80 ); >> p_pr->rate = (uint8_t)( p_parms->rate | 0x80 ); >> >> diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c >> index 7707f52..5a43912 100644 >> --- a/osm/opensm/osm_sa_path_record.c >> +++ b/osm/opensm/osm_sa_path_record.c >> @@ -774,7 +774,8 @@ __osm_pr_rcv_build_pr( >> #endif >> >> p_pr->pkey = p_parms->pkey; >> - p_pr->sl = cl_hton16(p_parms->sl); >> + ib_path_rec_set_qos_class(p_pr,0); >> + ib_path_rec_set_sl(p_pr,p_parms->sl); >> p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); >> p_pr->rate = (uint8_t)(p_parms->rate | 0x80); >> >> @@ -2051,7 +2052,7 @@ osm_pr_rcv_process( >> /* SL, Hop Limit, and Flow Label */ >> ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, >> &sl, &flow_label, &hop_limit ); >> - p_pr_item->path_rec.sl = cl_hton16( sl ); >> + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); >> #ifndef ROUTER_EXP >> p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | >> (flow_label << 8); >> diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c >> index b9e3bf7..c42b037 100644 >> --- a/osm/osmtest/osmtest.c >> +++ b/osm/osmtest/osmtest.c >> @@ -1982,7 +1982,7 @@ osmtest_write_path_info( IN osmtest_t * >> cl_ntoh64( p_rec->sgid.unicast.interface_id ), >> cl_ntoh16( p_rec->dlid ), cl_ntoh16( p_rec->slid ), >> cl_ntoh32( p_rec->hop_flow_raw ), p_rec->tclass, >> - p_rec->num_path, cl_ntoh16( p_rec->pkey ), p_rec->sl, >> + p_rec->num_path, cl_ntoh16( p_rec->pkey ), ib_path_rec_sl(p_rec), >> p_rec->mtu, p_rec->rate, p_rec->pkt_life, >> p_rec->preference ); >> >> -- >> 1.4.4.1.GIT >> >> >> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > From sean.hefty at intel.com Mon Jan 22 23:18:02 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 22 Jan 2007 23:18:02 -0800 Subject: [openib-general] [RFC/PATCH v2] rdma/cma: use the ipoib broadcast group qkey In-Reply-To: <15ddcffd0701222042r13bbfed1ub3d1cb2a117afac2@mail.gmail.com> Message-ID: <000001c73ebe$9edfe490$f0cc180a@amr.corp.intel.com> >Basically, I am positive to this, under the assumption that it will be >possible for --non-- root user space application to create >RDMA_PS_IPOIB IDs and use them as i would have been doing with >RDMA_PS_UDP IDs. After more consideration, I think this is the correct approach. I've already started working on a patch for this that I should have done but by the end of the week (hopefully tomorrow). I'm not overly concerned with immediately implementing how the ipoib port space would be restricted, just as long as it can be added before merging the code upstream. >1753 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; >1754 snum = ntohs(sin->sin_port); >1755 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) >1756 return -EACCES; > >what would be the equivalent check for RDMA_PS_IPOIB? and would this >check be done only on rdma_create_id time? This checks prevents applications from trying to use port numbers below 1024 without unless they possess the net bind service capability. A similar check could just be: if (ps == RDMA_PS_IPOIB && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - Sean From thomas.bub at thomson.net Mon Jan 22 23:35:22 2007 From: thomas.bub at thomson.net (Bub Thomas) Date: Tue, 23 Jan 2007 08:35:22 +0100 Subject: [openib-general] How to get NFS with RDMA on OFED-1.1? Message-ID: <29C41D06A093C1449B2650D43A833E4907E6CE@WDTSSMAIL02.eu.thmulti.com> I know this might end up as an RTFM but before reading the wrong manual I'd rather ask.... We are currently using OFED-1.1 on SLES-10 doing RDMA via libibverbs and lnow ike to utilize NFS with RDMA over IB for another project. Any help, RTFM etc. welcome Thanks Thomas Bub ............................................................ Thomas Bub Grass Valley Germany GmbH Brunnenweg 9 64331 Weiterstadt, Germany Tel: +49 6150 104 147 Fax: +49 6150 104 656 Email: Thomas.Bub at thomson.net www.GrassValley.com ............................................................ -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Mon Jan 22 23:50:20 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 09:50:20 +0200 Subject: [openib-general] pls build from /mswg2/work/mst/ofed_1_2 In-Reply-To: <1169538304.3731.40.camel@swlab50.yok.mtl.com> References: <1169538304.3731.40.camel@swlab50.yok.mtl.com> Message-ID: <20070123075020.GZ10025@mellanox.co.il> > Quoting Vladimir Sokolovsky : > Passed OK, I added I pushed multicast patch to ofed 1.2 and this was pushed out to staging. Sean, please let me know when multicast or sa cache branches update so I can redo this. Vlad, let's start taking master from librdmacm and 1.2 branch from DAPL as Sean/arlin indicated. -- Michael S. Tsirkin - Staff Engineer, Mellanox Technologies Ltd. error compiling committee.c: too many arguments to function From ogerlitz at voltaire.com Mon Jan 22 23:51:59 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 09:51:59 +0200 Subject: [openib-general] [RFC/PATCH v2] rdma/cma: use the ipoib broadcast group qkey In-Reply-To: <000001c73ebe$9edfe490$f0cc180a@amr.corp.intel.com> References: <000001c73ebe$9edfe490$f0cc180a@amr.corp.intel.com> Message-ID: <45B5BE9F.1040005@voltaire.com> Sean Hefty wrote: > After more consideration, I think this is the correct approach. I've already > started working on a patch for this that I should have done but by the end of > the week (hopefully tomorrow). > This checks prevents applications from trying to use port numbers below 1024 > without unless they possess the net bind service capability. A similar check > could just be: > > if (ps == RDMA_PS_IPOIB && !capable(CAP_NET_BIND_SERVICE)) > return -EACCES; OK, lets see i got it: your suggestion is that only if the process has the net bind service capability it would be able to create RDMA_PS_IPOIB IDs. How do processes get a possession of this capability(). Talking here, I understand that there are issues with Linux capability()-ies , specifically capabilities are not passed through execve() see "understanding Linux capabilities brokenness" @ http://lkml.org/lkml/2005/8/8/248 This means capabilities are practically not usable for "non root processes". Or. From ogerlitz at voltaire.com Tue Jan 23 00:09:08 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 10:09:08 +0200 Subject: [openib-general] rdma/cma: use the ipoib broadcast group qkey - linux capabilities In-Reply-To: <45B5BE9F.1040005@voltaire.com> References: <000001c73ebe$9edfe490$f0cc180a@amr.corp.intel.com> <45B5BE9F.1040005@voltaire.com> Message-ID: <45B5C2A4.1000404@voltaire.com> Or Gerlitz wrote: >> This checks prevents applications from trying to use port numbers below 1024 >> without unless they possess the net bind service capability. A similar check >> could just be: >> >> if (ps == RDMA_PS_IPOIB && !capable(CAP_NET_BIND_SERVICE)) >> return -EACCES; > > OK, lets see i got it: your suggestion is that only if the process has > the net bind service capability it would be able to create RDMA_PS_IPOIB > IDs. How do processes get a possession of this capability(). > > Talking here, I understand that there are issues with Linux > capability()-ies , specifically capabilities are not passed through > execve() see "understanding Linux capabilities brokenness" @ > http://lkml.org/lkml/2005/8/8/248 > > This means capabilities are practically not usable for "non root processes". I have now got a pointer to this more recent LKML discussion where a patch was suggested to solve the problem "patch to make Linux capabilities into something useful (v 0.3.1)" @ http://lkml.org/lkml/2006/9/5/246 This means that unless someone proves that capabilities are not broken, we will allow (eg under some mod param) non-root apps to create RDMA_PS_IPOIB IDs, OK? Or. From vlad at lists.openfabrics.org Tue Jan 23 04:06:18 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Tue, 23 Jan 2007 04:06:18 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070123-0346 daily build status Message-ID: <20070123120618.5B29AE603BB@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.18 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.16 Passed on x86_64 with linux-2.6.18 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.15 Failed: From mst at mellanox.co.il Tue Jan 23 04:10:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 14:10:51 +0200 Subject: [openib-general] ofa_1_2_kernel 20070123-0346 daily build status In-Reply-To: <20070123120618.5B29AE603BB@openfabrics.org> References: <20070123120618.5B29AE603BB@openfabrics.org> Message-ID: <20070123121051.GM13536@mellanox.co.il> > Quoting vlad at lists.openfabrics.org : > Subject: ofa_1_2_kernel 20070123-0346 daily build status > > This email was generated automatically, please do not reply OK, mail to openib-general seems to be finally working. -- MST From jlentini at netapp.com Tue Jan 23 06:52:43 2007 From: jlentini at netapp.com (James Lentini) Date: Tue, 23 Jan 2007 09:52:43 -0500 (EST) Subject: [openib-general] How to get NFS with RDMA on OFED-1.1? In-Reply-To: <29C41D06A093C1449B2650D43A833E4907E6CE@WDTSSMAIL02.eu.thmulti.com> References: <29C41D06A093C1449B2650D43A833E4907E6CE@WDTSSMAIL02.eu.thmulti.com> Message-ID: On Tue, 23 Jan 2007, Bub Thomas wrote: > I know this might end up as an RTFM but before reading the wrong manual > I'd rather ask.... > We are currently using OFED-1.1 on SLES-10 doing RDMA via libibverbs and > lnow ike to utilize NFS with RDMA over IB for another project. > Any help, RTFM etc. welcome > Thanks > Thomas Bub > > ............................................................ > Thomas Bub > Grass Valley Germany GmbH > Brunnenweg 9 > 64331 Weiterstadt, Germany > Tel: +49 6150 104 147 > Fax: +49 6150 104 656 > Email: Thomas.Bub at thomson.net > www.GrassValley.com > ............................................................ The Linux NFS-RDMA client and server are available in the downloads section of the SourceForge NFS-RDMA project. See: http://sourceforge.net/projects/nfs-rdma Our latest release is for kernel 2.6.18. Please let us know if you have any more questions. james From rdreier at cisco.com Tue Jan 23 07:06:22 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 07:06:22 -0800 Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: <1169026741.20182.3.camel@mtls05.yok.mtl.com> (Dotan Barak's message of "Wed, 17 Jan 2007 11:39:01 +0200") References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> Message-ID: Thanks, applied after s/num_of_events/num_cq_events/ > --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 10:58:02.000000000 +0200 > +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 11:09:59.000000000 +0200 BTW, can you make sure your patches apply to the libibverbs tree with "patch -p1" (or better yet, with "git apply --whitespace=error-all")? I had to edit the patch headers to apply this to my git tree. - R. From rdreier at cisco.com Tue Jan 23 07:10:07 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 07:10:07 -0800 Subject: [openib-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will pick up three small fixes: Hoang-Nam Nguyen (2): IB/ehca: Fix improper use of yield() with spinlock held IB/ehca: Fix mismatched spin_unlock in irq handler Ishai Rabinovitz (1): IB/srp: Check match_strdup() return drivers/infiniband/hw/ehca/ehca_cq.c | 5 ++++- drivers/infiniband/hw/ehca/ehca_irq.c | 3 ++- drivers/infiniband/ulp/srp/ib_srp.c | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 93995b6..6074c89 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq) unsigned long flags; spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index e7209af..c069be8 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -440,7 +440,8 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); break; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cdecbf5..72611fd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1621,18 +1621,30 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) switch (token) { case SRP_OPT_ID_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_IOC_GUID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_DGID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } if (strlen(p) != 32) { printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); kfree(p); @@ -1656,6 +1668,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_SERVICE_ID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; @@ -1693,6 +1709,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_INITIATOR_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; From rdreier at cisco.com Tue Jan 23 07:17:33 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 07:17:33 -0800 Subject: [openib-general] [PATCH] IB/SRP add need_reset In-Reply-To: <45ACEE9E.108@dev.mellanox.co.il> ( ishai@dev.mellanox.co.il's message of "Tue, 16 Jan 2007 17:26:22 +0200") References: <45ACEE9E.108@dev.mellanox.co.il> Message-ID: Thanks. I queued this for 2.6.21, since I think it's too late for this in the 2.6.20 cycle. From swise at opengridcomputing.com Tue Jan 23 07:33:18 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 23 Jan 2007 09:33:18 -0600 Subject: [openib-general] SLES9SP3 and the RDMA-CM Message-ID: <1169566398.942.17.camel@stevo-desktop> Has anyone tested the RDMA CM on SLES9SP3? ip_dev_find() is returning the loopback dev ptr for lookups on the local ip address of the RNIC or ipoib device. This causes problems. Anybody seen this? Thanks, Steve. From ogerlitz at voltaire.com Tue Jan 23 07:38:07 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 17:38:07 +0200 (IST) Subject: [openib-general] [RFC/PATCH v3] rdma/cma: add RDMA_PS_IPOIB port space In-Reply-To: References: Message-ID: Add to the RDMA CM an IPoIB port space (RDMA_PS_IPOIB) whose semantics are similar to those of RDMA_PS_UDP where RDMA_PS_IPOIB IDs allow for inter operability with IPoIB on some traffic patterns. For RDMA_PS_UDP and RDMA_PS_IPOIB IDs, the qkey is stored in struct rdma_id_private and delivered also in ADDR_RESOLVED and CONNECT_REQUEST events. The user space library learns the qkey from these events and use them when it is called to create UD QP. The IB UD qkey used by RDMA_PS_IPOIB IDs is that of the related ipoib broadcast group where the qkey used by RDMA_PS_UDP IDs is hard defined "rdma cm qkey". Creation of RDMA_PS_IPOIB IDs by proceeses is controlled by the linux kernel capabilities subsystem. Signed-off-by: Or Gerlitz Index: rdma-dev/drivers/infiniband/core/cma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/cma.c 2007-01-21 12:11:16.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/cma.c 2007-01-23 15:45:52.000000000 +0200 @@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); +static DEFINE_IDR(ipoib_ps); struct cma_device { struct list_head list; @@ -136,6 +137,7 @@ struct rdma_id_private { u32 seq_num; u32 qp_num; u8 srq; + u32 qkey; }; struct cma_multicast { @@ -323,6 +325,10 @@ struct rdma_cm_id *rdma_create_id(rdma_c { struct rdma_id_private *id_priv; + /* XXX - work around this till capabilities work fine for non root users */ + if (ps == RDMA_PS_IPOIB && !capable(CAP_NET_BROADCAST)) + return ERR_PTR(-EACCES); + id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); if (!id_priv) return ERR_PTR(-ENOMEM); @@ -884,6 +890,31 @@ out: return ret; } +static int cma_set_qkey(struct rdma_id_private *id_priv, struct rdma_cm_event *event) +{ + struct ib_sa_mcmember_rec rec; + struct rdma_dev_addr *dev_addr; + int ret; + + if (id_priv->id.ps == RDMA_PS_IPOIB) { + dev_addr = &id_priv->id.route.addr.dev_addr; + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return -EINVAL; + id_priv->qkey = rec.qkey; + event->param.ud.qkey = rec.qkey; + } + + if (id_priv->id.ps == RDMA_PS_UDP) { + id_priv->qkey = RDMA_UD_QKEY; + event->param.ud.qkey = RDMA_UD_QKEY; + } + + return 0; +} + static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event) { @@ -999,7 +1030,7 @@ static int cma_req_handler(struct ib_cm_ memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (listen_id->id.ps == RDMA_PS_UDP) { + if (listen_id->id.ps == RDMA_PS_UDP || listen_id->id.ps == RDMA_PS_IPOIB) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1020,7 +1051,11 @@ static int cma_req_handler(struct ib_cm_ mutex_unlock(&lock); if (ret) goto release_conn_id; - + + ret = cma_set_qkey(conn_id, &event); + if (ret) + goto release_conn_id; + conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; @@ -1600,6 +1635,7 @@ static void addr_handler(int status, str { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; + int ret; memset(&event, 0, sizeof event); atomic_inc(&id_priv->dev_remove); @@ -1627,6 +1663,11 @@ static void addr_handler(int status, str memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + ret = cma_set_qkey(id_priv, &event); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = ret; + } } if (id_priv->id.event_handler(&id_priv->id, &event)) { @@ -1822,6 +1863,9 @@ static int cma_get_port(struct rdma_id_p case RDMA_PS_UDP: ps = &udp_ps; break; + case RDMA_PS_IPOIB: + ps = &ipoib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -1936,7 +1980,9 @@ static int cma_sidr_rep_handler(struct i event.status = ib_event->param.sidr_rep_rcvd.status; break; } - if (rep->qkey != RDMA_UD_QKEY) { + if (rep->qkey != id_priv->qkey) { + printk(KERN_WARNING "qkey mismatch %.8x client qkey %.8x\n", + rep->qkey, id_priv->qkey); event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; @@ -2135,7 +2181,7 @@ int rdma_connect(struct rdma_cm_id *id, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (id->ps == RDMA_PS_UDP || id->ps == RDMA_PS_IPOIB) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); @@ -2231,7 +2277,7 @@ static int cma_send_sidr_rep(struct rdma rep.status = status; if (status == IB_SIDR_SUCCESS) { rep.qp_num = id_priv->qp_num; - rep.qkey = RDMA_UD_QKEY; + rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; @@ -2255,7 +2301,7 @@ int rdma_accept(struct rdma_cm_id *id, s switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (id->ps == RDMA_PS_UDP || id->ps == RDMA_PS_IPOIB) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->private_data, conn_param->private_data_len); @@ -2316,7 +2362,7 @@ int rdma_reject(struct rdma_cm_id *id, c switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (id->ps == RDMA_PS_UDP || id->ps == RDMA_PS_IPOIB) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, private_data, private_data_len); else Index: rdma-dev/include/rdma/rdma_cm.h =================================================================== --- rdma-dev.orig/include/rdma/rdma_cm.h 2007-01-18 13:46:06.000000000 +0200 +++ rdma-dev/include/rdma/rdma_cm.h 2007-01-23 12:35:35.000000000 +0200 @@ -58,6 +58,7 @@ enum rdma_cm_event_type { enum rdma_port_space { RDMA_PS_SDP = 0x0001, + RDMA_PS_IPOIB = 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, RDMA_PS_SCTP = 0x0183 Index: rdma-dev/drivers/infiniband/core/ucma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/ucma.c 2007-01-18 13:46:06.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/ucma.c 2007-01-23 15:45:05.000000000 +0200 @@ -257,7 +257,7 @@ static int ucma_event_handler(struct rdm ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP) + if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, From ogerlitz at voltaire.com Tue Jan 23 07:39:37 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 17:39:37 +0200 (IST) Subject: [openib-general] [RFC/PATCH] rdma/cma: port rdma_cm multicast code to the UDP/IPOIB port space framework In-Reply-To: References: Message-ID: Allow rdma_cm/ipoib multicast inter operability for RDMA_PS_IPOIB IDs. This is implemented by having the rdma cm use the --same-- qkey and multicast gid used by ipoib where for RDMA_UD_UDP IDs the rdma cm uses a qkey of its own and adds a signature byte to the multicast gid. Signed-off-by: Or Gerlitz Index: rdma-dev/drivers/infiniband/core/cma.c =================================================================== --- rdma-dev.orig/drivers/infiniband/core/cma.c 2007-01-23 15:56:01.000000000 +0200 +++ rdma-dev/drivers/infiniband/core/cma.c 2007-01-23 15:56:23.000000000 +0200 @@ -2473,7 +2473,10 @@ static int cma_join_ib_multicast(struct return ret; ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); - mc_map[7] = 0x01; /* Use RDMA CM signature */ + if (id_priv->id.ps == RDMA_PS_UDP) { + rec.qkey = RDMA_UD_QKEY; /* Use RDMA CM QKEY */ + mc_map[7] = 0x01; /* Use RDMA CM signature */ + } mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); From ogerlitz at voltaire.com Tue Jan 23 07:40:54 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 17:40:54 +0200 (IST) Subject: [openib-general] [RFC/PATCH v2] librdmacm: add RDMA_PS_IPOIB port space In-Reply-To: References: Message-ID: Add to librdmacm an IPoIB port space (RDMA_PS_IPOIB) whose semantics are similar to those of RDMA_PS_UDP where RDMA_PS_IPOIB IDs allow for inter operability with IPoIB on some traffic patterns. For RDMA_PS_UDP and RDMA_PS_IPOIB IDs, the qkey is provided by the kernel in ADDR_RESOLVED and CONNECT_REQUEST events and is stored by the library in struct cma_id_private. Later the library use the qkey when it is called to create a UD QP. The udaddy test program was enhanced to work in either of the port spaces. Signed-off-by: Or Gerlitz Index: librdmacm/src/cma.c =================================================================== --- librdmacm.orig/src/cma.c 2007-01-22 21:21:37.000000000 +0200 +++ librdmacm/src/cma.c 2007-01-23 13:57:48.000000000 +0200 @@ -116,6 +116,7 @@ struct cma_id_private { pthread_mutex_t mut; uint32_t handle; struct cma_multicast *mc_list; + uint32_t qkey; }; struct cma_multicast { @@ -687,7 +688,7 @@ static int ucma_init_ud_qp(struct cma_id qp_attr.port_num = id_priv->id.port_num; qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.qkey = RDMA_UD_QKEY; + qp_attr.qkey = id_priv->qkey; ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY); if (ret) @@ -718,7 +719,7 @@ int rdma_create_qp(struct rdma_cm_id *id if (!qp) return -ENOMEM; - if (id->ps == RDMA_PS_UDP) + if (id->ps == RDMA_PS_UDP || id->ps == RDMA_PS_IPOIB) ret = ucma_init_ud_qp(id_priv, qp); else ret = ucma_init_ib_qp(id_priv, qp); @@ -809,7 +810,7 @@ int rdma_accept(struct rdma_cm_id *id, s void *msg; int ret, size; - if (id->ps != RDMA_PS_UDP) { + if (id->ps != RDMA_PS_UDP && id->ps != RDMA_PS_IPOIB) { ret = ucma_modify_qp_rtr(id); if (ret) return ret; @@ -1169,6 +1170,7 @@ int rdma_get_cm_event(struct rdma_event_ struct ucma_abi_get_event *cmd; struct cma_event *evt; void *msg; + struct cma_id_private *id_priv; int ret, size; ret = cma_dev_cnt ? 0 : ucma_init(); @@ -1197,8 +1199,11 @@ retry: evt->id_priv = (void *) (uintptr_t) resp->uid; evt->event.id = &evt->id_priv->id; evt->event.status = ucma_query_route(&evt->id_priv->id); + id_priv = evt->id_priv; if (evt->event.status) evt->event.event = RDMA_CM_EVENT_ADDR_ERROR; + else if (id_priv->id.ps == RDMA_PS_UDP || id_priv->id.ps == RDMA_PS_IPOIB) + id_priv->qkey = resp->param.ud.qkey; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: evt->id_priv = (void *) (uintptr_t) resp->uid; @@ -1211,12 +1216,16 @@ retry: evt->id_priv = (void *) (uintptr_t) resp->uid; if (evt->id_priv->id.ps == RDMA_PS_TCP) ucma_copy_conn_event(evt, &resp->param.conn); - else + else ucma_copy_ud_event(evt, &resp->param.ud); ret = ucma_process_conn_req(evt, resp->id); if (ret) goto retry; + + id_priv = container_of(evt->event.id, struct cma_id_private, id); + if (id_priv->id.ps == RDMA_PS_UDP || id_priv->id.ps == RDMA_PS_IPOIB) + id_priv->qkey = resp->param.ud.qkey; break; case RDMA_CM_EVENT_CONNECT_RESPONSE: evt->id_priv = (void *) (uintptr_t) resp->uid; @@ -1233,7 +1242,8 @@ retry: case RDMA_CM_EVENT_ESTABLISHED: evt->id_priv = (void *) (uintptr_t) resp->uid; evt->event.id = &evt->id_priv->id; - if (evt->id_priv->id.ps == RDMA_PS_UDP) { + id_priv = evt->id_priv; + if (id_priv->id.ps == RDMA_PS_UDP || id_priv->id.ps == RDMA_PS_IPOIB) { ucma_copy_ud_event(evt, &resp->param.ud); break; } Index: librdmacm/examples/udaddy.c =================================================================== --- librdmacm.orig/examples/udaddy.c 2007-01-22 21:19:52.000000000 +0200 +++ librdmacm/examples/udaddy.c 2007-01-23 15:50:48.000000000 +0200 @@ -76,6 +76,7 @@ static int message_size = 100; static int message_count = 10; static char *dst_addr; static char *src_addr; +static enum rdma_port_space port_space = RDMA_PS_UDP; static int create_message(struct cmatest_node *node) { @@ -253,7 +254,7 @@ err: return ret; } -static int connect_handler(struct rdma_cm_id *cma_id) +static int connect_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct cmatest_node *node; struct rdma_conn_param conn_param; @@ -272,6 +273,7 @@ static int connect_handler(struct rdma_c if (ret) goto err2; + node->remote_qkey = event->param.ud.qkey; ret = post_recvs(node); if (ret) goto err2; @@ -327,7 +329,7 @@ static int cma_handler(struct rdma_cm_id ret = route_handler(cma_id->context); break; case RDMA_CM_EVENT_CONNECT_REQUEST: - ret = connect_handler(cma_id); + ret = connect_handler(cma_id, event); break; case RDMA_CM_EVENT_ESTABLISHED: ret = resolved_handler(cma_id->context, event); @@ -393,7 +395,7 @@ static int alloc_nodes(void) if (dst_addr) { ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, - &test.nodes[i], RDMA_PS_UDP); + &test.nodes[i], port_space); if (ret) goto err; } @@ -420,7 +422,7 @@ static void create_reply_ah(struct cmate node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem, node->cma_id->port_num); node->remote_qpn = ntohl(wc->imm_data); - node->remote_qkey = RDMA_UD_QKEY; + /* passive sets node->remote_qkey during CONNECT_REQUEST event processing */ } static int poll_cqs(void) @@ -489,7 +491,7 @@ static int run_server(void) int i, ret; printf("udaddy: starting server\n"); - ret = rdma_create_id(test.channel, &listen_id, &test, RDMA_PS_UDP); + ret = rdma_create_id(test.channel, &listen_id, &test, port_space); if (ret) { printf("udaddy: listen request failed\n"); return ret; @@ -595,7 +597,7 @@ int main(int argc, char **argv) { int op, ret; - while ((op = getopt(argc, argv, "s:b:c:C:S:")) != -1) { + while ((op = getopt(argc, argv, "s:b:c:C:S:p:")) != -1) { switch (op) { case 's': dst_addr = optarg; @@ -612,6 +614,9 @@ int main(int argc, char **argv) case 'S': message_size = atoi(optarg); break; + case 'p': + port_space = strtol(optarg, NULL, 0); + break; default: printf("usage: %s\n", argv[0]); printf("\t[-s server_address]\n"); @@ -619,6 +624,7 @@ int main(int argc, char **argv) printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); + printf("\t[-p port space - %#x for UDP %#x for IPoIB]\n",RDMA_PS_UDP,RDMA_PS_IPOIB); exit(1); } } Index: librdmacm/include/rdma/rdma_cma.h =================================================================== --- librdmacm.orig/include/rdma/rdma_cma.h 2007-01-22 21:56:13.000000000 +0200 +++ librdmacm/include/rdma/rdma_cma.h 2007-01-23 13:48:30.000000000 +0200 @@ -61,16 +61,11 @@ enum rdma_cm_event_type { }; enum rdma_port_space { + RDMA_PS_IPOIB = 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, }; -/* - * Global qkey value for all UD QPs and multicast groups created via the - * RDMA CM. - */ -#define RDMA_UD_QKEY 0x01234567 - struct ib_addr { union ibv_gid sgid; union ibv_gid dgid; From ogerlitz at voltaire.com Tue Jan 23 07:43:02 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 17:43:02 +0200 (IST) Subject: [openib-general] [RFC/PATCH] librdmacm: modify multicast code for RDMA_PS_IPOIB port space In-Reply-To: References: Message-ID: Enhance the mckey test program to work in either of the port spaces. Signed-off-by: Or Gerlitz Index: librdmacm/examples/mckey.c =================================================================== --- librdmacm.orig/examples/mckey.c 2007-01-23 16:52:16.000000000 +0200 +++ librdmacm/examples/mckey.c 2007-01-23 17:02:26.000000000 +0200 @@ -78,6 +78,7 @@ static int message_count = 10; static int is_sender; static char *dst_addr; static char *src_addr; +static enum rdma_port_space port_space = RDMA_PS_UDP; static int create_message(struct cmatest_node *node) { @@ -328,7 +329,7 @@ static int alloc_nodes(void) for (i = 0; i < connections; i++) { test.nodes[i].id = i; ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, - &test.nodes[i], RDMA_PS_UDP); + &test.nodes[i], port_space); if (ret) goto err; } @@ -472,7 +473,7 @@ int main(int argc, char **argv) { int op, ret; - while ((op = getopt(argc, argv, "m:sb:c:C:S:")) != -1) { + while ((op = getopt(argc, argv, "m:sb:c:C:S:p:")) != -1) { switch (op) { case 'm': dst_addr = optarg; @@ -492,6 +493,9 @@ int main(int argc, char **argv) case 'S': message_size = atoi(optarg); break; + case 'p': + port_space = strtol(optarg, NULL, 0); + break; default: printf("usage: %s\n", argv[0]); printf("\t-m multicast_address\n"); @@ -500,6 +504,7 @@ int main(int argc, char **argv) printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); + printf("\t[-p port space - %#x for UDP %#x for IPoIB]\n",RDMA_PS_UDP,RDMA_PS_IPOIB); exit(1); } } From ogerlitz at voltaire.com Tue Jan 23 07:46:23 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 17:46:23 +0200 (IST) Subject: [openib-general] [RFC/PATCH] librdmacm: modify multicast code for RDMA_PS_IPOIB port space In-Reply-To: References: Message-ID: Sean, with one host being both the client and the server, mckey does not work for me even without the IPOIB PS changes, it used to work between two hosts with the below patch that forces the sender to generate and poll completions on its TX packets. please let me know how its going with mckey on your system, i am going to test the librdmacm patches i have just sent with our mcast app. Or. Index: librdmacm/examples/mckey.c =================================================================== --- librdmacm.orig/examples/mckey.c 2007-01-23 16:24:19.000000000 +0200 +++ librdmacm/examples/mckey.c 2007-01-23 16:50:13.000000000 +0200 @@ -452,10 +453,14 @@ static int run(void) if (is_sender) { printf("initiating data transfers\n"); for (i = 0; i < connections; i++) { - ret = post_sends(&test.nodes[i], 0); + ret = post_sends(&test.nodes[i], IBV_SEND_SIGNALED); if (ret) goto out; - } + } + printf("polling data transfers completion\n"); + ret = poll_cqs(); + if (ret) + goto out; } else { printf("receiving data transfers\n"); ret = poll_cqs(); From ogerlitz at voltaire.com Tue Jan 23 08:04:11 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 23 Jan 2007 18:04:11 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B4F693.9050207@mellanox.co.il> References: <45B36F03.9070201@mellanox.co.il> <45B4F09B.4050407@ichips.intel.com> <45B4F693.9050207@mellanox.co.il> Message-ID: <45B631FB.7010306@voltaire.com> Tziporet Koren wrote: > Sean Hefty wrote: >>> multicast >>> >> >> This goes with the multicast branch of my rdma-dev git tree. >> >> IMO, OFED should determine which features they want and pull in the >> appropriate branch. I know that Voltaire would like the multicast >> feature, but require a couple of changes to the code before its usable >> for them. > Moni/Or > Can you update us regarding multicast feature status and testing I am working with Sean over the list on the changes needed to the multicast code needed for interoperability with IPoIB, it seems to converge and the code should be ready by the end of this week to be merged. Sean owns this and would do the push into OFED and upstream. I am doing testing all the time over my systems, but my code bases are either upstream or something i have set on top of OFED 1.1, i don't have an OFED 1.2 env yet. Or. From kliteyn at dev.mellanox.co.il Tue Jan 23 08:20:12 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 23 Jan 2007 18:20:12 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <1169498411.23046.12190.camel@hal.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> Message-ID: <45B635BC.2010304@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: >> Sasha Khapyorsky wrote: >>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: >>>> As for the mailing list it's openib-windows at openib.org. You can access >>>> it here: http://openib.org/mailman/listinfo/openib-windows >>> I found only references to svn://windows.openib.org, where >>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | >>> head -n 40' shows: >>> >>> ------------------------------------------------------------------------ >>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line >>> >>> Set property svn:keywords "id" on all repository >>> ------------------------------------------------------------------------ >>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line >>> >>> [OPENSM] When running as a service, if all ports are down, use the first port. >>> ------------------------------------------------------------------------ >>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines >>> >>> [OPENSM] When trying to set to INIT the remote port of the given physical port >>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no >>> check whether the physical port in null (e.g., if it's disconnected). >>> ------------------------------------------------------------------------ >>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line >>> >>> [opensm] Base service status on results that were received from opensm log messages. >>> ------------------------------------------------------------------------ >>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line >>> >>> [OPENSM] missed fix for OPENSM logging to System Event Log >>> ------------------------------------------------------------------------ >>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines >>> >>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! >>> 2. bugfix: added message file for correct logging to System Event Log. >>> 3. bugfix: wrong passing parameters in server mode; >>> 4. bugfix: error in table of parameters >>> >>> ------------------------------------------------------------------------ >>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line >>> >>> [opensm] Fix a trivial build break >>> ------------------------------------------------------------------------ >>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines >>> >>> if the guid2lid is corrupted, don't exit when running with -y option >>> (don't exit on fatal) - just ignore the file >>> >>> >>> >>> Seems that development there was stopped in Aug 2006, and it doesn't >>> have recent Win port patches. Am I looking in the wrong place? >> You were looking in the right place. It appears that I didn't describe >> the development process correctly. I think this repository is updated >> with stable OSM versions, after the code is tested. > > Any idea on when the next version is expected ? The SVN will be updated in a couple of days. -- Yevgeny > -- Hal > >> If you need more details, I think it's better for you to ask windows folks >> directly, since as we see, my knowledge in this area is very limited. >> >> -- Yevgeny >> >>> Sasha >>> >> _______________________________________________ >> openib-general mailing list >> openib-general at openib.org >> http://openib.org/mailman/listinfo/openib-general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > From kliteyn at dev.mellanox.co.il Tue Jan 23 08:21:59 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 23 Jan 2007 18:21:59 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <20070122190249.GI23783@sashak.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <20070122190249.GI23783@sashak.voltaire.com> Message-ID: <45B63627.3050606@dev.mellanox.co.il> Hi Sasha, Sasha Khapyorsky wrote: > On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: >> Hi Sasha. >> >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> >>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>> Hi Hal >>>> >>>> The following series of six patches implements QoS policy file parser: >>>> >>>> 1. QoS parser Lex file >>>> 2. QoS parser Lex-generated c file >>>> 3. QoS parser grammar (Yacc) file >>>> 4. QoS parser Yacc-generated grammar c and h file >>>> 5. QoS parser header file that defines parse tree data structures >>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>> Is there any description of proposed format and functionality? >> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >> minor modifications. You can find the RFC here: >> http://openib.org/pipermail/openib-general/2006-May/022336.html > > This was RFC and couple of issues were discussed then. Now you are about > implementation phase and exact format description would be desired. For > example what "few minor modifications" are? I'll prepare an example file with explanations. -- Yevgeny >>> Also what about using human readable formats? >> To me the xml-like format in the RFC looks pretty readable. >> It has very limited number of keywords (tags), so it's easy >> to follow and/or to modify. > > It is your opinion, not everybody will agree with it (AFAIR this was > discussed too during RFC). > > I would not be care, but I don't know any example of really successful > XML using for configuration purposes (especially where advanced graphical > config editors/viewers were not used). Do you know? > > Sasha > From halr at voltaire.com Tue Jan 23 08:26:02 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 23 Jan 2007 11:26:02 -0500 Subject: [openib-general] [PATCH 0/2] opensm: _ctrl layer cleanup In-Reply-To: <20070120025446.GG8193@sashak.voltaire.com> References: <20070120025446.GG8193@sashak.voltaire.com> Message-ID: <1169569503.29183.10129.camel@hal.voltaire.com> Hi Sasha, On Fri, 2007-01-19 at 21:54, Sasha Khapyorsky wrote: > Hi Hal, > > I found that opensm mad processing _ctrl layer is non-functional, OTOH > it costs us 11k lines of code and 17k in run-time. So there are two > patches which clean this up (for SM and SA related MADs). > > Any real objections against this? This seems like a worthwhile simplification to me. -- Hal > Sasha From swise at opengridcomputing.com Tue Jan 23 08:55:16 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 23 Jan 2007 10:55:16 -0600 Subject: [openib-general] ib_local_sa workqueue creation crash with ofed 1.2 + sles9sp3 In-Reply-To: <1169307866.5132.16.camel@linux-q667.site> References: <000001c73b4d$532babc0$ff0da8c0@amr.corp.intel.com> <1169306848.5132.11.camel@linux-q667.site> <1169307866.5132.16.camel@linux-q667.site> Message-ID: <1169571316.4313.14.camel@stevo-desktop> I guess I was all wet...the latest code works with the name "local_sa"... Sorry for the noise... Steve. On Sat, 2007-01-20 at 09:44 -0600, Steve WIse wrote: > On Sat, 2007-01-20 at 09:27 -0600, Steve WIse wrote: > > BTW: "local_sa" is still too long. I reduced it to "lsa" and it > > worked. > > > > I cannot explain this. The BUG_ON() is for (strlen(name) > 10). > "local_sa" is 8...9 if you count the NUL at the end. > > But I hit that BUG_ON() for rhel4u4 and sles9sp3. I reduced the name to > "lsa" and it loads fine. > > ???? > > > > > > > On Thu, 2007-01-18 at 14:09 -0800, Sean Hefty wrote: > > > Can you see if the call to create_singlethread_workqueue() passes in a string > > > longer than 8 characters? If so, try reducing the size of the string. > > > > > > I've committed a change to the local_sa.c file this morning to avoid this sort > > > of backport issue, if this is indeed what it is. > > > > > > - Sean > > > > > > >Loading ib_local_sa crashes on sles9sp3 with a BUG() trap. Is this a > > > >known issue? > > > > > > > >----------- [cut here ] --------- [please bite here ] --------- > > > >Kernel BUG at workqueue:308 > > > >invalid operand: 0000 [1] SMP > > > >CPU 0 > > > >Pid: 7309, comm: modprobe Tainted: GF U (2.6.5-7.244-smp SLES9_SP3_BRANCH- > > > >200512121832250000) > > > >RIP: 0010:[] {__create_workqueue+33} > > > >RSP: 0000:000001003f4f7d38 EFLAGS: 00010202 > > > >RAX: 000000000000000b RBX: ffffffffa02b4e69 RCX: 0000000000000bb8 > > > >RDX: 0000000000000bb8 RSI: 0000000000000001 RDI: ffffffffa02b4e69 > > > >RBP: 0000000000000001 R08: 000000000000003d R09: 0000000000000000 > > > >R10: 00000000000493e0 R11: 0000000000000001 R12: ffffffff803e3fc0 > > > >R13: 000000000000000a R14: ffffffff803e3fc0 R15: 0000000000000000 > > > >FS: 0000002a95894b00(0000) GS:ffffffff8057cc00(0000) knlGS:0000000000000000 > > > >CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > > >CR2: 0000000000539ccf CR3: 0000000000101000 CR4: 00000000000006e0 > > > >Process modprobe (pid: 7309, threadinfo 000001003f4f6000, task > > > >000001001fcdc9e0) > > > >Stack: 000001003fe3f350 ffffffff803e4020 ffffffffa02b7180 ffffffff803e3fc0 > > > > 000000000000000a ffffffff803e3fc0 ffffffffa02b2300 ffffffffa02bd08f > > > > ffffffff803e4020 ffffffff8015d4dd > > > >Call Trace:{:ib_local_sa:sa_db_init+143} > > > >{sys_init_module+6589} > > > > {generic_file_read+187} > > > >{:ib_local_sa:sa_db_init+0} > > > > {vfs_read+244} {sys_read+157} > > > > {system_call+124} > > > > > > > >Code: 0f 0b 17 66 38 80 ff ff ff ff 34 01 66 66 90 48 8b 3d 61 50 > > > >RIP {__create_workqueue+33} RSP <000001003f4f7d38> > > > > > > > > > > > > > > > >_______________________________________________ > > > >openib-general mailing list > > > >openib-general at openib.org > > > >http://openib.org/mailman/listinfo/openib-general > > > > > > > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From halr at voltaire.com Tue Jan 23 08:50:48 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 23 Jan 2007 11:50:48 -0500 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45B635BC.2010304@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> <45B635BC.2010304@dev.mellanox.co.il> Message-ID: <1169570982.29183.11110.camel@hal.voltaire.com> Hi again Yevgeny, On Tue, 2007-01-23 at 11:20, Yevgeny Kliteynik wrote: > Hi Hal, > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: > >> Sasha Khapyorsky wrote: > >>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > >>>> As for the mailing list it's openib-windows at openib.org. You can access > >>>> it here: http://openib.org/mailman/listinfo/openib-windows > >>> I found only references to svn://windows.openib.org, where > >>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | > >>> head -n 40' shows: > >>> > >>> ------------------------------------------------------------------------ > >>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line > >>> > >>> Set property svn:keywords "id" on all repository > >>> ------------------------------------------------------------------------ > >>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line > >>> > >>> [OPENSM] When running as a service, if all ports are down, use the first port. > >>> ------------------------------------------------------------------------ > >>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines > >>> > >>> [OPENSM] When trying to set to INIT the remote port of the given physical port > >>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no > >>> check whether the physical port in null (e.g., if it's disconnected). > >>> ------------------------------------------------------------------------ > >>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line > >>> > >>> [opensm] Base service status on results that were received from opensm log messages. > >>> ------------------------------------------------------------------------ > >>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line > >>> > >>> [OPENSM] missed fix for OPENSM logging to System Event Log > >>> ------------------------------------------------------------------------ > >>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines > >>> > >>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! > >>> 2. bugfix: added message file for correct logging to System Event Log. > >>> 3. bugfix: wrong passing parameters in server mode; > >>> 4. bugfix: error in table of parameters > >>> > >>> ------------------------------------------------------------------------ > >>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line > >>> > >>> [opensm] Fix a trivial build break > >>> ------------------------------------------------------------------------ > >>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines > >>> > >>> if the guid2lid is corrupted, don't exit when running with -y option > >>> (don't exit on fatal) - just ignore the file > >>> > >>> > >>> > >>> Seems that development there was stopped in Aug 2006, and it doesn't > >>> have recent Win port patches. Am I looking in the wrong place? > >> You were looking in the right place. It appears that I didn't describe > >> the development process correctly. I think this repository is updated > >> with stable OSM versions, after the code is tested. > > > > Any idea on when the next version is expected ? > > The SVN will be updated in a couple of days. Glad to hear it. To what OpenSM version will it correspond ? Will it be based on OFED 1.1 or beyond ? What OpenIB svn or git commit does it correspond to ? Thanks. -- Hal > > -- Yevgeny > > > -- Hal > > > >> If you need more details, I think it's better for you to ask windows folks > >> directly, since as we see, my knowledge in this area is very limited. > >> > >> -- Yevgeny > >> > >>> Sasha > >>> > >> _______________________________________________ > >> openib-general mailing list > >> openib-general at openib.org > >> http://openib.org/mailman/listinfo/openib-general > >> > >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >> > > From dotanb at dev.mellanox.co.il Tue Jan 23 09:06:54 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Tue, 23 Jan 2007 19:06:54 +0200 (IST) Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> Message-ID: <3196.212.72.208.162.1169572014.squirrel@dev.mellanox.co.il> > Thanks, applied after s/num_of_events/num_cq_events/ > > > --- > gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 > 10:58:02.000000000 +0200 > > +++ > gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 > 11:09:59.000000000 +0200 > > BTW, can you make sure your patches apply to the libibverbs tree with > "patch -p1" (or better yet, with "git apply --whitespace=error-all")? > I had to edit the patch headers to apply this to my git tree. > > - R. > I tried to apply the patch locally with -p1, and everything was fine .... but anyway, next time i will check it using the git apply. thanks Dotan From monis at voltaire.com Tue Jan 23 09:08:58 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 23 Jan 2007 19:08:58 +0200 Subject: [openib-general] Add bonding suuport to OFED Message-ID: <45B6412A.6020207@voltaire.com> Originally, bonding is a High Availability solution for Ethernet network interfaces. It is a module that implements a virtual network device (not bounded to hardware) and enslaves "real" devices. Bonding device controls its slaves according to the bonding policy and the slave's health. I am adding a bonding device which is good for IPoIB interfaces. Feel free to install it send comments. You just have to build source RPM, rebuild it and install the binary. For now, I have tested the module under RH4-UP3 and SLES10 with OFED-1.1. HOW TO BUILD THE SOURCE RPM =========================== git clone git://staging.openfabrics.org/~monis/ofed-bond-pkg.git mydir cd mydir/ ./build_rpm.sh ./build_rpm.sh OR ./build_rpm.sh --git-url After installing the binary RPM read the instructions in /usr/local/ofed/docs/ib-bonding.txt Note: Using ib-bonding requires applying a patch for IPoIB and replacing ib_ipoib.ko. Please find the patch in the following message. Please also note that the patch should be applied after ipoib_8111_to_2_6_16.patch. - MoniS From monis at voltaire.com Tue Jan 23 09:15:30 2007 From: monis at voltaire.com (Moni Shoua) Date: Tue, 23 Jan 2007 19:15:30 +0200 Subject: [openib-general] [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh In-Reply-To: <45B6412A.6020207@voltaire.com> References: <45B6412A.6020207@voltaire.com> Message-ID: <45B642B2.5060007@voltaire.com> IPoIB uses a two layer neighboring scheme, such that for each struct neighbour whose device is an ipoib one, there is a struct ipoib_neigh buddy which is created on demand at the tx flow by an ipoib_neigh_alloc(skb->dst->neighbour) call. When using the bonding driver, neighbours are created by the net stack on behalf of the bonding (master) device. On the tx flow the bonding code gets an skb such that skb->dev points to the master device, it changes this skb to point on the slave device and calls the slave hard_start_xmit function. Combing these two flows, there is a hole if some code at ipoib (ipoib_neigh_destructor) assumes that for each struct neighbour it gets, n->dev is an ipoib device so for example netdev_priv(n->dev) would be of type struct ipoib_dev_priv. To fix it, this patch adds a dev field to struct ipoib_neigh which is used instead of the struct neighbour dev one. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz ipoib.h | 3 ++- ipoib_main.c | 22 +++++++++++----------- ipoib_multicast.c | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) --- Index: openib-1.1/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- openib-1.1.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-10 17:53:02.744225722 +0200 +++ openib-1.1/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-10 17:55:04.121544018 +0200 @@ -218,6 +218,7 @@ struct ipoib_neigh { struct sk_buff_head queue; struct neighbour *neighbour; + struct net_device *dev; struct list_head all_neigh_list; struct list_head list; @@ -235,7 +236,7 @@ static inline struct ipoib_neigh **to_ip INFINIBAND_ALEN, sizeof(void *)); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,struct net_device *dev); void ipoib_neigh_free(struct ipoib_neigh *neigh); extern struct workqueue_struct *ipoib_workqueue; Index: openib-1.1/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- openib-1.1.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-10 17:53:02.717230544 +0200 +++ openib-1.1/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-10 17:58:55.531209253 +0200 @@ -516,7 +516,7 @@ static void neigh_add_path(struct sk_buf struct ipoib_path *path; struct ipoib_neigh *neigh; - neigh = ipoib_neigh_alloc(skb->dst->neighbour); + neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -799,7 +799,7 @@ static void ipoib_set_mcast_list(struct static void ipoib_neigh_destructor(struct neighbour *n) { struct ipoib_neigh *neigh; - struct ipoib_dev_priv *priv = netdev_priv(n->dev); + struct ipoib_dev_priv *priv; unsigned long flags; struct ipoib_ah *ah = NULL; @@ -808,12 +808,14 @@ static void ipoib_neigh_destructor(struc list_for_each_entry(tn, &ipoib_all_neigh_list, all_neigh_list) if (tn->neighbour == n) { nn = tn; + neigh = *to_ipoib_neigh(n); break; } spin_unlock(&ipoib_all_neigh_list_lock); - if (!nn) + if (!nn || !neigh) return; + priv = netdev_priv(neigh->dev); ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n", be32_to_cpup((__be32 *) n->ha), @@ -821,13 +823,9 @@ static void ipoib_neigh_destructor(struc spin_lock_irqsave(&priv->lock, flags); - neigh = *to_ipoib_neigh(n); - if (neigh) { - if (neigh->ah) - ah = neigh->ah; - list_del(&neigh->list); - ipoib_neigh_free(neigh); - } + ah = neigh->ah; + list_del(&neigh->list); + ipoib_neigh_free(neigh); spin_unlock_irqrestore(&priv->lock, flags); @@ -835,7 +833,8 @@ static void ipoib_neigh_destructor(struc ipoib_put_ah(ah); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, + struct net_device *dev) { struct ipoib_neigh *neigh; @@ -849,6 +848,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(st spin_lock(&ipoib_all_neigh_list_lock); list_add_tail(&neigh->all_neigh_list, &ipoib_all_neigh_list); neigh->neighbour->ops->destructor = ipoib_neigh_destructor; + neigh->dev = dev; spin_unlock(&ipoib_all_neigh_list_lock); return neigh; Index: openib-1.1/drivers/infiniband/ulp/ipoib/ipoib_multicast.c =================================================================== --- openib-1.1.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-10 17:53:01.077523451 +0200 +++ openib-1.1/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-10 17:58:04.808269239 +0200 @@ -770,7 +770,7 @@ out: if (skb->dst && skb->dst->neighbour && !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); + struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (neigh) { kref_get(&mcast->ah->ref); From mshefty at ichips.intel.com Tue Jan 23 09:20:21 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 09:20:21 -0800 Subject: [openib-general] [RFC/PATCH v3] rdma/cma: add RDMA_PS_IPOIB port space In-Reply-To: References: Message-ID: <45B643D5.7010104@ichips.intel.com> Or Gerlitz wrote: > Add to the RDMA CM an IPoIB port space (RDMA_PS_IPOIB) whose semantics are similar > to those of RDMA_PS_UDP where RDMA_PS_IPOIB IDs allow for inter operability with > IPoIB on some traffic patterns. I think we want to limit the ipoib port space to join/leave multicast groups. Would there be any reason to support listen/connect/accept (i.e. SIDR) operations? The udp port space is also intended to support any rdma device, eventually. Ipoib should be restricted to IB devices only. - Sean From rdreier at cisco.com Tue Jan 23 09:21:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 09:21:30 -0800 Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: <3196.212.72.208.162.1169572014.squirrel@dev.mellanox.co.il> (dotanb@dev.mellanox.co.il's message of "Tue, 23 Jan 2007 19:06:54 +0200 (IST)") References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> <3196.212.72.208.162.1169572014.squirrel@dev.mellanox.co.il> Message-ID: > I tried to apply the patch locally with -p1, and everything was fine .... I don't see how that could work... the patch headers have pathnames like gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c in them, so stripping just one directory would leave something like src/userspace/libibverbs/examples/rc_pingpong.c, which couldn't possibly apply in my libibverbs directory. From mshefty at ichips.intel.com Tue Jan 23 09:31:35 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 09:31:35 -0800 Subject: [openib-general] [RFC/PATCH v3] rdma/cma: add RDMA_PS_IPOIB port space In-Reply-To: <45B643D5.7010104@ichips.intel.com> References: <45B643D5.7010104@ichips.intel.com> Message-ID: <45B64677.5090701@ichips.intel.com> > I think we want to limit the ipoib port space to join/leave multicast groups. > Would there be any reason to support listen/connect/accept (i.e. SIDR) operations? I was thinking of SIDR, but what about connected mode ipoib? This could make the ipoib port space interesting, or require breaking it into two separate port spaces, or... I'm only going to worry about multicast for now, unless there's a reason to consider other use. - Sean From rdreier at cisco.com Tue Jan 23 09:37:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 09:37:58 -0800 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: <20070119185849.GT12386@obsidianresearch.com> (Jason Gunthorpe's message of "Fri, 19 Jan 2007 11:58:49 -0700") References: <20070118230744.GR12386@obsidianresearch.com> <20070119131024.GD28197@mellanox.co.il> <20070119185849.GT12386@obsidianresearch.com> Message-ID: Thanks, I queued this for 2.6.21. I think the tradeoff between risk and return is not a good one for 2.6.20 -- I'm irrationally afraid this is going to break the user-kernel ABI in some screwy case, and I think the impact of the problem itself is not that great (annoying kernel messages for the tiny group of people sending RMPP messages from userspace on ia64). From mst at mellanox.co.il Tue Jan 23 10:23:52 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 20:23:52 +0200 Subject: [openib-general] [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh In-Reply-To: <45B642B2.5060007@voltaire.com> References: <45B6412A.6020207@voltaire.com> <45B642B2.5060007@voltaire.com> Message-ID: <20070123182352.GF10394@mellanox.co.il> > Quoting Moni Shoua : > Subject: [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh > > IPoIB uses a two layer neighboring scheme, such that for each struct neighbour > whose device is an ipoib one, there is a struct ipoib_neigh buddy which is > created on demand at the tx flow by an ipoib_neigh_alloc(skb->dst->neighbour) > call. > > When using the bonding driver, neighbours are created by the net stack on behalf > of the bonding (master) device. On the tx flow the bonding code gets an skb such > that skb->dev points to the master device, it changes this skb to point on the > slave device and calls the slave hard_start_xmit function. > > Combing these two flows, there is a hole if some code at ipoib > (ipoib_neigh_destructor) assumes that for each struct neighbour it gets, n->dev > is an ipoib device so for example netdev_priv(n->dev) would be of type struct > ipoib_dev_priv. > > To fix it, this patch adds a dev field to struct ipoib_neigh which is used > instead of the struct neighbour dev one. > > Signed-off-by: Moni Shoua > Signed-off-by: Or Gerlitz Just to clarify - you previously mentionned you saw problems with 2.6.16 backport. Is this an issue you see with 2.6.20 as well? Also - in your approach, what prevents the device from going away while there are still ipoib_neigh objects around? Also - if neigh does not point to ipoib device, our neigh destructor won't be called for it, will it? What will clean the ipoib neigh then? -- MST From mst at mellanox.co.il Tue Jan 23 10:34:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 20:34:30 +0200 Subject: [openib-general] [RFC/PATCH v3] rdma/cma: add RDMA_PS_IPOIB port space In-Reply-To: <45B64677.5090701@ichips.intel.com> References: <45B643D5.7010104@ichips.intel.com> <45B64677.5090701@ichips.intel.com> Message-ID: <20070123183430.GI10394@mellanox.co.il> > I was thinking of SIDR, but what about connected mode ipoib? Note IPoIB CM does not follow the SDP/CMA model for listening on multiple Service IDs at all. There's a single well known Service ID and that is all. -- MST From mst at mellanox.co.il Tue Jan 23 10:29:55 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 20:29:55 +0200 Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> Message-ID: <20070123182955.GH10394@mellanox.co.il> > Subject: Re: [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events > > Thanks, applied after s/num_of_events/num_cq_events/ > > > --- gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 10:58:02.000000000 +0200 > > +++ gen2_devel_user/src/userspace/libibverbs/examples/rc_pingpong.c 2007-01-17 11:09:59.000000000 +0200 This one looks like -p4. > > BTW, can you make sure your patches apply to the libibverbs tree with > "patch -p1" (or better yet, with "git apply --whitespace=error-all")? > I had to edit the patch headers to apply this to my git tree. That's a bit extreme. git-apply gets the -pNUM flag, this should be sufficient. -- MST From mst at mellanox.co.il Tue Jan 23 10:28:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 20:28:38 +0200 Subject: [openib-general] [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events In-Reply-To: References: <1169026741.20182.3.camel@mtls05.yok.mtl.com> <3196.212.72.208.162.1169572014.squirrel@dev.mellanox.co.il> Message-ID: <20070123182838.GG10394@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [libibverbs/examples] [PATCH] Added resource cleaning before end of pingpong tests + ack to CQ events > > > I tried to apply the patch locally with -p1, and everything was fine .... > > I don't see how that could work... the patch headers have pathnames > like gen2_devel_user.orig/src/userspace/libibverbs/examples/rc_pingpong.c > in them, so stripping just one directory would leave something like > src/userspace/libibverbs/examples/rc_pingpong.c, which couldn't > possibly apply in my libibverbs directory. I think Dotan's working in an OFED source tree, that's the directory structure it has. So if you stay in directory where OFED unpacked the sources, you see src/userspace/libibverbs/examples/rc_pingpong.c, and this is the format OFED uses for patches (otherwise we'd need some other way to figure out which package to apply the patch to). -- MST From mst at mellanox.co.il Tue Jan 23 10:43:14 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 20:43:14 +0200 Subject: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 In-Reply-To: References: Message-ID: <20070123184314.GJ10394@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [openib-general] [PATCH] IB/core - ib_umad can cause address alignment fault on ia64 > > Thanks, I queued this for 2.6.21. I think the tradeoff between risk > and return is not a good one for 2.6.20 -- I'm irrationally afraid > this is going to break the user-kernel ABI in some screwy case, and I > think the impact of the problem itself is not that great (annoying > kernel messages for the tiny group of people sending RMPP messages > from userspace on ia64). OK. I've put it in OFED so it'll see some testing meanwhile. -- MST From nimrodg at mellanox.com Tue Jan 23 10:56:02 2007 From: nimrodg at mellanox.com (Nimrod Gindi) Date: Tue, 23 Jan 2007 10:56:02 -0800 Subject: [openib-general] OFED release testing Task force meeting minutes Message-ID: <1E3DCD1C63492545881FACB6063A57C1BDCF10@mtiexch01.mti.com> Meeting took place on Thursday - Jan. 18th, 2007 8:30AM (PST) Agenda: 1. Review report summary (sent from Amit K.- Mellanox) and agree of common structure to be used 2. Open discussion Attending companies: Mellanox, NetEffect, Qlogic, Voltaire, SystemFabricWorks Discussion Items and Action Items: 1) Reviewed the report different spread sheets and fields 2) Suggestions made to add the followings: a. Test descriptions b. Tested matrix 3) Agreed Action Items: a. AI 1: Amit K (Mellanox) and Moni L (Voltaire) - work on tests being reported on 2nd level report. b. AI 2: Jeremy B (Qlogic) Work on forming a matrix to report the tested environments. We agreed to review the above via e-mails before the next meeting. Follow-up meeting will be scheduled for 31st January 2007 8:30am PDT=11am EDT=6pm Israel. Nimrod Gindi Mellanox Technologies Ltd. mail : nimrodg at mellanox.com Cell : +1-408-750-4801 Office: +1-347-342-0011 Fax : +1-212-987-0275 -------------- next part -------------- An HTML attachment was scrubbed... URL: From mshefty at ichips.intel.com Tue Jan 23 11:44:47 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 11:44:47 -0800 Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: References: Message-ID: <45B665AF.3040806@ichips.intel.com> Or Gerlitz wrote: > Modify librdmacm use a qkey for its UD IDs/QPs delivered to it by the > rdma cm kernel code instead the a hard coded RDMA_UD_QKEY. For RDMA_PS_UDP > ID, the qkey is provided by the kernel in ADDR_RESOLVED and CONNECT_REQUEST > events and is stored by the library in struct cma_id_private. Later > the library use the qkey when it is called to create a UD QP. Reading the code, the following sequence does not work: rdma_join_multicast then rdma_create_qp. The QP does not automatically attach to previously joined multicast groups on creation. (This is a problem with the existing code.) The reverse sequence (create then join) does work. I'm adding the ipoib support to the librdmacm now, but we will still need a separate patch to fix attaching to joined multicast groups when the QP is created. - Sean From tziporet at mellanox.co.il Tue Jan 23 12:11:26 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 23 Jan 2007 22:11:26 +0200 Subject: [openib-general] Sonoma developers conference - can we move it to after OFED 1.2 release? Message-ID: <45B66BEE.4040402@mellanox.co.il> Hi All, I just saw in the openfabrics web page this new message: ***UPDATE*** The OpenFabrics Alliance will host the third annual Developer's Workshop at the Lodge in Sonoma - March 11-14, 2007. Click here for details. I asked before to delay this workshop since we will be in the middle of the OFED 1.2 release. I also think it will be much better to have the workshop *after *the release since it will enable us to understand what went good, and what need to be improved in the process. I understand that from pricing point of view we can change the dates, without any extra fee. Can we do this change? Thanks, Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Tue Jan 23 12:19:34 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Tue, 23 Jan 2007 22:19:34 +0200 Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: <45B665AF.3040806@ichips.intel.com> References: <45B665AF.3040806@ichips.intel.com> Message-ID: <20070123201934.GD20101@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey > > Or Gerlitz wrote: > > Modify librdmacm use a qkey for its UD IDs/QPs delivered to it by the > > rdma cm kernel code instead the a hard coded RDMA_UD_QKEY. For RDMA_PS_UDP > > ID, the qkey is provided by the kernel in ADDR_RESOLVED and CONNECT_REQUEST > > events and is stored by the library in struct cma_id_private. Later > > the library use the qkey when it is called to create a UD QP. > > Reading the code, the following sequence does not work: rdma_join_multicast then > rdma_create_qp. The QP does not automatically attach to previously joined > multicast groups on creation. (This is a problem with the existing code.) The > reverse sequence (create then join) does work. I'm adding the ipoib support to > the librdmacm now, but we will still need a separate patch to fix attaching to > joined multicast groups when the QP is created. Maybe just ask user to always call rdma_join_multicast after rdma_create_qp? Joins are now properly reference counted, so it shouldn't be a problem to repeat this any number of times. Right? -- MST From sean.hefty at intel.com Tue Jan 23 12:32:52 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 12:32:52 -0800 Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: <20070123201934.GD20101@mellanox.co.il> Message-ID: <000101c73f2d$a8655580$16d0180a@amr.corp.intel.com> >Maybe just ask user to always call rdma_join_multicast after rdma_create_qp? >Joins are now properly reference counted, so it shouldn't be a problem >to repeat this any number of times. Right? This is the solution for now, and it should work fine. I don't think it would be hard to support creating the QP after joining if someone ever came up with the need, but it doesn't seem like a priority at the moment. - Sean From mshefty at ichips.intel.com Tue Jan 23 13:01:00 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 13:01:00 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070123061342.GX10025@mellanox.co.il> References: <45B5124F.60907@ichips.intel.com> <20070123061342.GX10025@mellanox.co.il> Message-ID: <45B6778C.5010803@ichips.intel.com> > Could you please rebase that to 2.6.20-rc5? Yes - but I probably won't get to this until tomorrow. From sean.hefty at intel.com Tue Jan 23 13:16:46 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 13:16:46 -0800 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups Message-ID: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> Add to the rdma_cm an IPOIB port space that allows interoperability with IPoIB multicast traffic. Use of the RDMA_PS_IPOIB is limited to multicast join/leave. Rename the RDMA_UD_QKEY to RDMA_UDP_QKEY to signify that the qkey is only used with the RDMA_PS_UDP port space. Signed-off-by: Sean Hefty --- This patch differs from those posted by Or by limiting the ipoib port space to multicast traffic only. diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5cbef6a..6797e54 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); +static DEFINE_IDR(ipoib_ps); struct cma_device { struct list_head list; @@ -1822,6 +1823,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_UDP: ps = &udp_ps; break; + case RDMA_PS_IPOIB: + ps = &ipoib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -1936,7 +1940,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = ib_event->param.sidr_rep_rcvd.status; break; } - if (rep->qkey != RDMA_UD_QKEY) { + if (rep->qkey != RDMA_UDP_QKEY) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; @@ -2231,7 +2235,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.status = status; if (status == IB_SIDR_SUCCESS) { rep.qp_num = id_priv->qp_num; - rep.qkey = RDMA_UD_QKEY; + rep.qkey = RDMA_UDP_QKEY; } rep.private_data = private_data; rep.private_data_len = private_data_len; @@ -2426,7 +2430,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, return ret; ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); - mc_map[7] = 0x01; /* Use RDMA CM signature */ + if (id_priv->id.ps == RDMA_PS_UDP) { + mc_map[7] = 0x01; /* Use RDMA CM signature */ + rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + } mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); @@ -2434,7 +2441,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, ib_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; - rec.qkey = cpu_to_be32(RDMA_UD_QKEY); comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | @@ -2646,6 +2652,7 @@ static void cma_cleanup(void) idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); + idr_destroy(&ipoib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6b43672..c7a4de7 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -257,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP) + if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 2f96f33..2d6a770 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -58,6 +58,7 @@ enum rdma_cm_event_type { enum rdma_port_space { RDMA_PS_SDP = 0x0001, + RDMA_PS_IPOIB= 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, RDMA_PS_SCTP = 0x0183 diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 9b176df..950424b 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -44,7 +44,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths); -/* Global qkey for UD QPs and multicast groups. */ -#define RDMA_UD_QKEY 0x01234567 +/* Global qkey for UDP QPs and multicast groups. */ +#define RDMA_UDP_QKEY 0x01234567 #endif /* RDMA_CM_IB_H */ From sean.hefty at intel.com Tue Jan 23 13:21:06 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 13:21:06 -0800 Subject: [openib-general] [PATCH 2/2] librdmacm: add support to join IPOIB multicast groups In-Reply-To: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> Message-ID: <000301c73f34$6560f210$16d0180a@amr.corp.intel.com> Add to the librdmacm an IPOIB port space that allows interoperability with IPoIB multicast traffic. Use of the RDMA_PS_IPOIB is limited to multicast join/leave. Rename the RDMA_UD_QKEY to RDMA_UDP_QKEY to signify that the qkey is only used with the RDMA_PS_UDP port space. Update mckey to allow testing the RDMA_PS_IPOIB. Signed-off-by: Sean Hefty --- I will commit the mckey changes separately, since that patch is from Or. diff --git a/examples/mckey.c b/examples/mckey.c index 39f77d7..d4f83b8 100644 --- a/examples/mckey.c +++ b/examples/mckey.c @@ -78,6 +78,7 @@ static int message_count = 10; static int is_sender; static char *dst_addr; static char *src_addr; +static enum rdma_port_space port_space = RDMA_PS_UDP; static int create_message(struct cmatest_node *node) { @@ -328,7 +329,7 @@ static int alloc_nodes(void) for (i = 0; i < connections; i++) { test.nodes[i].id = i; ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, - &test.nodes[i], RDMA_PS_UDP); + &test.nodes[i], port_space); if (ret) goto err; } @@ -478,7 +479,7 @@ int main(int argc, char **argv) { int op, ret; - while ((op = getopt(argc, argv, "m:sb:c:C:S:")) != -1) { + while ((op = getopt(argc, argv, "m:sb:c:C:S:p:")) != -1) { switch (op) { case 'm': dst_addr = optarg; @@ -498,6 +499,9 @@ int main(int argc, char **argv) case 'S': message_size = atoi(optarg); break; + case 'p': + port_space = strtol(optarg, NULL, 0); + break; default: printf("usage: %s\n", argv[0]); printf("\t-m multicast_address\n"); @@ -506,6 +510,8 @@ int main(int argc, char **argv) printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); + printf("\t[-p port_space - %#x for UDP (default), " + "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB); exit(1); } } diff --git a/examples/udaddy.c b/examples/udaddy.c index ab9ace6..153e39c 100644 --- a/examples/udaddy.c +++ b/examples/udaddy.c @@ -420,7 +420,7 @@ static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc) node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem, node->cma_id->port_num); node->remote_qpn = ntohl(wc->imm_data); - node->remote_qkey = RDMA_UD_QKEY; + node->remote_qkey = RDMA_UDP_QKEY; } static int poll_cqs(void) diff --git a/include/rdma/rdma_cma.h b/include/rdma/rdma_cma.h index 88a25b2..e30f8cd 100644 --- a/include/rdma/rdma_cma.h +++ b/include/rdma/rdma_cma.h @@ -61,15 +61,16 @@ enum rdma_cm_event_type { }; enum rdma_port_space { + RDMA_PS_IPOIB= 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, }; /* - * Global qkey value for all UD QPs and multicast groups created via the + * Global qkey value for UDP QPs and multicast groups created via the * RDMA CM. */ -#define RDMA_UD_QKEY 0x01234567 +#define RDMA_UDP_QKEY 0x01234567 struct ib_addr { union ibv_gid sgid; diff --git a/src/cma.c b/src/cma.c index 7ab685b..6a0d076 100644 --- a/src/cma.c +++ b/src/cma.c @@ -698,7 +698,7 @@ static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) qp_attr.port_num = id_priv->id.port_num; qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.qkey = RDMA_UD_QKEY; + qp_attr.qkey = RDMA_UDP_QKEY; /* Will override PS_IPOIB on join */ ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY); if (ret) @@ -729,7 +729,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, if (!qp) return -ENOMEM; - if (id->ps == RDMA_PS_UDP) + if (id->ps == RDMA_PS_UDP || id->ps == RDMA_PS_IPOIB) ret = ucma_init_ud_qp(id_priv, qp); else ret = ucma_init_ib_qp(id_priv, qp); @@ -1136,14 +1136,25 @@ static int ucma_process_establish(struct rdma_cm_id *id) static int ucma_process_join(struct cma_event *evt) { + int ret; + evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid; evt->mc->mlid = evt->event.param.ud.ah_attr.dlid; - if (evt->id_priv->id.qp) - return ibv_attach_mcast(evt->id_priv->id.qp, - &evt->mc->mgid, evt->mc->mlid); - else + if (!evt->id_priv->id.qp) return 0; + + if (evt->id_priv->id.ps == RDMA_PS_IPOIB) { + struct ibv_qp_attr qp_attr; + + qp_attr.qkey = evt->event.param.ud.qkey; + ret = ibv_modify_qp(evt->id_priv->id.qp, &qp_attr, IBV_QP_QKEY); + if (ret) + return ret; + } + + return ibv_attach_mcast(evt->id_priv->id.qp, &evt->mc->mgid, + evt->mc->mlid); } static void ucma_copy_conn_event(struct cma_event *event, From swise at opengridcomputing.com Tue Jan 23 13:35:45 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 23 Jan 2007 15:35:45 -0600 Subject: [openib-general] [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets Message-ID: <1169588145.26256.12.camel@stevo-desktop> OFED/iWARP Developers, Here is a proposal for supporting the minimum required neighbour update event notifications needed for iwarp devices on the older kernels supported by ofed. This patch is a request for comments. Please review. If you think it looks ok, then I'll provide patches to all the various backports. Steve 2.6.17 backport: simulate neighbour update events by snooping ARP packets Needed to support iWARP devices on backported kernels. This also allows using the current drivers/infiniband/core/addr.c which requires netevents as well. This patch rearranges things a bit: - add the new file in the kernel_addons/backport dir for the ARP snooping / netevent callout code. This file is called rdma_netevents.c. - modify the kernel_patches/backports/2.6.17/linux_stuff* patch to include rdma_netevents.c _and_ the netevent.c file into its own module called rdma_ne - remove the backport patch to revert addr.c to snoop ARP packets. Signed-off-by: Steve Wise --- .../backport/2.6.17/include/src/rdma_netevents.c | 91 +++++++++++++++++++++++ .../2.6.17/addr_1_netevents_revert_to_2_6_17.patch | 76 ------------------- .../backport/2.6.17/linux_stuff_to_2_6_17.patch | 13 ++- 3 files changed, 99 insertions(+), 81 deletions(-) diff --git a/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c new file mode 100644 index 0000000..1e9422f --- /dev/null +++ b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2007 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2007 Chelsio Communications, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +/* + * Simulate neighbour update netevents by snooping ARP packets. + */ + +#include +#include +#include + +#include +#include +#include +#include + +MODULE_AUTHOR("Steve Wise"); +MODULE_DESCRIPTION("Netevent Notification Module"); +MODULE_LICENSE("Dual BSD/GPL"); + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if (op == ARPOP_REQUEST || op == ARPOP_REPLY) { + arp_ptr = (u8 *)(arp_hdr + 1); /* skip fixed-size arp header */ + arp_ptr += skb->dev->addr_len; /* skip src ha */ + memcpy(&gw, arp_ptr, 4); /* pull the SPA */ + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) { + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } + } + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; + +static int init(void) +{ + dev_add_pack(&arp); + return 0; +} + +static void cleanup(void) +{ + dev_remove_pack(&arp); +} + +module_init(init); +module_exit(cleanup); diff --git a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch index eb2285f..af7e814 100644 --- a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch +++ b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch @@ -5,20 +5,23 @@ index 0000000..58cf933 +++ b/drivers/infiniband/core/genalloc.c @@ -0,0 +1 @@ +#include "src/genalloc.c" -diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c +diff --git a/drivers/infiniband/core/rdma_netevents.c b/drivers/infiniband/core/rdma_netevents.c new file mode 100644 index 0000000..58cf933 --- /dev/null -+++ b/drivers/infiniband/core/netevent.c -@@ -0,0 +1 @@ ++++ b/drivers/infiniband/core/rdma_netevents.c +@@ -0,0 +1,2 @@ +#include "src/netevent.c" ++#include "src/rdma_netevents.c" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 50fb1cd..456bfd0 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile -@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o +@@ -30,3 +30,7 @@ ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ uverbs_marshall.o + -+ib_core-y += genalloc.o netevent.o ++infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) += rdma_ne.o ++rdma_ne-y := rdma_netevents.o ++ib_core-y += genalloc.o From sean.hefty at intel.com Tue Jan 23 15:00:09 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 15:00:09 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <45B4DA10.3080400@indiana.edu> Message-ID: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> Posting to openib-general list... >RDMA CM has multicast of course, though it seems no means of preventing >address collisions (to me, that means two separate MPI jobs using the >same multicast address). I know that part of the new multicast support >you had developed a few months ago was the ability to specify a '0' >MGID/MLID to indicate that an unused multicast address should be used >and returned. > >How hard would it be to add this functionality to RDMA CM? I looked into this, and it seems doable. I hacked the kernel rdma_cm to join a multicast group with an mgid of 0, and it seemed to work as far as I could test it without more extensive changes. (My test didn't actually transfer data, but the join succeeded, the MGID/MLID was exported to userspace, and different applications joined different groups.) What would be needed is a way for the user to indicate that they need a unique address. An obvious way to accomplish this is for the user to specify an IP address of 0.0.0.0 when calling rdma_join_multicast(). The user would first need to bind to a specific device by calling rdma_bind_addr() with a local IP address. If more than one group is joined this way, then rdma_leave_multicast() would need someway to distinguish between the different groups joined by a single user. (rdma_leave_multicast takes the IP address of the group to leave.) Providing a "port number" with the sockaddr would work. The port number would need to match when joining/leaving, but is not part of the multicast address, essentially making it a join index specified by the user. Your code would look something like this: rdma_bind_addr(local IP address) rdma_join_multicast(0.0.0.0, port 0) <- exchange group info out of band rdma_join_multicast(0.0.0.0, port 1) <- exchange group info out of band send data to a lot of nodes at once rdma_leave_multicast(0.0.0.0, port 0) rdma_leave_multicast(0.0.0.0, port 1) If this sounds like it would work for you, let me know, and I can create a patch to test this idea more. - Sean From hnguyen at linux.vnet.ibm.com Tue Jan 23 15:09:56 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 00:09:56 +0100 Subject: [openib-general] [PATCH 2.6.21 0/4] ehca: remove do_mmap() and some bug fixes Message-ID: <200701240009.57364.hnguyen@linux.vnet.ibm.com> Hello Roland! Here is a patch set for ehca as a result of previous disscussions and comments: 1. fix improper use of yield within spinlock context 2. fix mismatched use of spin_unlock in irq handler 3. remove do_mmap() 4. remove obsolete prototypes PS: I've sent the first two recently for 2.6.20, but adding here for completeness for 2.6.21. Thanks Nam From hnguyen at linux.vnet.ibm.com Tue Jan 23 15:10:36 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 00:10:36 +0100 Subject: [openib-general] [PATCH 2.6.21 1/4] ehca: fix improper use of yield with spinlock held Message-ID: <200701240010.37540.hnguyen@linux.vnet.ibm.com> Here is a patch for ehca_cq.c that fixes improper use of yield with spinlock held. Thanks Nam Signed-off-by: Hoang-Nam Nguyen --- ehca_cq.c | 5 ++++- 1 files changed, 4 insertions(+), 1 deletion(-) diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-19 19:40:32.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-20 00:15:34.000000000 +0100 @@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq) unsigned long flags; spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); From hnguyen at linux.vnet.ibm.com Tue Jan 23 15:12:30 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 00:12:30 +0100 Subject: [openib-general] [PATCH 2.6.21 2/4] ehca: fix mismatched use of spin_unlock in irq handler Message-ID: <200701240012.31621.hnguyen@linux.vnet.ibm.com> Here is a patch for ehca_irq.c that fixes mismatched use of spin_unlock in irq handler. Thanks Nam Signed-off-by: Hoang-Nam Nguyen --- ehca_irq.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletion(-) diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-19 19:40:32.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-23 22:38:02.000000000 +0100 @@ -440,7 +440,8 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); break; } From hnguyen at linux.vnet.ibm.com Tue Jan 23 15:13:35 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 00:13:35 +0100 Subject: [openib-general] [PATCH 2.6.21 3/4] ehca: remove do_mmap() Message-ID: <200701240013.37135.hnguyen@linux.vnet.ibm.com> This patch removes do_mmap() from ehca: - Call remap_pfn_range() for hardware register block - Use vm_insert_page() to register memory allocated for completion queues and queue pairs - The actual mmap() call/trigger is now controlled by user space, ie. libehca Thanks Nam Signed-off-by: Hoang-Nam Nguyen --- ehca_classes.h | 15 +- ehca_cq.c | 65 ++------- ehca_iverbs.h | 8 - ehca_main.c | 6 ehca_qp.c | 78 ++--------- ehca_uverbs.c | 395 +++++++++++++++++++++++---------------------------------- 6 files changed, 204 insertions(+), 363 deletions(-) diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-20 00:21:21.000000000 +0100 @@ -119,13 +119,14 @@ struct ehca_qp { struct ipz_qp_handle ipz_qp_handle; struct ehca_pfqp pf; struct ib_qp_init_attr init_attr; - u64 uspace_squeue; - u64 uspace_rqueue; - u64 uspace_fwh; struct ehca_cq *send_cq; struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; }; /* must be power of 2 */ @@ -142,13 +143,14 @@ struct ehca_cq { struct ipz_cq_handle ipz_cq_handle; struct ehca_pfcq pf; spinlock_t cb_lock; - u64 uspace_queue; - u64 uspace_fwh; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; u32 nr_callbacks; spinlock_t task_lock; u32 ownpid; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; }; enum ehca_mr_flag { @@ -283,7 +285,6 @@ extern int ehca_port_act_time; extern int ehca_use_hp_mr; struct ipzu_queue_resp { - u64 queue; /* points to first queue entry */ u32 qe_size; /* queue entry size */ u32 act_nr_of_sg; u32 queue_length; /* queue length allocated in bytes */ @@ -296,7 +297,6 @@ struct ehca_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; - struct h_galpas galpas; }; struct ehca_create_qp_resp { @@ -309,7 +309,6 @@ struct ehca_create_qp_resp { u32 dummy; /* padding for 8 byte alignment */ struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; - struct h_galpas galpas; }; struct ehca_alloc_cq_parms { diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-20 00:21:21.000000000 +0100 @@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; - struct vm_area_struct *vma; memset(&resp, 0, sizeof(resp)); resp.cq_number = my_cq->cq_number; resp.token = my_cq->token; @@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d resp.ipz_queue.queue_length = ipz_queue->queue_length; resp.ipz_queue.pagesize = ipz_queue->pagesize; resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, - ipz_queue->queue_length, - (void**)&resp.ipz_queue.queue, - &vma); - if (ret) { - ehca_err(device, "Could not mmap queue pages"); - cq = ERR_PTR(ret); - goto create_cq_exit4; - } - my_cq->uspace_queue = resp.ipz_queue.queue; - resp.galpas = my_cq->galpas; - ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(device, "Could not mmap fw_handle"); - cq = ERR_PTR(ret); - goto create_cq_exit5; - } - my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); - goto create_cq_exit6; + goto create_cq_exit4; } } return cq; -create_cq_exit6: - ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - -create_cq_exit5: - ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); - create_cq_exit4: ipz_queue_dtor(&my_cq->ipz_queue); @@ -333,7 +306,6 @@ create_cq_exit1: int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; - int ret; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int cq_num = my_cq->cq_number; struct ib_device *device = cq->device; @@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq) u32 cur_pid = current->tgid; unsigned long flags; + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + if (my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x " + "cq_num=%x", + cur_pid, my_cq->ownpid, my_cq->cq_number); + return -EINVAL; + } + } + spin_lock_irqsave(&ehca_cq_idr_lock, flags); while (my_cq->nr_callbacks) { spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); @@ -353,25 +339,6 @@ int ehca_destroy_cq(struct ib_cq *cq) idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { - ehca_err(device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_cq->ownpid); - return -EINVAL; - } - - /* un-mmap if vma alloc */ - if (my_cq->uspace_queue ) { - ret = ehca_munmap(my_cq->uspace_queue, - my_cq->ipz_queue.queue_length); - if (ret) - ehca_err(device, "Could not munmap queue ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(device, "Could not munmap fwh ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - } - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ @@ -400,7 +367,7 @@ int ehca_resize_cq(struct ib_cq *cq, int struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); u32 cur_pid = current->tgid; - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + if (cq->uobject && my_cq->ownpid != cur_pid) { ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", cur_pid, my_cq->ownpid); return -EINVAL; diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_iverbs.h infiniband_work/drivers/infiniband/hw/ehca/ehca_iverbs.h --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_iverbs.h 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_iverbs.h 2007-01-20 00:21:21.000000000 +0100 @@ -171,14 +171,6 @@ int ehca_mmap(struct ib_ucontext *contex void ehca_poll_eqs(unsigned long data); -int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, - struct vm_area_struct **vma); - -int ehca_mmap_register(u64 physical,void **mapped, - struct vm_area_struct **vma); - -int ehca_munmap(unsigned long addr, size_t len); - #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_main.c infiniband_work/drivers/infiniband/hw/ehca/ehca_main.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_main.c 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_main.c 2007-01-20 00:21:21.000000000 +0100 @@ -52,7 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0019"); +MODULE_VERSION("SVNEHCA_0020"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -288,7 +288,7 @@ int ehca_init_device(struct ehca_shca *s strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_abi_ver = 6; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -790,7 +790,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0019)\n"); + "(Rel.: SVNEHCA_0020)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_qp.c infiniband_work/drivers/infiniband/hw/ehca/ehca_qp.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_qp.c 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_qp.c 2007-01-20 00:21:21.000000000 +0100 @@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_p struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; - struct vm_area_struct * vma; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; @@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_p resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, - ipz_rqueue->queue_length, - (void**)&resp.ipz_rqueue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap rqueue pages"); - goto create_qp_exit3; - } - my_qp->uspace_rqueue = resp.ipz_rqueue.queue; /* squeue properties */ resp.ipz_squeue.qe_size = ipz_squeue->qe_size; resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; resp.ipz_squeue.queue_length = ipz_squeue->queue_length; resp.ipz_squeue.pagesize = ipz_squeue->pagesize; resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, - ipz_squeue->queue_length, - (void**)&resp.ipz_squeue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap squeue pages"); - goto create_qp_exit4; - } - my_qp->uspace_squeue = resp.ipz_squeue.queue; - /* fw_handle */ - resp.galpas = my_qp->galpas; - ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap fw_handle"); - goto create_qp_exit5; - } - my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit3; } } return &my_qp->ib_qp; -create_qp_exit6: - ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - -create_qp_exit5: - ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); - -create_qp_exit4: - ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); - create_qp_exit3: ipz_queue_dtor(&my_qp->ipz_rqueue); ipz_queue_dtor(&my_qp->ipz_squeue); @@ -931,7 +892,7 @@ static int internal_modify_qp(struct ib_ my_qp->qp_type == IB_QPT_SMI) && statetrans == IB_QPST_SQE2RTS) { /* mark next free wqe if kernel */ - if (my_qp->uspace_squeue == 0) { + if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); @@ -1417,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp) enum ib_qp_type qp_type; unsigned long flags; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; + if (ibqp->uobject) { + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(ibqp->device, "Resources still referenced in " + "user space qp_num=%x", ibqp->qp_num); + return -EINVAL; + } + if (my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } } if (my_qp->send_cq) { @@ -1439,24 +1407,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp) idr_remove(&ehca_qp_idr, my_qp->token); spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - /* un-mmap if vma alloc */ - if (my_qp->uspace_rqueue) { - ret = ehca_munmap(my_qp->uspace_rqueue, - my_qp->ipz_rqueue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap rqueue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_squeue, - my_qp->ipz_squeue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap squeue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", - qp_num); - } - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_uverbs.c infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-20 00:19:10.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-20 00:21:21.000000000 +0100 @@ -68,105 +68,183 @@ int ehca_dealloc_ucontext(struct ib_ucon return 0; } -struct page *ehca_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static void ehca_mm_open(struct vm_area_struct *vma) { - struct page *mypage = NULL; - u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; - u32 idr_handle = fileoffset >> 32; - u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ - u32 cur_pid = current->tgid; - unsigned long flags; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ehca_pd *pd; - u64 offset; - void *vaddr; + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - switch (q_type) { - case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - /* make sure this mmap really belongs to the authorized user */ - if (!cq) { - ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; +static struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, + vsize, vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_RESERVED; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%x", ret); + return ret; } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; - if (cq->ownpid != cur_pid) { + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, cq->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_fw() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } + break; - if (rsrc_type == 2) { - ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); - ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; - case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); - qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } - /* make sure this mmap really belongs to the authorized user */ - if (!qp) { - ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; } + break; - pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); - if (pd->ownpid != cur_pid) { + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, pd->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } + break; - if (rsrc_type == 2) { /* rqueue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } else if (rsrc_type == 3) { /* squeue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; default: - ehca_gen_err("bad queue type %x", q_type); - return NOPAGE_SIGBUS; - } - - if (!mypage) { - ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; } - get_page(mypage); - return mypage; + return 0; } -static struct vm_operations_struct ehcau_vm_ops = { - .nopage = ehca_nopage, -}; - int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; @@ -175,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *contex u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - u64 vsize, physical; unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; @@ -201,44 +278,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(cq->ib_cq.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = cq->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(cq->ib_cq.device, - "vsize=%lx physical=%lx", vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(cq->ib_cq.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; @@ -262,50 +307,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(qp->ib_qp.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = qp->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", - vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - case 3: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; @@ -316,77 +323,3 @@ int ehca_mmap(struct ib_ucontext *contex return 0; } - -int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, - struct vm_area_struct **vma) -{ - down_write(¤t->mm->mmap_sem); - *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - foffset); - up_write(¤t->mm->mmap_sem); - if (!(*mapped)) { - ehca_gen_err("couldn't mmap foffset=%lx length=%lx", - foffset, length); - return -EINVAL; - } - - *vma = find_vma(current->mm, (u64)*mapped); - if (!(*vma)) { - down_write(¤t->mm->mmap_sem); - do_munmap(current->mm, 0, length); - up_write(¤t->mm->mmap_sem); - ehca_gen_err("couldn't find vma queue=%p", *mapped); - return -EINVAL; - } - (*vma)->vm_flags |= VM_RESERVED; - (*vma)->vm_ops = &ehcau_vm_ops; - - return 0; -} - -int ehca_mmap_register(u64 physical, void **mapped, - struct vm_area_struct **vma) -{ - int ret; - unsigned long vsize; - /* ehca hw supports only 4k page */ - ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); - if (ret) { - ehca_gen_err("could'nt mmap physical=%lx", physical); - return ret; - } - - (*vma)->vm_flags |= VM_RESERVED; - vsize = (*vma)->vm_end - (*vma)->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", - (*vma)->vm_end - (*vma)->vm_start); - return -EINVAL; - } - - (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); - (*vma)->vm_flags |= VM_IO | VM_RESERVED; - - ret = remap_pfn_range((*vma), (*vma)->vm_start, - physical >> PAGE_SHIFT, vsize, - (*vma)->vm_page_prot); - if (ret) { - ehca_gen_err("remap_pfn_range() failed ret=%x", ret); - return -ENOMEM; - } - - return 0; - -} - -int ehca_munmap(unsigned long addr, size_t len) { - int ret = 0; - struct mm_struct *mm = current->mm; - if (mm) { - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - } - return ret; -} From hnguyen at linux.vnet.ibm.com Tue Jan 23 15:14:18 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 00:14:18 +0100 Subject: [openib-general] [PATCH 2.6.21 4/4] ehca: remove obsolete prototypes Message-ID: <200701240014.19439.hnguyen@linux.vnet.ibm.com> Here is a patch for ehca_classes.h that removes obsolete prototypes. Thanks Nam Signed-off-by: Hoang-Nam Nguyen --- ehca_classes.h | 14 -------------- 1 files changed, 14 deletions(-) diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h --- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-20 00:21:21.000000000 +0100 +++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-20 00:23:46.000000000 +0100 @@ -250,20 +250,6 @@ struct ehca_ucontext { struct ib_ucontext ib_ucontext; }; -struct ehca_module *ehca_module_new(void); - -int ehca_module_delete(struct ehca_module *me); - -int ehca_eq_ctor(struct ehca_eq *eq); - -int ehca_eq_dtor(struct ehca_eq *eq); - -struct ehca_shca *ehca_shca_new(void); - -int ehca_shca_delete(struct ehca_shca *me); - -struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); - int ehca_init_pd_cache(void); void ehca_cleanup_pd_cache(void); int ehca_init_cq_cache(void); From jeremy.brown at qlogic.com Tue Jan 23 15:53:57 2007 From: jeremy.brown at qlogic.com (Jeremy Brown) Date: Tue, 23 Jan 2007 15:53:57 -0800 Subject: [openib-general] OFED release testing Task force meeting minutes In-Reply-To: <1E3DCD1C63492545881FACB6063A57C1BDCF10@mtiexch01.mti.com> References: <1E3DCD1C63492545881FACB6063A57C1BDCF10@mtiexch01.mti.com> Message-ID: <1169596437.25781.1.camel@citrine.pathscale.com> On Tue, 2007-01-23 at 10:56 -0800, Nimrod Gindi wrote: > Follow-up meeting will be scheduled for 31st January 2007 8:30am > PDT=11am EDT=6pm Israel. Just a double-check: Shouldn't this read "8:30 am PDT = 11:30 am EDT = 6:30 pm Israel?" I want to make sure I don't show up half an hour late. :) Jeremy From halr at voltaire.com Tue Jan 23 16:20:49 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 23 Jan 2007 19:20:49 -0500 Subject: [openib-general] [PATCH 1/2] opensm: cleanup sm related _ctrl layer In-Reply-To: <20070120025651.GH8193@sashak.voltaire.com> References: <20070120025446.GG8193@sashak.voltaire.com> <20070120025651.GH8193@sashak.voltaire.com> Message-ID: <1169597971.29183.33690.camel@hal.voltaire.com> On Fri, 2007-01-19 at 21:56, Sasha Khapyorsky wrote: > This cleanups sm related _ctrl layer. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Tue Jan 23 16:25:28 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 23 Jan 2007 19:25:28 -0500 Subject: [openib-general] [PATCH 2/2] opensm: cleanup sa related _ctrl layer In-Reply-To: <20070120025748.GI8193@sashak.voltaire.com> References: <20070120025446.GG8193@sashak.voltaire.com> <20070120025748.GI8193@sashak.voltaire.com> Message-ID: <1169598227.29183.33858.camel@hal.voltaire.com> On Fri, 2007-01-19 at 21:57, Sasha Khapyorsky wrote: > Cleanup SA related *_ctrl layer. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From nimrodg at mellanox.com Tue Jan 23 16:35:53 2007 From: nimrodg at mellanox.com (Nimrod Gindi) Date: Tue, 23 Jan 2007 16:35:53 -0800 Subject: [openib-general] OFED release testing Task force meeting minutes Message-ID: <1E3DCD1C63492545881FACB6063A57C1AF87ED@mtiexch01.mti.com> You're correct - it is 8:30 PDT Nimrod Gindi Mellanox Technologies Ltd. mail: nimrodg at mellanox.com Cellular: +1-408-750-4801 Office: +1-347-342-0011 Fax: +1-212-987-0275 ----- Original Message ----- From: Jeremy Brown To: Nimrod Gindi Cc: Dotan Barak; Amit Krig; meder at de.ibm.com ; karun.sharma at qlogic.com ; minich at ornl.gov ; yosefe at voltaire.com ; monil at voltaire.com ; sweitzen at cisco.com ; ggrundstrom at NetEffect.com ; Chieng Etta ; openfabrics-ewg at openib.org ; openib-general at openib.org Sent: Tue Jan 23 15:53:57 2007 Subject: Re: OFED release testing Task force meeting minutes On Tue, 2007-01-23 at 10:56 -0800, Nimrod Gindi wrote: > Follow-up meeting will be scheduled for 31st January 2007 8:30am > PDT=11am EDT=6pm Israel. Just a double-check: Shouldn't this read "8:30 am PDT = 11:30 am EDT = 6:30 pm Israel?" I want to make sure I don't show up half an hour late. :) Jeremy -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Tue Jan 23 16:38:59 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 16:38:59 -0800 Subject: [openib-general] [PATCH 2/2 vex branch] IB/VNIC Fix failover delay issue In-Reply-To: <4578425A.27226.250CE6A4@ramachandra.kuchimanchi.qlogic.com> (Ramachandra K.'s message of "Thu, 07 Dec 2006 16:33:30 +0530") References: <4578425A.27226.250CE6A4@ramachandra.kuchimanchi.qlogic.com> Message-ID: thanks, I (finally) rolled these into my vex branch. From rdreier at cisco.com Tue Jan 23 16:56:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 23 Jan 2007 16:56:04 -0800 Subject: [openib-general] [RFT] [PATCH] Add ABI compatibility for apps linked against libibverbs 1.0 Message-ID: Here is a patch that attempts to make it possible use libibverbs 1.1 (ie the libibverbs.git master branch) to run applications compiled and linked against libibverbs 1.0. I would appreciate it if people who have such applications could test this out. In particular, I would like to know the results of the following: - build your application as normal against a libibverbs 1.0 tree (either from OFED <= 1.1, or the "stable" branch of libibverbs.git) - get the libibverbs.git tree: git clone git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git libibverbs.git - apply the patch below patch -p1 < [this email] - build and install the libibverbs.git tree - build and install the low-level driver(s) (libmthca, libipathverbs, libehca, etc) you want to test, using your libibverbs.git tree - run the binary of your application built against libibverbs 1.0 with the library from your libibverbs.git build (LD_LIBRARY_PATH may be useful; please use ldd to make sure your application is really picking up the new libibverbs library rather than the old one, or else you're not really testing the ABI compatibility) - reply to this email and report bugs or (perhaps) success I've tested the trivial examples in the libibverbs package (ibv_xxx_pingpong tests, ibv_devinfo, etc) but I'm sure that someone has an app using something not covered there... Thanks! Roland --- Add a compatibility layer that allows applications (but not low-level drivers) linked against libibverbs 1.0 to work with libibverbs 1.1. This is done by using Linux's versioned symbol linking support: the native libibverbs entry points are given IBVERBS_1.1 versions, and compatibility wrappers for entry points from libibverbs 1.0 are created with an IBVERBS_1.0 version (to match what libibverbs 1.0 exported). In essense these wrappers create compatible proxies for every structure returned to the application (struct ibv_device, ibv_context, ibv_pd, etc), and map between the proxy and the real object when the application calls into libibverbs. This code is mostly straightforward, with a few complications in handling async events, because the pointers in event structures must be translated back to proxy structures when they are returned to the application. There are a few further wrinkles because the calls to data path functions (poll CQ, post send, etc) are actually inline functions that call directly into the context ops, so the context ops proxy structure must actually contain pointers to compatibility wrappers for these functions as well. This may have some performance impact but it seems the overhead is unavoidable. Signed-off-by: Roland Dreier --- Makefile.am | 6 +- configure.in | 8 + src/compat-1_0.c | 876 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/device.c | 28 +- src/ibverbs.h | 14 + src/libibverbs.map | 23 +- src/verbs.c | 106 ++++--- 7 files changed, 1001 insertions(+), 60 deletions(-) diff --git a/Makefile.am b/Makefile.am index 35f4468..4c7ce9b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,9 +8,9 @@ src_libibverbs_la_CFLAGS = $(AM_CFLAGS) -DIBV_CONFIG_DIR=\"$(sysconfdir)/libibve libibverbs_version_script = @LIBIBVERBS_VERSION_SCRIPT@ -src_libibverbs_la_SOURCES = src/cmd.c src/device.c src/init.c src/marshall.c \ - src/memory.c src/sysfs.c src/verbs.c -src_libibverbs_la_LDFLAGS = -version-info 2 -export-dynamic \ +src_libibverbs_la_SOURCES = src/cmd.c src/compat-1_0.c src/device.c src/init.c \ + src/marshall.c src/memory.c src/sysfs.c src/verbs.c +src_libibverbs_la_LDFLAGS = -version-info 1 -export-dynamic \ $(libibverbs_version_script) src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map diff --git a/configure.in b/configure.in index d98867f..7e7d448 100644 --- a/configure.in +++ b/configure.in @@ -50,5 +50,13 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, fi]) AC_SUBST(LIBIBVERBS_VERSION_SCRIPT) +AC_CACHE_CHECK(for .symver assembler support, ac_cv_asm_symver_support, + [AC_TRY_COMPILE(, [asm("symbol:\n.symverx symbol, api at ABI\n");], + ac_cv_asm_symver_support=yes, + ac_cv_asm_symver_support=no)]) +if test $ac_cv_asm_symver_support = yes; then + AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support]) +fi + AC_CONFIG_FILES([Makefile libibverbs.spec]) AC_OUTPUT diff --git a/src/compat-1_0.c b/src/compat-1_0.c new file mode 100644 index 0000000..bfbf6a9 --- /dev/null +++ b/src/compat-1_0.c @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +#include "ibverbs.h" + +struct ibv_pd_1_0 { + struct ibv_context_1_0 *context; + uint32_t handle; + + struct ibv_pd *real_pd; +}; + +struct ibv_mr_1_0 { + struct ibv_context_1_0 *context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + uint32_t lkey; + uint32_t rkey; + + struct ibv_mr *real_mr; +}; + +struct ibv_srq_1_0 { + struct ibv_context_1_0 *context; + void *srq_context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; + + struct ibv_srq *real_srq; +}; + +struct ibv_qp_init_attr_1_0 { + void *qp_context; + struct ibv_cq_1_0 *send_cq; + struct ibv_cq_1_0 *recv_cq; + struct ibv_srq_1_0 *srq; + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; +}; + +struct ibv_send_wr_1_0 { + struct ibv_send_wr_1_0 *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; + enum ibv_wr_opcode opcode; + enum ibv_send_flags send_flags; + uint32_t imm_data; /* in network byte order */ + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_ah_1_0 *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; +}; + +struct ibv_recv_wr_1_0 { + struct ibv_recv_wr_1_0 *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; +}; + +struct ibv_qp_1_0 { + struct ibv_context_1_0 *context; + void *qp_context; + struct ibv_pd_1_0 *pd; + struct ibv_cq_1_0 *send_cq; + struct ibv_cq_1_0 *recv_cq; + struct ibv_srq_1_0 *srq; + uint32_t handle; + uint32_t qp_num; + enum ibv_qp_state state; + enum ibv_qp_type qp_type; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; + + struct ibv_qp *real_qp; +}; + +struct ibv_cq_1_0 { + struct ibv_context_1_0 *context; + void *cq_context; + uint32_t handle; + int cqe; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t comp_events_completed; + uint32_t async_events_completed; + + struct ibv_cq *real_cq; +}; + +struct ibv_ah_1_0 { + struct ibv_context_1_0 *context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + + struct ibv_ah *real_ah; +}; + +struct ibv_device_1_0 { + void *obsolete_sysfs_dev; + void *obsolete_sysfs_ibdev; + struct ibv_device *real_device; /* was obsolete driver member */ + struct ibv_device_ops ops; +}; + +struct ibv_context_ops_1_0 { + int (*query_device)(struct ibv_context *context, + struct ibv_device_attr *device_attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + struct ibv_pd * (*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, + enum ibv_access_flags access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); + int (*poll_cq)(struct ibv_cq_1_0 *cq, int num_entries, + struct ibv_wc *wc); + int (*req_notify_cq)(struct ibv_cq_1_0 *cq, + int solicited_only); + void (*cq_event)(struct ibv_cq *cq); + int (*resize_cq)(struct ibv_cq *cq, int cqe); + int (*destroy_cq)(struct ibv_cq *cq); + struct ibv_srq * (*create_srq)(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr); + int (*modify_srq)(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask); + int (*query_srq)(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr); + int (*destroy_srq)(struct ibv_srq *srq); + int (*post_srq_recv)(struct ibv_srq_1_0 *srq, + struct ibv_recv_wr_1_0 *recv_wr, + struct ibv_recv_wr_1_0 **bad_recv_wr); + struct ibv_qp * (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); + int (*query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); + int (*destroy_qp)(struct ibv_qp *qp); + int (*post_send)(struct ibv_qp_1_0 *qp, + struct ibv_send_wr_1_0 *wr, + struct ibv_send_wr_1_0 **bad_wr); + int (*post_recv)(struct ibv_qp_1_0 *qp, + struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr); + struct ibv_ah * (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr); + int (*destroy_ah)(struct ibv_ah *ah); + int (*attach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); + int (*detach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); +}; + +struct ibv_context_1_0 { + struct ibv_device_1_0 *device; + struct ibv_context_ops_1_0 ops; + int cmd_fd; + int async_fd; + int num_comp_vectors; + + struct ibv_context *real_context; /* was abi_compat member */ +}; + +struct ibv_device_1_0 **__ibv_get_device_list_1_0(int *num) +{ + struct ibv_device **real_list; + struct ibv_device_1_0 **l; + int i, n; + + real_list = ibv_get_device_list(&n); + if (!real_list) + return NULL; + + l = calloc(n + 2, sizeof (struct ibv_device_1_0 *)); + if (!l) + return NULL; + + l[0] = (void *) real_list; + + for (i = 0; i < n; ++i) { + l[i + 1] = calloc(1, sizeof (struct ibv_device_1_0)); + if (!l[i + 1]) + goto fail; + l[i + 1]->real_device = real_list[i]; + } + + if (num) + *num = n; + + return l + 1; + +fail: + for (i = 1; i <= n; ++i) + if (l[i]) + free(l[i]); + ibv_free_device_list(real_list); + return NULL; +} +symver(__ibv_get_device_list_1_0, ibv_get_device_list, IBVERBS_1.0); + +void __ibv_free_device_list_1_0(struct ibv_device_1_0 **list) +{ + struct ibv_device_1_0 **l = list; + + while (*l++) + free(l); + + ibv_free_device_list((void *) list[-1]); + free(list - 1); +} +symver(__ibv_free_device_list_1_0, ibv_free_device_list, IBVERBS_1.0); + +const char *__ibv_get_device_name_1_0(struct ibv_device_1_0 *device) +{ + return ibv_get_device_name(device->real_device); +} +symver(__ibv_get_device_name_1_0, ibv_get_device_name, IBVERBS_1.0); + +uint64_t __ibv_get_device_guid_1_0(struct ibv_device_1_0 *device) +{ + return ibv_get_device_guid(device->real_device); +} +symver(__ibv_get_device_guid_1_0, ibv_get_device_guid, IBVERBS_1.0); + +static int poll_cq_wrapper_1_0(struct ibv_cq_1_0 *cq, int num_entries, + struct ibv_wc *wc) +{ + return cq->context->real_context->ops.poll_cq(cq->real_cq, num_entries, wc); +} + +static int req_notify_cq_wrapper_1_0(struct ibv_cq_1_0 *cq, int sol_only) +{ + return cq->context->real_context->ops.req_notify_cq(cq->real_cq, sol_only); +} + +static int post_srq_recv_wrapper_1_0(struct ibv_srq_1_0 *srq, struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr) +{ + struct ibv_recv_wr_1_0 *w; + struct ibv_recv_wr *real_wr = NULL, *head_wr, *tail_wr = NULL, *real_bad_wr; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->sg_list = w->sg_list; + real_wr->num_sge = w->num_sge; + real_wr->next = NULL; + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = srq->context->real_context->ops.post_srq_recv(srq->real_srq, real_wr, + &real_bad_wr); + + if (ret) { + /*XXX set bad_wr*/ + } + + return ret; +} + +static int post_send_wrapper_1_0(struct ibv_qp_1_0 *qp, struct ibv_send_wr_1_0 *wr, + struct ibv_send_wr_1_0 **bad_wr) +{ + struct ibv_send_wr_1_0 *w; + struct ibv_send_wr *real_wr = NULL, *head_wr, *tail_wr = NULL, *real_bad_wr; + int is_ud = qp->qp_type == IBV_QPT_UD; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->next = NULL; + + memcpy(&real_wr->sg_list, &w->sg_list, + sizeof *w - offsetof(struct ibv_send_wr, sg_list)); + + if (is_ud) + real_wr->wr.ud.ah = w->wr.ud.ah->real_ah; + + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = qp->context->real_context->ops.post_send(qp->real_qp, real_wr, + &real_bad_wr); + + if (ret) { + /*XXX set bad_wr*/ + } + + return ret; +} + +static int post_recv_wrapper_1_0(struct ibv_qp_1_0 *qp, struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr) +{ + struct ibv_recv_wr_1_0 *w; + struct ibv_recv_wr *real_wr = NULL, *head_wr, *tail_wr = NULL, *real_bad_wr; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->sg_list = w->sg_list; + real_wr->num_sge = w->num_sge; + real_wr->next = NULL; + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = qp->context->real_context->ops.post_recv(qp->real_qp, real_wr, + &real_bad_wr); + + if (ret) { + /*XXX set bad_wr*/ + } + + return ret; +} + +struct ibv_context_1_0 *__ibv_open_device_1_0(struct ibv_device_1_0 *device) +{ + struct ibv_context *real_ctx; + struct ibv_context_1_0 *ctx; + + ctx = malloc(sizeof *ctx); + if (!ctx) + return NULL; + + real_ctx = ibv_open_device(device->real_device); + if (!real_ctx) { + free(ctx); + return NULL; + } + + ctx->device = device; + ctx->real_context = real_ctx; + + ctx->ops.poll_cq = poll_cq_wrapper_1_0; + ctx->ops.req_notify_cq = req_notify_cq_wrapper_1_0; + ctx->ops.post_send = post_send_wrapper_1_0; + ctx->ops.post_recv = post_recv_wrapper_1_0; + ctx->ops.post_srq_recv = post_srq_recv_wrapper_1_0; + + return ctx; +} +symver(__ibv_open_device_1_0, ibv_open_device, IBVERBS_1.0); + +int __ibv_close_device_1_0(struct ibv_context_1_0 *context) +{ + int ret; + + ret = ibv_close_device(context->real_context); + if (ret) + return ret; + + free(context); + return 0; +} +symver(__ibv_close_device_1_0, ibv_close_device, IBVERBS_1.0); + +int __ibv_get_async_event_1_0(struct ibv_context_1_0 *context, + struct ibv_async_event *event) +{ + int ret; + + ret = ibv_get_async_event(context->real_context, event); + if (ret) + return ret; + + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + event->element.cq = event->element.cq->cq_context; + break; + + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + event->element.qp = event->element.qp->qp_context; + break; + + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + event->element.srq = event->element.srq->srq_context; + break; + + default: + break; + } + + return ret; +} +symver(__ibv_get_async_event_1_0, ibv_get_async_event, IBVERBS_1.0); + +void __ibv_ack_async_event_1_0(struct ibv_async_event *event) +{ + struct ibv_async_event real_event = *event; + + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + real_event.element.cq = + ((struct ibv_cq_1_0 *) event->element.cq)->real_cq; + break; + + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + real_event.element.qp = + ((struct ibv_qp_1_0 *) event->element.qp)->real_qp; + break; + + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + real_event.element.srq = + ((struct ibv_srq_1_0 *) event->element.srq)->real_srq; + break; + + default: + break; + } + + ibv_ack_async_event(&real_event); +} +symver(__ibv_ack_async_event_1_0, ibv_ack_async_event, IBVERBS_1.0); + +int __ibv_query_device_1_0(struct ibv_context_1_0 *context, + struct ibv_device_attr *device_attr) +{ + return ibv_query_device(context->real_context, device_attr); +} +symver(__ibv_query_device_1_0, ibv_query_device, IBVERBS_1.0); + +int __ibv_query_port_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + return ibv_query_port(context->real_context, port_num, port_attr); +} +symver(__ibv_query_port_1_0, ibv_query_port, IBVERBS_1.0); + +int __ibv_query_gid_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + int index, union ibv_gid *gid) +{ + return ibv_query_gid(context->real_context, port_num, index, gid); +} +symver(__ibv_query_gid_1_0, ibv_query_gid, IBVERBS_1.0); + +int __ibv_query_pkey_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + int index, uint16_t *pkey) +{ + return ibv_query_pkey(context->real_context, port_num, index, pkey); +} +symver(__ibv_query_pkey_1_0, ibv_query_pkey, IBVERBS_1.0); + +struct ibv_pd_1_0 *__ibv_alloc_pd_1_0(struct ibv_context_1_0 *context) +{ + struct ibv_pd *real_pd; + struct ibv_pd_1_0 *pd; + + pd = malloc(sizeof *pd); + if (!pd) + return NULL; + + real_pd = ibv_alloc_pd(context->real_context); + if (!real_pd) { + free(pd); + return NULL; + } + + pd->context = context; + pd->real_pd = real_pd; + + return pd; +} +symver(__ibv_alloc_pd_1_0, ibv_alloc_pd, IBVERBS_1.0); + +int __ibv_dealloc_pd_1_0(struct ibv_pd_1_0 *pd) +{ + int ret; + + ret = ibv_dealloc_pd(pd->real_pd); + if (ret) + return ret; + + free(pd); + return 0; +} +symver(__ibv_dealloc_pd_1_0, ibv_dealloc_pd, IBVERBS_1.0); + +struct ibv_mr_1_0 *__ibv_reg_mr_1_0(struct ibv_pd_1_0 *pd, void *addr, + size_t length, enum ibv_access_flags access) +{ + struct ibv_mr *real_mr; + struct ibv_mr_1_0 *mr; + + mr = malloc(sizeof *mr); + if (!mr) + return NULL; + + real_mr = ibv_reg_mr(pd->real_pd, addr, length, access); + if (!real_mr) { + free(mr); + return NULL; + } + + mr->context = pd->context; + mr->pd = pd; + mr->lkey = real_mr->lkey; + mr->rkey = real_mr->rkey; + mr->real_mr = real_mr; + + return mr; +} +symver(__ibv_reg_mr_1_0, ibv_reg_mr, IBVERBS_1.0); + +int __ibv_dereg_mr_1_0(struct ibv_mr_1_0 *mr) +{ + int ret; + + ret = ibv_dereg_mr(mr->real_mr); + if (ret) + return ret; + + free(mr); + return 0; +} +symver(__ibv_dereg_mr_1_0, ibv_dereg_mr, IBVERBS_1.0); + +struct ibv_cq_1_0 *__ibv_create_cq_1_0(struct ibv_context_1_0 *context, int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ibv_cq *real_cq; + struct ibv_cq_1_0 *cq; + + cq = malloc(sizeof *cq); + if (!cq) + return NULL; + + real_cq = ibv_create_cq(context->real_context, cqe, cq_context, + channel, comp_vector); + if (!real_cq) { + free(cq); + return NULL; + } + + cq->context = context; + cq->cq_context = cq_context; + cq->cqe = cqe; + cq->real_cq = real_cq; + + real_cq->cq_context = cq; + + return cq; +} +symver(__ibv_create_cq_1_0, ibv_create_cq, IBVERBS_1.0); + +int __ibv_resize_cq_1_0(struct ibv_cq_1_0 *cq, int cqe) +{ + return ibv_resize_cq(cq->real_cq, cqe); +} +symver(__ibv_resize_cq_1_0, ibv_resize_cq, IBVERBS_1.0); + +int __ibv_destroy_cq_1_0(struct ibv_cq_1_0 *cq) +{ + int ret; + + ret = ibv_destroy_cq(cq->real_cq); + if (ret) + return ret; + + free(cq); + return 0; +} +symver(__ibv_destroy_cq_1_0, ibv_destroy_cq, IBVERBS_1.0); + +int __ibv_get_cq_event_1_0(struct ibv_comp_channel *channel, + struct ibv_cq_1_0 **cq, void **cq_context) +{ + struct ibv_cq *real_cq; + void *cq_ptr; + int ret; + + ret = ibv_get_cq_event(channel, &real_cq, &cq_ptr); + if (ret) + return ret; + + *cq = cq_ptr; + *cq_context = (*cq)->cq_context; + + return 0; +} +symver(__ibv_get_cq_event_1_0, ibv_get_cq_event, IBVERBS_1.0); + +void __ibv_ack_cq_events_1_0(struct ibv_cq_1_0 *cq, unsigned int nevents) +{ + ibv_ack_cq_events(cq->real_cq, nevents); +} +symver(__ibv_ack_cq_events_1_0, ibv_ack_cq_events, IBVERBS_1.0); + +struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd, + struct ibv_srq_init_attr *srq_init_attr) +{ + struct ibv_srq *real_srq; + struct ibv_srq_1_0 *srq; + + srq = malloc(sizeof *srq); + if (!srq) + return NULL; + + real_srq = ibv_create_srq(pd->real_pd, srq_init_attr); + if (!real_srq) { + free(srq); + return NULL; + } + + srq->context = pd->context; + srq->srq_context = srq_init_attr->srq_context; + srq->pd = pd; + srq->real_srq = real_srq; + + real_srq->srq_context = srq; + + return srq; +} +symver(__ibv_create_srq_1_0, ibv_create_srq, IBVERBS_1.0); + +int __ibv_modify_srq_1_0(struct ibv_srq_1_0 *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask) +{ + return ibv_modify_srq(srq->real_srq, srq_attr, srq_attr_mask); +} +symver(__ibv_modify_srq_1_0, ibv_modify_srq, IBVERBS_1.0); + +int __ibv_query_srq_1_0(struct ibv_srq_1_0 *srq, struct ibv_srq_attr *srq_attr) +{ + return ibv_query_srq(srq->real_srq, srq_attr); +} +symver(__ibv_query_srq_1_0, ibv_query_srq, IBVERBS_1.0); + +int __ibv_destroy_srq_1_0(struct ibv_srq_1_0 *srq) +{ + int ret; + + ret = ibv_destroy_srq(srq->real_srq); + if (ret) + return ret; + + free(srq); + return 0; +} +symver(__ibv_destroy_srq_1_0, ibv_destroy_srq, IBVERBS_1.0); + +struct ibv_qp_1_0 *__ibv_create_qp_1_0(struct ibv_pd_1_0 *pd, + struct ibv_qp_init_attr_1_0 *qp_init_attr) +{ + struct ibv_qp *real_qp; + struct ibv_qp_1_0 *qp; + struct ibv_qp_init_attr real_init_attr; + + qp = malloc(sizeof *qp); + if (!qp) + return NULL; + + real_init_attr.qp_context = qp_init_attr->qp_context; + real_init_attr.send_cq = qp_init_attr->send_cq->real_cq; + real_init_attr.recv_cq = qp_init_attr->recv_cq->real_cq; + real_init_attr.srq = qp_init_attr->srq ? + qp_init_attr->srq->real_srq : NULL; + real_init_attr.cap = qp_init_attr->cap; + real_init_attr.qp_type = qp_init_attr->qp_type; + real_init_attr.sq_sig_all = qp_init_attr->sq_sig_all; + + real_qp = ibv_create_qp(pd->real_pd, &real_init_attr); + if (!real_qp) { + free(qp); + return NULL; + } + + qp->context = pd->context; + qp->qp_context = qp_init_attr->qp_context; + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->recv_cq = qp_init_attr->recv_cq; + qp->srq = qp_init_attr->srq; + qp->qp_type = qp_init_attr->qp_type; + qp->qp_num = real_qp->qp_num; + qp->real_qp = real_qp; + + real_qp->qp_context = qp; + + return qp; +} +symver(__ibv_create_qp_1_0, ibv_create_qp, IBVERBS_1.0); + +int __ibv_query_qp_1_0(struct ibv_qp_1_0 *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr_1_0 *init_attr) +{ + struct ibv_qp_init_attr real_init_attr; + int ret; + + ret = ibv_query_qp(qp->real_qp, attr, attr_mask, &real_init_attr); + if (ret) + return ret; + + init_attr->qp_context = qp->qp_context; + init_attr->send_cq = real_init_attr.send_cq->cq_context; + init_attr->recv_cq = real_init_attr.recv_cq->cq_context; + init_attr->srq = real_init_attr.srq->srq_context; + init_attr->qp_type = real_init_attr.qp_type; + init_attr->cap = real_init_attr.cap; + init_attr->sq_sig_all = real_init_attr.sq_sig_all; + + return 0; +} +symver(__ibv_query_qp_1_0, ibv_query_qp, IBVERBS_1.0); + +int __ibv_modify_qp_1_0(struct ibv_qp_1_0 *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) +{ + return ibv_modify_qp(qp->real_qp, attr, attr_mask); +} +symver(__ibv_modify_qp_1_0, ibv_modify_qp, IBVERBS_1.0); + +int __ibv_destroy_qp_1_0(struct ibv_qp_1_0 *qp) +{ + int ret; + + ret = ibv_destroy_qp(qp->real_qp); + if (ret) + return ret; + + free(qp); + return 0; +} +symver(__ibv_destroy_qp_1_0, ibv_destroy_qp, IBVERBS_1.0); + +struct ibv_ah_1_0 *__ibv_create_ah_1_0(struct ibv_pd_1_0 *pd, + struct ibv_ah_attr *attr) +{ + struct ibv_ah *real_ah; + struct ibv_ah_1_0 *ah; + + ah = malloc(sizeof *ah); + if (!ah) + return NULL; + + real_ah = ibv_create_ah(pd->real_pd, attr); + if (!real_ah) { + free(ah); + return NULL; + } + + ah->context = pd->context; + ah->pd = pd; + ah->real_ah = real_ah; + + return ah; +} +symver(__ibv_create_ah_1_0, ibv_create_ah, IBVERBS_1.0); + +int __ibv_destroy_ah_1_0(struct ibv_ah_1_0 *ah) +{ + int ret; + + ret = ibv_destroy_ah(ah->real_ah); + if (ret) + return ret; + + free(ah); + return 0; +} +symver(__ibv_destroy_ah_1_0, ibv_destroy_ah, IBVERBS_1.0); + +int __ibv_attach_mcast_1_0(struct ibv_qp_1_0 *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_attach_mcast(qp->real_qp, gid, lid); +} +symver(__ibv_attach_mcast_1_0, ibv_attach_mcast, IBVERBS_1.0); + +int __ibv_detach_mcast_1_0(struct ibv_qp_1_0 *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_detach_mcast(qp->real_qp, gid, lid); +} +symver(__ibv_detach_mcast_1_0, ibv_detach_mcast, IBVERBS_1.0); diff --git a/src/device.c b/src/device.c index f4e4473..bca1ce9 100644 --- a/src/device.c +++ b/src/device.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -52,7 +52,7 @@ static pthread_mutex_t device_list_lock = PTHREAD_MUTEX_INITIALIZER; static int num_devices; static struct ibv_device **device_list; -struct ibv_device **ibv_get_device_list(int *num) +struct ibv_device **__ibv_get_device_list(int *num) { struct ibv_device **l; int i; @@ -77,18 +77,21 @@ out: return l; } +default_symver(__ibv_get_device_list, ibv_get_device_list); -void ibv_free_device_list(struct ibv_device **list) +void __ibv_free_device_list(struct ibv_device **list) { free(list); } +default_symver(__ibv_free_device_list, ibv_free_device_list); -const char *ibv_get_device_name(struct ibv_device *device) +const char *__ibv_get_device_name(struct ibv_device *device) { return device->name; } +default_symver(__ibv_get_device_name, ibv_get_device_name); -uint64_t ibv_get_device_guid(struct ibv_device *device) +uint64_t __ibv_get_device_guid(struct ibv_device *device) { char attr[24]; uint64_t guid = 0; @@ -108,8 +111,9 @@ uint64_t ibv_get_device_guid(struct ibv_device *device) return htonll(guid); } +default_symver(__ibv_get_device_guid, ibv_get_device_guid); -struct ibv_context *ibv_open_device(struct ibv_device *device) +struct ibv_context *__ibv_open_device(struct ibv_device *device) { char *devpath; int cmd_fd; @@ -142,8 +146,9 @@ err: return NULL; } +default_symver(__ibv_open_device, ibv_open_device); -int ibv_close_device(struct ibv_context *context) +int __ibv_close_device(struct ibv_context *context) { int async_fd = context->async_fd; int cmd_fd = context->cmd_fd; @@ -164,9 +169,10 @@ int ibv_close_device(struct ibv_context *context) return 0; } +default_symver(__ibv_close_device, ibv_close_device); -int ibv_get_async_event(struct ibv_context *context, - struct ibv_async_event *event) +int __ibv_get_async_event(struct ibv_context *context, + struct ibv_async_event *event) { struct ibv_kern_async_event ev; @@ -206,8 +212,9 @@ int ibv_get_async_event(struct ibv_context *context, return 0; } +default_symver(__ibv_get_async_event, ibv_get_async_event); -void ibv_ack_async_event(struct ibv_async_event *event) +void __ibv_ack_async_event(struct ibv_async_event *event) { switch (event->event_type) { case IBV_EVENT_CQ_ERR: @@ -258,3 +265,4 @@ void ibv_ack_async_event(struct ibv_async_event *event) return; } } +default_symver(__ibv_ack_async_event, ibv_ack_async_event); diff --git a/src/ibverbs.h b/src/ibverbs.h index 14330f8..b1d2c2b 100644 --- a/src/ibverbs.h +++ b/src/ibverbs.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -56,6 +57,19 @@ #define INIT __attribute__((constructor)) #define FINI __attribute__((destructor)) +#define DEFAULT_ABI "IBVERBS_1.1" + +#ifdef HAVE_SYMVER_SUPPORT +# define symver(name, api, ver) \ + asm(".symver " #name "," #api "@" #ver) +# define default_symver(name, api) \ + asm(".symver " #name "," #api "@@" DEFAULT_ABI) +#else +# define symver(name, api, ver) +# define default_symver(name, api) \ + extern __typeof(name) api __attribute__((alias(#name))) +#endif /* HAVE_SYMVER_SUPPORT */ + #define PFX "libibverbs: " struct ibv_abi_compat_v2 { diff --git a/src/libibverbs.map b/src/libibverbs.map index 795dd55..3a346ed 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -32,8 +32,6 @@ IBVERBS_1.0 { ibv_modify_qp; ibv_destroy_qp; ibv_create_ah; - ibv_init_ah_from_wc; - ibv_create_ah_from_wc; ibv_destroy_ah; ibv_attach_mcast; ibv_detach_mcast; @@ -67,17 +65,30 @@ IBVERBS_1.0 { ibv_cmd_attach_mcast; ibv_cmd_detach_mcast; ibv_copy_qp_attr_from_kern; - ibv_copy_ah_attr_from_kern; ibv_copy_path_rec_from_kern; ibv_copy_path_rec_to_kern; ibv_rate_to_mult; mult_to_ibv_rate; ibv_get_sysfs_path; ibv_read_sysfs_file; + + local: *; +}; + +IBVERBS_1.1 { + global: + ibv_get_device_list; + ibv_free_device_list; + ibv_get_device_name; + ibv_get_device_guid; + ibv_open_device; + ibv_close_device; + + ibv_init_ah_from_wc; + ibv_create_ah_from_wc; + ibv_copy_ah_attr_from_kern; ibv_fork_init; ibv_dontfork_range; ibv_dofork_range; ibv_register_driver; - - local: *; -}; +} IBVERBS_1.0; diff --git a/src/verbs.c b/src/verbs.c index 6ac56d3..56513e4 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -76,20 +76,22 @@ enum ibv_rate mult_to_ibv_rate(int mult) } } -int ibv_query_device(struct ibv_context *context, - struct ibv_device_attr *device_attr) +int __ibv_query_device(struct ibv_context *context, + struct ibv_device_attr *device_attr) { return context->ops.query_device(context, device_attr); } +default_symver(__ibv_query_device, ibv_query_device); -int ibv_query_port(struct ibv_context *context, uint8_t port_num, - struct ibv_port_attr *port_attr) +int __ibv_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr) { return context->ops.query_port(context, port_num, port_attr); } +default_symver(__ibv_query_port, ibv_query_port); -int ibv_query_gid(struct ibv_context *context, uint8_t port_num, - int index, union ibv_gid *gid) +int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, + int index, union ibv_gid *gid) { char name[24]; char attr[41]; @@ -111,9 +113,10 @@ int ibv_query_gid(struct ibv_context *context, uint8_t port_num, return 0; } +default_symver(__ibv_query_gid, ibv_query_gid); -int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, - int index, uint16_t *pkey) +int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, + int index, uint16_t *pkey) { char name[24]; char attr[8]; @@ -131,8 +134,9 @@ int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, *pkey = htons(val); return 0; } +default_symver(__ibv_query_pkey, ibv_query_pkey); -struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) +struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context) { struct ibv_pd *pd; @@ -142,14 +146,16 @@ struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) return pd; } +default_symver(__ibv_alloc_pd, ibv_alloc_pd); -int ibv_dealloc_pd(struct ibv_pd *pd) +int __ibv_dealloc_pd(struct ibv_pd *pd) { return pd->context->ops.dealloc_pd(pd); } +default_symver(__ibv_dealloc_pd, ibv_dealloc_pd); -struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, - size_t length, enum ibv_access_flags access) +struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, enum ibv_access_flags access) { struct ibv_mr *mr; @@ -167,8 +173,9 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, return mr; } +default_symver(__ibv_reg_mr, ibv_reg_mr); -int ibv_dereg_mr(struct ibv_mr *mr) +int __ibv_dereg_mr(struct ibv_mr *mr) { int ret; void *addr = mr->addr; @@ -180,6 +187,7 @@ int ibv_dereg_mr(struct ibv_mr *mr) return ret; } +default_symver(__ibv_dereg_mr, ibv_dereg_mr); static struct ibv_comp_channel *ibv_create_comp_channel_v2(struct ibv_context *context) { @@ -241,8 +249,8 @@ int ibv_destroy_comp_channel(struct ibv_comp_channel *channel) return 0; } -struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, - struct ibv_comp_channel *channel, int comp_vector) +struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, + struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq *cq = context->ops.create_cq(context, cqe, channel, comp_vector); @@ -258,23 +266,25 @@ struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_cont return cq; } +default_symver(__ibv_create_cq, ibv_create_cq); -int ibv_resize_cq(struct ibv_cq *cq, int cqe) +int __ibv_resize_cq(struct ibv_cq *cq, int cqe) { if (!cq->context->ops.resize_cq) return ENOSYS; return cq->context->ops.resize_cq(cq, cqe); } +default_symver(__ibv_resize_cq, ibv_resize_cq); -int ibv_destroy_cq(struct ibv_cq *cq) +int __ibv_destroy_cq(struct ibv_cq *cq) { return cq->context->ops.destroy_cq(cq); } +default_symver(__ibv_destroy_cq, ibv_destroy_cq); - -int ibv_get_cq_event(struct ibv_comp_channel *channel, - struct ibv_cq **cq, void **cq_context) +int __ibv_get_cq_event(struct ibv_comp_channel *channel, + struct ibv_cq **cq, void **cq_context) { struct ibv_comp_event ev; @@ -289,17 +299,19 @@ int ibv_get_cq_event(struct ibv_comp_channel *channel, return 0; } +default_symver(__ibv_get_cq_event, ibv_get_cq_event); -void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) +void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) { pthread_mutex_lock(&cq->mutex); cq->comp_events_completed += nevents; pthread_cond_signal(&cq->cond); pthread_mutex_unlock(&cq->mutex); } +default_symver(__ibv_ack_cq_events, ibv_ack_cq_events); -struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, - struct ibv_srq_init_attr *srq_init_attr) +struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr) { struct ibv_srq *srq; @@ -318,26 +330,30 @@ struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, return srq; } +default_symver(__ibv_create_srq, ibv_create_srq); -int ibv_modify_srq(struct ibv_srq *srq, - struct ibv_srq_attr *srq_attr, - enum ibv_srq_attr_mask srq_attr_mask) +int __ibv_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask) { return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask); } +default_symver(__ibv_modify_srq, ibv_modify_srq); -int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) +int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) { return srq->context->ops.query_srq(srq, srq_attr); } +default_symver(__ibv_query_srq, ibv_query_srq); -int ibv_destroy_srq(struct ibv_srq *srq) +int __ibv_destroy_srq(struct ibv_srq *srq) { return srq->context->ops.destroy_srq(srq); } +default_symver(__ibv_destroy_srq, ibv_destroy_srq); -struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, - struct ibv_qp_init_attr *qp_init_attr) +struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr) { struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); @@ -356,10 +372,11 @@ struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, return qp; } +default_symver(__ibv_create_qp, ibv_create_qp); -int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - enum ibv_qp_attr_mask attr_mask, - struct ibv_qp_init_attr *init_attr) +int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr) { int ret; @@ -372,9 +389,10 @@ int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, return 0; } +default_symver(__ibv_query_qp, ibv_query_qp); -int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - enum ibv_qp_attr_mask attr_mask) +int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) { int ret; @@ -387,13 +405,15 @@ int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, return 0; } +default_symver(__ibv_modify_qp, ibv_modify_qp); -int ibv_destroy_qp(struct ibv_qp *qp) +int __ibv_destroy_qp(struct ibv_qp *qp) { return qp->context->ops.destroy_qp(qp); } +default_symver(__ibv_destroy_qp, ibv_destroy_qp); -struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); @@ -404,6 +424,7 @@ struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) return ah; } +default_symver(__ibv_create_ah, ibv_create_ah); static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, union ibv_gid *gid) @@ -461,17 +482,20 @@ struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, return ibv_create_ah(pd, &ah_attr); } -int ibv_destroy_ah(struct ibv_ah *ah) +int __ibv_destroy_ah(struct ibv_ah *ah) { return ah->context->ops.destroy_ah(ah); } +default_symver(__ibv_destroy_ah, ibv_destroy_ah); -int ibv_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +int __ibv_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { return qp->context->ops.attach_mcast(qp, gid, lid); } +default_symver(__ibv_attach_mcast, ibv_attach_mcast); -int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +int __ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { return qp->context->ops.detach_mcast(qp, gid, lid); } +default_symver(__ibv_detach_mcast, ibv_detach_mcast); -- 1.4.4.1 From mshefty at ichips.intel.com Tue Jan 23 16:58:42 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 23 Jan 2007 16:58:42 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> References: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> Message-ID: <45B6AF42.7000500@ichips.intel.com> > rdma_join_multicast(0.0.0.0, port 0) <- exchange group info out of band Trying to work through this more, having the first node join seems trivial. Getting additional nodes to join the same group through the rdma_cm is proving more difficult... The MGID of the group would need to be treated as an IPv6 address, with a join done using that address directly, versus mapping an IPv6 address to an MGID using the ipoib algorithm. I still believe this is doable, it's just going to require more thought/discussion to ensure that we get a clean implementation. - Sean From swise at opengridcomputing.com Tue Jan 23 17:03:17 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 23 Jan 2007 19:03:17 -0600 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. Message-ID: <1169600597.26256.53.camel@stevo-desktop> Handle Ethernet neighbour updates during route resolution. The IWCM uses the ib_addr services to do route resolution (neighbour discovery in the IP world). The ib_addr netevent callback routine, however, currently only acts on Inifininband neighbour updates. It needs to act on ethernet neighbour updates as well. This patch just removes filtering on device type altogether and will trigger on any neighour updates where the nud_type is valid. This simplifies the code some. Signed-off-by: Steve Wise --- drivers/infiniband/core/addr.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index af93979..d2bb5a9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -360,8 +360,7 @@ static int netevent_callback(struct noti if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->dev->type == ARPHRD_INFINIBAND && - (neigh->nud_state & NUD_VALID)) { + if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } } From krkumar2 at in.ibm.com Tue Jan 23 19:21:13 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Wed, 24 Jan 2007 08:51:13 +0530 Subject: [openib-general] [PATCH] libibverbs: should not access NULL pointer. In-Reply-To: Message-ID: Yes, you have. I had a few days old bits when I saw this, sorry. Michael's suggestion is good though. thanks, - KK Roland Dreier wrote on 01/23/2007 11:19:17 AM: > I already fixed this in commit 789728f7, right? From eitan at sw053.yok.mtl.com Tue Jan 23 21:23:14 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Wed, 24 Jan 2007 07:23:14 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-24:normal completion Message-ID: <200701240523.l0O5NEnV031875@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Tue_Jan_23_10:07:29_2007 5becf9 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From ogerlitz at voltaire.com Tue Jan 23 22:25:59 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 24 Jan 2007 08:25:59 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> Message-ID: <45B6FBF7.9030400@voltaire.com> Sean Hefty wrote: > Add to the rdma_cm an IPOIB port space that allows interoperability with > IPoIB multicast traffic. Use of the RDMA_PS_IPOIB is limited to multicast > join/leave. OK, Sean the patch looks perfectly fine for allowing multicast interoperability with IPoIB. However, it will not support "mixed mode" communication patterns (which you were raising last week) that is one app having a UD QP for both multicast and unicast that talks with two "peers" IPoIB multicast and another app doing only unicast. Such a scenario would have been supported if you allow for unicast apps to use the IPOIB port space as well - similar to the my version of the patch. Also, just a clarification - how exactly the patch enforces that an app would not be able to do listen/connect/accept on RDMA_PS_IPOIB ID??? Or. From kliteyn at dev.mellanox.co.il Tue Jan 23 22:26:13 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 24 Jan 2007 08:26:13 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <1169570982.29183.11110.camel@hal.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> <45B635BC.2010304@dev.mellanox.co.il> <1169570982.29183.11110.camel@hal.voltaire.com> Message-ID: <45B6FC05.8030200@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi again Yevgeny, > > On Tue, 2007-01-23 at 11:20, Yevgeny Kliteynik wrote: >> Hi Hal, >> >> Hal Rosenstock wrote: >>> Hi Yevgeny, >>> >>> On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: >>>> Sasha Khapyorsky wrote: >>>>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: >>>>>> As for the mailing list it's openib-windows at openib.org. You can access >>>>>> it here: http://openib.org/mailman/listinfo/openib-windows >>>>> I found only references to svn://windows.openib.org, where >>>>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | >>>>> head -n 40' shows: >>>>> >>>>> ------------------------------------------------------------------------ >>>>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line >>>>> >>>>> Set property svn:keywords "id" on all repository >>>>> ------------------------------------------------------------------------ >>>>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line >>>>> >>>>> [OPENSM] When running as a service, if all ports are down, use the first port. >>>>> ------------------------------------------------------------------------ >>>>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines >>>>> >>>>> [OPENSM] When trying to set to INIT the remote port of the given physical port >>>>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no >>>>> check whether the physical port in null (e.g., if it's disconnected). >>>>> ------------------------------------------------------------------------ >>>>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line >>>>> >>>>> [opensm] Base service status on results that were received from opensm log messages. >>>>> ------------------------------------------------------------------------ >>>>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line >>>>> >>>>> [OPENSM] missed fix for OPENSM logging to System Event Log >>>>> ------------------------------------------------------------------------ >>>>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines >>>>> >>>>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! >>>>> 2. bugfix: added message file for correct logging to System Event Log. >>>>> 3. bugfix: wrong passing parameters in server mode; >>>>> 4. bugfix: error in table of parameters >>>>> >>>>> ------------------------------------------------------------------------ >>>>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line >>>>> >>>>> [opensm] Fix a trivial build break >>>>> ------------------------------------------------------------------------ >>>>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines >>>>> >>>>> if the guid2lid is corrupted, don't exit when running with -y option >>>>> (don't exit on fatal) - just ignore the file >>>>> >>>>> >>>>> >>>>> Seems that development there was stopped in Aug 2006, and it doesn't >>>>> have recent Win port patches. Am I looking in the wrong place? >>>> You were looking in the right place. It appears that I didn't describe >>>> the development process correctly. I think this repository is updated >>>> with stable OSM versions, after the code is tested. >>> Any idea on when the next version is expected ? >> The SVN will be updated in a couple of days. > > Glad to hear it. To what OpenSM version will it correspond ? Will it be > based on OFED 1.1 or beyond ? What OpenIB svn or git commit does it > correspond to ? Thanks. The local SVN repository is syncronized with OpenSM GIT repository (head of master), and the changes from git are merged into the svn daily. This local SVN will be uploaded to the SVN repository on the web. -- Yevgeny > -- Hal > >> -- Yevgeny >> >>> -- Hal >>> >>>> If you need more details, I think it's better for you to ask windows folks >>>> directly, since as we see, my knowledge in this area is very limited. >>>> >>>> -- Yevgeny >>>> >>>>> Sasha >>>>> >>>> _______________________________________________ >>>> openib-general mailing list >>>> openib-general at openib.org >>>> http://openib.org/mailman/listinfo/openib-general >>>> >>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>>> > From ogerlitz at voltaire.com Tue Jan 23 22:32:59 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 24 Jan 2007 08:32:59 +0200 Subject: [openib-general] [RFC/PATCH v3] rdma/cma: add RDMA_PS_IPOIB port space In-Reply-To: <45B64677.5090701@ichips.intel.com> References: <45B643D5.7010104@ichips.intel.com> <45B64677.5090701@ichips.intel.com> Message-ID: <45B6FD9B.50903@voltaire.com> Sean Hefty wrote: > I was thinking of SIDR, but what about connected mode ipoib? This could > make the ipoib port space interesting, or require breaking it into two > separate port spaces, or... I'm only going to worry about multicast for > now, unless there's a reason to consider other use. I don't think we need to worry on offloading IPoIB connected mode now, but thanks for bringing the idea. Or. From ogerlitz at voltaire.com Tue Jan 23 22:35:16 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 24 Jan 2007 08:35:16 +0200 Subject: [openib-general] [PATCH 2/2] librdmacm: add support to join IPOIB multicast groups In-Reply-To: <000301c73f34$6560f210$16d0180a@amr.corp.intel.com> References: <000301c73f34$6560f210$16d0180a@amr.corp.intel.com> Message-ID: <45B6FE24.80102@voltaire.com> Sean Hefty wrote: > Add to the librdmacm an IPOIB port space that allows interoperability with > IPoIB multicast traffic. Use of the RDMA_PS_IPOIB is limited to multicast > join/leave. the two patches seems fine, however i will not be able to test them today being out of the office all the day, will send my testing feedback on Thursday early IL time (late Wed night PST) Or. From ogerlitz at voltaire.com Tue Jan 23 22:41:44 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 24 Jan 2007 08:41:44 +0200 Subject: [openib-general] [RFC/PATCH] librdmacm: use the ipoib broadcast group qkey In-Reply-To: <000101c73f2d$a8655580$16d0180a@amr.corp.intel.com> References: <000101c73f2d$a8655580$16d0180a@amr.corp.intel.com> Message-ID: <45B6FFA8.3090802@voltaire.com> Sean Hefty wrote: >> Maybe just ask user to always call rdma_join_multicast after rdma_create_qp? >> Joins are now properly reference counted, so it shouldn't be a problem >> to repeat this any number of times. Right? > > This is the solution for now, and it should work fine. I don't think it would > be hard to support creating the QP after joining if someone ever came up with > the need, but it doesn't seem like a priority at the moment. Indeed, since to do multicast RX/TX you need an IB UD QP... naturally an IB app (eg IPoIB) would create its QP before doing any join/leave on the group, if there would be a demand for a "crazy" use scheme of 1st join 2nd create qp, you can enhance librdmacm to support it. Or. From mst at mellanox.co.il Wed Jan 24 00:02:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 24 Jan 2007 10:02:24 +0200 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <45B6778C.5010803@ichips.intel.com> References: <45B6778C.5010803@ichips.intel.com> Message-ID: <20070124080224.GF20101@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: librdmacm and udapl: Which git branch to use in ofed_1_2 build > > > Could you please rebase that to 2.6.20-rc5? > > Yes - but I probably won't get to this until tomorrow. Not a problem - I generated patches and put them in OFED already. -- MST From mst at mellanox.co.il Wed Jan 24 00:11:52 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 24 Jan 2007 10:11:52 +0200 Subject: [openib-general] RDMA CM multicast In-Reply-To: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> References: <45B4DA10.3080400@indiana.edu> <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> Message-ID: <20070124081152.GG20101@mellanox.co.il> > What would be needed is a way for the user to indicate that they need a unique > address. An obvious way to accomplish this is for the user to specify an IP > address of 0.0.0.0 when calling rdma_join_multicast(). The user would first > need to bind to a specific device by calling rdma_bind_addr() with a local IP > address. Is this how it works with sockets? -- MST From mst at mellanox.co.il Wed Jan 24 00:15:04 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 24 Jan 2007 10:15:04 +0200 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <1169600597.26256.53.camel@stevo-desktop> References: <1169600597.26256.53.camel@stevo-desktop> Message-ID: <20070124081504.GI20101@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > > Handle Ethernet neighbour updates during route resolution. > > The IWCM uses the ib_addr services to do route resolution (neighbour > discovery in the IP world). The ib_addr netevent callback routine, > however, currently only acts on Inifininband neighbour updates. It needs > to act on ethernet neighbour updates as well. > > This patch just removes filtering on device type altogether and > will trigger on any neighour updates where the nud_type is valid. > This simplifies the code some. > > Signed-off-by: Steve Wise BTW, Steve, if this is a patch you want in OFED, pls specify this. -- MST From mst at mellanox.co.il Wed Jan 24 02:14:59 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 24 Jan 2007 12:14:59 +0200 Subject: [openib-general] [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets In-Reply-To: <1169588145.26256.12.camel@stevo-desktop> References: <1169588145.26256.12.camel@stevo-desktop> Message-ID: <20070124101459.GB22505@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets > > OFED/iWARP Developers, > > Here is a proposal for supporting the minimum required neighbour update > event notifications needed for iwarp devices on the older kernels > supported by ofed. > > This patch is a request for comments. Please review. If you think it > looks ok, then I'll provide patches to all the various backports. > > Steve I am generally very positive about this, let's try to do this for OFED 1.2. Some comments on code: > 2.6.17 backport: simulate neighbour update events by snooping ARP packets > > Needed to support iWARP devices on backported kernels. This also allows > using the current drivers/infiniband/core/addr.c which requires netevents > as well. > > This patch rearranges things a bit: > > - add the new file in the kernel_addons/backport dir for the ARP > snooping / netevent callout code. This file is called > rdma_netevents.c. > > - modify the kernel_patches/backports/2.6.17/linux_stuff* patch to > include rdma_netevents.c _and_ the netevent.c file into its own > module called rdma_ne Maybe roll these two into a common netevent.c? Is there a reason not to? Are there kernels where you will want one of these but not the other? And the name is a bit confusing - nothing here is actually related to rdma in any way ... > - remove the backport patch to revert addr.c to snoop ARP packets. > > Signed-off-by: Steve Wise > > .../backport/2.6.17/include/src/rdma_netevents.c | 91 +++++++++++++++++++++++ > .../2.6.17/addr_1_netevents_revert_to_2_6_17.patch | 76 ------------------- > .../backport/2.6.17/linux_stuff_to_2_6_17.patch | 13 ++- > 3 files changed, 99 insertions(+), 81 deletions(-) > > diff --git a/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c > new file mode 100644 > index 0000000..1e9422f > --- /dev/null > +++ b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c > @@ -0,0 +1,91 @@ > +/* > + * Copyright (c) 2007 Open Grid Computing, Inc. All rights reserved. > + * Copyright (c) 2007 Chelsio Communications, Inc. All rights reserved. > + * > + * This Software is licensed under one of the following licenses: > + * > + * 1) under the terms of the "Common Public License 1.0" a copy of which is > + * available from the Open Source Initiative, see > + * http://www.opensource.org/licenses/cpl.php. > + * > + * 2) under the terms of the "The BSD License" a copy of which is > + * available from the Open Source Initiative, see > + * http://www.opensource.org/licenses/bsd-license.php. > + * > + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a > + * copy of which is available from the Open Source Initiative, see > + * http://www.opensource.org/licenses/gpl-license.php. > + * > + * Licensee has the right to choose one of the above licenses. > + * > + * Redistributions of source code must retain the above copyright > + * notice and one of the license notices. > + * > + * Redistributions in binary form must reproduce both the above copyright > + * notice, one of the license notices in the documentation > + * and/or other materials provided with the distribution. > + * > + */ > + > +/* > + * Simulate neighbour update netevents by snooping ARP packets. > + */ > + > +#include > +#include > +#include > + > +#include > +#include > +#include > +#include > + > +MODULE_AUTHOR("Steve Wise"); > +MODULE_DESCRIPTION("Netevent Notification Module"); > +MODULE_LICENSE("Dual BSD/GPL"); > + > +static int arp_recv(struct sk_buff *skb, struct net_device *dev, > + struct packet_type *pkt, struct net_device *dev2) > +{ > + struct arphdr *arp_hdr; > + struct neighbour *n; > + u8 *arp_ptr; > + __be32 gw; > + u16 op; > + > + arp_hdr = (struct arphdr *) skb->nh.raw; > + op = ntohs(arp_hdr->ar_op); > + > + if (op == ARPOP_REQUEST || op == ARPOP_REPLY) { > + arp_ptr = (u8 *)(arp_hdr + 1); /* skip fixed-size arp header */ I think this is correct, but this looks weird because arp_hdr + 1 is a pointer to an *invalid* arp header. I know arp_hdr + 1 does math in units of sizeof *arp_hdr, but just arp_ptr = skb->nh.raw + sizeof (struct arphdr) would much clearer - leave the pointer math for when there is an array. And then you will not need a cast. > + arp_ptr += skb->dev->addr_len; /* skip src ha */ > + memcpy(&gw, arp_ptr, 4); /* pull the SPA */ > + n = neigh_lookup(&arp_tbl, &gw, skb->dev); > + if (n) { > + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); > + } > + } > + > + kfree_skb(skb); > + return 0; > +} > + > +static struct packet_type arp = { > + .type = __constant_htons(ETH_P_ARP), > + .func = arp_recv, > + .af_packet_priv = (void *)1, > +}; > + > +static int init(void) > +{ > + dev_add_pack(&arp); > + return 0; > +} > + > +static void cleanup(void) > +{ > + dev_remove_pack(&arp); > +} > + > +module_init(init); > +module_exit(cleanup); > diff --git a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch > deleted file mode 100644 > index 316d8d2..0000000 > --- a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch > +++ /dev/null > @@ -1,76 +0,0 @@ > -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 > -Author: Tom Tucker > -Date: Sun Jul 30 20:44:19 2006 -0700 > - > - [NET] infiniband: Cleanup ib_addr module to use the netevents > - > - Signed-off-by: Tom Tucker > - Signed-off-by: Steve Wise > - Signed-off-by: David S. Miller > - > -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c > -index 1205e80..d294bbc 100644 > ---- a/drivers/infiniband/core/addr.c > -+++ b/drivers/infiniband/core/addr.c > -@@ -35,7 +35,6 @@ #include > - #include > - #include > - #include > --#include > - #include > - > - MODULE_AUTHOR("Sean Hefty"); > -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad > - } > - EXPORT_SYMBOL(rdma_addr_cancel); > - > --static int netevent_callback(struct notifier_block *self, unsigned long event, > -- void *ctx) > -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, > -+ struct packet_type *pkt, struct net_device *orig_dev) > - { > -- if (event == NETEVENT_NEIGH_UPDATE) { > -- struct neighbour *neigh = ctx; > -+ struct arphdr *arp_hdr; > - > -- if (neigh->dev->type == ARPHRD_INFINIBAND && > -- (neigh->nud_state & NUD_VALID)) { > -- set_timeout(jiffies); > -- } > -- } > -+ arp_hdr = (struct arphdr *) skb->nh.raw; > -+ > -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || > -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) > -+ set_timeout(jiffies); > -+ > -+ kfree_skb(skb); > - return 0; > - } > - > --static struct notifier_block nb = { > -- .notifier_call = netevent_callback > -+static struct packet_type addr_arp = { > -+ .type = __constant_htons(ETH_P_ARP), > -+ .func = addr_arp_recv, > -+ .af_packet_priv = (void*) 1, > - }; > - > - static int addr_init(void) > -@@ -351,13 +353,13 @@ static int addr_init(void) > - if (!addr_wq) > - return -ENOMEM; > - > -- register_netevent_notifier(&nb); > -+ dev_add_pack(&addr_arp); > - return 0; > - } > - > - static void addr_cleanup(void) > - { > -- unregister_netevent_notifier(&nb); > -+ dev_remove_pack(&addr_arp); > - destroy_workqueue(addr_wq); > - } > - > - > diff --git a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > index eb2285f..af7e814 100644 > --- a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > +++ b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > @@ -5,20 +5,23 @@ index 0000000..58cf933 > +++ b/drivers/infiniband/core/genalloc.c > @@ -0,0 +1 @@ > +#include "src/genalloc.c" > -diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > +diff --git a/drivers/infiniband/core/rdma_netevents.c b/drivers/infiniband/core/rdma_netevents.c > new file mode 100644 > index 0000000..58cf933 > --- /dev/null > -+++ b/drivers/infiniband/core/netevent.c > -@@ -0,0 +1 @@ > ++++ b/drivers/infiniband/core/rdma_netevents.c > +@@ -0,0 +1,2 @@ > +#include "src/netevent.c" > ++#include "src/rdma_netevents.c" This is slightly ugly. Let's have an object file per .c file. Or just merge the two .c files together? > diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > index 50fb1cd..456bfd0 100644 > --- a/drivers/infiniband/core/Makefile > +++ b/drivers/infiniband/core/Makefile > -@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > +@@ -30,3 +30,7 @@ ib_ucm-y := ucm.o > > ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > uverbs_marshall.o > + > -+ib_core-y += genalloc.o netevent.o > ++infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) += rdma_ne.o > ++rdma_ne-y := rdma_netevents.o > ++ib_core-y += genalloc.o I'd prefer not to have a new module rdma_ne. Scripts need to be written to install it, and making these kernel dependent is a big pain. Can we continue keeping it in ib_core? Or move to ib_addr you see a problem with this. -- MST From monis at voltaire.com Wed Jan 24 03:11:58 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 24 Jan 2007 13:11:58 +0200 Subject: [openib-general] [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh In-Reply-To: <20070123182352.GF10394@mellanox.co.il> References: <45B6412A.6020207@voltaire.com> <45B642B2.5060007@voltaire.com> <20070123182352.GF10394@mellanox.co.il> Message-ID: <45B73EFE.60500@voltaire.com> > > > Just to clarify - you previously mentionned you saw problems with 2.6.16 > backport. Is this an issue you see with 2.6.20 as well? Yes, the same thing happens with kernel 2.6.20. However, the patch for 2.6.20 looks a little bit different. I will post it today or tommorow. > > Also - in your approach, what prevents the device from going away while there > are still ipoib_neigh objects around? Nothing prevents it. You can modprobe -r bonding whenever you want (even when IPoIB is up) and still be safe from leaks. I think my answer for that is below. > Also - if neigh does not point to ipoib device, our neigh destructor won't be called > for it, will it? What will clean the ipoib neigh then? > With kernels up to 2.6.16, patch ipoib_8111_to_2_6_16 adds this to ipoib_neigh_alloc ==> neigh->neighbour->ops->destructor = ipoib_neigh_destructor; So I guess there is no such problem here. For later kernels, bond device "borrows" the slave's neigh_setup function in the bond's setup function. ==> bond_dev->neigh_setup = slave_dev->neigh_setup; So even if the beighbour points to bond device the ipoib_neigh_destructor will be called. From mst at mellanox.co.il Wed Jan 24 04:07:11 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 24 Jan 2007 14:07:11 +0200 Subject: [openib-general] [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh In-Reply-To: <45B73EFE.60500@voltaire.com> References: <45B73EFE.60500@voltaire.com> Message-ID: <20070124120711.GA26693@mellanox.co.il> > > > > > > Just to clarify - you previously mentionned you saw problems with 2.6.16 > > backport. Is this an issue you see with 2.6.20 as well? > Yes, the same thing happens with kernel 2.6.20. However, the patch for 2.6.20 > looks a little bit different. I will post it today or tommorow. Let's see that first. I prefer to first look at upstream code, then think about backporting. > > > > Also - in your approach, what prevents the device from going away while there > > are still ipoib_neigh objects around? > Nothing prevents it. You can modprobe -r bonding whenever you want (even when IPoIB is up) > and still be safe from leaks. I think my answer for that is below. > > > Also - if neigh does not point to ipoib device, our neigh destructor won't be called > > for it, will it? What will clean the ipoib neigh then? > > > With kernels up to 2.6.16, patch ipoib_8111_to_2_6_16 adds this to > ipoib_neigh_alloc > ==> neigh->neighbour->ops->destructor = ipoib_neigh_destructor; > So I guess there is no such problem here. But this would hardly help if ipoib module is unloaded while neighbour for bonding device is still around and has a pointer to ipoib_neigh_destructor. > For later kernels, bond device "borrows" the slave's neigh_setup > function in the bond's setup function. > > ==> bond_dev->neigh_setup = slave_dev->neigh_setup; > > So even if the beighbour points to bond device the > ipoib_neigh_destructor will be called. Same applies here. Further, in both cases, it seems that accessing data at to_ipoib_neigh on a neighbour for non-ipoib device can cause a crash if hardware address is !=0 at offset 20. -- MST From kliteyn at dev.mellanox.co.il Wed Jan 24 06:09:09 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 24 Jan 2007 16:09:09 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B63627.3050606@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <20070122190249.GI23783@sashak.voltaire.com> <45B63627.3050606@dev.mellanox.co.il> Message-ID: <45B76885.4090508@dev.mellanox.co.il> Hi Hal, Sasha. Here's a description of the QoS policy file, and an example of such file (with more comments inside). QoS Policy file --------------- The QoS policy file is divided into 4 sub sections: * Node Group: a set of HCAs, Routers or Switches that share the same settings. A node groups might be a partition defined by the partition manager policy in terms of GUIDs. Future implementations might provide support for NodeDescription based definition of node groups. * Fabric Setup: Defines how the SL2VL and VLArb tables should be setup. This policy definition assumes the computation of target behavior should be performed outside of OpenSM. * QoS-Levels Definition: This section defines the possible sets of parameters for QoS that a client might be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits (in case LMC > 0 is used for QoS) and TClass. * Matching Rules: A list of rules that match an incoming PathRecord request to a QoS-Level. The rules are processed in order such as the first match is applied. Each rule is built out of set of match expressions which should all match for the rule to apply. The matching expressions are defined for the following fields - SRC and DST to lists of node groups - Service-ID to a list of Service-ID or Service-ID ranges - TClass to a list of TClass values or ranges QoS policy file example ----------------------- Storage our SRP storage targets 0x1000000000000001 0x1000000000000002 Virtual Servers node desc and IB port # vs1/HCA-1/P1 vs3/HCA-1/P1 vs3/HCA-2/P1 Partition 1 default settings Part1 Routers all routers ROUTER Part1 * * 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 Storage Storage2 Storage3 * 1 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 Storage 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 8:255,9:127,10:63,11:31,12:15,13:7,14:3 10 1 for the lowest priority comm 16 2 low latency best bandwidth 0 7 3 just an example 0 32 1 1 1 low latency by class 7-9 or 11 7-9,11 1 2 Storage targets connection> Storage 22,4719 3 -- Yevgeny Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: >> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: >>> Hi Sasha. >>> >>> Sasha Khapyorsky wrote: >>>> Hi Yevgeny, >>>> >>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>>> Hi Hal >>>>> >>>>> The following series of six patches implements QoS policy file parser: >>>>> >>>>> 1. QoS parser Lex file >>>>> 2. QoS parser Lex-generated c file >>>>> 3. QoS parser grammar (Yacc) file >>>>> 4. QoS parser Yacc-generated grammar c and h file >>>>> 5. QoS parser header file that defines parse tree data structures >>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>>> Is there any description of proposed format and functionality? >>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >>> minor modifications. You can find the RFC here: >>> http://openib.org/pipermail/openib-general/2006-May/022336.html >> This was RFC and couple of issues were discussed then. Now you are about >> implementation phase and exact format description would be desired. For >> example what "few minor modifications" are? > > I'll prepare an example file with explanations. > > -- Yevgeny > >>>> Also what about using human readable formats? >>> To me the xml-like format in the RFC looks pretty readable. >>> It has very limited number of keywords (tags), so it's easy >>> to follow and/or to modify. >> It is your opinion, not everybody will agree with it (AFAIR this was >> discussed too during RFC). >> >> I would not be care, but I don't know any example of really successful >> XML using for configuration purposes (especially where advanced graphical >> config editors/viewers were not used). Do you know? >> >> Sasha >> > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Wed Jan 24 06:10:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 24 Jan 2007 16:10:42 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser Message-ID: <45B768E2.9070604@dev.mellanox.co.il> Hi Hal, Sasha. Here's a description of the QoS policy file, and an example of such file (with more comments inside). QoS Policy file --------------- The QoS policy file is divided into 4 sub sections: * Node Group: a set of HCAs, Routers or Switches that share the same settings. A node groups might be a partition defined by the partition manager policy in terms of GUIDs. Future implementations might provide support for NodeDescription based definition of node groups. * Fabric Setup: Defines how the SL2VL and VLArb tables should be setup. This policy definition assumes the computation of target behavior should be performed outside of OpenSM. * QoS-Levels Definition: This section defines the possible sets of parameters for QoS that a client might be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits (in case LMC > 0 is used for QoS) and TClass. * Matching Rules: A list of rules that match an incoming PathRecord request to a QoS-Level. The rules are processed in order such as the first match is applied. Each rule is built out of set of match expressions which should all match for the rule to apply. The matching expressions are defined for the following fields - SRC and DST to lists of node groups - Service-ID to a list of Service-ID or Service-ID ranges - TClass to a list of TClass values or ranges QoS policy file example ----------------------- Storage our SRP storage targets 0x1000000000000001 0x1000000000000002 Virtual Servers node desc and IB port # vs1/HCA-1/P1 vs3/HCA-1/P1 vs3/HCA-2/P1 Partition 1 default settings Part1 Routers all routers ROUTER Part1 * * 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 Storage Storage2 Storage3 * 1 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 Storage 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 8:255,9:127,10:63,11:31,12:15,13:7,14:3 10 1 for the lowest priority comm 16 2 low latency best bandwidth 0 7 3 just an example 0 32 1 1 1 low latency by class 7-9 or 11 7-9,11 1 2 Storage targets connection> Storage 22,4719 3 -- Yevgeny Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: >> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: >>> Hi Sasha. >>> >>> Sasha Khapyorsky wrote: >>>> Hi Yevgeny, >>>> >>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>>> Hi Hal >>>>> >>>>> The following series of six patches implements QoS policy file parser: >>>>> >>>>> 1. QoS parser Lex file >>>>> 2. QoS parser Lex-generated c file >>>>> 3. QoS parser grammar (Yacc) file >>>>> 4. QoS parser Yacc-generated grammar c and h file >>>>> 5. QoS parser header file that defines parse tree data structures >>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>>> Is there any description of proposed format and functionality? >>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >>> minor modifications. You can find the RFC here: >>> http://openib.org/pipermail/openib-general/2006-May/022336.html >> This was RFC and couple of issues were discussed then. Now you are about >> implementation phase and exact format description would be desired. For >> example what "few minor modifications" are? > > I'll prepare an example file with explanations. > > -- Yevgeny > >>>> Also what about using human readable formats? >>> To me the xml-like format in the RFC looks pretty readable. >>> It has very limited number of keywords (tags), so it's easy >>> to follow and/or to modify. >> It is your opinion, not everybody will agree with it (AFAIR this was >> discussed too during RFC). >> >> I would not be care, but I don't know any example of really successful >> XML using for configuration purposes (especially where advanced graphical >> config editors/viewers were not used). Do you know? >> >> Sasha >> > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From vlad at lists.openfabrics.org Wed Jan 24 06:12:52 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Wed, 24 Jan 2007 06:12:52 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070124-0553 daily build status Message-ID: <20070124141252.305A6E603C3@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.19 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Failed: From kliteyn at dev.mellanox.co.il Wed Jan 24 06:15:02 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 24 Jan 2007 16:15:02 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169496259.23046.10310.camel@hal.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> Message-ID: <45B769E6.3080905@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Sun, 2007-01-21 at 03:46, Yevgeny Kliteynik wrote: >> Hi Sasha. >> >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> >>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>> Hi Hal >>>> >>>> The following series of six patches implements QoS policy file parser: >>>> >>>> 1. QoS parser Lex file >>>> 2. QoS parser Lex-generated c file >>>> 3. QoS parser grammar (Yacc) file >>>> 4. QoS parser Yacc-generated grammar c and h file >>>> 5. QoS parser header file that defines parse tree data structures >>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>> Is there any description of proposed format and functionality? >> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >> minor modifications. You can find the RFC here: >> http://openib.org/pipermail/openib-general/2006-May/022336.html >> >>> Also what about using human readable formats? >> To me the xml-like format in the RFC looks pretty readable. >> It has very limited number of keywords (tags), so it's easy >> to follow and/or to modify. > > Putting aside the issue of plain text versus XML file formats for a > moment, can an example of the XML format be supplied ? What are the tags > used and their relationships ? I don't think there's been a discussion > on this yet. I've just sent a QoS policy file example with some explanations. You might get this mail more than once - I think I messed up with the mail address... > Also, why were lex and yacc chosen to be used rather than some open > source XML parser (already written in C) ? Yacc and Lex produce simple stand alone code which is not dependant on any other package. On top of it they are the most accurate syntax parsers and easy to handle. Also an XML parser would have provided only TAG parsing without any type checking. > I also have some questions about the patches Shoot > but I'll wait to see more of the bigger picture here. Hope the mail with the file example sheds some light. -- Yevgeny > -- Hal > >> -- Yevgeny >> >>> Sasha >>> > From halr at voltaire.com Wed Jan 24 06:16:47 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 24 Jan 2007 09:16:47 -0500 Subject: [openib-general] [PATCH] osm: QoS: added qos class and service id to the path record In-Reply-To: <45B5B436.7040506@dev.mellanox.co.il> References: <45B4E0C2.8060102@dev.mellanox.co.il> <20070122175952.GG23783@sashak.voltaire.com> <45B5B436.7040506@dev.mellanox.co.il> Message-ID: <1169648105.29183.86864.camel@hal.voltaire.com> On Tue, 2007-01-23 at 02:07, Yevgeny Kliteynik wrote: [snip...] > Hal, should I resubmit the patch? Yes, please do. -- Hal > Thanks. > > -- Yevgeny From swise at opengridcomputing.com Wed Jan 24 06:27:33 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 08:27:33 -0600 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <20070124081504.GI20101@mellanox.co.il> References: <1169600597.26256.53.camel@stevo-desktop> <20070124081504.GI20101@mellanox.co.il> Message-ID: <1169648853.1796.9.camel@stevo-desktop> On Wed, 2007-01-24 at 10:15 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > > > > > Handle Ethernet neighbour updates during route resolution. > > > > The IWCM uses the ib_addr services to do route resolution (neighbour > > discovery in the IP world). The ib_addr netevent callback routine, > > however, currently only acts on Inifininband neighbour updates. It needs > > to act on ethernet neighbour updates as well. > > > > This patch just removes filtering on device type altogether and > > will trigger on any neighour updates where the nud_type is valid. > > This simplifies the code some. > > > > Signed-off-by: Steve Wise > > BTW, Steve, if this is a patch you want in OFED, pls specify this. > Right...sorry: I believe it should go in OFED 1.2 and queued for 2.6.21. From halr at voltaire.com Wed Jan 24 06:25:09 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 24 Jan 2007 09:25:09 -0500 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45B6FC05.8030200@dev.mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> <45B635BC.2010304@dev.mellanox.co.il> <1169570982.29183.11110.camel@hal.voltaire.com> <45B6FC05.8030200@dev.mellanox.co.il> Message-ID: <1169648609.29183.87203.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-24 at 01:26, Yevgeny Kliteynik wrote: > Hi Hal, > > Hal Rosenstock wrote: > > Hi again Yevgeny, > > > > On Tue, 2007-01-23 at 11:20, Yevgeny Kliteynik wrote: > >> Hi Hal, > >> > >> Hal Rosenstock wrote: > >>> Hi Yevgeny, > >>> > >>> On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: > >>>> Sasha Khapyorsky wrote: > >>>>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > >>>>>> As for the mailing list it's openib-windows at openib.org. You can access > >>>>>> it here: http://openib.org/mailman/listinfo/openib-windows > >>>>> I found only references to svn://windows.openib.org, where > >>>>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | > >>>>> head -n 40' shows: > >>>>> > >>>>> ------------------------------------------------------------------------ > >>>>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line > >>>>> > >>>>> Set property svn:keywords "id" on all repository > >>>>> ------------------------------------------------------------------------ > >>>>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line > >>>>> > >>>>> [OPENSM] When running as a service, if all ports are down, use the first port. > >>>>> ------------------------------------------------------------------------ > >>>>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines > >>>>> > >>>>> [OPENSM] When trying to set to INIT the remote port of the given physical port > >>>>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no > >>>>> check whether the physical port in null (e.g., if it's disconnected). > >>>>> ------------------------------------------------------------------------ > >>>>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line > >>>>> > >>>>> [opensm] Base service status on results that were received from opensm log messages. > >>>>> ------------------------------------------------------------------------ > >>>>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line > >>>>> > >>>>> [OPENSM] missed fix for OPENSM logging to System Event Log > >>>>> ------------------------------------------------------------------------ > >>>>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines > >>>>> > >>>>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! > >>>>> 2. bugfix: added message file for correct logging to System Event Log. > >>>>> 3. bugfix: wrong passing parameters in server mode; > >>>>> 4. bugfix: error in table of parameters > >>>>> > >>>>> ------------------------------------------------------------------------ > >>>>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line > >>>>> > >>>>> [opensm] Fix a trivial build break > >>>>> ------------------------------------------------------------------------ > >>>>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines > >>>>> > >>>>> if the guid2lid is corrupted, don't exit when running with -y option > >>>>> (don't exit on fatal) - just ignore the file > >>>>> > >>>>> > >>>>> > >>>>> Seems that development there was stopped in Aug 2006, and it doesn't > >>>>> have recent Win port patches. Am I looking in the wrong place? > >>>> You were looking in the right place. It appears that I didn't describe > >>>> the development process correctly. I think this repository is updated > >>>> with stable OSM versions, after the code is tested. > >>> Any idea on when the next version is expected ? > >> The SVN will be updated in a couple of days. > > > > Glad to hear it. To what OpenSM version will it correspond ? Will it be > > based on OFED 1.1 or beyond ? What OpenIB svn or git commit does it > > correspond to ? Thanks. > > The local SVN repository is syncronized with OpenSM GIT repository > (head of master), and the changes from git are merged into the svn daily. > This local SVN will be uploaded to the SVN repository on the web. How frequently ? Is there only the released OpenSM version on the web ? Why not a work in progress one as well ? Wouldn't that help get more early testing in the Windows environment ? -- Hal > > -- Yevgeny > > > -- Hal > > > >> -- Yevgeny > >> > >>> -- Hal > >>> > >>>> If you need more details, I think it's better for you to ask windows folks > >>>> directly, since as we see, my knowledge in this area is very limited. > >>>> > >>>> -- Yevgeny > >>>> > >>>>> Sasha > >>>>> > >>>> _______________________________________________ > >>>> openib-general mailing list > >>>> openib-general at openib.org > >>>> http://openib.org/mailman/listinfo/openib-general > >>>> > >>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>>> > > From swise at opengridcomputing.com Wed Jan 24 06:37:23 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 08:37:23 -0600 Subject: [openib-general] [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets In-Reply-To: <20070124101459.GB22505@mellanox.co.il> References: <1169588145.26256.12.camel@stevo-desktop> <20070124101459.GB22505@mellanox.co.il> Message-ID: <1169649443.1796.20.camel@stevo-desktop> On Wed, 2007-01-24 at 12:14 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets > > > > OFED/iWARP Developers, > > > > Here is a proposal for supporting the minimum required neighbour update > > event notifications needed for iwarp devices on the older kernels > > supported by ofed. > > > > This patch is a request for comments. Please review. If you think it > > looks ok, then I'll provide patches to all the various backports. > > > > Steve > > I am generally very positive about this, let's try to do this for OFED 1.2. > Some comments on code: > > > 2.6.17 backport: simulate neighbour update events by snooping ARP packets > > > > Needed to support iWARP devices on backported kernels. This also allows > > using the current drivers/infiniband/core/addr.c which requires netevents > > as well. > > > > This patch rearranges things a bit: > > > > - add the new file in the kernel_addons/backport dir for the ARP > > snooping / netevent callout code. This file is called > > rdma_netevents.c. > > > > - modify the kernel_patches/backports/2.6.17/linux_stuff* patch to > > include rdma_netevents.c _and_ the netevent.c file into its own > > module called rdma_ne > > Maybe roll these two into a common netevent.c? Is there a reason not to? > Are there kernels where you will want one of these but not the other? > And the name is a bit confusing - nothing here is actually related to rdma in any way ... > I kept them seperate because the netevent.c is pulled as-is from 2.6.20. It does make sense to just add the stuff I put in rdma_netevent.c into netevent.c and just have that one file. > > - remove the backport patch to revert addr.c to snoop ARP packets. > > > > Signed-off-by: Steve Wise > > > > .../backport/2.6.17/include/src/rdma_netevents.c | 91 +++++++++++++++++++++++ > > .../2.6.17/addr_1_netevents_revert_to_2_6_17.patch | 76 ------------------- > > .../backport/2.6.17/linux_stuff_to_2_6_17.patch | 13 ++- > > 3 files changed, 99 insertions(+), 81 deletions(-) > > > > diff --git a/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c > > new file mode 100644 > > index 0000000..1e9422f > > --- /dev/null > > +++ b/kernel_addons/backport/2.6.17/include/src/rdma_netevents.c > > @@ -0,0 +1,91 @@ > > +/* > > + * Copyright (c) 2007 Open Grid Computing, Inc. All rights reserved. > > + * Copyright (c) 2007 Chelsio Communications, Inc. All rights reserved. > > + * > > + * This Software is licensed under one of the following licenses: > > + * > > + * 1) under the terms of the "Common Public License 1.0" a copy of which is > > + * available from the Open Source Initiative, see > > + * http://www.opensource.org/licenses/cpl.php. > > + * > > + * 2) under the terms of the "The BSD License" a copy of which is > > + * available from the Open Source Initiative, see > > + * http://www.opensource.org/licenses/bsd-license.php. > > + * > > + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a > > + * copy of which is available from the Open Source Initiative, see > > + * http://www.opensource.org/licenses/gpl-license.php. > > + * > > + * Licensee has the right to choose one of the above licenses. > > + * > > + * Redistributions of source code must retain the above copyright > > + * notice and one of the license notices. > > + * > > + * Redistributions in binary form must reproduce both the above copyright > > + * notice, one of the license notices in the documentation > > + * and/or other materials provided with the distribution. > > + * > > + */ > > + > > +/* > > + * Simulate neighbour update netevents by snooping ARP packets. > > + */ > > + > > +#include > > +#include > > +#include > > + > > +#include > > +#include > > +#include > > +#include > > + > > +MODULE_AUTHOR("Steve Wise"); > > +MODULE_DESCRIPTION("Netevent Notification Module"); > > +MODULE_LICENSE("Dual BSD/GPL"); > > + > > +static int arp_recv(struct sk_buff *skb, struct net_device *dev, > > + struct packet_type *pkt, struct net_device *dev2) > > +{ > > + struct arphdr *arp_hdr; > > + struct neighbour *n; > > + u8 *arp_ptr; > > + __be32 gw; > > + u16 op; > > + > > + arp_hdr = (struct arphdr *) skb->nh.raw; > > + op = ntohs(arp_hdr->ar_op); > > + > > + if (op == ARPOP_REQUEST || op == ARPOP_REPLY) { > > + arp_ptr = (u8 *)(arp_hdr + 1); /* skip fixed-size arp header */ > > I think this is correct, but this looks weird because arp_hdr + 1 > is a pointer to an *invalid* arp header. This is common practice for bumping past a fixed size header. > > I know arp_hdr + 1 does math in units of sizeof *arp_hdr, but just > arp_ptr = skb->nh.raw + sizeof (struct arphdr) would much clearer - > leave the pointer math for when there is an array. > > And then you will not need a cast. > Ok, if you think that is clearer, then I'll do it. > > > + arp_ptr += skb->dev->addr_len; /* skip src ha */ > > + memcpy(&gw, arp_ptr, 4); /* pull the SPA */ > > + n = neigh_lookup(&arp_tbl, &gw, skb->dev); > > + if (n) { > > + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); > > + } > > + } > > + > > + kfree_skb(skb); > > + return 0; > > +} > > + > > +static struct packet_type arp = { > > + .type = __constant_htons(ETH_P_ARP), > > + .func = arp_recv, > > + .af_packet_priv = (void *)1, > > +}; > > + > > +static int init(void) > > +{ > > + dev_add_pack(&arp); > > + return 0; > > +} > > + > > +static void cleanup(void) > > +{ > > + dev_remove_pack(&arp); > > +} > > + > > +module_init(init); > > +module_exit(cleanup); > > diff --git a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch > > deleted file mode 100644 > > index 316d8d2..0000000 > > --- a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch > > +++ /dev/null > > @@ -1,76 +0,0 @@ > > -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 > > -Author: Tom Tucker > > -Date: Sun Jul 30 20:44:19 2006 -0700 > > - > > - [NET] infiniband: Cleanup ib_addr module to use the netevents > > - > > - Signed-off-by: Tom Tucker > > - Signed-off-by: Steve Wise > > - Signed-off-by: David S. Miller > > - > > -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c > > -index 1205e80..d294bbc 100644 > > ---- a/drivers/infiniband/core/addr.c > > -+++ b/drivers/infiniband/core/addr.c > > -@@ -35,7 +35,6 @@ #include > > - #include > > - #include > > - #include > > --#include > > - #include > > - > > - MODULE_AUTHOR("Sean Hefty"); > > -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad > > - } > > - EXPORT_SYMBOL(rdma_addr_cancel); > > - > > --static int netevent_callback(struct notifier_block *self, unsigned long event, > > -- void *ctx) > > -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, > > -+ struct packet_type *pkt, struct net_device *orig_dev) > > - { > > -- if (event == NETEVENT_NEIGH_UPDATE) { > > -- struct neighbour *neigh = ctx; > > -+ struct arphdr *arp_hdr; > > - > > -- if (neigh->dev->type == ARPHRD_INFINIBAND && > > -- (neigh->nud_state & NUD_VALID)) { > > -- set_timeout(jiffies); > > -- } > > -- } > > -+ arp_hdr = (struct arphdr *) skb->nh.raw; > > -+ > > -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || > > -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) > > -+ set_timeout(jiffies); > > -+ > > -+ kfree_skb(skb); > > - return 0; > > - } > > - > > --static struct notifier_block nb = { > > -- .notifier_call = netevent_callback > > -+static struct packet_type addr_arp = { > > -+ .type = __constant_htons(ETH_P_ARP), > > -+ .func = addr_arp_recv, > > -+ .af_packet_priv = (void*) 1, > > - }; > > - > > - static int addr_init(void) > > -@@ -351,13 +353,13 @@ static int addr_init(void) > > - if (!addr_wq) > > - return -ENOMEM; > > - > > -- register_netevent_notifier(&nb); > > -+ dev_add_pack(&addr_arp); > > - return 0; > > - } > > - > > - static void addr_cleanup(void) > > - { > > -- unregister_netevent_notifier(&nb); > > -+ dev_remove_pack(&addr_arp); > > - destroy_workqueue(addr_wq); > > - } > > - > > - > > diff --git a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > > index eb2285f..af7e814 100644 > > --- a/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > > +++ b/kernel_patches/backport/2.6.17/linux_stuff_to_2_6_17.patch > > @@ -5,20 +5,23 @@ index 0000000..58cf933 > > +++ b/drivers/infiniband/core/genalloc.c > > @@ -0,0 +1 @@ > > +#include "src/genalloc.c" > > -diff --git a/drivers/infiniband/core/netevent.c b/drivers/infiniband/core/netevent.c > > +diff --git a/drivers/infiniband/core/rdma_netevents.c b/drivers/infiniband/core/rdma_netevents.c > > new file mode 100644 > > index 0000000..58cf933 > > --- /dev/null > > -+++ b/drivers/infiniband/core/netevent.c > > -@@ -0,0 +1 @@ > > ++++ b/drivers/infiniband/core/rdma_netevents.c > > +@@ -0,0 +1,2 @@ > > +#include "src/netevent.c" > > ++#include "src/rdma_netevents.c" > > This is slightly ugly. Let's have an object file per .c file. > Or just merge the two .c files together? > ok. > > diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile > > index 50fb1cd..456bfd0 100644 > > --- a/drivers/infiniband/core/Makefile > > +++ b/drivers/infiniband/core/Makefile > > -@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o > > +@@ -30,3 +30,7 @@ ib_ucm-y := ucm.o > > > > ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ > > uverbs_marshall.o > > + > > -+ib_core-y += genalloc.o netevent.o > > ++infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) += rdma_ne.o > > ++rdma_ne-y := rdma_netevents.o > > ++ib_core-y += genalloc.o > > I'd prefer not to have a new module rdma_ne. Scripts need to be written to install it, > and making these kernel dependent is a big pain. > Can we continue keeping it in ib_core? > Or move to ib_addr you see a problem with this. > The nice thing about making it a stand-alone module is that its init function gets called when loaded. If we add it to ib_core or ib_addr, then we'll have to modify one of those init functions. I'm all for keeping this simple, so do you have a suggestion for how to call the init function? One last (major) issue: After testing, I've discovered that this method (arp pkt snooping) has a problem in that our recv function is getting called with the incoming ARP packet __before__ the IPv4 ARP code. Thus the neigh entries are not yet updated with the results of the incoming ARP packet. This effectively makes notifications useless since the neigh entry hasn't yet been updated! :-( I have coded up a solution for this: The arp_recv function will simply install a destructor method on the skb (skb->destructor). When the IPv4 code frees the skb, the destructor function will get called and I can then look up the neigh entry and call the notifier chain. This solution works nicely, but it is a bit of HACK. Thoughts? From swise at opengridcomputing.com Wed Jan 24 06:41:15 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 08:41:15 -0600 Subject: [openib-general] OFED 1.2 bug reporting Message-ID: <1169649675.1796.24.camel@stevo-desktop> Should I be using the open fabrics bugzilla to open bugs against OFED 1.2? If so, should a new 'version' be added for ofed-1.2? Right now the only version that makes sense is 'gen2', but that doesn't really cover bugs against backport code... Steve. From halr at voltaire.com Wed Jan 24 06:37:03 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 24 Jan 2007 09:37:03 -0500 Subject: [openib-general] RDMA CM multicast In-Reply-To: <45B6AF42.7000500@ichips.intel.com> References: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <45B6AF42.7000500@ichips.intel.com> Message-ID: <1169649315.29183.87785.camel@hal.voltaire.com> On Tue, 2007-01-23 at 19:58, Sean Hefty wrote: > > rdma_join_multicast(0.0.0.0, port 0) <- exchange group info out of band > > Trying to work through this more, having the first node join seems trivial. > Getting additional nodes to join the same group through the rdma_cm is proving > more difficult... The MGID of the group would need to be treated as an IPv6 > address, with a join done using that address directly, versus mapping an IPv6 > address to an MGID using the ipoib algorithm. Doesn't this carve a hole in the IPv6 (multicast) address space (group ID) ? If so, is that a hole we can carve out ? -- Hal > I still believe this is doable, it's just going to require more > thought/discussion to ensure that we get a clean implementation. > > - Sean > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From afriedle at open-mpi.org Wed Jan 24 09:39:38 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Wed, 24 Jan 2007 09:39:38 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> References: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> Message-ID: <45B799DA.7010606@open-mpi.org> Sean Hefty wrote: > Posting to openib-general list... > >> RDMA CM has multicast of course, though it seems no means of preventing >> address collisions (to me, that means two separate MPI jobs using the >> same multicast address). I know that part of the new multicast support >> you had developed a few months ago was the ability to specify a '0' >> MGID/MLID to indicate that an unused multicast address should be used >> and returned. >> >> How hard would it be to add this functionality to RDMA CM? > > I looked into this, and it seems doable. I hacked the kernel rdma_cm to join a > multicast group with an mgid of 0, and it seemed to work as far as I could test > it without more extensive changes. (My test didn't actually transfer data, but > the join succeeded, the MGID/MLID was exported to userspace, and different > applications joined different groups.) > > What would be needed is a way for the user to indicate that they need a unique > address. An obvious way to accomplish this is for the user to specify an IP > address of 0.0.0.0 when calling rdma_join_multicast(). The user would first > need to bind to a specific device by calling rdma_bind_addr() with a local IP > address. Fine with me -- I would be using rdma_bind_addr() all the time anyway. Though finding a way to remove this requirement might be useful to someone else.. > If more than one group is joined this way, then rdma_leave_multicast() would > need someway to distinguish between the different groups joined by a single > user. (rdma_leave_multicast takes the IP address of the group to leave.) > Providing a "port number" with the sockaddr would work. The port number would > need to match when joining/leaving, but is not part of the multicast address, > essentially making it a join index specified by the user. > > Your code would look something like this: > > rdma_bind_addr(local IP address) > rdma_join_multicast(0.0.0.0, port 0) <- exchange group info out of band > rdma_join_multicast(0.0.0.0, port 1) <- exchange group info out of band > send data to a lot of nodes at once > rdma_leave_multicast(0.0.0.0, port 0) > rdma_leave_multicast(0.0.0.0, port 1) Not sure I understand this -- RDMA CM will be exchanging whatever multicast address is selected as part of the rdma_join_multicast call? How exactly do you plan to do that? i.e. How do you know who to communicate that information to? Unless I'm missing something, it sounds like this approach just moves the address collision problem over to port numbers, which wouldn't really accomplish anything. What I would have expected is a means to get at the address that gets chosen instead of 0.0.0.0 -- either directly returned at rdma_join_multicast or successful join notification, or made available in a data structure somewhere. From there it's trivial (for me) to pass the address to the other peers I care about, and have them join by expicitly specifying the multicast address. Andrew > If this sounds like it would work for you, let me know, and I can create a patch > to test this idea more. > > - Sean > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Wed Jan 24 06:51:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 08:51:54 -0600 Subject: [openib-general] [PATCH] ofed_1_2 iw_cxgb3: allow doorbell mappings with VM_READ set. Message-ID: <1169650314.12416.1.camel@stevo-desktop> iw_cxgb3: allow doorbell mappings with VM_READ set. This is needed on RHEL4U4. The vma passed into the iw_cxgb3 mmap function has VM_READ set even though the library only request write. Signed-off-by: Steve Wise --- .../2.6.9_U4/iwch_provider_to_2.6.9_U4.patch | 16 ++++++++++++++++ 1 files changed, 16 insertions(+), 0 deletions(-) diff --git a/kernel_patches/backport/2.6.9_U4/iwch_provider_to_2.6.9_U4.patch b/kernel_patches/backport/2.6.9_U4/iwch_provider_to_2.6.9_U4.patch new file mode 100644 index 0000000..1fbc717 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/iwch_provider_to_2.6.9_U4.patch @@ -0,0 +1,16 @@ +--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c 2007-01-17 09:22:39.000000000 -0600 ++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c 2007-01-22 17:46:16.000000000 -0600 +@@ -337,13 +337,6 @@ static int iwch_mmap(struct ib_ucontext + (pgaddr < (rdev_p->rnic_info.udbell_physbase + + rdev_p->rnic_info.udbell_len))) { + +- /* +- * Map T3 DB register. +- */ +- if (vma->vm_flags & VM_READ) { +- return -EPERM; +- } +- + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_flags &= ~VM_MAYREAD; From afriedle at open-mpi.org Wed Jan 24 09:48:10 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Wed, 24 Jan 2007 09:48:10 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <20070124081152.GG20101@mellanox.co.il> References: <45B4DA10.3080400@indiana.edu> <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <20070124081152.GG20101@mellanox.co.il> Message-ID: <45B79BDA.4020605@open-mpi.org> Michael S. Tsirkin wrote: >> What would be needed is a way for the user to indicate that they need a unique >> address. An obvious way to accomplish this is for the user to specify an IP >> address of 0.0.0.0 when calling rdma_join_multicast(). The user would first >> need to bind to a specific device by calling rdma_bind_addr() with a local IP >> address. > > Is this how it works with sockets? I'm not aware of IP multicast having this functionality. This is a major problem with IP multicast, as some sort of address selection arbitration has to be done. From what I've seen, this ranges from IANA assigning multicast addresses for specific uses to best-effort protocols for selecting unused addresses from an available range. It's a fairly hard problem, and having OFED choose an unused address (my understanding is that the SM is capable of this) sidesteps the issue and makes life much better. Andrew From swise at opengridcomputing.com Wed Jan 24 07:17:26 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 09:17:26 -0600 Subject: [openib-general] [PATCH RFC ] ofed_1_2 simulate neighbour update events by snooping ARP packets In-Reply-To: <1169649443.1796.20.camel@stevo-desktop> References: <1169588145.26256.12.camel@stevo-desktop> <20070124101459.GB22505@mellanox.co.il> <1169649443.1796.20.camel@stevo-desktop> Message-ID: <1169651846.12416.5.camel@stevo-desktop> > > > > I'd prefer not to have a new module rdma_ne. Scripts need to be written to install it, > > and making these kernel dependent is a big pain. > > Can we continue keeping it in ib_core? > > Or move to ib_addr you see a problem with this. > > > > The nice thing about making it a stand-alone module is that its init > function gets called when loaded. If we add it to ib_core or ib_addr, > then we'll have to modify one of those init functions. > > I'm all for keeping this simple, so do you have a suggestion for how to > call the init function? One idea: This code can register as an ib_client. Then it will a callback when providers are registered. Upon the first provider registration, it can do its init functionality... So then the code could be bound into ib_core or ib_addr without modifying those sources. Steve. From monis at voltaire.com Wed Jan 24 07:20:58 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 24 Jan 2007 17:20:58 +0200 Subject: [openib-general] [PATCH] IB/ipoib: Add field dev to struct ipoib_neigh In-Reply-To: <20070124120711.GA26693@mellanox.co.il> References: <45B73EFE.60500@voltaire.com> <20070124120711.GA26693@mellanox.co.il> Message-ID: <45B7795A.9020907@voltaire.com> Michael S. Tsirkin wrote: >>> >>>Just to clarify - you previously mentionned you saw problems with 2.6.16 >>>backport. Is this an issue you see with 2.6.20 as well? >> >>Yes, the same thing happens with kernel 2.6.20. However, the patch for 2.6.20 >>looks a little bit different. I will post it today or tommorow. > > > Let's see that first. I prefer to first look at upstream code, then think > about backporting. > OK, I will post this patch today. > But this would hardly help if ipoib module is unloaded while neighbour > for bonding device is still around and has a pointer to ipoib_neigh_destructor. > > >>For later kernels, bond device "borrows" the slave's neigh_setup >>function in the bond's setup function. >> >> ==> bond_dev->neigh_setup = slave_dev->neigh_setup; >> >>So even if the beighbour points to bond device the >>ipoib_neigh_destructor will be called. > > > Same applies here. > This is a good point. The right solution in my opinion is to enforce a correct order of unloading the modules. First bonding and than IPoIB. We still have to think how do we want to implement this. > Further, in both cases, it seems that accessing data at to_ipoib_neigh on a neighbour for > non-ipoib device can cause a crash if hardware address is !=0 at offset 20. > I don't see such risk. the ipoib_neigh_destructor is called only for neighbours that were passed as an argument to ipoib_neigh_alloc (for kernels <= 2.6.16) or for devices that set their neigh_setup function to ipoib_neigh_setup_dev (for bigger kernels). The only one (besides IPoIB of course) that does that is bonding and bonding cannot enslave devices of different types. So, once bonding sets its neigh_setup to ipoib_neigh_setup_dev, it means it enslaves an IPoIB device and won't enslave devices of other types. However, it might be good idea to change the condition in bonding to "borrow" the neigh_setup function. Currently it is (slave_type != Ethernet) but should be (slave_type == IPoIB). From kliteyn at dev.mellanox.co.il Wed Jan 24 07:33:32 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 24 Jan 2007 17:33:32 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <1169648609.29183.87203.camel@hal.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C99E4352@mtlexch01.mtl.com> <45A368D0.7090906@dev.mellanox.co.il> <20070109180712.GA17240@sashak.voltaire.com> <45A49A18.8050702@dev.mellanox.co.il> <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> <45B635BC.2010304@dev.mellanox.co.il> <1169570982.29183.11110.camel@hal.voltaire.com> <45B6FC05.8030200@dev.mellanox.co.il> <1169648609.29183.87203.camel@hal.voltaire.com> Message-ID: <45B77C4C.5040305@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Wed, 2007-01-24 at 01:26, Yevgeny Kliteynik wrote: >> Hi Hal, >> >> Hal Rosenstock wrote: >>> Hi again Yevgeny, >>> >>> On Tue, 2007-01-23 at 11:20, Yevgeny Kliteynik wrote: >>>> Hi Hal, >>>> >>>> Hal Rosenstock wrote: >>>>> Hi Yevgeny, >>>>> >>>>> On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: >>>>>> Sasha Khapyorsky wrote: >>>>>>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: >>>>>>>> As for the mailing list it's openib-windows at openib.org. You can access >>>>>>>> it here: http://openib.org/mailman/listinfo/openib-windows >>>>>>> I found only references to svn://windows.openib.org, where >>>>>>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | >>>>>>> head -n 40' shows: >>>>>>> >>>>>>> ------------------------------------------------------------------------ >>>>>>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line >>>>>>> >>>>>>> Set property svn:keywords "id" on all repository >>>>>>> ------------------------------------------------------------------------ >>>>>>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line >>>>>>> >>>>>>> [OPENSM] When running as a service, if all ports are down, use the first port. >>>>>>> ------------------------------------------------------------------------ >>>>>>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines >>>>>>> >>>>>>> [OPENSM] When trying to set to INIT the remote port of the given physical port >>>>>>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no >>>>>>> check whether the physical port in null (e.g., if it's disconnected). >>>>>>> ------------------------------------------------------------------------ >>>>>>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line >>>>>>> >>>>>>> [opensm] Base service status on results that were received from opensm log messages. >>>>>>> ------------------------------------------------------------------------ >>>>>>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line >>>>>>> >>>>>>> [OPENSM] missed fix for OPENSM logging to System Event Log >>>>>>> ------------------------------------------------------------------------ >>>>>>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines >>>>>>> >>>>>>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! >>>>>>> 2. bugfix: added message file for correct logging to System Event Log. >>>>>>> 3. bugfix: wrong passing parameters in server mode; >>>>>>> 4. bugfix: error in table of parameters >>>>>>> >>>>>>> ------------------------------------------------------------------------ >>>>>>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line >>>>>>> >>>>>>> [opensm] Fix a trivial build break >>>>>>> ------------------------------------------------------------------------ >>>>>>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines >>>>>>> >>>>>>> if the guid2lid is corrupted, don't exit when running with -y option >>>>>>> (don't exit on fatal) - just ignore the file >>>>>>> >>>>>>> >>>>>>> >>>>>>> Seems that development there was stopped in Aug 2006, and it doesn't >>>>>>> have recent Win port patches. Am I looking in the wrong place? >>>>>> You were looking in the right place. It appears that I didn't describe >>>>>> the development process correctly. I think this repository is updated >>>>>> with stable OSM versions, after the code is tested. >>>>> Any idea on when the next version is expected ? >>>> The SVN will be updated in a couple of days. >>> Glad to hear it. To what OpenSM version will it correspond ? Will it be >>> based on OFED 1.1 or beyond ? What OpenIB svn or git commit does it >>> correspond to ? Thanks. >> >> The local SVN repository is syncronized with OpenSM GIT repository >> (head of master), and the changes from git are merged into the svn daily. >> This local SVN will be uploaded to the SVN repository on the web. > > How frequently ? Is there only the released OpenSM version on the web ? > Why not a work in progress one as well ? Wouldn't that help get more > early testing in the Windows environment ? There's no defined procedure on when should the svn on the web should be updated. This daily synchronization with OpenSM is pretty new, so I guess some sort of procedure will be defined soon. As for the testing - the local version of SM (the one that is synchronized with OpenSM) is tested nightly on windows machines. -- Yevgeny > -- Hal > >> -- Yevgeny >> >>> -- Hal >>> >>>> -- Yevgeny >>>> >>>>> -- Hal >>>>> >>>>>> If you need more details, I think it's better for you to ask windows folks >>>>>> directly, since as we see, my knowledge in this area is very limited. >>>>>> >>>>>> -- Yevgeny >>>>>> >>>>>>> Sasha >>>>>>> >>>>>> _______________________________________________ >>>>>> openib-general mailing list >>>>>> openib-general at openib.org >>>>>> http://openib.org/mailman/listinfo/openib-general >>>>>> >>>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>>>>> > From swise at opengridcomputing.com Wed Jan 24 07:40:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 09:40:51 -0600 Subject: [openib-general] OFED 1.2 RPMs Message-ID: <1169653251.12416.8.camel@stevo-desktop> What do I need to do to get the Chelsio code into the RPM packaging? The wiki doesn't say much yet about rpms. I see file ofed_1_2/ofed_scripts/openib.spec. Should I add the chelsio stuff to that spec file? Thanks, Steve. From tziporet at mellanox.co.il Wed Jan 24 07:47:24 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 24 Jan 2007 17:47:24 +0200 Subject: [openib-general] modules compilation status for OFED 1.2 Message-ID: <45B77F8C.9060209@mellanox.co.il> Hi All, We are approaching code freeze and I want to make sure that all kernel modules indeed will compile on the supported OSes of OFED 1.2: * Redhat EL4 up5 (currently tested on up4) * Redhat EL5 - if will be available * SLES9 SP3 * SLES10 SP1 * kernel.org: 2.6.19.x and 2.6.20.x The status is that all modules (except ehca) pass compilation on kernel 2.6.19. The following modules have issues with support for some distros: * vnic (Ram) - SLES9 * ipath driver (Bryan) : SLES9, Redhat EL4 up4, SLES10 SP1 * ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 Owners of these modules: Please take an action to fix as soon as possible or reply if you don't want your module to be supported on some of the distros Thanks, Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From monis at voltaire.com Wed Jan 24 07:47:13 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 24 Jan 2007 17:47:13 +0200 Subject: [openib-general] Add bonding suuport to OFED In-Reply-To: <45B6412A.6020207@voltaire.com> References: <45B6412A.6020207@voltaire.com> Message-ID: <45B77F81.7090808@voltaire.com> Hi, Vlad, Can you please pull this to OFED-1.2? I guess this requires some changes in the build scripts and configuration files. I'd be happy to help and any way I can to help with that. Please let me know. thanks - MoniS Moni Shoua wrote: > Originally, bonding is a High Availability solution for Ethernet network interfaces. > It is a module that implements a virtual network device (not bounded to > hardware) and enslaves "real" devices. Bonding device controls its slaves according > to the bonding policy and the slave's health. > > I am adding a bonding device which is good for IPoIB interfaces. Feel free to install it > send comments. > > You just have to build source RPM, rebuild it and install the binary. > > For now, I have tested the module under RH4-UP3 and SLES10 with OFED-1.1. > > HOW TO BUILD THE SOURCE RPM > =========================== > git clone git://staging.openfabrics.org/~monis/ofed-bond-pkg.git mydir > cd mydir/ > ./build_rpm.sh > ./build_rpm.sh OR ./build_rpm.sh --git-url > > > After installing the binary RPM read the instructions in > /usr/local/ofed/docs/ib-bonding.txt > > Note: Using ib-bonding requires applying a patch for IPoIB and replacing > ib_ipoib.ko. Please find the patch in the following message. > Please also note that the patch should be applied after > ipoib_8111_to_2_6_16.patch. > > - MoniS > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From monis at voltaire.com Wed Jan 24 07:53:54 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 24 Jan 2007 17:53:54 +0200 Subject: [openib-general] The neigh_setup patch for upstream In-Reply-To: <45B7795A.9020907@voltaire.com> References: <45B73EFE.60500@voltaire.com> <20070124120711.GA26693@mellanox.co.il> <45B7795A.9020907@voltaire.com> Message-ID: <45B78112.8060302@voltaire.com> Hi, This is the upstream version of the patch that I sent in for OFED. Please comment. thanks - MoniS -------------------------------------------------------------------------------- IPoIB uses a two layer neighboring scheme, such that for each struct neighbour whose device is an ipoib one, there is a struct ipoib_neigh buddy which is created on demand at the tx flow by an ipoib_neigh_alloc(skb->dst->neighbour) call. When using the bonding driver, neighbours are created by the net stack on behalf of the bonding (master) device. On the tx flow the bonding code gets an skb such that skb->dev points to the master device, it changes this skb to point on the slave device and calls the slave hard_start_xmit function. Combing these two flows, there is a hole if some code at ipoib (ipoib_neigh_destructor) assumes that for each struct neighbour it gets, n->dev is an ipoib device so for example netdev_priv(n->dev) would be of type struct ipoib_dev_priv. To fix it, this patch adds a dev field to struct ipoib_neigh which is used instead of the struct neighbour dev one. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz --- ipoib.h | 4 +++- ipoib_main.c | 23 +++++++++++++---------- ipoib_multicast.c | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-22 12:11:25.000000000 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-22 12:18:06.101698456 +0200 @@ -216,6 +216,7 @@ struct ipoib_neigh { struct sk_buff_head queue; struct neighbour *neighbour; + struct net_device *dev; struct list_head list; }; @@ -232,7 +233,8 @@ static inline struct ipoib_neigh **to_ip INFINIBAND_ALEN, sizeof(void *)); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, + struct net_device *dev); void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); extern struct workqueue_struct *ipoib_workqueue; Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-22 12:11:33.000000000 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-22 12:34:57.599156580 +0200 @@ -490,7 +490,7 @@ static void neigh_add_path(struct sk_buf struct ipoib_path *path; struct ipoib_neigh *neigh; - neigh = ipoib_neigh_alloc(skb->dst->neighbour); + neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -769,32 +769,34 @@ static void ipoib_set_mcast_list(struct static void ipoib_neigh_destructor(struct neighbour *n) { struct ipoib_neigh *neigh; - struct ipoib_dev_priv *priv = netdev_priv(n->dev); + struct ipoib_dev_priv *priv; unsigned long flags; struct ipoib_ah *ah = NULL; - ipoib_dbg(priv, - "neigh_destructor for %06x " IPOIB_GID_FMT "\n", - IPOIB_QPN(n->ha), - IPOIB_GID_RAW_ARG(n->ha + 4)); - - spin_lock_irqsave(&priv->lock, flags); neigh = *to_ipoib_neigh(n); if (neigh) { + priv = netdev_priv(neigh->dev); + ipoib_dbg(priv, + "neigh_destructor for %06x " IPOIB_GID_FMT "\n", + IPOIB_QPN(n->ha), + IPOIB_GID_RAW_ARG(n->ha + 4)); + + spin_lock_irqsave(&priv->lock, flags); if (neigh->ah) ah = neigh->ah; list_del(&neigh->list); ipoib_neigh_free(n->dev, neigh); + spin_unlock_irqrestore(&priv->lock, flags); } - spin_unlock_irqrestore(&priv->lock, flags); if (ah) ipoib_put_ah(ah); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, + struct net_device *dev) { struct ipoib_neigh *neigh; @@ -803,6 +805,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(st return NULL; neigh->neighbour = neighbour; + neigh->dev = dev; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_multicast.c =================================================================== --- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-22 12:11:25.000000000 +0200 +++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-22 12:18:06.151689482 +0200 @@ -774,7 +774,7 @@ out: if (skb->dst && skb->dst->neighbour && !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); + struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (neigh) { kref_get(&mcast->ah->ref); From vlad at dev.mellanox.co.il Wed Jan 24 08:02:45 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 24 Jan 2007 18:02:45 +0200 Subject: [openib-general] OFED 1.2 RPMs In-Reply-To: <1169653251.12416.8.camel@stevo-desktop> References: <1169653251.12416.8.camel@stevo-desktop> Message-ID: <1169654565.3731.121.camel@swlab50.yok.mtl.com> On Wed, 2007-01-24 at 09:40 -0600, Steve Wise wrote: > What do I need to do to get the Chelsio code into the RPM packaging? > The wiki doesn't say much yet about rpms. I see file > ofed_1_2/ofed_scripts/openib.spec. Should I add the chelsio stuff to > that spec file? > > > Thanks, > > Steve. > Hi Steve, I am going to split openib.spec file into ofa_user.spec and ofa_kernel.spec. Then I will add Chelsio and all other packages to these files. This will happen next week, I hope. -- Vladimir Sokolovsky Mellanox Technologies Ltd. From halr at voltaire.com Wed Jan 24 08:24:40 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 24 Jan 2007 11:24:40 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B769E6.3080905@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> Message-ID: <1169655749.29183.93022.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-24 at 09:15, Yevgeny Kliteynik wrote: [snip...] > > I also have some questions about the patches > > Shoot First, as I understand it, this higher level QoS is not yet an approved standard (annex) so is this code experimental ? In any case, some things might change, etc. so IMO this QoS should be implemented in a way that minimizes the risk to the non QoS code. I suspect the main interactions are in osm_sa_path/multipath_record.c but will also extend to the QoS manager. So should this all be conditionalized with something like QOS_ANNEX and by default be off with some build switch to enable this code in OpenSM until be becomes standard ? When will the remainder of the changes to the QoS manager be ready ? It would be good to see the whole picture. Are there any other missing pieces ? It would be good to have some documentation for this including an opensm man page update. As far as using lex/yacc, are they invoked as part of the build procedure or are the files they generate just checked in and used ? How could/would multiple file versions be supported ? One previous example was a mention that port groups can be shared by more than one manager (e.g. QoS and partitions) so this might be made hierarchical. I'd like to understand this before we get locked in. There are some other lower level questions which I'll get to later. I'll also review the XML file format in detail later. -- Hal From sashak at voltaire.com Wed Jan 24 08:44:21 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 24 Jan 2007 18:44:21 +0200 Subject: [openib-general] win related [was: Re: [PATCH 1/2] opensm: sigusr1: syslog() fixes] In-Reply-To: <45B77C4C.5040305@dev.mellanox.co.il> References: <20070110231917.GD17240@sashak.voltaire.com> <45A664CC.8050200@dev.mellanox.co.il> <20070118195340.GB23783@sashak.voltaire.com> <45B32FB8.40700@dev.mellanox.co.il> <1169498411.23046.12190.camel@hal.voltaire.com> <45B635BC.2010304@dev.mellanox.co.il> <1169570982.29183.11110.camel@hal.voltaire.com> <45B6FC05.8030200@dev.mellanox.co.il> <1169648609.29183.87203.camel@hal.voltaire.com> <45B77C4C.5040305@dev.mellanox.co.il> Message-ID: <20070124164421.GS28335@sashak.voltaire.com> On 17:33 Wed 24 Jan , Yevgeny Kliteynik wrote: > Hi Hal, > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Wed, 2007-01-24 at 01:26, Yevgeny Kliteynik wrote: > >> Hi Hal, > >> > >> Hal Rosenstock wrote: > >>> Hi again Yevgeny, > >>> > >>> On Tue, 2007-01-23 at 11:20, Yevgeny Kliteynik wrote: > >>>> Hi Hal, > >>>> > >>>> Hal Rosenstock wrote: > >>>>> Hi Yevgeny, > >>>>> > >>>>> On Sun, 2007-01-21 at 04:17, Yevgeny Kliteynik wrote: > >>>>>> Sasha Khapyorsky wrote: > >>>>>>> On 18:24 Thu 11 Jan , Yevgeny Kliteynik wrote: > >>>>>>>> As for the mailing list it's openib-windows at openib.org. You can access > >>>>>>>> it here: http://openib.org/mailman/listinfo/openib-windows > >>>>>>> I found only references to svn://windows.openib.org, where > >>>>>>> 'svn log svn://windows.openib.org/gen1/trunk/ulp/opensm/user/opensm | > >>>>>>> head -n 40' shows: > >>>>>>> > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r474 | sleybo | 2006-08-31 11:57:19 +0300 (Thu, 31 Aug 2006) | 1 line > >>>>>>> > >>>>>>> Set property svn:keywords "id" on all repository > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r472 | sleybo | 2006-08-31 11:08:18 +0300 (Thu, 31 Aug 2006) | 1 line > >>>>>>> > >>>>>>> [OPENSM] When running as a service, if all ports are down, use the first port. > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r460 | sleybo | 2006-08-20 16:55:49 +0300 (Sun, 20 Aug 2006) | 3 lines > >>>>>>> > >>>>>>> [OPENSM] When trying to set to INIT the remote port of the given physical port > >>>>>>> in function __osm_lid_mgr_set_remote_pi_state_to_init, there was no > >>>>>>> check whether the physical port in null (e.g., if it's disconnected). > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r458 | tzachid | 2006-08-17 11:12:37 +0300 (Thu, 17 Aug 2006) | 1 line > >>>>>>> > >>>>>>> [opensm] Base service status on results that were received from opensm log messages. > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r410 | leonidk | 2006-07-09 20:56:01 +0300 (Sun, 09 Jul 2006) | 1 line > >>>>>>> > >>>>>>> [OPENSM] missed fix for OPENSM logging to System Event Log > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r402 | leonidk | 2006-07-05 16:19:23 +0300 (Wed, 05 Jul 2006) | 5 lines > >>>>>>> > >>>>>>> [OPENSM] 1. feature: added SHUT_DOWN support. Without that one can't perform reboot with opensm running as service ! > >>>>>>> 2. bugfix: added message file for correct logging to System Event Log. > >>>>>>> 3. bugfix: wrong passing parameters in server mode; > >>>>>>> 4. bugfix: error in table of parameters > >>>>>>> > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r366 | tzachid | 2006-05-28 14:49:08 +0300 (Sun, 28 May 2006) | 1 line > >>>>>>> > >>>>>>> [opensm] Fix a trivial build break > >>>>>>> ------------------------------------------------------------------------ > >>>>>>> r361 | eitan | 2006-05-23 13:07:09 +0300 (Tue, 23 May 2006) | 3 lines > >>>>>>> > >>>>>>> if the guid2lid is corrupted, don't exit when running with -y option > >>>>>>> (don't exit on fatal) - just ignore the file > >>>>>>> > >>>>>>> > >>>>>>> > >>>>>>> Seems that development there was stopped in Aug 2006, and it doesn't > >>>>>>> have recent Win port patches. Am I looking in the wrong place? > >>>>>> You were looking in the right place. It appears that I didn't describe > >>>>>> the development process correctly. I think this repository is updated > >>>>>> with stable OSM versions, after the code is tested. > >>>>> Any idea on when the next version is expected ? > >>>> The SVN will be updated in a couple of days. > >>> Glad to hear it. To what OpenSM version will it correspond ? Will it be > >>> based on OFED 1.1 or beyond ? What OpenIB svn or git commit does it > >>> correspond to ? Thanks. > >> > >> The local SVN repository is syncronized with OpenSM GIT repository > >> (head of master), and the changes from git are merged into the svn daily. > >> This local SVN will be uploaded to the SVN repository on the web. > > > > How frequently ? Is there only the released OpenSM version on the web ? > > Why not a work in progress one as well ? Wouldn't that help get more > > early testing in the Windows environment ? > > There's no defined procedure on when should the svn on the web should > be updated. This daily synchronization with OpenSM is pretty new, so > I guess some sort of procedure will be defined soon. This should be clear for the projects which called "open source". Isn't it? > As for the testing - the local version of SM (the one that is synchronized > with OpenSM) is tested nightly on windows machines. By "the local version of SM" you actually mean not published mainstream development sources of OpenSM for windows. I don't think that doing development in such proprietary way is very helpful for collaboration (and now I am not surprised about why so many our questions to windows developers still be ignored). Sasha > > -- Yevgeny > > > -- Hal > > > >> -- Yevgeny > >> > >>> -- Hal > >>> > >>>> -- Yevgeny > >>>> > >>>>> -- Hal > >>>>> > >>>>>> If you need more details, I think it's better for you to ask windows folks > >>>>>> directly, since as we see, my knowledge in this area is very limited. > >>>>>> > >>>>>> -- Yevgeny > >>>>>> > >>>>>>> Sasha > >>>>>>> > >>>>>> _______________________________________________ > >>>>>> openib-general mailing list > >>>>>> openib-general at openib.org > >>>>>> http://openib.org/mailman/listinfo/openib-general > >>>>>> > >>>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>>>>> > > From sweitzen at cisco.com Wed Jan 24 08:39:06 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Wed, 24 Jan 2007 08:39:06 -0800 Subject: [openib-general] OFED 1.2 bug reporting In-Reply-To: <1169649675.1796.24.camel@stevo-desktop> References: <1169649675.1796.24.camel@stevo-desktop> Message-ID: I have added a version "1.2". Tziporet is the first build going to be called rc1 or something else? Scott > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Steve Wise > Sent: Wednesday, January 24, 2007 6:41 AM > To: openib-general > Subject: [openib-general] OFED 1.2 bug reporting > > Should I be using the open fabrics bugzilla to open bugs against OFED > 1.2? If so, should a new 'version' be added for ofed-1.2? Right now > the only version that makes sense is 'gen2', but that doesn't really > cover bugs against backport code... > > > Steve. > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Wed Jan 24 09:22:21 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 24 Jan 2007 19:22:21 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B768E2.9070604@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> Message-ID: <20070124172221.GU28335@sashak.voltaire.com> Hi Yevgeny, On 16:10 Wed 24 Jan , Yevgeny Kliteynik wrote: > Hi Hal, Sasha. > > Here's a description of the QoS policy file, and an > example of such file (with more comments inside). > > QoS Policy file > --------------- > > The QoS policy file is divided into 4 sub sections: > > * Node Group: a set of HCAs, Routers or Switches that share the same settings. > A node groups might be a partition defined by the partition manager policy in > terms of GUIDs. Future implementations might provide support for NodeDescription > based definition of node groups. > In the discussion following RFC (available in ML archive), we talked about to make port groups definition separate from QoS, so it could be sharable between different OpenSM components (like QoS and Partition manager). Any reason why it was not done? Sasha > * Fabric Setup: > Defines how the SL2VL and VLArb tables should be setup. This policy definition > assumes the computation of target behavior should be performed outside of > OpenSM. > > * QoS-Levels Definition: > This section defines the possible sets of parameters for QoS that a client might > be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > (in case LMC > 0 is used for QoS) and TClass. > > * Matching Rules: > A list of rules that match an incoming PathRecord request to a QoS-Level. The > rules are processed in order such as the first match is applied. Each rule is > built out of set of match expressions which should all match for the rule to > apply. The matching expressions are defined for the following fields > - SRC and DST to lists of node groups > - Service-ID to a list of Service-ID or Service-ID ranges > - TClass to a list of TClass values or ranges > > QoS policy file example > ----------------------- > > > > > > > > Storage > our SRP storage targets > 0x1000000000000001 > 0x1000000000000002 > > > > Virtual Servers > node desc and IB port # > vs1/HCA-1/P1 > vs3/HCA-1/P1 > vs3/HCA-2/P1 > > > > Partition 1 > default settings > Part1 > > > > Routers > all routers > ROUTER > > > > > > > > > Part1 > * > * > 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > > > > Storage > > Storage2 > > Storage3 I guess "across-from" and "across-to" include all ports on the path. What shoud hapen in case of configuration "overlap"? > * > 1 > 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > > > > > > > > Storage > > 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > 10 > > > > > > > > 1 > for the lowest priority comm > 16 > > > > 2 > low latency best bandwidth > 0 > 7 > > > > 3 > just an example > 0 > 32 > 1 > 1 > > > > > > > > 1 > low latency by class 7-9 or 11 > 7-9,11 > 1 > > > > 2 > Storage targets connection> > Storage > 22,4719 > 3 > > > > > > > > -- Yevgeny > > Yevgeny Kliteynik wrote: > > Hi Sasha, > > > > Sasha Khapyorsky wrote: > >> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>> Hi Sasha. > >>> > >>> Sasha Khapyorsky wrote: > >>>> Hi Yevgeny, > >>>> > >>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>> Hi Hal > >>>>> > >>>>> The following series of six patches implements QoS policy file parser: > >>>>> > >>>>> 1. QoS parser Lex file > >>>>> 2. QoS parser Lex-generated c file > >>>>> 3. QoS parser grammar (Yacc) file > >>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>> 5. QoS parser header file that defines parse tree data structures > >>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>> Is there any description of proposed format and functionality? > >>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>> minor modifications. You can find the RFC here: > >>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >> This was RFC and couple of issues were discussed then. Now you are about > >> implementation phase and exact format description would be desired. For > >> example what "few minor modifications" are? > > > > I'll prepare an example file with explanations. > > > > -- Yevgeny > > > >>>> Also what about using human readable formats? > >>> To me the xml-like format in the RFC looks pretty readable. > >>> It has very limited number of keywords (tags), so it's easy > >>> to follow and/or to modify. > >> It is your opinion, not everybody will agree with it (AFAIR this was > >> discussed too during RFC). > >> > >> I would not be care, but I don't know any example of really successful > >> XML using for configuration purposes (especially where advanced graphical > >> config editors/viewers were not used). Do you know? > >> > >> Sasha > >> > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From pradeep at us.ibm.com Wed Jan 24 09:17:23 2007 From: pradeep at us.ibm.com (Pradeep Satyanarayana) Date: Wed, 24 Jan 2007 09:17:23 -0800 Subject: [openib-general] IPOIB CM with Non SRQ support Message-ID: Michael, I am working on a prototype based on your IPOIB CM patch to incorporate support for Non SRQ as well. IPOIB CM was planned to be in OFED 1.2 if I remember correctly. If I were to submit a patch for non SRQ support, what would be the cut off date to make it into OFED 1.2? Pradeep pradeep at us.ibm.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From bugzilla-daemon at lists.openfabrics.org Wed Jan 24 09:31:36 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Wed, 24 Jan 2007 09:31:36 -0800 (PST) Subject: [openib-general] [Bug 322] New: 2.6.17 backport: reading the rdma-cm abi file causes fault. Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=322 Summary: 2.6.17 backport: reading the rdma-cm abi file causes fault. Product: OpenFabrics Linux Version: 1.2 Platform: X86-64 OS/Version: SLES 10 Status: NEW Severity: normal Priority: P3 Component: RDMA CM AssignedTo: bugzilla at openib.org ReportedBy: swise at opengridcomputing.com I think the misc device patch is broken for 2.6.17. Everything builds and loads ok, but when librdmacm (or a user) reads /sys/class/misc/rdma_cm/abi_file, the system logs this fault: [62266.670174] invalid opcode: 0000 [1] SMP [62266.682226] CPU 0 [62266.688276] Modules linked in: rdma_ucm nfs lockd nfs_acl sunrpc rdma_cm iw_cm ib_addr ib_local_sa ib_ucm ib_uverbs ib_umad iw_cxgb3 ib_ipoib ib_cm ib_sa edd button battery ac cxgb3 ib_mthca ib_mad shpchp ib_core pci_hotplug i2c_i801 e1000 rdma_ne i2c_core fan thermal processor aic79xx [62266.764826] Pid: 16190, comm: cat Tainted: GF 2.6.17-ofed-1.2 #3 [62266.783832] RIP: 0010:[] [] [62266.801073] RSP: 0018:ffff81011f273eb0 EFLAGS: 00010203 [62266.817510] RAX: 0000000000000002 RBX: ffff8101421bd848 RCX: 0000000000000001 [62266.838855] RDX: 00000000ffffffff RSI: ffff810103c558f6 RDI: ffff810083c558f9 [62266.860199] RBP: ffffffff8041508e R08: 00000000ffffffff R09: 0000000000000020 [62266.881542] R10: 0000000000000000 R11: 0000000000000000 R12: ffff81013dd5f138 [62266.902887] R13: ffff81013dd5f158 R14: ffff81011f273f48 R15: ffffffff805b18f0 [62266.924232] FS: 00002abb806756d0(0000) GS:ffffffff806e5000(0000) knlGS:0000000000000000 [62266.948431] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [62266.965621] CR2: 0000000000402f30 CR3: 0000000123782000 CR4: 00000000000006e0 [62266.986966] Process cat (pid: 16190, threadinfo ffff81011f272000, task ffff810144814850) [62267.011165] Stack: ffffffff802bddb6 0000000000000000 0000000000001000 000000000050a000 [62267.034690] ffff810083c55908 ffffffff881d7fa0 ffff8101424ea400 000000000050a000 [62267.058787] ffff81011f273f48 0000000000001000 [62267.074003] Call Trace: {sysfs_read_file+193} [62267.091845] {vfs_read+204} {sys_read+71} [62267.114927] {system_call+126} [62267.130946] [62267.130947] Code: 27 1f 01 81 ff ff 13 f5 27 80 ff ff ff ff 00 a4 4e 42 01 81 [62267.157535] RIP [] RSP -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From mshefty at ichips.intel.com Wed Jan 24 09:31:19 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 09:31:19 -0800 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <45B6FBF7.9030400@voltaire.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> Message-ID: <45B797E7.40309@ichips.intel.com> > However, it will not support "mixed mode" communication patterns (which > you were raising last week) that is one app having a UD QP for both > multicast and unicast that talks with two "peers" IPoIB multicast and > another app doing only unicast. Separating ipoib to its own port space alleviated my concerns on existing usage. The RDMA_PS_UDP continues operating as before, with mixed mode traffic supported. Mixed mode for RDMA_PS_IPOIB is not supported, since it's not clear to me how that would be used. The IPOIB protocol doesn't use SIDR, so I'm hesitant to extend the capabilities until there's a clear need/use. > Also, just a clarification - how exactly the patch enforces that an app > would not be able to do listen/connect/accept on RDMA_PS_IPOIB ID??? This is not enforce directly yet. (It just requires an if statement in resolve route.) I would expect that if it were tried, there would be a failure at some point. - Sean From mshefty at ichips.intel.com Wed Jan 24 09:53:48 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 09:53:48 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <45B79BDA.4020605@open-mpi.org> References: <45B4DA10.3080400@indiana.edu> <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <20070124081152.GG20101@mellanox.co.il> <45B79BDA.4020605@open-mpi.org> Message-ID: <45B79D2C.9050305@ichips.intel.com> >>Is this how it works with sockets? > > > I'm not aware of IP multicast having this functionality. This is a > major problem with IP multicast, as some sort of address selection > arbitration has to be done. From what I've seen, this ranges from IANA > assigning multicast addresses for specific uses to best-effort protocols > for selecting unused addresses from an available range. It's a fairly > hard problem, and having OFED choose an unused address (my understanding > is that the SM is capable of this) sidesteps the issue and makes life > much better. This is an IB specific feature that could be exposed through the rdma_cm. I can envision other RDMA transports providing a similar capability, even if it requires some sort of proprietary method. - Sean From or.gerlitz at gmail.com Wed Jan 24 10:12:21 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 20:12:21 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <45B797E7.40309@ichips.intel.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> Message-ID: <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> On 1/24/07, Sean Hefty wrote: > > However, it will not support "mixed mode" communication patterns (which > > you were raising last week) that is one app having a UD QP for both > > multicast and unicast that talks with two "peers" IPoIB multicast and > > another app doing only unicast. > Separating ipoib to its own port space alleviated my concerns on existing usage. > The RDMA_PS_UDP continues operating as before, with mixed mode traffic > supported. Mixed mode for RDMA_PS_IPOIB is not supported, since it's not clear > to me how that would be used. The IPOIB protocol doesn't use SIDR, so I'm > hesitant to extend the capabilities until there's a clear need/use. Indeed, it is not possible to have UDP --unicast-- interop between "IPoIB UD" (ie not IPoIB CM) and an RDMA_PS_IPOIB RDMA CM consumer. However, it is possible that an RDMA_PS_IPOIB consumer would want to talk over ---one-- UD QP with two peers: 1) IPoIB - multicast traffic 2) --another-- RDMA CM consumer - unicast traffic since both talks are over the same QP everyone must use the same --QKEY--, now since RDMA_PS_IPOIB does not support the SIDR exchange this config is broken. The patch i have sent allows this, and it can be really nice to remove this restriction with some documentation explaining the restrictions. > > Also, just a clarification - how exactly the patch enforces that an app > > would not be able to do listen/connect/accept on RDMA_PS_IPOIB ID??? > This is not enforce directly yet. (It just requires an if statement in resolve > route.) I would expect that if it were tried, there would be a failure at some > point. OK, that (failure at some point) was my thought as well. Or. From mshefty at ichips.intel.com Wed Jan 24 10:13:57 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 10:13:57 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <1169649315.29183.87785.camel@hal.voltaire.com> References: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <45B6AF42.7000500@ichips.intel.com> <1169649315.29183.87785.camel@hal.voltaire.com> Message-ID: <45B7A1E5.6030009@ichips.intel.com> > Doesn't this carve a hole in the IPv6 (multicast) address space (group > ID) ? If so, is that a hole we can carve out ? I don't think that this causes any issues that weren't previously there. IPOIB maps an IPv6 address into 80-bits, so not every IPv6 address can be mapped into an MGID using the ipoib algorithm. What the rdma_cm needs to determine is whether the sockaddr passed into rdma_join_multicast is an IPv6 address that needs to be translated into an MGID, or whether the address is itself an MGID. I guess that it might be able to look at the address to see if it matches the SA created MGID format (0xff1?a01b...). - Sean From mshefty at ichips.intel.com Wed Jan 24 10:15:43 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 10:15:43 -0800 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <1169600597.26256.53.camel@stevo-desktop> References: <1169600597.26256.53.camel@stevo-desktop> Message-ID: <45B7A24F.1040607@ichips.intel.com> Looks good to me. Acked-by: Sean Hefty Steve Wise wrote: > Handle Ethernet neighbour updates during route resolution. > > The IWCM uses the ib_addr services to do route resolution (neighbour > discovery in the IP world). The ib_addr netevent callback routine, > however, currently only acts on Inifininband neighbour updates. It needs > to act on ethernet neighbour updates as well. > > This patch just removes filtering on device type altogether and > will trigger on any neighour updates where the nud_type is valid. > This simplifies the code some. > > Signed-off-by: Steve Wise > --- > > drivers/infiniband/core/addr.c | 3 +-- > 1 files changed, 1 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c > index af93979..d2bb5a9 100644 > --- a/drivers/infiniband/core/addr.c > +++ b/drivers/infiniband/core/addr.c > @@ -360,8 +360,7 @@ static int netevent_callback(struct noti > if (event == NETEVENT_NEIGH_UPDATE) { > struct neighbour *neigh = ctx; > > - if (neigh->dev->type == ARPHRD_INFINIBAND && > - (neigh->nud_state & NUD_VALID)) { > + if (neigh->nud_state & NUD_VALID) { > set_timeout(jiffies); > } > } From or.gerlitz at gmail.com Wed Jan 24 10:28:00 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 20:28:00 +0200 Subject: [openib-general] RDMA CM multicast In-Reply-To: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> References: <45B4DA10.3080400@indiana.edu> <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> Message-ID: <15ddcffd0701241028i3f087571i648c2c3c282bf0cd@mail.gmail.com> On 1/24/07, Sean Hefty wrote: > What would be needed is a way for the user to indicate that they need a unique > address. An obvious way to accomplish this is for the user to specify an IP > address of 0.0.0.0 when calling rdma_join_multicast(). The user would first > need to bind to a specific device by calling rdma_bind_addr() with a local IP > address. > Your code would look something like this: > rdma_bind_addr(local IP address) > rdma_join_multicast(0.0.0.0, port 0) <- exchange group info out of band > rdma_join_multicast(0.0.0.0, port 1) <- exchange group info out of band > send data to a lot of nodes at once > rdma_leave_multicast(0.0.0.0, port 0) > rdma_leave_multicast(0.0.0.0, port 1) Sean, This seems to me as a little bit of over engineering... since we do require that to use the RDMA CM the consumers must have a functional IPoIB NIC (so they can call rdma_bind_addrress to resolve the device/port/pkey) we can add another requirement to have the sys admin configure their routing such that some multicast IP subnet (eg net 224.0.0.0 mask 255.0.0.0) is routed to the IPoIB NIC. Once this routing is in place, the only thing they need is to enhance the MPI job starter/etc to allocate to each job (say) two unique multicast --IP-- addresses on the relevant subnet and provide these IP addresses to each rank. Now the rank can use the RDMA CM without any hack. Or. From or.gerlitz at gmail.com Wed Jan 24 10:34:48 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 20:34:48 +0200 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <1169600597.26256.53.camel@stevo-desktop> References: <1169600597.26256.53.camel@stevo-desktop> Message-ID: <15ddcffd0701241034j2c1dc649ga3b31c042491b1a5@mail.gmail.com> On 1/24/07, Steve Wise wrote: > Handle Ethernet neighbour updates during route resolution. > The IWCM uses the ib_addr services to do route resolution (neighbour > discovery in the IP world). The ib_addr netevent callback routine, > however, currently only acts on Inifininband neighbour updates. It needs > to act on ethernet neighbour updates as well. > This patch just removes filtering on device type altogether and > will trigger on any neighour updates where the nud_type is valid. > This simplifies the code some. OK, as I have mentioned in the past there is a check in the fast path xmit code of IPoIB to verify that the neighbour we are using now to xmit (skb->neigh) has not changed its HA address since the last time IPoIB xmit-ed with it - that is that the GID in the struct neighbous->ha is the same as the GID in struct ipoib_neigh. Such a diff happens when the kernel is acting to gratitius arp - that is a remote peer has changed its HW address (eg as of fail-over of an IP address from one IPoIB NIC to another IPoIB NIC - eg with bonding). >From this patch i understand that we can register to the neighbour change event in IPoIB and eliminate the run time check !?!?!? Or. From halr at voltaire.com Wed Jan 24 10:29:15 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 24 Jan 2007 13:29:15 -0500 Subject: [openib-general] RDMA CM multicast In-Reply-To: <45B7A1E5.6030009@ichips.intel.com> References: <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <45B6AF42.7000500@ichips.intel.com> <1169649315.29183.87785.camel@hal.voltaire.com> <45B7A1E5.6030009@ichips.intel.com> Message-ID: <1169663298.13608.4796.camel@hal.voltaire.com> On Wed, 2007-01-24 at 13:13, Sean Hefty wrote: > > Doesn't this carve a hole in the IPv6 (multicast) address space (group > > ID) ? If so, is that a hole we can carve out ? > > I don't think that this causes any issues that weren't previously there. IPOIB > maps an IPv6 address into 80-bits, so not every IPv6 address can be mapped into > an MGID using the ipoib algorithm. I forgot about this. > What the rdma_cm needs to determine is whether the sockaddr passed into > rdma_join_multicast is an IPv6 address that needs to be translated into an MGID, > or whether the address is itself an MGID. I guess that it might be able to look > at the address to see if it matches the SA created MGID format (0xff1?a01b...). Makes sense. -- Hal > - Sean From mshefty at ichips.intel.com Wed Jan 24 10:36:50 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 10:36:50 -0800 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> Message-ID: <45B7A742.4040702@ichips.intel.com> > However, it is possible that an RDMA_PS_IPOIB consumer would want to > talk over ---one-- UD QP with two peers: > > 1) IPoIB - multicast traffic > 2) --another-- RDMA CM consumer - unicast traffic My thinking on this was that path record lookup and SIDR resolution isn't part of the ipoib protocol, and I wanted to limit the scope of the patch. After the user joins the multicast group, unicast traffic is still supported. The issue I see is whether the rdma_cm uses address resolution (which ends up being IP ARP), an SA query, and SIDR to resolve the remote QPN, or if it can obtain it through some other method. - Sean From or.gerlitz at gmail.com Wed Jan 24 10:51:03 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 20:51:03 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <45B7A742.4040702@ichips.intel.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> <45B7A742.4040702@ichips.intel.com> Message-ID: <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> On 1/24/07, Sean Hefty wrote: > > However, it is possible that an RDMA_PS_IPOIB consumer would want to > > talk over ---one-- UD QP with two peers: > > > > 1) IPoIB - multicast traffic > > 2) --another-- RDMA CM consumer - unicast traffic > My thinking on this was that path record lookup and SIDR resolution isn't part > of the ipoib protocol, and I wanted to limit the scope of the patch. indeed they are not part of the ipoib protocol, but the reason there interop is not possible between PS_IPOIB ID/QP to peer node IPoIB UD - is much more simple - as of IPoIB address resolution... The peer IPoIB would send an ARP and then would assume it can send its packets to the QP number provided in the arp reply, so it would be talking not with the rdma cm consumer but rather with the underlying IPoIB in this node. On the other direction you are correct, IPoIB does not listen for SIDR requests. > After the user joins the multicast group, unicast traffic is still supported. no! it is broken since the PS_IPOIB ID/QP that joined/attached the multicast group is now using the ipoib broadcast qkey where the PS_UDP ID/QP is using the RDMA_UDP_QKEY > The issue I see is whether the rdma_cm uses address resolution (which ends up > being IP ARP), an SA query, and SIDR to resolve the remote QPN, or if it can > obtain it through some other method. A possible fallback to RDMA CM consumer is: issue ARP, then send SIDR - if there is no response use the IPoIB QP from the ARP reply and the ipv4 broadcast qkey to talk directly with IPoIB. However, as i mention above this hack is not possible in the other direction, that is you can't make IPoIB do unicast talking with PS_IPOIB consumer. Or. From or.gerlitz at gmail.com Wed Jan 24 11:32:42 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 21:32:42 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> <45B7A742.4040702@ichips.intel.com> <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> Message-ID: <15ddcffd0701241132x26e61296k53dbbb41cab096f7@mail.gmail.com> On 1/24/07, Or Gerlitz wrote: > On 1/24/07, Sean Hefty wrote: > > > However, it is possible that an RDMA_PS_IPOIB consumer would want to > > > talk over ---one-- UD QP with two peers: > > > 1) IPoIB - multicast traffic > > > 2) --another-- RDMA CM consumer - unicast traffic > > After the user joins the multicast group, unicast traffic is still supported. > no! it is broken since the PS_IPOIB ID/QP that joined/attached the > multicast group is now using the ipoib broadcast qkey where the PS_UDP > ID/QP is using the RDMA_UDP_QKEY OK, i have managed to confuse myself... with the patch you have sent PS_IPOIB ID does not does support unicast traffic so this all use scanrio is not possible from the first place. But, my preferation is not to block RDMA CM use patterns of UD unicast to UD unicast and UD unicast to UD unicast/multicast etc. Or. From tziporet at mellanox.co.il Wed Jan 24 11:52:29 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 24 Jan 2007 21:52:29 +0200 Subject: [openib-general] OFED 1.2 bug reporting Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DC29@mtlexch01.mtl.com> First version will be 1.2-alpha1 -----Original Message----- From: openib-general-bounces at openib.org [mailto:openib-general-bounces at openib.org] On Behalf Of Scott Weitzenkamp (sweitzen) Sent: Wednesday, January 24, 2007 6:39 PM To: Steve Wise; openib-general Subject: Re: [openib-general] OFED 1.2 bug reporting I have added a version "1.2". Tziporet is the first build going to be called rc1 or something else? Scott > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Steve Wise > Sent: Wednesday, January 24, 2007 6:41 AM > To: openib-general > Subject: [openib-general] OFED 1.2 bug reporting > > Should I be using the open fabrics bugzilla to open bugs against OFED > 1.2? If so, should a new 'version' be added for ofed-1.2? Right now > the only version that makes sense is 'gen2', but that doesn't really > cover bugs against backport code... > > > Steve. > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > _______________________________________________ openib-general mailing list openib-general at openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From HNGUYEN at de.ibm.com Wed Jan 24 12:02:40 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Wed, 24 Jan 2007 15:02:40 -0500 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 In-Reply-To: <45B77F8C.9060209@mellanox.co.il> Message-ID: Hi Tziporet! > ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 Backport for SLES9 and RHEL4.4/5 is doable only with a kernel patch in order to get ibmebus running, which is a prereq for ehca. Since ofed-1.1 build process compiles the components out of kernel tree, such one kernel patch is not possible. Will that change with ofed-1.2 build process? Regards Nam From tziporet at mellanox.co.il Wed Jan 24 12:18:15 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 24 Jan 2007 22:18:15 +0200 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DC2C@mtlexch01.mtl.com> We stay with same build process but the backport patches give a solution for such cases. Michael Tsirkin can help you how we solved such problems with other kernel code we needed. Tziporet -----Original Message----- From: Hoang-Nam Nguyen [mailto:HNGUYEN at de.ibm.com] Sent: Wednesday, January 24, 2007 10:03 PM To: Tziporet Koren Cc: Bryan O'Sullivan; EWG; openfabrics-ewg-bounces at openib.org; OPENIB; ramachandra.kuchimanchi at qlogic.com Subject: Re: [openfabrics-ewg] modules compilation status for OFED 1.2 Hi Tziporet! > ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 Backport for SLES9 and RHEL4.4/5 is doable only with a kernel patch in order to get ibmebus running, which is a prereq for ehca. Since ofed-1.1 build process compiles the components out of kernel tree, such one kernel patch is not possible. Will that change with ofed-1.2 build process? Regards Nam From ramachandra.kuchimanchi at qlogic.com Wed Jan 24 12:27:01 2007 From: ramachandra.kuchimanchi at qlogic.com (Ramachandra Kuchimanchi) Date: Wed, 24 Jan 2007 14:27:01 -0600 Subject: [openib-general] [PATCH 2/2 vex branch] IB/VNIC Fix failover delay issue In-Reply-To: References: <4578425A.27226.250CE6A4@ramachandra.kuchimanchi.qlogic.com> Message-ID: > thanks, I (finally) rolled these into my vex branch. Thanks Roland. But for some reason, I do not see the commit logs for these two patches in the vex branch, even though I can see from the code that the patches have been applied. (I can see the commit logs for the initial set of patches though). Any idea why ? From mshefty at ichips.intel.com Wed Jan 24 12:31:37 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 12:31:37 -0800 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> <45B7A742.4040702@ichips.intel.com> <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> Message-ID: <45B7C229.4040605@ichips.intel.com> > The peer IPoIB would send an ARP and then would assume it can send its > packets to the QP number provided in the arp reply, so it would be > talking not with the rdma cm consumer but rather with the underlying > IPoIB in this node. Okay - so you want to change the QPN from that given in the ARP? I missed that you wanted this, and I think I understand better what you're trying to do. > no! it is broken since the PS_IPOIB ID/QP that joined/attached the > multicast group is now using the ipoib broadcast qkey where the PS_UDP > ID/QP is using the RDMA_UDP_QKEY I'm only trying to support communication within the same port space, not between them. Unicast is supported between different RDMA_PS_IPOIB QPs. The question is how to obtain the IB unicast address (i.e. QPN, etc.) for RDMA_PS_IPOIB. My assumption was that this capability wasn't needed, but you're saying that it is. I will update the patches. - Sean From or.gerlitz at gmail.com Wed Jan 24 12:40:53 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 24 Jan 2007 22:40:53 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <45B7C229.4040605@ichips.intel.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> <45B6FBF7.9030400@voltaire.com> <45B797E7.40309@ichips.intel.com> <15ddcffd0701241012w4f6326e5v64d25f86b24fdc2d@mail.gmail.com> <45B7A742.4040702@ichips.intel.com> <15ddcffd0701241051w2ab8a90dn4bda97c129d5e098@mail.gmail.com> <45B7C229.4040605@ichips.intel.com> Message-ID: <15ddcffd0701241240p164a0daex887a179fb74ebc4d@mail.gmail.com> On 1/24/07, Sean Hefty wrote: > > The peer IPoIB would send an ARP and then would assume it can send its > > packets to the QP number provided in the arp reply, so it would be > > talking not with the rdma cm consumer but rather with the underlying > > IPoIB in this node. > > Okay - so you want to change the QPN from that given in the ARP? I missed that > you wanted this, and I think I understand better what you're trying to do. we don't want to use the QPN from the arp reply but rather the sidr exchange etc as it is implemented in the rdma cm. > > no! it is broken since the PS_IPOIB ID/QP that joined/attached the > > multicast group is now using the ipoib broadcast qkey where the PS_UDP > > ID/QP is using the RDMA_UDP_QKEY > > I'm only trying to support communication within the same port space, not between > them. Unicast is supported between different RDMA_PS_IPOIB QPs. working only within a port space makes sense. However, your patch does not allow for PS_IPOIB IDs to do unicast since some places in the cma kernel code only care for PS_UDP where they should care for PS_UDP OR PS_IPOIB as i did in my patch... > The question > is how to obtain the IB unicast address (i.e. QPN, etc.) for RDMA_PS_IPOIB. My > assumption was that this capability wasn't needed, but you're saying that it is. > I will update the patches. thanks, and again its fine to obtain the IB unicast address for PS_IPOIB IDs using the sidr exchange, you don't need to worry on the ARP result. Only make sure that PS_IPOIB uses the ipoib broadcast group qkey and also to what i mention above (code branching on PS_UDP where it should do so on PS_UDP or PS_IPOIB). thanks! Or. From sashak at voltaire.com Wed Jan 24 13:11:25 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 24 Jan 2007 23:11:25 +0200 Subject: [openib-general] [PATCH MINOR] opensm: minor usage strings simplification Message-ID: <20070124211125.GD28335@sashak.voltaire.com> Minor usage string simplification - this helps to avoid warning with some version of vim c code analyzer. Signed-off-by: Sasha Khapyorsky --- osm/opensm/main.c | 7 +++---- 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/osm/opensm/main.c b/osm/opensm/main.c index a63fbeb..0993441 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -217,12 +217,11 @@ show_usage(void) " SMPs.\n" " Without -maxsmps, OpenSM defaults to a maximum of\n" " 4 outstanding SMPs.\n\n" ); + printf( "-console [off|local" #ifdef ENABLE_OSM_CONSOLE_SOCKET - printf( "-console [off|local|socket]\n" -#else - printf( "-console [off|local]\n" + "|socket" #endif - " This option activates the OpenSM console. (default off)\n\n"); + "]\n This option activates the OpenSM console. (default off)\n\n"); #ifdef ENABLE_OSM_CONSOLE_SOCKET printf( "-console-port \n" " Specify an alternate telnet port for the console (default %d).\n\n", -- 1.5.0.rc2.g11a3 From robert.j.woodruff at intel.com Wed Jan 24 13:46:02 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Wed, 24 Jan 2007 13:46:02 -0800 Subject: [openib-general] InfiniBand Maintainers Summit/BOF at Ottawa Linux Symposium Message-ID: Hay guys, I was wondering how many people are planning to attend the Ottawa Linux Symposium this year and if there was any interest in getting the maintainers together for an InfiniBand BOF. I think it would be good to get the maintainers together once in a while face to face just to discuss how the general process of InfiniBand development is working for people, are there anything we should be doing different, what new features are coming and what kernel versions might they be targeted at, etc. If a lot of people are already planning on attending, then this might be an opportunity to get together. Please respond with, 1.) If you are planning on attending OLS this year ? 2.) If you are interested in having an InfiniBand BOF. If there is enough interest, I will send in a request to OLS for a BOF timeslot Please respond by Monday 1/29/07 since if we want to get something added to the agenda, we need to submit something before the end of the month. woody From rdreier at cisco.com Wed Jan 24 13:48:27 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 24 Jan 2007 13:48:27 -0800 Subject: [openib-general] [PATCH 2/2 vex branch] IB/VNIC Fix failover delay issue In-Reply-To: (Ramachandra Kuchimanchi's message of "Wed, 24 Jan 2007 14:27:01 -0600") References: <4578425A.27226.250CE6A4@ramachandra.kuchimanchi.qlogic.com> Message-ID: > Thanks Roland. But for some reason, I do not see the commit > logs for these two patches in the vex branch, even though I can see from > the code that the patches have been applied. (I can see the commit > logs for the initial set of patches though). Any idea why ? Yes, because I rolled the patches into the existing patches already there rather than adding them on top of what I had. From sashak at voltaire.com Wed Jan 24 14:19:37 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 25 Jan 2007 00:19:37 +0200 Subject: [openib-general] [PATCH] opensm: cleanup unused osm_req_ctrl Message-ID: <20070124221937.GF28335@sashak.voltaire.com> This cleanups unused osm_req_ctrl stuff and corresponded objects. Signed-off-by: Sasha Khapyorsky --- osm/include/Makefile.am | 1 - osm/include/opensm/osm_msgdef.h | 16 +--- osm/include/opensm/osm_req_ctrl.h | 228 ------------------------------------- osm/include/opensm/osm_sm.h | 5 - osm/opensm/Makefile.am | 2 +- osm/opensm/osm_helper.c | 2 +- osm/opensm/osm_req_ctrl.c | 136 ---------------------- osm/opensm/osm_sm.c | 6 - 8 files changed, 3 insertions(+), 393 deletions(-) delete mode 100644 osm/include/opensm/osm_req_ctrl.h delete mode 100644 osm/opensm/osm_req_ctrl.c diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index b49cf21..5a186ff 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -71,7 +71,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_pkey.h \ $(srcdir)/opensm/osm_pkey_mgr.h \ $(srcdir)/opensm/osm_sa_mad_ctrl.h \ - $(srcdir)/opensm/osm_req_ctrl.h \ $(srcdir)/opensm/osm_sa_link_record.h \ $(srcdir)/opensm/osm_mcm_port.h \ $(srcdir)/opensm/osm_log.h \ diff --git a/osm/include/opensm/osm_msgdef.h b/osm/include/opensm/osm_msgdef.h index 87c943f..a90e3b9 100644 --- a/osm/include/opensm/osm_msgdef.h +++ b/osm/include/opensm/osm_msgdef.h @@ -77,20 +77,6 @@ BEGIN_C_DECLS * *********/ -/****s* OpenSM: Dispatcher Messages/OSM_MSG_REQ -* NAME -* OSM_MSG_REQ -* -* DESCRIPTION -* Initiates a QP0 attribute request. -* -* NOTES -* Sent by: osm_sm_t -* Received by: osm_req_ctrl_t -* Delivery notice: yes -* -***********/ - /****s* OpenSM: Dispatcher Messages/OSM_MSG_MAD_NODE_INFO * NAME * OSM_MSG_MAD_NODE_INFO @@ -166,7 +152,7 @@ BEGIN_C_DECLS ***********/ enum { - OSM_MSG_REQ = 0, + OSM_MSG_NONE = 0, OSM_MSG_MAD_NODE_INFO, OSM_MSG_MAD_PORT_INFO, OSM_MSG_MAD_SWITCH_INFO, diff --git a/osm/include/opensm/osm_req_ctrl.h b/osm/include/opensm/osm_req_ctrl.h deleted file mode 100644 index 7823823..0000000 --- a/osm/include/opensm/osm_req_ctrl.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_req_ctrl_t. - * This object represents a controller that calls the - * generic requester object to retrieve attributes from a node. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_REQ_CTRL_H_ -#define _OSM_REQ_CTRL_H_ - -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****h* OpenSM/Generic Request Controller -* NAME -* Generic Request Controller -* -* DESCRIPTION -* The Generic Request Controller object encapsulates the information -* needed to request an attribute from a node. -* -* The Generic Request Controller object is thread safe. -* -* This object should be treated as opaque and should be -* manipulated only through the provided functions. -* -* AUTHOR -* Steve King, Intel -* -*********/ - -/****s* OpenSM: Generic Request Controller/osm_req_ctrl_t -* NAME -* osm_req_ctrl_t -* -* DESCRIPTION -* Generic Request Controller structure. -* -* This object should be treated as opaque and should -* be manipulated only through the provided functions. -* -* SYNOPSIS -*/ -typedef struct _osm_req_ctrl -{ - osm_req_t *p_req; - osm_log_t *p_log; - cl_dispatcher_t *p_disp; - cl_disp_reg_handle_t h_disp; - -} osm_req_ctrl_t; -/* -* FIELDS -* p_log -* Pointer to the log object. -* -* p_disp -* Pointer to the Dispatcher. -* -* h_disp -* Handle returned from dispatcher registration. -* -* SEE ALSO -* Generic Request Controller object -*********/ - -/****f* OpenSM: Generic Request Controller/osm_req_ctrl_construct -* NAME -* osm_req_ctrl_construct -* -* DESCRIPTION -* This function constructs a Generic Request Controller object. -* -* SYNOPSIS -*/ -void -osm_req_ctrl_construct( - IN osm_req_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to a Generic Request Controller object to construct. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Allows calling osm_req_ctrl_init, and osm_req_ctrl_destroy. -* -* Calling osm_req_ctrl_construct is a prerequisite to calling any other -* method except osm_req_ctrl_init. -* -* SEE ALSO -* Generic Request Controller object, osm_req_ctrl_init, -* osm_req_ctrl_destroy -*********/ - -/****f* OpenSM: Generic Request Controller/osm_req_ctrl_destroy -* NAME -* osm_req_ctrl_destroy -* -* DESCRIPTION -* The osm_req_ctrl_destroy function destroys the object, releasing -* all resources. -* -* SYNOPSIS -*/ -void -osm_req_ctrl_destroy( - IN osm_req_ctrl_t* const p_ctrl ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to the object to destroy. -* -* RETURN VALUE -* This function does not return a value. -* -* NOTES -* Performs any necessary cleanup of the specified -* Generic Request Controller object. -* Further operations should not be attempted on the destroyed object. -* This function should only be called after a call to -* osm_req_ctrl_construct or osm_req_ctrl_init. -* -* SEE ALSO -* Generic Request Controller object, osm_req_ctrl_construct, -* osm_req_ctrl_init -*********/ - -/****f* OpenSM: Generic Request Controller/osm_req_ctrl_init -* NAME -* osm_req_ctrl_init -* -* DESCRIPTION -* The osm_req_ctrl_init function initializes a -* Generic Request Controller object for use. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_req_ctrl_init( - IN osm_req_ctrl_t* const p_ctrl, - IN osm_req_t* const p_req, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ); -/* -* PARAMETERS -* p_ctrl -* [in] Pointer to an osm_req_ctrl_t object to initialize. -* -* p_req -* [in] Pointer to a Generic Requester object. -* -* p_log -* [in] Pointer to the log object. -* -* p_disp -* [in] Pointer to the OpenSM central Dispatcher. -* -* RETURN VALUES -* CL_SUCCESS if the Generic Request Controller object was initialized -* successfully. -* -* NOTES -* Allows calling other Generic Request Controller methods. -* -* SEE ALSO -* Generic Request Controller object, osm_req_ctrl_construct, -* Generic Requester object, osm_req_ctrl_destroy -*********/ - -END_C_DECLS - -#endif /* _OSM_REQ_CTRL_H_ */ diff --git a/osm/include/opensm/osm_sm.h b/osm/include/opensm/osm_sm.h index a3894d8..7f22d58 100644 --- a/osm/include/opensm/osm_sm.h +++ b/osm/include/opensm/osm_sm.h @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -142,7 +141,6 @@ typedef struct _osm_sm cl_plock_t *p_lock; atomic32_t sm_trans_id; osm_req_t req; - osm_req_ctrl_t req_ctrl; osm_resp_t resp; osm_ni_rcv_t ni_rcv; osm_pi_rcv_t pi_rcv; @@ -200,9 +198,6 @@ typedef struct _osm_sm * req * Generic MAD attribute requester. * -* req_ctrl -* Controller for the generic requester. -* * resp * MAD attribute responder. * diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index b03a206..0e9ac67 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -37,7 +37,7 @@ opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_node_desc_rcv.c osm_node_info_rcv.c \ osm_opensm.c osm_pkey.c osm_pkey_mgr.c osm_pkey_rcv.c \ osm_port.c osm_port_info_rcv.c \ - osm_remote_sm.c osm_req.c osm_req_ctrl.c \ + osm_remote_sm.c osm_req.c \ osm_resp.c osm_sa.c osm_sa_class_port_info.c \ osm_sa_informinfo.c osm_sa_lft_record.c osm_sa_mft_record.c \ osm_sa_link_record.c osm_sa_mad_ctrl.c \ diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c index 50a3413..e2a3e8a 100644 --- a/osm/opensm/osm_helper.c +++ b/osm/opensm/osm_helper.c @@ -2258,7 +2258,7 @@ osm_get_sm_signal_str( static const char* const __osm_disp_msg_str[] = { - "OSM_MSG_REQ", + "OSM_MSG_NONE", "OSM_MSG_MAD_NODE_INFO", "OSM_MSG_MAD_PORT_INFO,", "OSM_MSG_MAD_SWITCH_INFO", diff --git a/osm/opensm/osm_req_ctrl.c b/osm/opensm/osm_req_ctrl.c deleted file mode 100644 index 6eb7cf0..0000000 --- a/osm/opensm/osm_req_ctrl.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -/* - * Abstract: - * Implementation of osm_req_ctrl_t. - * This object represents the request controller object. - * This object is part of the opensm family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.5 $ - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/********************************************************************** - **********************************************************************/ -void -__osm_req_ctrl_disp_callback( - IN void *context, - IN void *p_data ) -{ - /* ignore return status when invoked via the dispatcher */ - osm_req_get( ((osm_req_ctrl_t*)context)->p_req, - (&((osm_attrib_req_t*)p_data)->path), - ((osm_attrib_req_t*)p_data)->attrib_id, - ((osm_attrib_req_t*)p_data)->attrib_mod, - ((osm_attrib_req_t*)p_data)->err_msg, - (&((osm_attrib_req_t*)p_data)->context) ); -} - -/********************************************************************** - **********************************************************************/ -void -osm_req_ctrl_construct( - IN osm_req_ctrl_t* const p_ctrl ) -{ - memset( p_ctrl, 0, sizeof(*p_ctrl) ); - p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; -} - -/********************************************************************** - **********************************************************************/ -void -osm_req_ctrl_destroy( - IN osm_req_ctrl_t* const p_ctrl ) -{ - CL_ASSERT( p_ctrl ); - cl_disp_unregister( p_ctrl->h_disp ); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_req_ctrl_init( - IN osm_req_ctrl_t* const p_ctrl, - IN osm_req_t* const p_req, - IN osm_log_t* const p_log, - IN cl_dispatcher_t* const p_disp ) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER( p_log, osm_req_ctrl_init ); - - osm_req_ctrl_construct( p_ctrl ); - p_ctrl->p_log = p_log; - - p_ctrl->p_disp = p_disp; - p_ctrl->p_req = p_req; - - p_ctrl->h_disp = cl_disp_register( - p_disp, - OSM_MSG_REQ, - __osm_req_ctrl_disp_callback, - p_ctrl ); - - if( p_ctrl->h_disp == CL_DISP_INVALID_HANDLE ) - { - osm_log( p_log, OSM_LOG_ERROR, - "osm_req_ctrl_init: ERR 1202: " - "Dispatcher registration failed\n" ); - status = IB_INSUFFICIENT_RESOURCES; - goto Exit; - } - - Exit: - OSM_LOG_EXIT( p_log ); - return( status ); -} - diff --git a/osm/opensm/osm_sm.c b/osm/opensm/osm_sm.c index a2b531f..dfe01a4 100644 --- a/osm/opensm/osm_sm.c +++ b/osm/opensm/osm_sm.c @@ -137,7 +137,6 @@ osm_sm_construct( cl_event_construct( &p_sm->subnet_up_event ); cl_thread_construct( &p_sm->sweeper ); osm_req_construct( &p_sm->req ); - osm_req_ctrl_construct( &p_sm->req_ctrl ); osm_resp_construct( &p_sm->resp ); osm_ni_rcv_construct( &p_sm->ni_rcv ); osm_pi_rcv_construct( &p_sm->pi_rcv ); @@ -195,7 +194,6 @@ osm_sm_shutdown( * dispatcher are complete. */ osm_sm_mad_ctrl_destroy( &p_sm->mad_ctrl ); - osm_req_ctrl_destroy( &p_sm->req_ctrl ); cl_disp_unregister(p_sm->ni_disp_h); cl_disp_unregister(p_sm->pi_disp_h); cl_disp_unregister(p_sm->si_disp_h); @@ -298,10 +296,6 @@ osm_sm_init( if( status != IB_SUCCESS ) goto Exit; - status = osm_req_ctrl_init( &p_sm->req_ctrl, &p_sm->req, p_log, p_disp ); - if( status != IB_SUCCESS ) - goto Exit; - status = osm_resp_init( &p_sm->resp, p_mad_pool, p_vl15, p_subn, p_log ); if( status != IB_SUCCESS ) goto Exit; -- 1.5.0.rc2.g11a3 From robert.j.woodruff at intel.com Wed Jan 24 14:14:03 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Wed, 24 Jan 2007 14:14:03 -0800 Subject: [openib-general] Current List of OFA Linux components and maintainers Message-ID: Attached is the latest list of component maintainers for the OFA Linux stack that was compiled at the OFA developers workshop in Tampa. Let me know if there are any changes or additions needed. I would be good if we could get this posted somewhere on the OFA website so that the larger OF community knows who to send patches to for each component. woody -------------- next part -------------- A non-text attachment was scrubbed... Name: OpenFabrics Maintainers.pdf Type: application/octet-stream Size: 159215 bytes Desc: OpenFabrics Maintainers.pdf URL: From swise at opengridcomputing.com Wed Jan 24 14:17:01 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 24 Jan 2007 16:17:01 -0600 Subject: [openib-general] [PATCH v2] [RFC] ofed_1_2 2.6.17 backport: simulate neighbour update events by snooping ARP packets] Message-ID: <1169677021.21297.24.camel@stevo-desktop> Here is an updated patch for review. If you like this, then I'll post a series to back-port this to all the kernels... Steve. --- 2.6.17 backport: simulate neighbour update events by snooping ARP packets Needed to support iWARP devices on backported kernels. This also allows using the current drivers/infiniband/core/addr.c which requires netevents as well. For each incoming ARP request or response, we add a destructor function to the skb. When the skb is freed (ie when the ARP subsystem has updated the neighbour entry if needed) our destructor function will get called and we can generate a NEIGH_UPDATE netevent. When the first consumer registers for netevents, we add an ARP packet filter to start snooping. When the last consumer unregisters, we remove the filter. Changes: - add the snoop code to the backport netevent.c file. - remove the backport patch to revert addr.c to snoop ARP packets. Signed-off-by: Steve Wise --- .../backport/2.6.17/include/src/netevent.c | 67 ++++++++++++++++++++ .../2.6.17/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 78 deletions(-) diff --git a/kernel_addons/backport/2.6.17/include/src/netevent.c b/kernel_addons/backport/2.6.17/include/src/netevent.c index 35d02c3..26a0920 100644 --- a/kernel_addons/backport/2.6.17/include/src/netevent.c +++ b/kernel_addons/backport/2.6.17/include/src/netevent.c @@ -15,6 +15,55 @@ #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); @@ -30,8 +79,13 @@ static ATOMIC_NOTIFIER_HEAD(netevent_not int register_netevent_notifier(struct notifier_block *nb) { int err; - err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -47,7 +101,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = atomic_notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From wangd at uci.edu Wed Jan 24 14:29:20 2007 From: wangd at uci.edu (Daniel Wang) Date: Wed, 24 Jan 2007 14:29:20 -0800 Subject: [openib-general] autogen.sh in userspace/libmthca needs automake version check Message-ID: <45B7DDC0.8070406@uci.edu> Hi, I don't know if this is the right place to post this, but the autogen.sh in userspace/libmthca should have a version check for automake. With automake 1.4, I get an error: Makefile.am:5: invalid unused variable name: `MTHCA_SOURCES' ...whereas after upgrading to automake 1.9, it seems to run cleanly. There's a version check in the autogen.sh in userspace/libibutils that seems appropriate. FYI, I pulled the userspace code off git yesterday (1/23) via the instructions on the wiki, and am in the process of trying to get things built for a 2.6.19.2 kernel. OFED 1.1 doesn't seem to build due to conflicts with the kernel, so I'm giving the development tree a shot. Cheers, -Daniel From sean.hefty at intel.com Wed Jan 24 15:19:37 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 24 Jan 2007 15:19:37 -0800 Subject: [openib-general] librdmacm and udapl: Which git branch to use in ofed_1_2 build In-Reply-To: <20070124080224.GF20101@mellanox.co.il> Message-ID: <000101c7400e$1f1b6f80$ff0da8c0@amr.corp.intel.com> >> > Could you please rebase that to 2.6.20-rc5? >> >> Yes - but I probably won't get to this until tomorrow. > >Not a problem - I generated patches and put them in OFED already. I've updated my rdma-dev.git tree to the latest flavor of the day. - Sean From kliteyn at dev.mellanox.co.il Wed Jan 24 15:58:15 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 25 Jan 2007 01:58:15 +0200 Subject: [openib-general] [PATCH v2] osm: QoS: added qos class and service id to the path record Message-ID: <45B7F297.4080506@dev.mellanox.co.il> Hi Hal [V2] QoS patch: added qos class and service id to the path record Signed-off-by: Yevgeny Kliteynik --- osm/include/iba/ib_types.h | 148 ++++++++++++++++++++++++++++++--- osm/opensm/osm_helper.c | 8 +- osm/opensm/osm_sa_multipath_record.c | 2 +- osm/opensm/osm_sa_path_record.c | 5 +- osm/osmtest/osmtest.c | 2 +- 5 files changed, 144 insertions(+), 21 deletions(-) diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 22f7f62..2bbb8b4 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -1700,6 +1700,28 @@ ib_class_is_rmpp( #define IB_SMINFO_STATE_MASTER 3 /**********/ +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK +* NAME +* IB_PATH_REC_SL_MASK +* +* DESCRIPTION +* Mask for the sl field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_SL_MASK 0xF + +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK +* NAME +* IB_PATH_REC_QOS_CLASS_MASK +* +* DESCRIPTION +* Mask for the QoS class field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 + /****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK * NAME * IB_PATH_REC_SELECTOR_MASK @@ -2314,7 +2336,7 @@ ib_gid_get_guid( #include typedef struct _ib_path_rec { - uint8_t resv0[8]; + ib_net64_t service_id; ib_gid_t dgid; ib_gid_t sgid; ib_net16_t dlid; @@ -2323,7 +2345,7 @@ typedef struct _ib_path_rec uint8_t tclass; uint8_t num_path; ib_net16_t pkey; - ib_net16_t sl; + ib_net16_t qos_class_sl; uint8_t mtu; uint8_t rate; uint8_t pkt_life; @@ -2334,8 +2356,8 @@ typedef struct _ib_path_rec #include /* * FIELDS -* resv0 -* Reserved bytes. +* service_id +* Service ID. * * dgid * GID of destination port. @@ -2363,11 +2385,8 @@ typedef struct _ib_path_rec * pkey * Partition key (P_Key) to use on this path. * -* resv1 -* Reserved byte. -* -* sl -* Service level to use on this path. +* qos_class_sl +* QoS class and service level to use on this path. * * mtu * MTU and MTU selector fields to use on this path @@ -2388,6 +2407,7 @@ typedef struct _ib_path_rec *********/ /* Path Record Component Masks */ +#define IB_PR_COMPMASK_SERVICEID (CL_HTON64(((uint64_t)1)<<1)) #define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) #define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) #define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) @@ -2400,7 +2420,7 @@ typedef struct _ib_path_rec #define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) #define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) #define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) -#define IB_PR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<14)) +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) #define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) #define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) #define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) @@ -2658,6 +2678,7 @@ ib_path_rec_init_local( IN ib_net16_t slid, IN uint8_t num_path, IN ib_net16_t pkey, + IN uint16_t qos_class, IN uint8_t sl, IN uint8_t mtu_selector, IN uint8_t mtu, @@ -2673,8 +2694,8 @@ ib_path_rec_init_local( p_rec->slid = slid; p_rec->num_path = num_path; p_rec->pkey = pkey; - /* Lower 4 bits of path rec's SL are reserved. */ - p_rec->sl = cl_ntoh16( sl ); + p_rec->qos_class_sl = cl_ntoh16( (sl & IB_PATH_REC_SL_MASK) | + (qos_class << 4) ); p_rec->mtu = (uint8_t)((mtu & IB_PATH_REC_BASE_MASK) | (uint8_t)(mtu_selector << 6)); p_rec->rate = (uint8_t)((rate & IB_PATH_REC_BASE_MASK) | @@ -2686,8 +2707,8 @@ ib_path_rec_init_local( /* Clear global routing fields for local path records */ p_rec->hop_flow_raw = 0; p_rec->tclass = 0; + p_rec->service_id = 0; - *((uint64_t*)p_rec->resv0) = 0; *((uint32_t*)p_rec->resv2) = 0; *((uint16_t*)p_rec->resv2 + 2) = 0; } @@ -2779,6 +2800,41 @@ ib_path_rec_num_path( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_sl +* NAME +* ib_path_rec_set_sl +* +* DESCRIPTION +* Set path service level. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_sl( + IN ib_path_rec_t* const p_rec, + IN const uint8_t sl ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_QOS_CLASS_MASK ) | + ( sl & IB_PATH_REC_SL_MASK) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* sl +* [in] Service level to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + /****f* IBA Base: Types/ib_path_rec_sl * NAME * ib_path_rec_sl @@ -2792,7 +2848,7 @@ static inline uint8_t OSM_API ib_path_rec_sl( IN const ib_path_rec_t* const p_rec ) { - return( (uint8_t)((cl_ntoh16( p_rec->sl )) & 0xF) ); + return( (uint8_t)((cl_ntoh16( p_rec->qos_class_sl )) & IB_PATH_REC_SL_MASK) ); } /* * PARAMETERS @@ -2808,6 +2864,70 @@ ib_path_rec_sl( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_qos_class +* NAME +* ib_path_rec_set_qos_class +* +* DESCRIPTION +* Set path QoS class. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_qos_class( + IN ib_path_rec_t* const p_rec, + IN const uint16_t qos_class ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_SL_MASK ) | + ( qos_class << 4) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* qos_class +* [in] QoS class to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_qos_class +* NAME +* ib_path_rec_qos_class +* +* DESCRIPTION +* Get QoS class. +* +* SYNOPSIS +*/ +static inline uint16_t OSM_API +ib_path_rec_qos_class( + IN const ib_path_rec_t* const p_rec ) +{ + return( cl_ntoh16(p_rec->qos_class_sl) >> 4 ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* QoS class of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + /****f* IBA Base: Types/ib_path_rec_mtu * NAME * ib_path_rec_mtu diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c index 50a3413..5193409 100644 --- a/osm/opensm/osm_helper.c +++ b/osm/opensm/osm_helper.c @@ -1095,7 +1095,7 @@ osm_dump_path_record( { osm_log( p_log, log_level, "PathRecord dump:\n" - "\t\t\t\tresv0...................0x%016" PRIx64 "\n" + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" "\t\t\t\tdgid....................0x%016" PRIx64 " : " "0x%016" PRIx64 "\n" "\t\t\t\tsgid....................0x%016" PRIx64 " : " @@ -1106,6 +1106,7 @@ osm_dump_path_record( "\t\t\t\ttclass..................0x%X\n" "\t\t\t\tnum_path_revers.........0x%X\n" "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\tqos_class...............0x%X\n" "\t\t\t\tsl......................0x%X\n" "\t\t\t\tmtu.....................0x%X\n" "\t\t\t\trate....................0x%X\n" @@ -1114,7 +1115,7 @@ osm_dump_path_record( "\t\t\t\tresv2...................0x%X\n" "\t\t\t\tresv3...................0x%X\n" "", - *(uint64_t*)p_pr->resv0, + cl_ntoh64(p_pr->service_id), cl_ntoh64( p_pr->dgid.unicast.prefix ), cl_ntoh64( p_pr->dgid.unicast.interface_id ), cl_ntoh64( p_pr->sgid.unicast.prefix ), @@ -1125,7 +1126,8 @@ osm_dump_path_record( p_pr->tclass, p_pr->num_path, cl_ntoh16( p_pr->pkey ), - cl_ntoh16( p_pr->sl ), + ib_path_rec_qos_class(p_pr), + ib_path_rec_sl(p_pr), p_pr->mtu, p_pr->rate, p_pr->pkt_life, diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 3256a45..44fe453 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -759,7 +759,7 @@ __osm_mpr_rcv_build_pr( p_pr->hop_flow_raw &= cl_hton32(1<<31); p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16( p_parms->sl ); + ib_path_rec_set_sl(p_pr, p_parms->sl); p_pr->mtu = (uint8_t)( p_parms->mtu | 0x80 ); p_pr->rate = (uint8_t)( p_parms->rate | 0x80 ); diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index a0dbb07..9a3eaeb 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -774,7 +774,8 @@ __osm_pr_rcv_build_pr( #endif p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16(p_parms->sl); + ib_path_rec_set_qos_class(p_pr,0); + ib_path_rec_set_sl(p_pr,p_parms->sl); p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); p_pr->rate = (uint8_t)(p_parms->rate | 0x80); @@ -2053,7 +2054,7 @@ osm_pr_rcv_process( /* SL, Hop Limit, and Flow Label */ ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, &sl, &flow_label, &hop_limit ); - p_pr_item->path_rec.sl = cl_hton16( sl ); + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); #ifndef ROUTER_EXP p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | (flow_label << 8); diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c index b9e3bf7..c42b037 100644 --- a/osm/osmtest/osmtest.c +++ b/osm/osmtest/osmtest.c @@ -1982,7 +1982,7 @@ osmtest_write_path_info( IN osmtest_t * cl_ntoh64( p_rec->sgid.unicast.interface_id ), cl_ntoh16( p_rec->dlid ), cl_ntoh16( p_rec->slid ), cl_ntoh32( p_rec->hop_flow_raw ), p_rec->tclass, - p_rec->num_path, cl_ntoh16( p_rec->pkey ), p_rec->sl, + p_rec->num_path, cl_ntoh16( p_rec->pkey ), ib_path_rec_sl(p_rec), p_rec->mtu, p_rec->rate, p_rec->pkt_life, p_rec->preference ); -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Wed Jan 24 16:34:45 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 25 Jan 2007 02:34:45 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169655749.29183.93022.camel@hal.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> Message-ID: <45B7FB25.7020704@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Wed, 2007-01-24 at 09:15, Yevgeny Kliteynik wrote: > > [snip...] > >>> I also have some questions about the patches >> Shoot > > First, as I understand it, this higher level QoS is not yet an approved > standard (annex) so is this code experimental? I guess so > In any case, some things > might change, etc. so IMO this QoS should be implemented in a way that > minimizes the risk to the non QoS code. Agree > I suspect the main interactions > are in osm_sa_path/multipath_record.c but will also extend to the QoS > manager. So should this all be conditionalized with something like > QOS_ANNEX and by default be off with some build switch to enable this > code in OpenSM until be becomes standard ? I suggest that instead of enclosing the code in ifdef, this new code will be invoked only when QoS in OpenSM has been turned on. > When will the remainder of the changes to the QoS manager be ready ? It > would be good to see the whole picture. Are there any other missing > pieces ? I'm working right now on checking path record for QoS constraints. I'm hoping to finish it in a day or two. After that, I'll do the same with multipath record. > It would be good to have some documentation for this including an opensm > man page update. > > As far as using lex/yacc, are they invoked as part of the build > procedure or are the files they generate just checked in and used ? When lex/yacc are invoked, they generate three files: - osm_qos_parser_l.c - osm_qos_parser_y.c - osm_qos_parser_y.h These generated files should be included in the git repository, and they are the ones that are compiled by 'make' command. To cause lex/yacc generate these files on every compilation, a configuration flag '--enable-maintainer-mode' should be used when running 'configure'. So normally, lex/yacc won't be invoked during the build (unless the --enable-maintainer-mode option was selected). > How could/would multiple file versions be supported ? One previous > example was a mention that port groups can be shared by more than one > manager (e.g. QoS and partitions) so this might be made hierarchical. > I'd like to understand this before we get locked in. The parser can be enhanced to support different versions of grammar. It will just check the first line of the policy file: and then it will decide which grammar rules to apply according to the 'version' value. --Yevgeny > There are some other lower level questions which I'll get to later. I'll > also review the XML file format in detail later. > > -- Hal > > From HNGUYEN at de.ibm.com Wed Jan 24 21:17:14 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Thu, 25 Jan 2007 00:17:14 -0500 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 In-Reply-To: <6C2C79E72C305246B504CBA17B5500C9A0DC2C@mtlexch01.mtl.com> Message-ID: Hi, > We stay with same build process but the backport patches give a solution > for such cases. > Michael Tsirkin can help you how we solved such problems with other > kernel code we needed. I need to be more specific here: ibmebus requires two symbols in arch/ppc64/kernel/dma.c to be exported, which means one really needs to rebuild and install the patched kernel. As far as I understood from Michael, when we looked at ofed-1.1, that approach is not supported by ofed build process. Regards Nam From eitan at sw053.yok.mtl.com Wed Jan 24 21:24:18 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Thu, 25 Jan 2007 07:24:18 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-25:normal completion Message-ID: <200701250524.l0P5OI6E016870@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Tue_Jan_23_18:08:59_2007 b49605 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From mst at mellanox.co.il Wed Jan 24 22:06:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 08:06:22 +0200 Subject: [openib-general] [PATCH v2] [RFC] ofed_1_2 2.6.17 backport: simulate neighbourupdate events by snooping ARP packets] In-Reply-To: <1169677021.21297.24.camel@stevo-desktop> References: <1169677021.21297.24.camel@stevo-desktop> Message-ID: <20070125060622.GB11092@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH v2] [RFC] ofed_1_2 2.6.17 backport: simulate neighbourupdate events by snooping ARP packets] > > Here is an updated patch for review. If you like this, then I'll post a > series to back-port this to all the kernels... Looks good. Let's do it. -- MST From mst at mellanox.co.il Wed Jan 24 23:35:17 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 09:35:17 +0200 Subject: [openib-general] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: <20070125073517.GF11092@mellanox.co.il> > Attached is the latest list of component maintainers for the OFA Linux > stack that was compiled at the OFA developers workshop in Tampa. > Let me know if there are any changes or additions needed. > I would be good if we could get this posted somewhere on > the OFA website so that the larger OF community knows who to send > patches to for each component. Especially in the OFED distro I have now stuck my nose deep enough in IPoIB that I guess I can't avoid this responsibility now :) And Roland seems uninterested in backporting code to older kernels, so that's all my code, too. For SRP, all OFED backporting was done by Ishai Rabinovit , and he's taken over the srp daemon from Roland, so I think you want to add him to maintainer list there. -- MST From tziporet at mellanox.co.il Thu Jan 25 01:50:50 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Thu, 25 Jan 2007 11:50:50 +0200 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: <20070125073517.GF11092@mellanox.co.il> References: <20070125073517.GF11092@mellanox.co.il> Message-ID: <45B87D7A.5000708@mellanox.co.il> Michael S. Tsirkin wrote: > Especially in the OFED distro I have now stuck my nose deep enough > in IPoIB that I guess I can't avoid this responsibility now :) > And Roland seems uninterested in backporting code to older kernels, so > that's all my code, too. > > For SRP, all OFED backporting was done by Ishai Rabinovit > , and he's taken over the srp daemon from Roland, so I > think you want to add him to maintainer list there. > > I support Michael in his notes. In addition I think we should add MPI maintainers too: Open MPI - Jeff Squyres - jsquyres at cisco.com MVAPICH OSU - Pasha (Pavel) Shamis - pasha at mellanox.co.il MVAPICH2 OSU - Shaun Rowland rowland at cse.ohio-state.edu Maybe for OFED we want to add integration owner - Vlad (Vladimir Sokolovsky) vlad at mellanox.co.il Tziporet From vlad at lists.openfabrics.org Thu Jan 25 02:22:00 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Thu, 25 Jan 2007 02:22:00 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070125-0200 daily build status Message-ID: <20070125102200.EBC6BE603C8@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.16 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Failed: From ramachandra.kuchimanchi at qlogic.com Thu Jan 25 05:15:49 2007 From: ramachandra.kuchimanchi at qlogic.com (Ramachandra Kuchimanchi) Date: Thu, 25 Jan 2007 07:15:49 -0600 Subject: [openib-general] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: Woody, I am the maintainer for the QLogic VNIC ULP kernel driver and userspace tools. Kernel component: VNIC - infiniband/ulp/vnic Userspace component: VNIC tools - qlvnictools Maintainer: Ramachandra K (ramachandra.kuchimanchi at qlogic.com) Regards, Ram > -----Original Message----- > From: openib-general-bounces at openib.org [mailto:openib-general- > bounces at openib.org] On Behalf Of Woodruff, Robert J > Sent: Thursday, January 25, 2007 3:44 AM > To: openib-general; EWG; openib-promoters at openib.org > Subject: [openib-general] Current List of OFA Linux components and maintainers > > > Attached is the latest list of component maintainers for the OFA Linux stack > that was compiled at the OFA developers workshop in Tampa. > Let me know if there are any changes or additions needed. > I would be good if we could get this posted somewhere on the OFA website so > that the larger OF community knows who to send patches to for each component. > > woody > From jlentini at netapp.com Thu Jan 25 05:35:41 2007 From: jlentini at netapp.com (James Lentini) Date: Thu, 25 Jan 2007 08:35:41 -0500 (EST) Subject: [openib-general] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: On Wed, 24 Jan 2007, Woodruff, Robert J wrote: > > Attached is the latest list of component maintainers for the OFA Linux > stack > that was compiled at the OFA developers workshop in Tampa. > Let me know if there are any changes or additions needed. > I would be good if we could get this posted somewhere on > the OFA website so that the larger OF community knows who to send > patches to > for each component. The list is missing the the iWARP CM and the Chelsio driver. I believe Tom Tucker and Steve Wise are co-maintainers of both. From swise at opengridcomputing.com Thu Jan 25 05:48:32 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 07:48:32 -0600 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers References: <20070125073517.GF11092@mellanox.co.il> <45B87D7A.5000708@mellanox.co.il> Message-ID: <004501c74087$8241a010$020010ac@haggard> I'm the maintainer for the Chelsio RDMA drivers/libs. ----- Original Message ----- From: "Tziporet Koren" To: "Michael S. Tsirkin" Cc: "Pavel Shamis (Pasha)" ; "EWG" ; "openib-general" ; Sent: Thursday, January 25, 2007 3:50 AM Subject: Re: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers > Michael S. Tsirkin wrote: >> Especially in the OFED distro I have now stuck my nose deep enough >> in IPoIB that I guess I can't avoid this responsibility now :) >> And Roland seems uninterested in backporting code to older kernels, >> so >> that's all my code, too. >> >> For SRP, all OFED backporting was done by Ishai Rabinovit >> , and he's taken over the srp daemon from >> Roland, so I >> think you want to add him to maintainer list there. >> >> > I support Michael in his notes. > In addition I think we should add MPI maintainers too: > Open MPI - Jeff Squyres - jsquyres at cisco.com > MVAPICH OSU - Pasha (Pavel) Shamis - pasha at mellanox.co.il > MVAPICH2 OSU - Shaun Rowland rowland at cse.ohio-state.edu > > Maybe for OFED we want to add integration owner - Vlad (Vladimir > Sokolovsky) vlad at mellanox.co.il > > Tziporet > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From mst at mellanox.co.il Thu Jan 25 06:13:57 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 16:13:57 +0200 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <1169648853.1796.9.camel@stevo-desktop> References: <1169600597.26256.53.camel@stevo-desktop> <20070124081504.GI20101@mellanox.co.il> <1169648853.1796.9.camel@stevo-desktop> Message-ID: <20070125141357.GB25265@mellanox.co.il> > Quoting Steve Wise : > Subject: Re: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > On Wed, 2007-01-24 at 10:15 +0200, Michael S. Tsirkin wrote: > > > Quoting Steve Wise : > > > Subject: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > > > > > > > > Handle Ethernet neighbour updates during route resolution. > > > > > > The IWCM uses the ib_addr services to do route resolution (neighbour > > > discovery in the IP world). The ib_addr netevent callback routine, > > > however, currently only acts on Inifininband neighbour updates. It needs > > > to act on ethernet neighbour updates as well. > > > > > > This patch just removes filtering on device type altogether and > > > will trigger on any neighour updates where the nud_type is valid. > > > This simplifies the code some. > > > > > > Signed-off-by: Steve Wise > > > > BTW, Steve, if this is a patch you want in OFED, pls specify this. > > > > Right...sorry: I believe it should go in OFED 1.2 and queued for 2.6.21. OK. What happens is it conflicts (naturally) with backport addr_1_netevents_revert_to_2_6_17.patch. So rather than duplicate effort, let's wait till we can pull from you the changes that remove addr_1_netevents_revert_to_2_6_17.patch, and then put this one in fixes? OK? -- MST From halr at voltaire.com Thu Jan 25 06:11:49 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 25 Jan 2007 09:11:49 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B768E2.9070604@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> Message-ID: <1169734241.13608.75454.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: > Hi Hal, Sasha. > > Here's a description of the QoS policy file, and an > example of such file (with more comments inside). This makes the start of a good document on this. If you add this to osm/doc, I will incorporate it into the opensm man page. > QoS Policy file > --------------- > > The QoS policy file is divided into 4 sub sections: > > * Node Group: a set of HCAs, Routers or Switches that share the same settings. > A node groups might be a partition defined by the partition manager policy in > terms of GUIDs. Are these Node or Port Groups ? It looks like port groups from the below. > Future implementations might provide support for NodeDescription > based definition of node groups. > > * Fabric Setup: > Defines how the SL2VL and VLArb tables should be setup. This policy definition > assumes the computation of target behavior should be performed outside of > OpenSM. > > * QoS-Levels Definition: > This section defines the possible sets of parameters for QoS that a client might > be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > (in case LMC > 0 is used for QoS) and TClass. How does this relate to/interact with partition configuration ? Also, what about preexisting QoS ? > * Matching Rules: > A list of rules that match an incoming PathRecord request to a QoS-Level. The > rules are processed in order such as the first match is applied. Each rule is > built out of set of match expressions which should all match for the rule to > apply. The matching expressions are defined for the following fields > - SRC and DST to lists of node groups > - Service-ID to a list of Service-ID or Service-ID ranges > - TClass to a list of TClass values or ranges > > QoS policy file example > ----------------------- > > > > > > > > Storage > our SRP storage targets Is the use clause more than commentary ? How is it "used" ? > 0x1000000000000001 > 0x1000000000000002 > > > > Virtual Servers > node desc and IB port # > vs1/HCA-1/P1 > vs3/HCA-1/P1 > vs3/HCA-2/P1 How are port-names used ? > > > > Partition 1 > default settings > Part1 > > Is this CA rather than HCA ? (What about TCAs ?) > > Routers > all routers > ROUTER > > > > > ^^ Actually, it is SL assuming the device supports SL2VL mapping as indicate by IsSLMappingSupported in the PortInfo:CapabilityMask. Will the syntax handle single data VL devices which only implement SL filtering ? Will the QoS manager support this (SL2VL without VLArb settings) or are these required together ? > > > > Part1 > * > * > 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > > > > Storage > > Storage2 > > Storage3 I don't quite follow across-from/to. > * > 1 > 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > > > > > > > > Storage > > 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > 10 What happens if the shape of VLArb indicated here does not match the device ? > > > > > > > > 1 What does sn mean ? What is it used for ? > for the lowest priority comm > 16 > > > > 2 > low latency best bandwidth > 0 > 7 What is class ? I saw TClass mentioned earlier. Is this TClass or something else ? > > If specified, do MTU limit and rate limit add extra limits to be imposed on what is selected (and realizable) ? Strictly speaking, couldn't packet lifetime limit also be added to this syntax here ? I presume it was left out as being not "interesting" as yet. Is that correct ? Also, how are path bits used ? > > 3 > just an example > 0 > 32 > 1 > 1 > > > > > > > > 1 > low latency by class 7-9 or 11 > 7-9,11 > 1 > > > > 2 > Storage targets connection> > Storage > 22,4719 What is service ? What does 22.4719 mean ? > 3 What are match-levels used for ? -- Hal > > > > > > > > -- Yevgeny > > Yevgeny Kliteynik wrote: > > Hi Sasha, > > > > Sasha Khapyorsky wrote: > >> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>> Hi Sasha. > >>> > >>> Sasha Khapyorsky wrote: > >>>> Hi Yevgeny, > >>>> > >>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>> Hi Hal > >>>>> > >>>>> The following series of six patches implements QoS policy file parser: > >>>>> > >>>>> 1. QoS parser Lex file > >>>>> 2. QoS parser Lex-generated c file > >>>>> 3. QoS parser grammar (Yacc) file > >>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>> 5. QoS parser header file that defines parse tree data structures > >>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>> Is there any description of proposed format and functionality? > >>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>> minor modifications. You can find the RFC here: > >>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >> This was RFC and couple of issues were discussed then. Now you are about > >> implementation phase and exact format description would be desired. For > >> example what "few minor modifications" are? > > > > I'll prepare an example file with explanations. > > > > -- Yevgeny > > > >>>> Also what about using human readable formats? > >>> To me the xml-like format in the RFC looks pretty readable. > >>> It has very limited number of keywords (tags), so it's easy > >>> to follow and/or to modify. > >> It is your opinion, not everybody will agree with it (AFAIR this was > >> discussed too during RFC). > >> > >> I would not be care, but I don't know any example of really successful > >> XML using for configuration purposes (especially where advanced graphical > >> config editors/viewers were not used). Do you know? > >> > >> Sasha > >> > > > > _______________________________________________ > > openib-general mailing list > > openib-general at openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > From halr at voltaire.com Thu Jan 25 06:15:17 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 25 Jan 2007 09:15:17 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B7FB25.7020704@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> Message-ID: <1169734403.13608.75532.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-24 at 19:34, Yevgeny Kliteynik wrote: > Hi Hal, > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Wed, 2007-01-24 at 09:15, Yevgeny Kliteynik wrote: > > > > [snip...] > > > >>> I also have some questions about the patches > >> Shoot > > > > First, as I understand it, this higher level QoS is not yet an approved > > standard (annex) so is this code experimental? > > I guess so > > > In any case, some things > > might change, etc. so IMO this QoS should be implemented in a way that > > minimizes the risk to the non QoS code. > > Agree > > > I suspect the main interactions > > are in osm_sa_path/multipath_record.c but will also extend to the QoS > > manager. So should this all be conditionalized with something like > > QOS_ANNEX and by default be off with some build switch to enable this > > code in OpenSM until be becomes standard ? > > I suggest that instead of enclosing the code in ifdef, this new code > will be invoked only when QoS in OpenSM has been turned on. Perhaps. I don't see this in the SA PR/MPR patch you supplied though. What happens if a QoS request is made and it is not enabled on the SM side ? Also, what happens when a QoS request is made but only the previous (more primitive) QoS is enabled (not this QoS support) ? > > When will the remainder of the changes to the QoS manager be ready ? It > > would be good to see the whole picture. Are there any other missing > > pieces ? > > I'm working right now on checking path record for QoS constraints. > I'm hoping to finish it in a day or two. After that, I'll do the same > with multipath record. Will this take care of the questions asked above ? If so, I guess I'll need to wait to see this. > > It would be good to have some documentation for this including an opensm > > man page update. When do you plan on doing this ? Clearly, this is not as important as the work immediately in front of you on this. > > As far as using lex/yacc, are they invoked as part of the build > > procedure or are the files they generate just checked in and used ? > > When lex/yacc are invoked, they generate three files: > - osm_qos_parser_l.c > - osm_qos_parser_y.c > - osm_qos_parser_y.h > These generated files should be included in the git repository, > and they are the ones that are compiled by 'make' command. > To cause lex/yacc generate these files on every compilation, a > configuration flag '--enable-maintainer-mode' should be used when > running 'configure'. > So normally, lex/yacc won't be invoked during the build (unless the > --enable-maintainer-mode option was selected). > > How could/would multiple file versions be supported ? One previous > > example was a mention that port groups can be shared by more than one > > manager (e.g. QoS and partitions) so this might be made hierarchical. > > I'd like to understand this before we get locked in. > > The parser can be enhanced to support different versions of grammar. > It will just check the first line of the policy file: > > and then it will decide which grammar rules to apply according to the > 'version' value. Thanks. -- Hal > --Yevgeny > > > There are some other lower level questions which I'll get to later. I'll > > also review the XML file format in detail later. > > > > -- Hal > > > > From mst at mellanox.co.il Thu Jan 25 06:28:37 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 16:28:37 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169734403.13608.75532.camel@hal.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> Message-ID: <20070125142837.GC25265@mellanox.co.il> > What happens if a QoS request is made and it is not enabled on the SM side ? What exactly does "QoS request" mean? Does not QoS Annext just add data in path record query? -- MST From vlad at dev.mellanox.co.il Thu Jan 25 06:33:04 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 25 Jan 2007 16:33:04 +0200 Subject: [openib-general] Add bonding suuport to OFED In-Reply-To: <45B77F81.7090808@voltaire.com> References: <45B6412A.6020207@voltaire.com> <45B77F81.7090808@voltaire.com> Message-ID: <1169735584.3731.180.camel@swlab50.yok.mtl.com> Hi Moni, Please review the following patch to ib-bonding.spec: Use %{_prefix} in RPM spec file instead of hard-coded /usr/local/ofed. Signed-off-by: Vladimir Sokolovsky --- diff --git a/ib-bonding.spec b/ib-bonding.spec index db02fe8..77e51e0 100644 --- a/ib-bonding.spec +++ b/ib-bonding.spec @@ -5,6 +5,8 @@ %define _build_name_fmt %%{ARCH}/%%{NAME}-%%{VERSION}-%%{RELEASE}-%%{DISTRIBUTION}-%%{ARCH}.rpm +%{!?_prefix: %define _prefix /usr/local/ofed} + Summary : ib_bonding patch and modules. Name : %{name} Version : %{version} @@ -39,11 +41,11 @@ fi %install [ "${RPM_BUILD_ROOT}" != "/" -a -d ${RPM_BUILD_ROOT} ] && rm -rf ${RPM_BUILD_ROOT} mkdir -p ${RPM_BUILD_ROOT}/lib/modules/%{kversion}/kernel/drivers/net/bonding/ -mkdir -p ${RPM_BUILD_ROOT}/usr/local/ofed/bin -mkdir -p ${RPM_BUILD_ROOT}/usr/local/ofed/docs +mkdir -p ${RPM_BUILD_ROOT}%{_prefix}/bin +mkdir -p ${RPM_BUILD_ROOT}%{_prefix}/docs install -m 755 linux/drivers/net/bonding/bonding.ko ${RPM_BUILD_ROOT}/lib/modules/%{kversion}/kernel/drivers/net/bonding/ -install -m 755 bin/bond-init.sh ${RPM_BUILD_ROOT}/usr/local/ofed/bin -install -m 755 docs/ib-bonding.txt ${RPM_BUILD_ROOT}/usr/local/ofed/docs +install -m 755 bin/bond-init.sh ${RPM_BUILD_ROOT}%{_prefix}/bin +install -m 755 docs/ib-bonding.txt ${RPM_BUILD_ROOT}%{_prefix}/docs @@ -51,7 +53,7 @@ install -m 755 docs/ib-bonding.txt ${RP if [ ! -z $STACK_PREFIX ] ; then backup_dir=$STACK_PREFIX/backup else - backup_dir=/usr/local/ofed/backup + backup_dir=%{_prefix}/backup fi @@ -69,7 +71,7 @@ STACK_PREFIX=$(test -x /etc/infiniband/i if [ ! -z $STACK_PREFIX ] ; then backup_dir=$STACK_PREFIX/backup else - backup_dir=/usr/local/ofed/backup + backup_dir=%{_prefix}/backup fi cd $backup_dir found_file=$(find -name bonding.ko) @@ -81,6 +83,6 @@ fi %files /lib/modules/%{kversion}/kernel/drivers/net/bonding/bonding.ko -/usr/local/ofed/bin/bond-init.sh -/usr/local/ofed/docs/ib-bonding.txt +%{_prefix}/bin/bond-init.sh +%{_prefix}/docs/ib-bonding.txt From mst at mellanox.co.il Thu Jan 25 06:43:00 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 16:43:00 +0200 Subject: [openib-general] IB_WC_RESP_TIMEOUT_ERR Message-ID: <20070125144300.GD25265@mellanox.co.il> Guys, what is IB_WC_RESP_TIMEOUT_ERR? I went over tables 95 and 96 in Vol1 IB spec 1.2 and I do not see anything that could map to this code. -- MST From swise at opengridcomputing.com Thu Jan 25 07:11:20 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 09:11:20 -0600 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <20070125141357.GB25265@mellanox.co.il> References: <1169600597.26256.53.camel@stevo-desktop> <20070124081504.GI20101@mellanox.co.il> <1169648853.1796.9.camel@stevo-desktop> <20070125141357.GB25265@mellanox.co.il> Message-ID: <1169737880.20537.5.camel@stevo-desktop> On Thu, 2007-01-25 at 16:13 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: Re: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > > > On Wed, 2007-01-24 at 10:15 +0200, Michael S. Tsirkin wrote: > > > > Quoting Steve Wise : > > > > Subject: [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. > > > > > > > > > > > > Handle Ethernet neighbour updates during route resolution. > > > > > > > > The IWCM uses the ib_addr services to do route resolution (neighbour > > > > discovery in the IP world). The ib_addr netevent callback routine, > > > > however, currently only acts on Inifininband neighbour updates. It needs > > > > to act on ethernet neighbour updates as well. > > > > > > > > This patch just removes filtering on device type altogether and > > > > will trigger on any neighour updates where the nud_type is valid. > > > > This simplifies the code some. > > > > > > > > Signed-off-by: Steve Wise > > > > > > BTW, Steve, if this is a patch you want in OFED, pls specify this. > > > > > > > Right...sorry: I believe it should go in OFED 1.2 and queued for 2.6.21. > > OK. > What happens is it conflicts (naturally) with backport > addr_1_netevents_revert_to_2_6_17.patch. > > So rather than duplicate effort, let's wait till we can pull > from you the changes that remove addr_1_netevents_revert_to_2_6_17.patch, > and then put this one in fixes? OK? > Sounds good. From swise at opengridcomputing.com Thu Jan 25 07:13:30 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 09:13:30 -0600 Subject: [openib-general] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: <1169738010.20537.8.camel@stevo-desktop> On Thu, 2007-01-25 at 08:35 -0500, James Lentini wrote: > > On Wed, 24 Jan 2007, Woodruff, Robert J wrote: > > > > > Attached is the latest list of component maintainers for the OFA Linux > > stack > > that was compiled at the OFA developers workshop in Tampa. > > Let me know if there are any changes or additions needed. > > I would be good if we could get this posted somewhere on > > the OFA website so that the larger OF community knows who to send > > patches to > > for each component. > > The list is missing the the iWARP CM and the Chelsio driver. I believe > Tom Tucker and Steve Wise are co-maintainers of both. I should be the maintainer for Chelsio. Steve. From rdreier at cisco.com Thu Jan 25 07:13:26 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 07:13:26 -0800 Subject: [openib-general] IB_WC_RESP_TIMEOUT_ERR In-Reply-To: <20070125144300.GD25265@mellanox.co.il> (Michael S. Tsirkin's message of "Thu, 25 Jan 2007 16:43:00 +0200") References: <20070125144300.GD25265@mellanox.co.il> Message-ID: Michael> Guys, what is IB_WC_RESP_TIMEOUT_ERR? I went over tables Michael> 95 and 96 in Vol1 IB spec 1.2 and I do not see anything Michael> that could map to this code. I think it is something extra that was added so the mad layer could share the ib_wc stuff. Maybe Sean or Hal remembers better... From mst at mellanox.co.il Thu Jan 25 07:17:02 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 17:17:02 +0200 Subject: [openib-general] IB_WC_RESP_TIMEOUT_ERR In-Reply-To: References: Message-ID: <20070125151702.GF25265@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: IB_WC_RESP_TIMEOUT_ERR > > Michael> Guys, what is IB_WC_RESP_TIMEOUT_ERR? I went over tables > Michael> 95 and 96 in Vol1 IB spec 1.2 and I do not see anything > Michael> that could map to this code. > > I think it is something extra that was added so the mad layer could > share the ib_wc stuff. Maybe Sean or Hal remembers better... Oh, right. So maybe we should add a comment - rest of the stuff comes from IB spec ... -- MST From halr at voltaire.com Thu Jan 25 07:19:08 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 25 Jan 2007 10:19:08 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <20070125142837.GC25265@mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> <20070125142837.GC25265@mellanox.co.il> Message-ID: <1169738241.13608.78271.camel@hal.voltaire.com> On Thu, 2007-01-25 at 09:28, Michael S. Tsirkin wrote: > > What happens if a QoS request is made and it is not enabled on the SM side ? > > What exactly does "QoS request" mean? > Does not QoS Annext just add data in path record query? If by data you mean the additional components and the relevant component mask bits, that's exactly what I meant by a "QoS request". My question is how does OpenSM SA PR/MPR code handle these SA client requests when high level QoS option is not enabled ? Maybe this is in a patch yet to come. BTW, I'm not sure we can discuss the specifics of the QoS Annex here. -- Hal From bugzilla-daemon at lists.openfabrics.org Thu Jan 25 07:34:16 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Thu, 25 Jan 2007 07:34:16 -0800 (PST) Subject: [openib-general] [Bug 323] New: After fatal event on the hca, the ipoib doesn't return to connected mode Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=323 Summary: After fatal event on the hca, the ipoib doesn't return to connected mode Product: OpenFabrics Linux Version: gen2 Platform: All OS/Version: Other Status: NEW Severity: normal Priority: P2 Component: IPoIB AssignedTo: bugzilla at openib.org ReportedBy: yohadd at mellanox.co.il CC: mst at mellanox.co.il, dotanb at mellanox.co.il, amitk at mellanox.co.il, tziporet at mellanox.co.il After fatal event on the hca, the driver restart the device. After this restet, the ipoib does not brought up in connected mode (it brought up in datagram mode). -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From parks at lanl.gov Thu Jan 25 07:37:25 2007 From: parks at lanl.gov (Parks Fields) Date: Thu, 25 Jan 2007 08:37:25 -0700 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: References: <20070108165714.GM20047@mellanox.co.il> Message-ID: <7.0.1.0.2.20070125083206.02759b90@lanl.gov> Hi, Is there a doc that can explain more about how this work. Also if I download ( git ) the current trunk can I build it and test ?? thanks ***** Correspondence ***** This email contains no programmatic content that requires independent ADC review From kliteyn at dev.mellanox.co.il Thu Jan 25 07:37:58 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 25 Jan 2007 17:37:58 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169734403.13608.75532.camel@hal.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> Message-ID: <45B8CED6.3040201@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Wed, 2007-01-24 at 19:34, Yevgeny Kliteynik wrote: >> Hi Hal, >> >> Hal Rosenstock wrote: >>> Hi Yevgeny, >>> >>> On Wed, 2007-01-24 at 09:15, Yevgeny Kliteynik wrote: >>> >>> [snip...] >>> >>>>> I also have some questions about the patches >>>> Shoot >>> First, as I understand it, this higher level QoS is not yet an approved >>> standard (annex) so is this code experimental? >> I guess so >> >>> In any case, some things >>> might change, etc. so IMO this QoS should be implemented in a way that >>> minimizes the risk to the non QoS code. >> Agree >> >>> I suspect the main interactions >>> are in osm_sa_path/multipath_record.c but will also extend to the QoS >>> manager. So should this all be conditionalized with something like >>> QOS_ANNEX and by default be off with some build switch to enable this >>> code in OpenSM until be becomes standard ? >> I suggest that instead of enclosing the code in ifdef, this new code >> will be invoked only when QoS in OpenSM has been turned on. > > Perhaps. I don't see this in the SA PR/MPR patch you supplied though. > What happens if a QoS request is made and it is not enabled on the SM side ? > Also, what happens when a QoS request is made but only the previous > (more primitive) QoS is enabled (not this QoS support) ? I didn't try it (it's worth trying thought), but I believe that SM should do whatever it does right now (bofere QoS) if such request is made - ignore all the QoS-related part of the query. SM refers the QoS fields as reserved fields an doesn't do anything with them. Am I wrong on this? >>> When will the remainder of the changes to the QoS manager be ready ? It >>> would be good to see the whole picture. Are there any other missing >>> pieces ? >> >> I'm working right now on checking path record for QoS constraints. >> I'm hoping to finish it in a day or two. After that, I'll do the same >> with multipath record. > > Will this take care of the questions asked above ? If so, I guess I'll > need to wait to see this. > >>> It would be good to have some documentation for this including an opensm >>> man page update. > > When do you plan on doing this ? Clearly, this is not as important as > the work immediately in front of you on this. I'll work on the documentation as soon as the code is ready. --Yevgeny >>> As far as using lex/yacc, are they invoked as part of the build >>> procedure or are the files they generate just checked in and used ? >> When lex/yacc are invoked, they generate three files: >> - osm_qos_parser_l.c >> - osm_qos_parser_y.c >> - osm_qos_parser_y.h >> These generated files should be included in the git repository, >> and they are the ones that are compiled by 'make' command. >> To cause lex/yacc generate these files on every compilation, a >> configuration flag '--enable-maintainer-mode' should be used when >> running 'configure'. >> So normally, lex/yacc won't be invoked during the build (unless the >> --enable-maintainer-mode option was selected). > >>> How could/would multiple file versions be supported ? One previous >>> example was a mention that port groups can be shared by more than one >>> manager (e.g. QoS and partitions) so this might be made hierarchical. >>> I'd like to understand this before we get locked in. >> The parser can be enhanced to support different versions of grammar. >> It will just check the first line of the policy file: >> >> and then it will decide which grammar rules to apply according to the >> 'version' value. > > Thanks. > > -- Hal > >> --Yevgeny >> >>> There are some other lower level questions which I'll get to later. I'll >>> also review the XML file format in detail later. >>> >>> -- Hal >>> >>> > From ogerlitz at voltaire.com Thu Jan 25 07:40:25 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 25 Jan 2007 17:40:25 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> References: <000201c73f33$cabb6830$16d0180a@amr.corp.intel.com> Message-ID: <45B8CF69.8080708@voltaire.com> Sean Hefty wrote: > Add to the rdma_cm an IPOIB port space that allows interoperability with > IPoIB multicast traffic. Use of the RDMA_PS_IPOIB is limited to multicast > join/leave. > > Rename the RDMA_UD_QKEY to RDMA_UDP_QKEY to signify that the qkey is only > used with the RDMA_PS_UDP port space. > > Signed-off-by: Sean Hefty > --- > This patch differs from those posted by Or by limiting the ipoib port space > to multicast traffic only. OK, Sean i have tested the two patches and things are working fine, that is I have changed my multicast app to use RDMA_PS_IPOIB instead of RDMA_PS_UDP and I am now able to run it against itself and against ipoib in all the possibilities : tx-app / rx-ipoib tx-ipoib / rx-app tx-app / rx-app this means that basically (*) you have my OK for pushing the mutlicast support to OFED 1.2 (again my thinking is that this is fine for upstream as well). The only missing piece here, as we agreed yesterday is to allow using PS_IPOIB IDs for unicast traffic over librdmacm, i guess this should be fairly simple to add. However, as the code freeze deadline becomes closer, would you be able to implement and push this by the end of this week? Basically, my thinking is that if have the code that allows PS_IPOIB to do unicast and you have both udaddy and mckey working in --both-- PS_IPOIB and PS_UDP modes - push that. how does this sounds to you? Or. From wombat2 at us.ibm.com Thu Jan 25 07:48:22 2007 From: wombat2 at us.ibm.com (Bernard King-Smith) Date: Thu, 25 Jan 2007 10:48:22 -0500 Subject: [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release Message-ID: To: "EWG" cc: "Roland Dreier" , "OPENIB" Subject: [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release > I suggest that in OFED 1.2 we will not include the NAPI support > The reasons are: > > * IBM interrupt handler change to support NAPI will not be ready on time > * IPoIB UD with NAPI was not tested thoroughly > * IPoIB CM mode was tested without NAPI and got to a good stability. If we remove NAPI, does that remove the rotting packet fix from OFED 1.2 also? If it does I think we still need to keep the rotting packet code in. > If no one objects I will change the OFED 1.2 plan > Thanks, > Tziporet Bernie King-Smith IBM Corporation Server Group Cluster System Performance wombat2 at us.ibm.com (845)433-8483 Tie. 293-8483 or wombat2 on NOTES "We are not responsible for the world we are born into, only for the world we leave when we die. So we have to accept what has gone before us and work to change the only thing we can, -- The Future." William Shatner -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Thu Jan 25 07:50:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 17:50:30 +0200 Subject: [openib-general] [PATCHv4] IPoIB CM Experimental support In-Reply-To: <7.0.1.0.2.20070125083206.02759b90@lanl.gov> References: <7.0.1.0.2.20070125083206.02759b90@lanl.gov> Message-ID: <20070125155030.GG25265@mellanox.co.il> > Quoting Parks Fields : > Subject: Re: [openib-general] [PATCHv4] IPoIB CM Experimental support > > > Hi, > > Is there a doc that can explain more about how this work. Also if I > download ( git ) the current trunk can I build it and test ?? > > thanks Can you take a look at the original mail you reply to please? It included detailed answers to you questions. -- MST From halr at voltaire.com Thu Jan 25 07:45:07 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 25 Jan 2007 10:45:07 -0500 Subject: [openib-general] IB_WC_RESP_TIMEOUT_ERR In-Reply-To: References: <20070125144300.GD25265@mellanox.co.il> Message-ID: <1169739857.13608.79403.camel@hal.voltaire.com> On Thu, 2007-01-25 at 10:13, Roland Dreier wrote: > Michael> Guys, what is IB_WC_RESP_TIMEOUT_ERR? I went over tables > Michael> 95 and 96 in Vol1 IB spec 1.2 and I do not see anything > Michael> that could map to this code. > > I think it is something extra that was added so the mad layer could > share the ib_wc stuff. Maybe Sean or Hal remembers better... Good recollection :-) I had to look as it's been so long. It's for timing out sent MADs for SA client requests as well as user MAD send requests. -- Hal From mst at mellanox.co.il Thu Jan 25 07:52:03 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 25 Jan 2007 17:52:03 +0200 Subject: [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release In-Reply-To: References: Message-ID: <20070125155203.GH25265@mellanox.co.il> > Quoting Bernard King-Smith : > Subject: [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release > > To: > "EWG" > cc: > "Roland Dreier" , "OPENIB" > Subject: > [openib-general] Suggestion to remove NAPI with IPoIB from OFED 1.2 release > > > I suggest that in OFED 1.2 we will not include the NAPI support > > The reasons are: > > > > * IBM interrupt handler change to support NAPI will not be ready on time > > * IPoIB UD with NAPI was not tested thoroughly > > * IPoIB CM mode was tested without NAPI and got to a good stability. > > If we remove NAPI, does that remove the rotting packet fix from OFED 1.2 also? If it does I think we still need to keep the rotting packet code in. NAPI was not yet included in OFED, so Tziporet mainly proposed to remove it from OFED *plans*. I do not know what the rotting packet fix is. If it is already in OFED, I see no reason to remove it. -- MST From halr at voltaire.com Thu Jan 25 08:03:40 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 25 Jan 2007 11:03:40 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B8CED6.3040201@dev.mellanox.co.il> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> <45B8CED6.3040201@dev.mellanox.co.il> Message-ID: <1169740919.13608.79933.camel@hal.voltaire.com> Hi again Yevgeny, On Thu, 2007-01-25 at 10:37, Yevgeny Kliteynik wrote: > Hi Hal, > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Wed, 2007-01-24 at 19:34, Yevgeny Kliteynik wrote: > >> Hi Hal, > >> > >> Hal Rosenstock wrote: > >>> Hi Yevgeny, > >>> > >>> On Wed, 2007-01-24 at 09:15, Yevgeny Kliteynik wrote: > >>> > >>> [snip...] > >>> > >>>>> I also have some questions about the patches > >>>> Shoot > >>> First, as I understand it, this higher level QoS is not yet an approved > >>> standard (annex) so is this code experimental? > >> I guess so > >> > >>> In any case, some things > >>> might change, etc. so IMO this QoS should be implemented in a way that > >>> minimizes the risk to the non QoS code. > >> Agree > >> > >>> I suspect the main interactions > >>> are in osm_sa_path/multipath_record.c but will also extend to the QoS > >>> manager. So should this all be conditionalized with something like > >>> QOS_ANNEX and by default be off with some build switch to enable this > >>> code in OpenSM until be becomes standard ? > >> I suggest that instead of enclosing the code in ifdef, this new code > >> will be invoked only when QoS in OpenSM has been turned on. > > > > Perhaps. I don't see this in the SA PR/MPR patch you supplied though. > > What happens if a QoS request is made and it is not enabled on the SM side ? > > Also, what happens when a QoS request is made but only the previous > > (more primitive) QoS is enabled (not this QoS support) ? > > I didn't try it (it's worth trying thought), but I believe that > SM should do whatever it does right now (bofere QoS) if such > request is made - ignore all the QoS-related part of the query. > SM refers the QoS fields as reserved fields an doesn't do anything > with them. > Am I wrong on this? I suggest first consulting the QoS Annex to see what it says and use that for hints about these implementation corner cases. > >>> When will the remainder of the changes to the QoS manager be ready ? It > >>> would be good to see the whole picture. Are there any other missing > >>> pieces ? > >> > >> I'm working right now on checking path record for QoS constraints. > >> I'm hoping to finish it in a day or two. After that, I'll do the same > >> with multipath record. > > > > Will this take care of the questions asked above ? If so, I guess I'll > > need to wait to see this. > > > >>> It would be good to have some documentation for this including an opensm > >>> man page update. > > > > When do you plan on doing this ? Clearly, this is not as important as > > the work immediately in front of you on this. > > I'll work on the documentation as soon as the code is ready. Thanks. -- Hal > --Yevgeny > > >>> As far as using lex/yacc, are they invoked as part of the build > >>> procedure or are the files they generate just checked in and used ? > >> When lex/yacc are invoked, they generate three files: > >> - osm_qos_parser_l.c > >> - osm_qos_parser_y.c > >> - osm_qos_parser_y.h > >> These generated files should be included in the git repository, > >> and they are the ones that are compiled by 'make' command. > >> To cause lex/yacc generate these files on every compilation, a > >> configuration flag '--enable-maintainer-mode' should be used when > >> running 'configure'. > >> So normally, lex/yacc won't be invoked during the build (unless the > >> --enable-maintainer-mode option was selected). > > > >>> How could/would multiple file versions be supported ? One previous > >>> example was a mention that port groups can be shared by more than one > >>> manager (e.g. QoS and partitions) so this might be made hierarchical. > >>> I'd like to understand this before we get locked in. > >> The parser can be enhanced to support different versions of grammar. > >> It will just check the first line of the policy file: > >> > >> and then it will decide which grammar rules to apply according to the > >> 'version' value. > > > > Thanks. > > > > -- Hal > > > >> --Yevgeny > >> > >>> There are some other lower level questions which I'll get to later. I'll > >>> also review the XML file format in detail later. > >>> > >>> -- Hal > >>> > >>> > > From kliteyn at dev.mellanox.co.il Thu Jan 25 08:53:29 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 25 Jan 2007 18:53:29 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169734241.13608.75454.camel@hal.voltaire.com> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> Message-ID: <45B8E089.5000804@dev.mellanox.co.il> Hi Hal. Hal Rosenstock wrote: > Hi Yevgeny, > > On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: >> Hi Hal, Sasha. >> >> Here's a description of the QoS policy file, and an >> example of such file (with more comments inside). > > This makes the start of a good document on this. If you add this to > osm/doc, I will incorporate it into the opensm man page. OK, I'll do that. >> QoS Policy file >> --------------- >> >> The QoS policy file is divided into 4 sub sections: >> >> * Node Group: a set of HCAs, Routers or Switches that share the same settings. >> A node groups might be a partition defined by the partition manager policy in >> terms of GUIDs. > > Are these Node or Port Groups ? It looks like port groups from the > below. Good point - it should be "Port Groups". >> Future implementations might provide support for NodeDescription >> based definition of node groups. >> >> * Fabric Setup: >> Defines how the SL2VL and VLArb tables should be setup. This policy definition >> assumes the computation of target behavior should be performed outside of >> OpenSM. >> >> * QoS-Levels Definition: >> This section defines the possible sets of parameters for QoS that a client might >> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits >> (in case LMC > 0 is used for QoS) and TClass. > > How does this relate to/interact with partition configuration ? Also, > what about preexisting QoS ? As I understand from the osm man or from the partition-config.txt, partitions definition is intended to be used for IPoIB only. [quote] sl= - specifies SL for this IPoIB MC group (default is 0) [/quote] I think that QoS policy may only "tighten" the constraints and enforce lower-than-requested values, both in case of partition and in case of preexisting QoS settings. >> * Matching Rules: >> A list of rules that match an incoming PathRecord request to a QoS-Level. The >> rules are processed in order such as the first match is applied. Each rule is >> built out of set of match expressions which should all match for the rule to >> apply. The matching expressions are defined for the following fields >> - SRC and DST to lists of node groups >> - Service-ID to a list of Service-ID or Service-ID ranges >> - TClass to a list of TClass values or ranges >> >> QoS policy file example >> ----------------------- >> >> >> >> >> >> >> >> Storage >> our SRP storage targets > > Is the use clause more than commentary ? How is it "used" ? The 'use' clause is just a description of the port group that can be used for logging. Other than for logging, it is just a commentary. >> 0x1000000000000001 >> 0x1000000000000002 >> >> >> >> Virtual Servers >> node desc and IB port # >> vs1/HCA-1/P1 >> vs3/HCA-1/P1 >> vs3/HCA-2/P1 > > How are port-names used ? The syntax of the port name is as follows: "hostname/CA-num/Pnum" >> >> >> >> Partition 1 >> default settings >> Part1 >> >> > > Is this CA rather than HCA ? (What about TCAs ?) Sure, it should be 'CA'. >> >> Routers >> all routers >> ROUTER >> >> >> >> >> > ^^ > Actually, it is SL > assuming the device supports SL2VL mapping as indicate by > IsSLMappingSupported in the PortInfo:CapabilityMask. > Will the syntax handle single data VL devices which only implement SL > filtering ? Yes, it should. > Will the QoS manager support this (SL2VL without VLArb > settings) or are these required together ? Yes, it should support sl2vl w/o vlarb settings as well. >> >> >> >> Part1 >> * >> * >> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 >> >> >> >> Storage >> >> Storage2 >> >> Storage3 > > I don't quite follow across-from/to. Right, the comments there are garbage. Here the explanation: SL2VL table describes VL as function of from-port, to-port, and SL. group_name: It defines sl2vl table where 'to-port's belong to group_name group_name: Same as above, only that this time 'from-port's belong to group_name group_name: sl2vl tables both for 'to-port's 'from-port's that belong to group_name >> * >> 1 >> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 >> >> >> >> >> >> >> >> Storage >> >> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 >> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 >> 10 > > What happens if the shape of VLArb indicated here does not match the > device ? The part that sets up the QoS in SM (I'm not writing this part right now), should issue error message in case VLArb definition doesn't match the device properties. >> >> >> >> >> >> >> >> 1 > > What does sn mean ? What is it used for ? 'sn' is an id of this qos level definition. It is referenced later in by QoS match rules as 'qos-level-sn' >> for the lowest priority comm >> 16 >> >> >> >> 2 >> low latency best bandwidth >> 0 >> 7 > > What is class ? I saw TClass mentioned earlier. Is this TClass or > something else ? Instead of "TClass" there should be "QoS Class". The value is the PathRecord.qos_class value that should be returned in the path record query response when a certain is applied to the returned path. >> >> > > If specified, do MTU limit and rate limit add extra limits to be imposed > on what is selected (and realizable) ? Yes > Strictly speaking, couldn't packet lifetime limit also be added to this > syntax here ? I presume it was left out as being not "interesting" as > yet. Is that correct ? I can add packet lifetime limit - it's not a big deal > Also, how are path bits used ? For now I don't do anything with them - we'll discuss this issue in the future. >> >> 3 >> just an example >> 0 >> 32 >> 1 >> 1 >> >> >> >> >> >> >> >> 1 >> low latency by class 7-9 or 11 >> 7-9,11 >> 1 >> >> >> >> 2 >> Storage targets connection> >> Storage >> 22,4719 > > What is service ? What does 22.4719 mean ? The syntax is service_id1,service_id1,..., so in the example above these are actually two service ids. As for the exact meaning of this, I'm not sure - I need to think about it... >> 3 > > What are match-levels used for ? Actually, they are not used - they shouldn't appear here. Somehow it was copy-pasted here from one of the older versions of the policy file. -- Yevgeny > -- Hal > >> >> >> >> >> >> >> >> -- Yevgeny >> >> Yevgeny Kliteynik wrote: >>> Hi Sasha, >>> >>> Sasha Khapyorsky wrote: >>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: >>>>> Hi Sasha. >>>>> >>>>> Sasha Khapyorsky wrote: >>>>>> Hi Yevgeny, >>>>>> >>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>>>>> Hi Hal >>>>>>> >>>>>>> The following series of six patches implements QoS policy file parser: >>>>>>> >>>>>>> 1. QoS parser Lex file >>>>>>> 2. QoS parser Lex-generated c file >>>>>>> 3. QoS parser grammar (Yacc) file >>>>>>> 4. QoS parser Yacc-generated grammar c and h file >>>>>>> 5. QoS parser header file that defines parse tree data structures >>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>>>>> Is there any description of proposed format and functionality? >>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >>>>> minor modifications. You can find the RFC here: >>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html >>>> This was RFC and couple of issues were discussed then. Now you are about >>>> implementation phase and exact format description would be desired. For >>>> example what "few minor modifications" are? >>> I'll prepare an example file with explanations. >>> >>> -- Yevgeny >>> >>>>>> Also what about using human readable formats? >>>>> To me the xml-like format in the RFC looks pretty readable. >>>>> It has very limited number of keywords (tags), so it's easy >>>>> to follow and/or to modify. >>>> It is your opinion, not everybody will agree with it (AFAIR this was >>>> discussed too during RFC). >>>> >>>> I would not be care, but I don't know any example of really successful >>>> XML using for configuration purposes (especially where advanced graphical >>>> config editors/viewers were not used). Do you know? >>>> >>>> Sasha >>>> >>> _______________________________________________ >>> openib-general mailing list >>> openib-general at openib.org >>> http://openib.org/mailman/listinfo/openib-general >>> >>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>> > From sean.hefty at intel.com Thu Jan 25 09:17:29 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 25 Jan 2007 09:17:29 -0800 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <45B8CF69.8080708@voltaire.com> Message-ID: <000001c740a4$b1501c20$ff0da8c0@amr.corp.intel.com> >this means that basically (*) you have my OK for pushing the mutlicast >support to OFED 1.2 (again my thinking is that this is fine for upstream >as well). I've pushed these changes out to my rdma-dev.git tree. >The only missing piece here, as we agreed yesterday is to allow using >PS_IPOIB IDs for unicast traffic over librdmacm, i guess this should be >fairly simple to add. I'm adding this now. I would like to include all of these changes as part of the multicast code push for OFED/upstream. I hope to test this today. - Sean From bugzilla-daemon at lists.openfabrics.org Thu Jan 25 09:27:00 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Thu, 25 Jan 2007 09:27:00 -0800 (PST) Subject: [openib-general] [Bug 324] New: After fatal event on the hca, the ipoib doesn't return to connected mode Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=324 Summary: After fatal event on the hca, the ipoib doesn't return to connected mode Product: OpenFabrics Linux Version: gen2 Platform: All OS/Version: Other Status: NEW Severity: normal Priority: P2 Component: IPoIB AssignedTo: bugzilla at openib.org ReportedBy: yohadd at mellanox.co.il CC: mst at mellanox.co.il, dotanb at mellanox.co.il, amitk at mellanox.co.il, tziporet at mellanox.co.il After fatal event on the hca, the driver restart the device. After this restet, the ipoib does not brought up in connected mode (it brought up in datagram mode). -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From rdreier at cisco.com Thu Jan 25 10:04:46 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 10:04:46 -0800 Subject: [openib-general] [RFT] [PATCH] Add ABI compatibility for apps linked against libibverbs 1.0 In-Reply-To: (Roland Dreier's message of "Tue, 23 Jan 2007 16:56:04 -0800") References: Message-ID: OK, if no one else is going to test this, I guess I have to... Anyway, I just built Open MPI 1.1.2 against libibvebs 1.0 and tried NetPIPE 3.6.2's NPmpi with libibverbs.git master branch + the ABI compat below. I discovered a few problems (including a stupid problem with a test change to configure.in that got left behind by mistake, which would lead to the ABI compat stuff not being enabled at all). Updated patch is below. I would still appreciate test reports with other apps, but now I think I'm confident enough that I will push this out on the libibverbs.git master branch soon. Thanks, Roland --- Add a compatibility layer that allows applications (but not low-level drivers) linked against libibverbs 1.0 to work with libibverbs 1.1. This is done by using Linux's versioned symbol linking support: the native libibverbs entry points are given IBVERBS_1.1 versions, and compatibility wrappers for entry points from libibverbs 1.0 are created with an IBVERBS_1.0 version (to match what libibverbs 1.0 exported). In essense these wrappers create compatible proxies for every structure returned to the application (struct ibv_device, ibv_context, ibv_pd, etc), and map between the proxy and the real object when the application calls into libibverbs. This code is mostly straightforward, with a few complications in handling async events, because the pointers in event structures must be translated back to proxy structures when they are returned to the application. There are a few further wrinkles because the calls to data path functions (poll CQ, post send, etc) are actually inline functions that call directly into the context ops, so the context ops proxy structure must actually contain pointers to compatibility wrappers for these functions as well. This may have some performance impact but it seems the overhead is unavoidable. Signed-off-by: Roland Dreier --- Makefile.am | 6 +- configure.in | 8 + src/compat-1_0.c | 898 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/device.c | 28 +- src/ibverbs.h | 14 + src/libibverbs.map | 23 +- src/verbs.c | 106 ++++--- 7 files changed, 1023 insertions(+), 60 deletions(-) diff --git a/Makefile.am b/Makefile.am index 35f4468..4c7ce9b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,9 +8,9 @@ src_libibverbs_la_CFLAGS = $(AM_CFLAGS) -DIBV_CONFIG_DIR=\"$(sysconfdir)/libibve libibverbs_version_script = @LIBIBVERBS_VERSION_SCRIPT@ -src_libibverbs_la_SOURCES = src/cmd.c src/device.c src/init.c src/marshall.c \ - src/memory.c src/sysfs.c src/verbs.c -src_libibverbs_la_LDFLAGS = -version-info 2 -export-dynamic \ +src_libibverbs_la_SOURCES = src/cmd.c src/compat-1_0.c src/device.c src/init.c \ + src/marshall.c src/memory.c src/sysfs.c src/verbs.c +src_libibverbs_la_LDFLAGS = -version-info 1 -export-dynamic \ $(libibverbs_version_script) src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map diff --git a/configure.in b/configure.in index d98867f..7fb7fc1 100644 --- a/configure.in +++ b/configure.in @@ -50,5 +50,13 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, fi]) AC_SUBST(LIBIBVERBS_VERSION_SCRIPT) +AC_CACHE_CHECK(for .symver assembler support, ac_cv_asm_symver_support, + [AC_TRY_COMPILE(, [asm("symbol:\n.symver symbol, api at ABI\n");], + ac_cv_asm_symver_support=yes, + ac_cv_asm_symver_support=no)]) +if test $ac_cv_asm_symver_support = yes; then + AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support]) +fi + AC_CONFIG_FILES([Makefile libibverbs.spec]) AC_OUTPUT diff --git a/src/compat-1_0.c b/src/compat-1_0.c new file mode 100644 index 0000000..459ade9 --- /dev/null +++ b/src/compat-1_0.c @@ -0,0 +1,898 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +#include "ibverbs.h" + +struct ibv_pd_1_0 { + struct ibv_context_1_0 *context; + uint32_t handle; + + struct ibv_pd *real_pd; +}; + +struct ibv_mr_1_0 { + struct ibv_context_1_0 *context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + uint32_t lkey; + uint32_t rkey; + + struct ibv_mr *real_mr; +}; + +struct ibv_srq_1_0 { + struct ibv_context_1_0 *context; + void *srq_context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; + + struct ibv_srq *real_srq; +}; + +struct ibv_qp_init_attr_1_0 { + void *qp_context; + struct ibv_cq_1_0 *send_cq; + struct ibv_cq_1_0 *recv_cq; + struct ibv_srq_1_0 *srq; + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; +}; + +struct ibv_send_wr_1_0 { + struct ibv_send_wr_1_0 *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; + enum ibv_wr_opcode opcode; + enum ibv_send_flags send_flags; + uint32_t imm_data; /* in network byte order */ + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_ah_1_0 *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; +}; + +struct ibv_recv_wr_1_0 { + struct ibv_recv_wr_1_0 *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; +}; + +struct ibv_qp_1_0 { + struct ibv_context_1_0 *context; + void *qp_context; + struct ibv_pd_1_0 *pd; + struct ibv_cq_1_0 *send_cq; + struct ibv_cq_1_0 *recv_cq; + struct ibv_srq_1_0 *srq; + uint32_t handle; + uint32_t qp_num; + enum ibv_qp_state state; + enum ibv_qp_type qp_type; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; + + struct ibv_qp *real_qp; +}; + +struct ibv_cq_1_0 { + struct ibv_context_1_0 *context; + void *cq_context; + uint32_t handle; + int cqe; + + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t comp_events_completed; + uint32_t async_events_completed; + + struct ibv_cq *real_cq; +}; + +struct ibv_ah_1_0 { + struct ibv_context_1_0 *context; + struct ibv_pd_1_0 *pd; + uint32_t handle; + + struct ibv_ah *real_ah; +}; + +struct ibv_device_1_0 { + void *obsolete_sysfs_dev; + void *obsolete_sysfs_ibdev; + struct ibv_device *real_device; /* was obsolete driver member */ + struct ibv_device_ops ops; +}; + +struct ibv_context_ops_1_0 { + int (*query_device)(struct ibv_context *context, + struct ibv_device_attr *device_attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + struct ibv_pd * (*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, + enum ibv_access_flags access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); + int (*poll_cq)(struct ibv_cq_1_0 *cq, int num_entries, + struct ibv_wc *wc); + int (*req_notify_cq)(struct ibv_cq_1_0 *cq, + int solicited_only); + void (*cq_event)(struct ibv_cq *cq); + int (*resize_cq)(struct ibv_cq *cq, int cqe); + int (*destroy_cq)(struct ibv_cq *cq); + struct ibv_srq * (*create_srq)(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr); + int (*modify_srq)(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask); + int (*query_srq)(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr); + int (*destroy_srq)(struct ibv_srq *srq); + int (*post_srq_recv)(struct ibv_srq_1_0 *srq, + struct ibv_recv_wr_1_0 *recv_wr, + struct ibv_recv_wr_1_0 **bad_recv_wr); + struct ibv_qp * (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); + int (*query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); + int (*destroy_qp)(struct ibv_qp *qp); + int (*post_send)(struct ibv_qp_1_0 *qp, + struct ibv_send_wr_1_0 *wr, + struct ibv_send_wr_1_0 **bad_wr); + int (*post_recv)(struct ibv_qp_1_0 *qp, + struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr); + struct ibv_ah * (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr); + int (*destroy_ah)(struct ibv_ah *ah); + int (*attach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); + int (*detach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); +}; + +struct ibv_context_1_0 { + struct ibv_device_1_0 *device; + struct ibv_context_ops_1_0 ops; + int cmd_fd; + int async_fd; + int num_comp_vectors; + + struct ibv_context *real_context; /* was abi_compat member */ +}; + +struct ibv_device_1_0 **__ibv_get_device_list_1_0(int *num) +{ + struct ibv_device **real_list; + struct ibv_device_1_0 **l; + int i, n; + + real_list = ibv_get_device_list(&n); + if (!real_list) + return NULL; + + l = calloc(n + 2, sizeof (struct ibv_device_1_0 *)); + if (!l) + return NULL; + + l[0] = (void *) real_list; + + for (i = 0; i < n; ++i) { + l[i + 1] = calloc(1, sizeof (struct ibv_device_1_0)); + if (!l[i + 1]) + goto fail; + l[i + 1]->real_device = real_list[i]; + } + + if (num) + *num = n; + + return l + 1; + +fail: + for (i = 1; i <= n; ++i) + if (l[i]) + free(l[i]); + ibv_free_device_list(real_list); + return NULL; +} +symver(__ibv_get_device_list_1_0, ibv_get_device_list, IBVERBS_1.0); + +void __ibv_free_device_list_1_0(struct ibv_device_1_0 **list) +{ + struct ibv_device_1_0 **l = list; + + while (*l) { + free(*l); + ++l; + } + + ibv_free_device_list((void *) list[-1]); + free(list - 1); +} +symver(__ibv_free_device_list_1_0, ibv_free_device_list, IBVERBS_1.0); + +const char *__ibv_get_device_name_1_0(struct ibv_device_1_0 *device) +{ + return ibv_get_device_name(device->real_device); +} +symver(__ibv_get_device_name_1_0, ibv_get_device_name, IBVERBS_1.0); + +uint64_t __ibv_get_device_guid_1_0(struct ibv_device_1_0 *device) +{ + return ibv_get_device_guid(device->real_device); +} +symver(__ibv_get_device_guid_1_0, ibv_get_device_guid, IBVERBS_1.0); + +static int poll_cq_wrapper_1_0(struct ibv_cq_1_0 *cq, int num_entries, + struct ibv_wc *wc) +{ + return cq->context->real_context->ops.poll_cq(cq->real_cq, num_entries, wc); +} + +static int req_notify_cq_wrapper_1_0(struct ibv_cq_1_0 *cq, int sol_only) +{ + return cq->context->real_context->ops.req_notify_cq(cq->real_cq, sol_only); +} + +static int post_srq_recv_wrapper_1_0(struct ibv_srq_1_0 *srq, struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr) +{ + struct ibv_recv_wr_1_0 *w; + struct ibv_recv_wr *real_wr, *head_wr = NULL, *tail_wr = NULL, *real_bad_wr; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->sg_list = w->sg_list; + real_wr->num_sge = w->num_sge; + real_wr->next = NULL; + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = srq->context->real_context->ops.post_srq_recv(srq->real_srq, head_wr, + &real_bad_wr); + + if (ret) { + for (real_wr = head_wr, w = wr; + real_wr; + real_wr = real_wr->next, w = w->next) + if (real_wr == real_bad_wr) { + *bad_wr = w; + break; + } + } + + return ret; +} + +static int post_send_wrapper_1_0(struct ibv_qp_1_0 *qp, struct ibv_send_wr_1_0 *wr, + struct ibv_send_wr_1_0 **bad_wr) +{ + struct ibv_send_wr_1_0 *w; + struct ibv_send_wr *real_wr, *head_wr = NULL, *tail_wr = NULL, *real_bad_wr; + int is_ud = qp->qp_type == IBV_QPT_UD; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->next = NULL; + + memcpy(&real_wr->sg_list, &w->sg_list, + sizeof *w - offsetof(struct ibv_send_wr, sg_list)); + + if (is_ud) + real_wr->wr.ud.ah = w->wr.ud.ah->real_ah; + + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = qp->context->real_context->ops.post_send(qp->real_qp, head_wr, + &real_bad_wr); + + if (ret) { + for (real_wr = head_wr, w = wr; + real_wr; + real_wr = real_wr->next, w = w->next) + if (real_wr == real_bad_wr) { + *bad_wr = w; + break; + } + } + + return ret; +} + +static int post_recv_wrapper_1_0(struct ibv_qp_1_0 *qp, struct ibv_recv_wr_1_0 *wr, + struct ibv_recv_wr_1_0 **bad_wr) +{ + struct ibv_recv_wr_1_0 *w; + struct ibv_recv_wr *real_wr, *head_wr = NULL, *tail_wr = NULL, *real_bad_wr; + int ret; + + for (w = wr; w; w = w->next) { + real_wr = alloca(sizeof *real_wr); + real_wr->wr_id = w->wr_id; + real_wr->sg_list = w->sg_list; + real_wr->num_sge = w->num_sge; + real_wr->next = NULL; + if (tail_wr) + tail_wr->next = real_wr; + else + head_wr = real_wr; + + tail_wr = real_wr; + } + + ret = qp->context->real_context->ops.post_recv(qp->real_qp, head_wr, + &real_bad_wr); + + if (ret) { + for (real_wr = head_wr, w = wr; + real_wr; + real_wr = real_wr->next, w = w->next) + if (real_wr == real_bad_wr) { + *bad_wr = w; + break; + } + } + + return ret; +} + +struct ibv_context_1_0 *__ibv_open_device_1_0(struct ibv_device_1_0 *device) +{ + struct ibv_context *real_ctx; + struct ibv_context_1_0 *ctx; + + ctx = malloc(sizeof *ctx); + if (!ctx) + return NULL; + + real_ctx = ibv_open_device(device->real_device); + if (!real_ctx) { + free(ctx); + return NULL; + } + + ctx->device = device; + ctx->real_context = real_ctx; + + ctx->ops.poll_cq = poll_cq_wrapper_1_0; + ctx->ops.req_notify_cq = req_notify_cq_wrapper_1_0; + ctx->ops.post_send = post_send_wrapper_1_0; + ctx->ops.post_recv = post_recv_wrapper_1_0; + ctx->ops.post_srq_recv = post_srq_recv_wrapper_1_0; + + return ctx; +} +symver(__ibv_open_device_1_0, ibv_open_device, IBVERBS_1.0); + +int __ibv_close_device_1_0(struct ibv_context_1_0 *context) +{ + int ret; + + ret = ibv_close_device(context->real_context); + if (ret) + return ret; + + free(context); + return 0; +} +symver(__ibv_close_device_1_0, ibv_close_device, IBVERBS_1.0); + +int __ibv_get_async_event_1_0(struct ibv_context_1_0 *context, + struct ibv_async_event *event) +{ + int ret; + + ret = ibv_get_async_event(context->real_context, event); + if (ret) + return ret; + + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + event->element.cq = event->element.cq->cq_context; + break; + + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + event->element.qp = event->element.qp->qp_context; + break; + + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + event->element.srq = event->element.srq->srq_context; + break; + + default: + break; + } + + return ret; +} +symver(__ibv_get_async_event_1_0, ibv_get_async_event, IBVERBS_1.0); + +void __ibv_ack_async_event_1_0(struct ibv_async_event *event) +{ + struct ibv_async_event real_event = *event; + + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + real_event.element.cq = + ((struct ibv_cq_1_0 *) event->element.cq)->real_cq; + break; + + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + real_event.element.qp = + ((struct ibv_qp_1_0 *) event->element.qp)->real_qp; + break; + + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + real_event.element.srq = + ((struct ibv_srq_1_0 *) event->element.srq)->real_srq; + break; + + default: + break; + } + + ibv_ack_async_event(&real_event); +} +symver(__ibv_ack_async_event_1_0, ibv_ack_async_event, IBVERBS_1.0); + +int __ibv_query_device_1_0(struct ibv_context_1_0 *context, + struct ibv_device_attr *device_attr) +{ + return ibv_query_device(context->real_context, device_attr); +} +symver(__ibv_query_device_1_0, ibv_query_device, IBVERBS_1.0); + +int __ibv_query_port_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + return ibv_query_port(context->real_context, port_num, port_attr); +} +symver(__ibv_query_port_1_0, ibv_query_port, IBVERBS_1.0); + +int __ibv_query_gid_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + int index, union ibv_gid *gid) +{ + return ibv_query_gid(context->real_context, port_num, index, gid); +} +symver(__ibv_query_gid_1_0, ibv_query_gid, IBVERBS_1.0); + +int __ibv_query_pkey_1_0(struct ibv_context_1_0 *context, uint8_t port_num, + int index, uint16_t *pkey) +{ + return ibv_query_pkey(context->real_context, port_num, index, pkey); +} +symver(__ibv_query_pkey_1_0, ibv_query_pkey, IBVERBS_1.0); + +struct ibv_pd_1_0 *__ibv_alloc_pd_1_0(struct ibv_context_1_0 *context) +{ + struct ibv_pd *real_pd; + struct ibv_pd_1_0 *pd; + + pd = malloc(sizeof *pd); + if (!pd) + return NULL; + + real_pd = ibv_alloc_pd(context->real_context); + if (!real_pd) { + free(pd); + return NULL; + } + + pd->context = context; + pd->real_pd = real_pd; + + return pd; +} +symver(__ibv_alloc_pd_1_0, ibv_alloc_pd, IBVERBS_1.0); + +int __ibv_dealloc_pd_1_0(struct ibv_pd_1_0 *pd) +{ + int ret; + + ret = ibv_dealloc_pd(pd->real_pd); + if (ret) + return ret; + + free(pd); + return 0; +} +symver(__ibv_dealloc_pd_1_0, ibv_dealloc_pd, IBVERBS_1.0); + +struct ibv_mr_1_0 *__ibv_reg_mr_1_0(struct ibv_pd_1_0 *pd, void *addr, + size_t length, enum ibv_access_flags access) +{ + struct ibv_mr *real_mr; + struct ibv_mr_1_0 *mr; + + mr = malloc(sizeof *mr); + if (!mr) + return NULL; + + real_mr = ibv_reg_mr(pd->real_pd, addr, length, access); + if (!real_mr) { + free(mr); + return NULL; + } + + mr->context = pd->context; + mr->pd = pd; + mr->lkey = real_mr->lkey; + mr->rkey = real_mr->rkey; + mr->real_mr = real_mr; + + return mr; +} +symver(__ibv_reg_mr_1_0, ibv_reg_mr, IBVERBS_1.0); + +int __ibv_dereg_mr_1_0(struct ibv_mr_1_0 *mr) +{ + int ret; + + ret = ibv_dereg_mr(mr->real_mr); + if (ret) + return ret; + + free(mr); + return 0; +} +symver(__ibv_dereg_mr_1_0, ibv_dereg_mr, IBVERBS_1.0); + +struct ibv_cq_1_0 *__ibv_create_cq_1_0(struct ibv_context_1_0 *context, int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ibv_cq *real_cq; + struct ibv_cq_1_0 *cq; + + cq = malloc(sizeof *cq); + if (!cq) + return NULL; + + real_cq = ibv_create_cq(context->real_context, cqe, cq_context, + channel, comp_vector); + if (!real_cq) { + free(cq); + return NULL; + } + + cq->context = context; + cq->cq_context = cq_context; + cq->cqe = cqe; + cq->real_cq = real_cq; + + real_cq->cq_context = cq; + + return cq; +} +symver(__ibv_create_cq_1_0, ibv_create_cq, IBVERBS_1.0); + +int __ibv_resize_cq_1_0(struct ibv_cq_1_0 *cq, int cqe) +{ + return ibv_resize_cq(cq->real_cq, cqe); +} +symver(__ibv_resize_cq_1_0, ibv_resize_cq, IBVERBS_1.0); + +int __ibv_destroy_cq_1_0(struct ibv_cq_1_0 *cq) +{ + int ret; + + ret = ibv_destroy_cq(cq->real_cq); + if (ret) + return ret; + + free(cq); + return 0; +} +symver(__ibv_destroy_cq_1_0, ibv_destroy_cq, IBVERBS_1.0); + +int __ibv_get_cq_event_1_0(struct ibv_comp_channel *channel, + struct ibv_cq_1_0 **cq, void **cq_context) +{ + struct ibv_cq *real_cq; + void *cq_ptr; + int ret; + + ret = ibv_get_cq_event(channel, &real_cq, &cq_ptr); + if (ret) + return ret; + + *cq = cq_ptr; + *cq_context = (*cq)->cq_context; + + return 0; +} +symver(__ibv_get_cq_event_1_0, ibv_get_cq_event, IBVERBS_1.0); + +void __ibv_ack_cq_events_1_0(struct ibv_cq_1_0 *cq, unsigned int nevents) +{ + ibv_ack_cq_events(cq->real_cq, nevents); +} +symver(__ibv_ack_cq_events_1_0, ibv_ack_cq_events, IBVERBS_1.0); + +struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd, + struct ibv_srq_init_attr *srq_init_attr) +{ + struct ibv_srq *real_srq; + struct ibv_srq_1_0 *srq; + + srq = malloc(sizeof *srq); + if (!srq) + return NULL; + + real_srq = ibv_create_srq(pd->real_pd, srq_init_attr); + if (!real_srq) { + free(srq); + return NULL; + } + + srq->context = pd->context; + srq->srq_context = srq_init_attr->srq_context; + srq->pd = pd; + srq->real_srq = real_srq; + + real_srq->srq_context = srq; + + return srq; +} +symver(__ibv_create_srq_1_0, ibv_create_srq, IBVERBS_1.0); + +int __ibv_modify_srq_1_0(struct ibv_srq_1_0 *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask) +{ + return ibv_modify_srq(srq->real_srq, srq_attr, srq_attr_mask); +} +symver(__ibv_modify_srq_1_0, ibv_modify_srq, IBVERBS_1.0); + +int __ibv_query_srq_1_0(struct ibv_srq_1_0 *srq, struct ibv_srq_attr *srq_attr) +{ + return ibv_query_srq(srq->real_srq, srq_attr); +} +symver(__ibv_query_srq_1_0, ibv_query_srq, IBVERBS_1.0); + +int __ibv_destroy_srq_1_0(struct ibv_srq_1_0 *srq) +{ + int ret; + + ret = ibv_destroy_srq(srq->real_srq); + if (ret) + return ret; + + free(srq); + return 0; +} +symver(__ibv_destroy_srq_1_0, ibv_destroy_srq, IBVERBS_1.0); + +struct ibv_qp_1_0 *__ibv_create_qp_1_0(struct ibv_pd_1_0 *pd, + struct ibv_qp_init_attr_1_0 *qp_init_attr) +{ + struct ibv_qp *real_qp; + struct ibv_qp_1_0 *qp; + struct ibv_qp_init_attr real_init_attr; + + qp = malloc(sizeof *qp); + if (!qp) + return NULL; + + real_init_attr.qp_context = qp_init_attr->qp_context; + real_init_attr.send_cq = qp_init_attr->send_cq->real_cq; + real_init_attr.recv_cq = qp_init_attr->recv_cq->real_cq; + real_init_attr.srq = qp_init_attr->srq ? + qp_init_attr->srq->real_srq : NULL; + real_init_attr.cap = qp_init_attr->cap; + real_init_attr.qp_type = qp_init_attr->qp_type; + real_init_attr.sq_sig_all = qp_init_attr->sq_sig_all; + + real_qp = ibv_create_qp(pd->real_pd, &real_init_attr); + if (!real_qp) { + free(qp); + return NULL; + } + + qp->context = pd->context; + qp->qp_context = qp_init_attr->qp_context; + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->recv_cq = qp_init_attr->recv_cq; + qp->srq = qp_init_attr->srq; + qp->qp_type = qp_init_attr->qp_type; + qp->qp_num = real_qp->qp_num; + qp->real_qp = real_qp; + + qp_init_attr->cap = real_init_attr.cap; + + real_qp->qp_context = qp; + + return qp; +} +symver(__ibv_create_qp_1_0, ibv_create_qp, IBVERBS_1.0); + +int __ibv_query_qp_1_0(struct ibv_qp_1_0 *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr_1_0 *init_attr) +{ + struct ibv_qp_init_attr real_init_attr; + int ret; + + ret = ibv_query_qp(qp->real_qp, attr, attr_mask, &real_init_attr); + if (ret) + return ret; + + init_attr->qp_context = qp->qp_context; + init_attr->send_cq = real_init_attr.send_cq->cq_context; + init_attr->recv_cq = real_init_attr.recv_cq->cq_context; + init_attr->srq = real_init_attr.srq->srq_context; + init_attr->qp_type = real_init_attr.qp_type; + init_attr->cap = real_init_attr.cap; + init_attr->sq_sig_all = real_init_attr.sq_sig_all; + + return 0; +} +symver(__ibv_query_qp_1_0, ibv_query_qp, IBVERBS_1.0); + +int __ibv_modify_qp_1_0(struct ibv_qp_1_0 *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) +{ + return ibv_modify_qp(qp->real_qp, attr, attr_mask); +} +symver(__ibv_modify_qp_1_0, ibv_modify_qp, IBVERBS_1.0); + +int __ibv_destroy_qp_1_0(struct ibv_qp_1_0 *qp) +{ + int ret; + + ret = ibv_destroy_qp(qp->real_qp); + if (ret) + return ret; + + free(qp); + return 0; +} +symver(__ibv_destroy_qp_1_0, ibv_destroy_qp, IBVERBS_1.0); + +struct ibv_ah_1_0 *__ibv_create_ah_1_0(struct ibv_pd_1_0 *pd, + struct ibv_ah_attr *attr) +{ + struct ibv_ah *real_ah; + struct ibv_ah_1_0 *ah; + + ah = malloc(sizeof *ah); + if (!ah) + return NULL; + + real_ah = ibv_create_ah(pd->real_pd, attr); + if (!real_ah) { + free(ah); + return NULL; + } + + ah->context = pd->context; + ah->pd = pd; + ah->real_ah = real_ah; + + return ah; +} +symver(__ibv_create_ah_1_0, ibv_create_ah, IBVERBS_1.0); + +int __ibv_destroy_ah_1_0(struct ibv_ah_1_0 *ah) +{ + int ret; + + ret = ibv_destroy_ah(ah->real_ah); + if (ret) + return ret; + + free(ah); + return 0; +} +symver(__ibv_destroy_ah_1_0, ibv_destroy_ah, IBVERBS_1.0); + +int __ibv_attach_mcast_1_0(struct ibv_qp_1_0 *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_attach_mcast(qp->real_qp, gid, lid); +} +symver(__ibv_attach_mcast_1_0, ibv_attach_mcast, IBVERBS_1.0); + +int __ibv_detach_mcast_1_0(struct ibv_qp_1_0 *qp, union ibv_gid *gid, uint16_t lid) +{ + return ibv_detach_mcast(qp->real_qp, gid, lid); +} +symver(__ibv_detach_mcast_1_0, ibv_detach_mcast, IBVERBS_1.0); diff --git a/src/device.c b/src/device.c index f4e4473..bca1ce9 100644 --- a/src/device.c +++ b/src/device.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -52,7 +52,7 @@ static pthread_mutex_t device_list_lock = PTHREAD_MUTEX_INITIALIZER; static int num_devices; static struct ibv_device **device_list; -struct ibv_device **ibv_get_device_list(int *num) +struct ibv_device **__ibv_get_device_list(int *num) { struct ibv_device **l; int i; @@ -77,18 +77,21 @@ out: return l; } +default_symver(__ibv_get_device_list, ibv_get_device_list); -void ibv_free_device_list(struct ibv_device **list) +void __ibv_free_device_list(struct ibv_device **list) { free(list); } +default_symver(__ibv_free_device_list, ibv_free_device_list); -const char *ibv_get_device_name(struct ibv_device *device) +const char *__ibv_get_device_name(struct ibv_device *device) { return device->name; } +default_symver(__ibv_get_device_name, ibv_get_device_name); -uint64_t ibv_get_device_guid(struct ibv_device *device) +uint64_t __ibv_get_device_guid(struct ibv_device *device) { char attr[24]; uint64_t guid = 0; @@ -108,8 +111,9 @@ uint64_t ibv_get_device_guid(struct ibv_device *device) return htonll(guid); } +default_symver(__ibv_get_device_guid, ibv_get_device_guid); -struct ibv_context *ibv_open_device(struct ibv_device *device) +struct ibv_context *__ibv_open_device(struct ibv_device *device) { char *devpath; int cmd_fd; @@ -142,8 +146,9 @@ err: return NULL; } +default_symver(__ibv_open_device, ibv_open_device); -int ibv_close_device(struct ibv_context *context) +int __ibv_close_device(struct ibv_context *context) { int async_fd = context->async_fd; int cmd_fd = context->cmd_fd; @@ -164,9 +169,10 @@ int ibv_close_device(struct ibv_context *context) return 0; } +default_symver(__ibv_close_device, ibv_close_device); -int ibv_get_async_event(struct ibv_context *context, - struct ibv_async_event *event) +int __ibv_get_async_event(struct ibv_context *context, + struct ibv_async_event *event) { struct ibv_kern_async_event ev; @@ -206,8 +212,9 @@ int ibv_get_async_event(struct ibv_context *context, return 0; } +default_symver(__ibv_get_async_event, ibv_get_async_event); -void ibv_ack_async_event(struct ibv_async_event *event) +void __ibv_ack_async_event(struct ibv_async_event *event) { switch (event->event_type) { case IBV_EVENT_CQ_ERR: @@ -258,3 +265,4 @@ void ibv_ack_async_event(struct ibv_async_event *event) return; } } +default_symver(__ibv_ack_async_event, ibv_ack_async_event); diff --git a/src/ibverbs.h b/src/ibverbs.h index 14330f8..b1d2c2b 100644 --- a/src/ibverbs.h +++ b/src/ibverbs.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -56,6 +57,19 @@ #define INIT __attribute__((constructor)) #define FINI __attribute__((destructor)) +#define DEFAULT_ABI "IBVERBS_1.1" + +#ifdef HAVE_SYMVER_SUPPORT +# define symver(name, api, ver) \ + asm(".symver " #name "," #api "@" #ver) +# define default_symver(name, api) \ + asm(".symver " #name "," #api "@@" DEFAULT_ABI) +#else +# define symver(name, api, ver) +# define default_symver(name, api) \ + extern __typeof(name) api __attribute__((alias(#name))) +#endif /* HAVE_SYMVER_SUPPORT */ + #define PFX "libibverbs: " struct ibv_abi_compat_v2 { diff --git a/src/libibverbs.map b/src/libibverbs.map index 795dd55..3a346ed 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -32,8 +32,6 @@ IBVERBS_1.0 { ibv_modify_qp; ibv_destroy_qp; ibv_create_ah; - ibv_init_ah_from_wc; - ibv_create_ah_from_wc; ibv_destroy_ah; ibv_attach_mcast; ibv_detach_mcast; @@ -67,17 +65,30 @@ IBVERBS_1.0 { ibv_cmd_attach_mcast; ibv_cmd_detach_mcast; ibv_copy_qp_attr_from_kern; - ibv_copy_ah_attr_from_kern; ibv_copy_path_rec_from_kern; ibv_copy_path_rec_to_kern; ibv_rate_to_mult; mult_to_ibv_rate; ibv_get_sysfs_path; ibv_read_sysfs_file; + + local: *; +}; + +IBVERBS_1.1 { + global: + ibv_get_device_list; + ibv_free_device_list; + ibv_get_device_name; + ibv_get_device_guid; + ibv_open_device; + ibv_close_device; + + ibv_init_ah_from_wc; + ibv_create_ah_from_wc; + ibv_copy_ah_attr_from_kern; ibv_fork_init; ibv_dontfork_range; ibv_dofork_range; ibv_register_driver; - - local: *; -}; +} IBVERBS_1.0; diff --git a/src/verbs.c b/src/verbs.c index 6ac56d3..56513e4 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -76,20 +76,22 @@ enum ibv_rate mult_to_ibv_rate(int mult) } } -int ibv_query_device(struct ibv_context *context, - struct ibv_device_attr *device_attr) +int __ibv_query_device(struct ibv_context *context, + struct ibv_device_attr *device_attr) { return context->ops.query_device(context, device_attr); } +default_symver(__ibv_query_device, ibv_query_device); -int ibv_query_port(struct ibv_context *context, uint8_t port_num, - struct ibv_port_attr *port_attr) +int __ibv_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr) { return context->ops.query_port(context, port_num, port_attr); } +default_symver(__ibv_query_port, ibv_query_port); -int ibv_query_gid(struct ibv_context *context, uint8_t port_num, - int index, union ibv_gid *gid) +int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, + int index, union ibv_gid *gid) { char name[24]; char attr[41]; @@ -111,9 +113,10 @@ int ibv_query_gid(struct ibv_context *context, uint8_t port_num, return 0; } +default_symver(__ibv_query_gid, ibv_query_gid); -int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, - int index, uint16_t *pkey) +int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, + int index, uint16_t *pkey) { char name[24]; char attr[8]; @@ -131,8 +134,9 @@ int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, *pkey = htons(val); return 0; } +default_symver(__ibv_query_pkey, ibv_query_pkey); -struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) +struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context) { struct ibv_pd *pd; @@ -142,14 +146,16 @@ struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) return pd; } +default_symver(__ibv_alloc_pd, ibv_alloc_pd); -int ibv_dealloc_pd(struct ibv_pd *pd) +int __ibv_dealloc_pd(struct ibv_pd *pd) { return pd->context->ops.dealloc_pd(pd); } +default_symver(__ibv_dealloc_pd, ibv_dealloc_pd); -struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, - size_t length, enum ibv_access_flags access) +struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, enum ibv_access_flags access) { struct ibv_mr *mr; @@ -167,8 +173,9 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, return mr; } +default_symver(__ibv_reg_mr, ibv_reg_mr); -int ibv_dereg_mr(struct ibv_mr *mr) +int __ibv_dereg_mr(struct ibv_mr *mr) { int ret; void *addr = mr->addr; @@ -180,6 +187,7 @@ int ibv_dereg_mr(struct ibv_mr *mr) return ret; } +default_symver(__ibv_dereg_mr, ibv_dereg_mr); static struct ibv_comp_channel *ibv_create_comp_channel_v2(struct ibv_context *context) { @@ -241,8 +249,8 @@ int ibv_destroy_comp_channel(struct ibv_comp_channel *channel) return 0; } -struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, - struct ibv_comp_channel *channel, int comp_vector) +struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, + struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq *cq = context->ops.create_cq(context, cqe, channel, comp_vector); @@ -258,23 +266,25 @@ struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_cont return cq; } +default_symver(__ibv_create_cq, ibv_create_cq); -int ibv_resize_cq(struct ibv_cq *cq, int cqe) +int __ibv_resize_cq(struct ibv_cq *cq, int cqe) { if (!cq->context->ops.resize_cq) return ENOSYS; return cq->context->ops.resize_cq(cq, cqe); } +default_symver(__ibv_resize_cq, ibv_resize_cq); -int ibv_destroy_cq(struct ibv_cq *cq) +int __ibv_destroy_cq(struct ibv_cq *cq) { return cq->context->ops.destroy_cq(cq); } +default_symver(__ibv_destroy_cq, ibv_destroy_cq); - -int ibv_get_cq_event(struct ibv_comp_channel *channel, - struct ibv_cq **cq, void **cq_context) +int __ibv_get_cq_event(struct ibv_comp_channel *channel, + struct ibv_cq **cq, void **cq_context) { struct ibv_comp_event ev; @@ -289,17 +299,19 @@ int ibv_get_cq_event(struct ibv_comp_channel *channel, return 0; } +default_symver(__ibv_get_cq_event, ibv_get_cq_event); -void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) +void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) { pthread_mutex_lock(&cq->mutex); cq->comp_events_completed += nevents; pthread_cond_signal(&cq->cond); pthread_mutex_unlock(&cq->mutex); } +default_symver(__ibv_ack_cq_events, ibv_ack_cq_events); -struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, - struct ibv_srq_init_attr *srq_init_attr) +struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr) { struct ibv_srq *srq; @@ -318,26 +330,30 @@ struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, return srq; } +default_symver(__ibv_create_srq, ibv_create_srq); -int ibv_modify_srq(struct ibv_srq *srq, - struct ibv_srq_attr *srq_attr, - enum ibv_srq_attr_mask srq_attr_mask) +int __ibv_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask) { return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask); } +default_symver(__ibv_modify_srq, ibv_modify_srq); -int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) +int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) { return srq->context->ops.query_srq(srq, srq_attr); } +default_symver(__ibv_query_srq, ibv_query_srq); -int ibv_destroy_srq(struct ibv_srq *srq) +int __ibv_destroy_srq(struct ibv_srq *srq) { return srq->context->ops.destroy_srq(srq); } +default_symver(__ibv_destroy_srq, ibv_destroy_srq); -struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, - struct ibv_qp_init_attr *qp_init_attr) +struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr) { struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); @@ -356,10 +372,11 @@ struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, return qp; } +default_symver(__ibv_create_qp, ibv_create_qp); -int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - enum ibv_qp_attr_mask attr_mask, - struct ibv_qp_init_attr *init_attr) +int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr) { int ret; @@ -372,9 +389,10 @@ int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, return 0; } +default_symver(__ibv_query_qp, ibv_query_qp); -int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - enum ibv_qp_attr_mask attr_mask) +int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) { int ret; @@ -387,13 +405,15 @@ int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, return 0; } +default_symver(__ibv_modify_qp, ibv_modify_qp); -int ibv_destroy_qp(struct ibv_qp *qp) +int __ibv_destroy_qp(struct ibv_qp *qp) { return qp->context->ops.destroy_qp(qp); } +default_symver(__ibv_destroy_qp, ibv_destroy_qp); -struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); @@ -404,6 +424,7 @@ struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) return ah; } +default_symver(__ibv_create_ah, ibv_create_ah); static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, union ibv_gid *gid) @@ -461,17 +482,20 @@ struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, return ibv_create_ah(pd, &ah_attr); } -int ibv_destroy_ah(struct ibv_ah *ah) +int __ibv_destroy_ah(struct ibv_ah *ah) { return ah->context->ops.destroy_ah(ah); } +default_symver(__ibv_destroy_ah, ibv_destroy_ah); -int ibv_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +int __ibv_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { return qp->context->ops.attach_mcast(qp, gid, lid); } +default_symver(__ibv_attach_mcast, ibv_attach_mcast); -int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +int __ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { return qp->context->ops.detach_mcast(qp, gid, lid); } +default_symver(__ibv_detach_mcast, ibv_detach_mcast); -- 1.4.4.1 From swise at opengridcomputing.com Thu Jan 25 10:05:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 12:05:46 -0600 Subject: [openib-general] stuck process question Message-ID: <1169748346.1799.1.camel@stevo-desktop> I have a process probably stuck in the IWCM or CMA waiting on a refcnt to go to zero. Does anyone know of a way to figure out what the process is blocked on short of using a kernel debugger? Like once its stuck, is there any useful information I can gather about what the process is waiting on? Thanks in advance, Steve. From rdreier at cisco.com Thu Jan 25 10:09:59 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 10:09:59 -0800 Subject: [openib-general] stuck process question In-Reply-To: <1169748346.1799.1.camel@stevo-desktop> (Steve Wise's message of "Thu, 25 Jan 2007 12:05:46 -0600") References: <1169748346.1799.1.camel@stevo-desktop> Message-ID: Steve> I have a process probably stuck in the IWCM or CMA waiting Steve> on a refcnt to go to zero. Does anyone know of a way to Steve> figure out what the process is blocked on short of using a Steve> kernel debugger? Like once its stuck, is there any useful Steve> information I can gather about what the process is waiting Steve> on? /proc//wchan will show you where it's waiting. And "echo t > /proc/sysrq-trigger" will put a stack trace of everything into the kernel log. - R. From swise at opengridcomputing.com Thu Jan 25 10:14:12 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 12:14:12 -0600 Subject: [openib-general] stuck process question In-Reply-To: References: <1169748346.1799.1.camel@stevo-desktop> Message-ID: <1169748852.1799.3.camel@stevo-desktop> On Thu, 2007-01-25 at 10:09 -0800, Roland Dreier wrote: > Steve> I have a process probably stuck in the IWCM or CMA waiting > Steve> on a refcnt to go to zero. Does anyone know of a way to > Steve> figure out what the process is blocked on short of using a > Steve> kernel debugger? Like once its stuck, is there any useful > Steve> information I can gather about what the process is waiting > Steve> on? > > /proc//wchan will show you where it's waiting. And "echo t > > /proc/sysrq-trigger" will put a stack trace of everything into the > kernel log. > Thanks! I have no /proc/sysrq-trigger. Is the a config option for the kernel? From rdreier at cisco.com Thu Jan 25 10:16:02 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 10:16:02 -0800 Subject: [openib-general] stuck process question In-Reply-To: <1169748852.1799.3.camel@stevo-desktop> (Steve Wise's message of "Thu, 25 Jan 2007 12:14:12 -0600") References: <1169748346.1799.1.camel@stevo-desktop> <1169748852.1799.3.camel@stevo-desktop> Message-ID: > I have no /proc/sysrq-trigger. Is the a config option for the kernel? Yes, looking in fs/proc/proc_misc.c, creating the file is inside a test for CONFIG_MAGIC_SYSRQ. - R. From rdreier at cisco.com Thu Jan 25 10:24:07 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 10:24:07 -0800 Subject: [openib-general] InfiniBand Maintainers Summit/BOF at Ottawa Linux Symposium In-Reply-To: (Robert J. Woodruff's message of "Wed, 24 Jan 2007 13:46:02 -0800") References: Message-ID: Since the kernel summit has been moved to Cambridge, England in September this year, I'm unlikely to go to OLS too. But I don't think an OLS BOF is really the best venue for an "RDMA summit" anyway. A BOF is too little time to really cover everything, and having non-core people there changes the mood -- of course it's good to include the hoi polloi most of the time, but I think you're really proposing an event for the core developers only. I think a summit really needs to be a full day and be invitation only. One idea that might work would be to have a summit the day before the Sonoma workshop in April. This could be pretty informal, with no need to involve the OFA board or anything cumbersome like that -- just tell everyone to show up a day early, and get a small conference room and a catered lunch at the hotel (money could probably be scared up to pay for that without much trouble). Or we could pick another day/location... I'm sure I could get a room & food at Cisco's campus any time given a few weeks warning. - R. From ebiederm at xmission.com Thu Jan 25 10:55:04 2007 From: ebiederm at xmission.com (ebiederm at xmission.com) Date: Thu, 25 Jan 2007 11:55:04 -0700 Subject: [openib-general] [RFC PATCH 0/31] An introduction and A path for merging network namespace work Message-ID: The idea of a network namespace is fundamentally quite simple. We create a mechanism that from the users perspective allows creation of separate instances of the network stack. When combined with mechanism like chroot this results in a much more complete isolation. When seen in the context of application migration this allows for taking your IP address and other global identifiers with you. What does this mean in the context of the networking stack? The basic idea is to tag processes with a network namespace that is used when they create new sockets or otherwise initiate a new fresh communication with the networking stack. The idea is to tag all sockets with a network namespace they will always be in and all operations on them will be relative to. The idea is to tag all network devices with a network namespace they are a member of, but may be changed during the lifetime of a device. Mostly a network namespace at it's most basic level is about names. It is about creating a view of the networking stack where you can name the network devices that are members anything you want. Likewise for iptables rules and all of the rest of the state. It is a lot like creating a new directory in a filesystem. The underlying data structures don't really change just the users view of those data structures, and we continue to have a single network stack. My goal today is that even if we can't agree on a specific set of patches that we come to an agreement on roughly what those patches should accomplish, and what process we should go through to get them merged. For implementing a network namespace the core problem is that there is a lot of networking code, and it is continually evolving. This means that the task of implementing a network namespace is not a small one, a lot of code must be read, touched and updated, while hoping someone doesn't change something important before you get your changes in. To do this sanely means we need an incremental path to our goal, that allows small pieces to be reviewed and merged as they are ready. The path I am recommending today is to first lay down some basic infrastructure. Then one layer at a time modify the existing code to handle multiple simultaneous network namespaces but to modify each component of that layer to refuse to operate in the context of anything but the initial network namespace, thus preventing code that has not yet been updated with situations it does not know how to deal with. Eventually this will get down to the real meat of the problem and practical things like ipv4 sockets will work. This should allow for a network stack that compiles, builds and works at each step of the way. Not too far into the process support for multiple network namespaces that works should be available with the limitation that except for the initial network namespace all of the rest will look like a kernel with most parts of the networking stack compiled out, but within those parts that are present it should be fully useable. To make my thinking clear I have provided a initial patchset, that makes quite a bit of progress especially in laying the ground work. My goal is to have the question does this basic path make sense? To that end I have omitted posting some of the prerequisite cleanup and infrastructure patches (like my sysctl work), that are just noise in this context, and I have failed to rebase my patchset against Dave Miller's latest networking tree. Those are important details but they are not important to this conversation. If my basic path and the basic patches look like they are heading in the right direction we can start moving towards what needs to happen to ensure a review of the patches, and what we need to do to start merging them. If the basic path does not appear reasonable well that would be good to know as well. There are essentially two different approaches to modify networking code to handle multiple network namesspaces. Either all of the global variables can be replicated once for each network namespace and we build up parallel namespace specific data structures. Or the data elements in the data structure are tagged, with what namespace they belong to and we filter them. It depends on the context which is most appropriate and easier. As a general rule large hash tables call for filtering and a small global variable set calls for simply having multiple instances of the data structure. The biggest intrusion I expect to see in the logic of the networking stack is initialization and tear down. As we need to initialize and clean up all of those per network namespace variables when we create and destroy and network namespace. A git tree with all of my patches against 2.6.20-rc5 is available at: git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/linux-2.6-netns.git In addition to what I have posted here and all of it's prerequisites the tree includes further patches that get the basics of ipv4 and iptables working. So people who are interested actually have something more or less useful to play with. At a big practical level what I don't yet see is how exactly the inifiniband/rdma network subsystem fits into network namespaces yet. Not at the ipoib layer but at the native layer. I think I want the ability to say each pkey of each IB device can potentially be in a different namespace or possibly each different queue pair. Suggestions are welcome. I don't quite have my head wrapped around that the user space API there yet. I suppose on the infiniband/rdma side I should dig up all interactions with user space and simply fail if that user is not in the initial network namespace as a start. At the very least this is necessary given how many calls the connection manager makes into the IP stack. Eric From ebiederm at xmission.com Thu Jan 25 11:00:10 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:10 -0700 Subject: [openib-general] [PATCH RFC 8/31] net: Make /sys/class/net handle multiple network namespaces In-Reply-To: References: Message-ID: <11697516343584-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted In combination with the sysfs support I am in the process of merging with gregkh, creates a separate instance of the /sys/class/net directory for each network namespace so two devices with the same name do not conflict. Then a network namespace sensitive follow link method on the /sys/class/net directory ensures that you see the directory instance for your current network namespace. Ensuring all existing applications continue to see what we is currently present in sysfs. Signed-off-by: Eric W. Biederman --- net/core/net-sysfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 52 insertions(+), 1 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5d08cc9..b08c1be 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -11,12 +11,14 @@ #include #include +#include #include #include #include #include #include #include +#include #define to_class_dev(obj) container_of(obj,struct class_device,kobj) #define to_net_dev(class) container_of(class, struct net_device, class_dev) @@ -431,6 +433,24 @@ static void netdev_release(struct class_device *cd) kfree((char *)dev - dev->padded); } +static DEFINE_PER_NET(struct dentry *, net_shadow) = NULL; + +static struct dentry *net_class_device_dparent(struct class_device *cd) +{ + struct net_device *dev + = container_of(cd, struct net_device, class_dev); + net_t net = dev->nd_net; + + return per_net(net_shadow, net); +} + +static void *class_net_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + dput(nd->dentry); + nd->dentry = dget(per_net(net_shadow, current->nsproxy->net_ns)); + return NULL; +} + static struct class net_class = { .name = "net", .release = netdev_release, @@ -438,6 +458,8 @@ static struct class net_class = { #ifdef CONFIG_HOTPLUG .uevent = netdev_uevent, #endif + .class_device_dparent = net_class_device_dparent, + .class_follow_link = class_net_follow_link, }; void netdev_unregister_sysfs(struct net_device * dev) @@ -470,7 +492,36 @@ int netdev_register_sysfs(struct net_device *dev) return class_device_add(class_dev); } +static int netdev_sysfs_net_init(net_t net) +{ + struct dentry *shadow; + int error = 0; + shadow = sysfs_create_shadow_dir(&net_class.subsys.kset.kobj); + if (IS_ERR(shadow)) + error = PTR_ERR(shadow); + else + per_net(net_shadow, net) = shadow; + return error; +} + +static void netdev_sysfs_net_exit(net_t net) +{ + sysfs_remove_shadow_dir(per_net(net_shadow, net)); + per_net(net_shadow, net) = NULL; +} + +static struct pernet_operations netdev_sysfs_ops = { + .init = netdev_sysfs_net_init, + .exit = netdev_sysfs_net_exit, +}; + int netdev_sysfs_init(void) { - return class_register(&net_class); + int rc; + if ((rc = class_register(&net_class))) + goto out; + if ((rc = register_pernet_subsys(&netdev_sysfs_ops))) + goto out; +out: + return rc; } -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:05 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:05 -0700 Subject: [openib-general] [PATCH RFC 3/31] net: Add a network namespace parameter to tasks In-Reply-To: References: Message-ID: <11697516333073-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This is the network namespace from which all which all sockets and anything else under user control ultimately get their network namespace parameters. Signed-off-by: Eric W. Biederman --- include/linux/nsproxy.h | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 0b9f0dc..cc76610 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -3,6 +3,7 @@ #include #include +#include struct mnt_namespace; struct uts_namespace; @@ -28,6 +29,7 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; + net_t net_ns; }; extern struct nsproxy init_nsproxy; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:21 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:21 -0700 Subject: [openib-general] [PATCH RFC 19/31] net: sysfs interface support for moving devices between network namespaces. In-Reply-To: References: Message-ID: <11697516371558-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted I haven't a clue if this interface will meet with widespread approval but at this point it is simple, and very useful. Signed-off-by: Eric W. Biederman --- net/core/net-sysfs.c | 35 +++++++++++++++++++++++++++++++++++ 1 files changed, 35 insertions(+), 0 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1be6f94..f8a5c6b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -188,6 +188,40 @@ static ssize_t store_mtu(struct class_device *cd, const char *buf, size_t len) return netdev_store(cd, buf, len, change_mtu); } +static ssize_t show_new_ns_pid(struct class_device *cd, char *buf) +{ + return -EPERM; +} +static int change_new_ns_pid(struct net_device *dev, unsigned long new_ns_pid) +{ + struct task_struct *tsk; + int err; + net_t net; + /* Look up the network namespace */ + err = -ESRCH; + rcu_read_lock(); + tsk = find_task_by_pid(new_ns_pid); + if (tsk) { + task_lock(tsk); + if (tsk->nsproxy) { + err = 0; + net = get_net(tsk->nsproxy->net_ns); + } + task_unlock(tsk); + } + rcu_read_unlock(); + /* If I found a network namespace move the device */ + if (!err) { + err = dev_change_net_namespace(dev, net, NULL); + put_net(net); + } + return err; +} +static ssize_t store_new_ns_pid(struct class_device *cd, const char *buf, size_t len) +{ + return netdev_store(cd, buf, len, change_new_ns_pid); +} + NETDEVICE_SHOW(flags, fmt_hex); static int change_flags(struct net_device *dev, unsigned long new_flags) @@ -243,6 +277,7 @@ static struct class_device_attribute net_class_attributes[] = { __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), + __ATTR(new_ns_pid, S_IWUSR, show_new_ns_pid, store_new_ns_pid), {} }; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:32 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:32 -0700 Subject: [openib-general] [PATCH RFC 30/31] net: Make AF_UNIX per network namespace safe. In-Reply-To: References: Message-ID: <11697516392921-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Because of the global nature of garbage collection, and because of the cost of per namespace hash tables unix_socket_table has been kept global. With a filter added on lookups so we don't see sockets from the wrong namespace. Currently I don't fold the namesapce into the hash so multiple namespaces using the same socket name will be guaranateed a hash collision. Signed-off-by: Eric W. Biederman --- include/net/af_unix.h | 10 ++-- net/unix/af_unix.c | 116 ++++++++++++++++++++++++++++++++------------ net/unix/sysctl_net_unix.c | 24 +++++---- 3 files changed, 103 insertions(+), 47 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c0398f5..1f40dd2 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -89,12 +89,12 @@ struct unix_sock { #define unix_sk(__sk) ((struct unix_sock *)__sk) #ifdef CONFIG_SYSCTL -extern int sysctl_unix_max_dgram_qlen; -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); +DECLARE_PER_NET(int, sysctl_unix_max_dgram_qlen); +extern void unix_sysctl_register(net_t net); +extern void unix_sysctl_unregister(net_t net); #else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} +static inline void unix_sysctl_register(net_t net) {} +static inline void unix_sysctl_unregister(net_t net) {} #endif #endif #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8015a03..3f57cb2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,7 +118,7 @@ #include #include -int sysctl_unix_max_dgram_qlen __read_mostly = 10; +DEFINE_PER_NET(int, sysctl_unix_max_dgram_qlen) = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); @@ -245,7 +245,8 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) spin_unlock(&unix_table_lock); } -static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, +static struct sock *__unix_find_socket_byname(net_t net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; @@ -254,6 +255,9 @@ static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); + if (!net_eq(s->sk_net, net)) + continue; + if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; @@ -263,21 +267,22 @@ found: return s; } -static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, +static inline struct sock *unix_find_socket_byname(net_t net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, type, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); return s; } -static struct sock *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(net_t net, struct inode *i) { struct sock *s; struct hlist_node *node; @@ -287,6 +292,9 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; + if (!net_eq(s->sk_net, net)) + continue; + if(dentry && dentry->d_inode == i) { sock_hold(s); @@ -588,7 +596,7 @@ static struct sock * unix_create1(net_t net, struct socket *sock) &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; + sk->sk_max_ack_backlog = per_net(sysctl_unix_max_dgram_qlen, net); sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->dentry = NULL; @@ -604,9 +612,6 @@ out: static int unix_create(net_t net, struct socket *sock, int protocol) { - if (!net_eq(net, init_net())) - return -EAFNOSUPPORT; - if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -650,6 +655,7 @@ static int unix_release(struct socket *sock) static int unix_autobind(struct socket *sock) { struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; struct unix_address * addr; @@ -676,7 +682,7 @@ retry: spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(addr->name, addr->len, sock->type, + if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ @@ -696,7 +702,8 @@ out: mutex_unlock(&u->readlock); return err; } -static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, +static struct sock *unix_find_other(net_t net, + struct sockaddr_un *sunname, int len, int type, unsigned hash, int *error) { struct sock *u; @@ -714,7 +721,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, err = -ECONNREFUSED; if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) goto put_fail; - u=unix_find_socket_byinode(nd.dentry->d_inode); + u=unix_find_socket_byinode(net, nd.dentry->d_inode); if (!u) goto put_fail; @@ -730,7 +737,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, } } else { err = -ECONNREFUSED; - u=unix_find_socket_byname(sunname, len, type, hash); + u=unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->dentry; @@ -752,6 +759,7 @@ fail: static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct dentry * dentry = NULL; @@ -826,7 +834,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; - if (__unix_find_socket_byname(sunaddr, addr_len, + if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { unix_release_addr(addr); goto out_unlock; @@ -867,6 +875,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; struct sock *other; unsigned hash; @@ -882,7 +891,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; - other=unix_find_other(sunaddr, alen, sock->type, hash, &err); + other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err); if (!other) goto out; @@ -955,6 +964,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; struct sock *other = NULL; @@ -994,7 +1004,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, restart: /* Find listening sock. */ - other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); if (!other) goto out; @@ -1273,6 +1283,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; struct sock *other = NULL; @@ -1336,7 +1347,7 @@ restart: if (sunaddr == NULL) goto out_free; - other = unix_find_other(sunaddr, namelen, sk->sk_type, + other = unix_find_other(net, sunaddr, namelen, sk->sk_type, hash, &err); if (other==NULL) goto out_free; @@ -1935,12 +1946,18 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl #ifdef CONFIG_PROC_FS -static struct sock *unix_seq_idx(int *iter, loff_t pos) +struct unix_iter_state { + net_t net; + int i; +}; +static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) { loff_t off = 0; struct sock *s; - for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { + for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { + if (!net_eq(s->sk_net, iter->net)) + continue; if (off == pos) return s; ++off; @@ -1951,17 +1968,24 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos) static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { + struct unix_iter_state *iter = seq->private; spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); + return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct unix_iter_state *iter = seq->private; + struct sock *sk = v; ++*pos; if (v == (void *)1) - return first_unix_socket(seq->private); - return next_unix_socket(seq->private, v); + sk = first_unix_socket(&iter->i); + else + sk = next_unix_socket(&iter->i, sk); + while (sk && !net_eq(sk->sk_net, iter->net)) + sk = next_unix_socket(&iter->i, sk); + return sk; } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2025,7 +2049,7 @@ static int unix_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - int *iter = kmalloc(sizeof(int), GFP_KERNEL); + struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) goto out; @@ -2036,7 +2060,8 @@ static int unix_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = iter; - *iter = 0; + iter->net = get_net(PROC_NET(inode)); + iter->i = 0; out: return rc; out_kfree: @@ -2044,12 +2069,20 @@ out_kfree: goto out; } +static int unix_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct unix_iter_state *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static struct file_operations unix_seq_fops = { .owner = THIS_MODULE, .open = unix_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = unix_seq_release, }; #endif @@ -2060,6 +2093,31 @@ static struct net_proto_family unix_family_ops = { .owner = THIS_MODULE, }; + +static int unix_net_init(net_t net) +{ + int error = -ENOMEM; +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) + goto out; +#endif + unix_sysctl_register(net); + error = 0; +out: + return 0; +} + +static void unix_net_exit(net_t net) +{ + unix_sysctl_unregister(net); + proc_net_remove(net, "unix"); +} + +static struct pernet_operations unix_net_ops = { + .init = unix_net_init, + .exit = unix_net_exit, +}; + static int __init af_unix_init(void) { int rc = -1; @@ -2075,10 +2133,7 @@ static int __init af_unix_init(void) } sock_register(&unix_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create(init_net(), "unix", 0, &unix_seq_fops); -#endif - unix_sysctl_register(); + register_pernet_subsys(&unix_net_ops); out: return rc; } @@ -2086,9 +2141,8 @@ out: static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); - unix_sysctl_unregister(); - proc_net_remove(init_net(), "unix"); proto_unregister(&unix_proto); + unregister_pernet_subsys(&unix_net_ops); } module_init(af_unix_init); diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index eb0bd57..4b59da8 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -14,11 +14,11 @@ #include -static ctl_table unix_table[] = { +static DEFINE_PER_NET(ctl_table, unix_table[]) = { { .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", - .data = &sysctl_unix_max_dgram_qlen, + .data = &__per_net_base(sysctl_unix_max_dgram_qlen), .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -26,35 +26,37 @@ static ctl_table unix_table[] = { { .ctl_name = 0 } }; -static ctl_table unix_net_table[] = { +static DEFINE_PER_NET(ctl_table, unix_net_table[]) = { { .ctl_name = NET_UNIX, .procname = "unix", .mode = 0555, - .child = unix_table + .child = __per_net_base(unix_table) }, { .ctl_name = 0 } }; -static ctl_table unix_root_table[] = { +static DEFINE_PER_NET(ctl_table, unix_root_table[]) = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = unix_net_table + .child = __per_net_base(unix_net_table) }, { .ctl_name = 0 } }; -static struct ctl_table_header * unix_sysctl_header; +static DEFINE_PER_NET(struct ctl_table_header *, unix_sysctl_header); -void unix_sysctl_register(void) +void unix_sysctl_register(net_t net) { - unix_sysctl_header = register_sysctl_table(unix_root_table); + ctl_table *table = per_net(unix_root_table, net); + per_net(unix_sysctl_header, net) = + register_net_sysctl_table(net, table); } -void unix_sysctl_unregister(void) +void unix_sysctl_unregister(net_t net) { - unregister_sysctl_table(unix_sysctl_header); + unregister_net_sysctl_table(per_net(unix_sysctl_header, net)); } -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:25 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:25 -0700 Subject: [openib-general] [PATCH RFC 23/31] net: Modify all rtnetlink methods to only work in the initial namespace In-Reply-To: References: Message-ID: <11697516382511-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Before I can enable rtnetlink to work in all network namespaces I need to be certain that something won't break. So this patch deliberately disables all of the methods and when they are audited this extra check can be disabled. Signed-off-by: Eric W. Biederman --- net/bridge/br_netlink.c | 9 +++++++++ net/core/fib_rules.c | 7 +++++++ net/core/neighbour.c | 18 ++++++++++++++++++ net/core/rtnetlink.c | 13 +++++++++++++ net/decnet/dn_dev.c | 12 ++++++++++++ net/decnet/dn_fib.c | 8 ++++++++ net/decnet/dn_route.c | 8 ++++++++ net/decnet/dn_rules.c | 5 +++++ net/decnet/dn_table.c | 4 ++++ net/ipv4/devinet.c | 12 ++++++++++++ net/ipv4/fib_frontend.c | 12 ++++++++++++ net/ipv4/fib_rules.c | 5 +++++ net/ipv6/addrconf.c | 31 +++++++++++++++++++++++++++++++ net/ipv6/fib6_rules.c | 5 +++++ net/ipv6/ip6_fib.c | 4 ++++ net/ipv6/route.c | 12 ++++++++++++ net/sched/act_api.c | 8 ++++++++ net/sched/cls_api.c | 8 ++++++++ net/sched/sch_api.c | 20 ++++++++++++++++++++ 19 files changed, 201 insertions(+), 0 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 119b97d..85165a1 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -104,9 +105,13 @@ errout: */ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; struct net_device *dev; int idx; + if (!net_eq(net, init_net())) + return 0; + read_lock(&per_net(dev_base_lock, init_net())); for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next) { /* not a bridge port */ @@ -133,12 +138,16 @@ skip: */ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_device *dev; struct net_bridge_port *p; u8 new_state; + if (!net_eq(net, init_net())) + return -EINVAL; + if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 2fa2708..00b4148 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -163,6 +163,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL; + if (!net_eq(net, init_net())) + return -EINVAL; + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; @@ -244,12 +247,16 @@ errout: int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule; struct nlattr *tb[FRA_MAX+1]; int err = -EINVAL; + if (!net_eq(net, init_net())) + return -EINVAL; + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f5d4f92..d89c6fe 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1445,6 +1445,9 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err = -EINVAL; + if (!net_eq(net, init_net())) + return -EINVAL; + if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; @@ -1511,6 +1514,9 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev = NULL; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) goto out; @@ -1783,11 +1789,15 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy); if (err < 0) @@ -1907,11 +1917,15 @@ errout: int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; struct neigh_table *tbl; + if (!net_eq(net, init_net())) + return 0; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; read_lock(&neigh_tbl_lock); @@ -2030,9 +2044,13 @@ out: int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; struct neigh_table *tbl; int t, family, s_t; + if (!net_eq(net, init_net())) + return 0; + read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; s_t = cb->args[0]; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ac07a0..9be586c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -395,6 +395,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int s_idx = cb->args[0]; struct net_device *dev; + if (!net_eq(net, init_net())) + return 0; + read_lock(&per_net(dev_base_lock, net)); for (dev=per_net(dev_base, net), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) @@ -429,6 +432,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) goto errout; @@ -602,6 +608,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int iw_buf_len = 0; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) return err; @@ -650,9 +659,13 @@ errout: static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx; int s_idx = cb->family; + if (!net_eq(net, init_net())) + return 0; + if (s_idx == 0) s_idx = 1; for (idx=1; idxsk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; struct dn_ifaddr *ifa, **ifap; int err = -EADDRNOTAVAIL; + if (!net_eq(net, init_net())) + goto errout; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) goto errout; @@ -680,6 +684,7 @@ errout: static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; struct dn_dev *dn_db; @@ -687,6 +692,9 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct dn_ifaddr *ifa; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) return err; @@ -788,11 +796,15 @@ errout: static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx, dn_idx = 0, skip_ndevs, skip_naddr; struct net_device *dev; struct dn_dev *dn_db; struct dn_ifaddr *ifa; + if (!net_eq(net, init_net())) + return 0; + skip_ndevs = cb->args[0]; skip_naddr = cb->args[1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index cc2ab1f..832e1b4 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -503,10 +503,14 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (!net_eq(net, init_net())) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; @@ -519,10 +523,14 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (!net_eq(net, init_net())) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9669e50..d942ea0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1528,6 +1528,7 @@ rtattr_failure: */ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { + net_t net = in_skb->sk->sk_net; struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct dn_route *rt = NULL; @@ -1536,6 +1537,9 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) struct sk_buff *skb; struct flowi fl; + if (!net_eq(net, init_net())) + return -EINVAL; + memset(&fl, 0, sizeof(fl)); fl.proto = DNPROTO_NSP; @@ -1613,10 +1617,14 @@ out_free: */ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; struct dn_route *rt; int h, s_h; int idx, s_idx; + if (!net_eq(net, init_net())) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) return -EINVAL; if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index e32d0c3..84eec40 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -243,6 +243,11 @@ static u32 dn_fib_rule_default_pref(void) int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; + + if (!net_eq(net, init_net())) + return 0; + return fib_rules_dump(skb, cb, AF_DECnet); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 13b2421..3ff151c 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -459,12 +459,16 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; struct hlist_node *node; int dumped = 0; + if (!net_eq(net, init_net())) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return dn_cache_dump(skb, cb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b0d12ec..7769b1c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -443,6 +443,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; @@ -451,6 +452,9 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err < 0) goto errout; @@ -562,10 +566,14 @@ errout: static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct in_ifaddr *ifa; ASSERT_RTNL(); + if (!net_eq(net, init_net())) + return -EINVAL; + ifa = rtm_to_ifaddr(nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1173,12 +1181,16 @@ nla_put_failure: static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx, ip_idx; struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; int s_ip_idx, s_idx = cb->args[0]; + if (!net_eq(net, init_net())) + return 0; + s_ip_idx = ip_idx = cb->args[1]; read_lock(&per_net(dev_base_lock, init_net())); for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next, idx++) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 449f42d..0e48fb8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -538,10 +538,14 @@ errout: int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); if (err < 0) goto errout; @@ -559,10 +563,14 @@ errout: int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); if (err < 0) goto errout; @@ -580,12 +588,16 @@ errout: int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_node *node; int dumped = 0; + if (!net_eq(net, init_net())) + return 0; + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) return ip_rt_dump(skb, cb); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b837c33..f2c50e0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -279,6 +279,11 @@ nla_put_failure: int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; + + if (!net_eq(net, init_net())) + return 0; + return fib_rules_dump(skb, cb, AF_INET); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7afe698..83b7312 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2951,11 +2951,15 @@ static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3003,6 +3007,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3012,6 +3017,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3278,26 +3286,42 @@ done: static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; + + if (!net_eq(net, init_net())) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; + + if (!net_eq(net, init_net())) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; + + if (!net_eq(net, init_net())) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + net_t net = in_skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3306,6 +3330,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct sk_buff *skb; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3472,11 +3499,15 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; + if (!net_eq(net, init_net())) + return 0; + read_lock(&per_net(dev_base_lock, init_net())); for (dev=per_net(dev_base, init_net()), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 0862809..80d6de6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -223,6 +223,11 @@ nla_put_failure: int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; + + if (!net_eq(net, init_net())) + return 0; + return fib_rules_dump(skb, cb, AF_INET6); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 96d8310..97814ed 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -362,6 +362,7 @@ end: int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; @@ -370,6 +371,9 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; int res = 0; + if (!net_eq(net, init_net())) + return 0; + s_h = cb->args[0]; s_e = cb->args[1]; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4519006..02fd8ae 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1985,9 +1985,13 @@ errout: int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -1997,9 +2001,13 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2132,6 +2140,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + net_t net = in_skb->sk->sk_net; struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2139,6 +2148,9 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) struct flowi fl; int err, iif = 0; + if (!net_eq(net, init_net())) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 835070e..18d8f68 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -942,10 +942,14 @@ done: static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + net_t net = skb->sk->sk_net; struct rtattr **tca = arg; u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; + if (!net_eq(net, init_net())) + return -EINVAL; + if (tca[TCA_ACT_TAB-1] == NULL) { printk("tc_ctl_action: received NO action attribs\n"); return -EINVAL; @@ -1015,6 +1019,7 @@ find_dump_kind(struct nlmsghdr *n) static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rtattr *x; @@ -1024,6 +1029,9 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct rtattr *kind = find_dump_kind(cb->nlh); + if (!net_eq(net, init_net())) + return 0; + if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 19935f9..09a3ec8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -129,6 +129,7 @@ static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp) static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + net_t net = skb->sk->sk_net; struct rtattr **tca; struct tcmsg *t; u32 protocol; @@ -145,6 +146,9 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) unsigned long fh; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + replay: tca = arg; t = NLMSG_DATA(n); @@ -385,6 +389,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walke static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -395,6 +400,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) struct Qdisc_class_ops *cops; struct tcf_dump_args arg; + if (!net_eq(net, init_net())) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; if ((dev = dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 912e8e1..7e33f73 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -578,6 +578,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + net_t net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -586,6 +587,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + if ((dev = __dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return -ENODEV; @@ -639,6 +643,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + net_t net = skb->sk->sk_net; struct tcmsg *tcm; struct rtattr **tca; struct net_device *dev; @@ -646,6 +651,9 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *q, *p; int err; + if (!net_eq(net, init_net())) + return -EINVAL; + replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); @@ -851,11 +859,15 @@ err_out: static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; struct Qdisc *q; + if (!net_eq(net, init_net())) + return 0; + s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; read_lock(&per_net(dev_base_lock, init_net())); @@ -900,6 +912,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + net_t net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -912,6 +925,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; + if (!net_eq(net, init_net())) + return -EINVAL; + if ((dev = __dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return -ENODEV; @@ -1086,6 +1102,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -1093,6 +1110,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct qdisc_dump_args arg; + if (!net_eq(net, init_net())) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; if ((dev = dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:17 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:17 -0700 Subject: [openib-general] [PATCH RFC 15/31] net: Make the loopback device per network namespace In-Reply-To: References: Message-ID: <11697516361068-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch makes the loopback_dev per network namespace. The loopback device registers itself as a pernet_device so we can register the new loopback_dev instance when we add a new network namespace and so we can unregister the loopback device when we destory the network namespace. Currently the loopback device statitics are kept accross all loopback devices, a minor glitch that will not affect correct operation but something we may want to fix. This patch modifies all users the loopback_dev so they access it as per_net(loopback_dev, init_net()), keeping all of the code compiling and working. A later pass will be needed to update the users to use something other than the initial network namespace. The only non-trivial modification was the ipv6 code in route.c as the loopback_dev can no longer be used in static initializers, and even that change was very simple. Signed-off-by: Eric W. Biederman --- drivers/net/loopback.c | 24 ++++++++++++++++++++---- include/linux/netdevice.h | 2 +- net/core/dst.c | 8 ++++---- net/decnet/dn_dev.c | 4 ++-- net/decnet/dn_route.c | 14 +++++++------- net/ipv4/devinet.c | 4 ++-- net/ipv4/ipconfig.c | 8 +++++--- net/ipv4/ipvs/ip_vs_core.c | 2 +- net/ipv4/route.c | 18 +++++++++--------- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/addrconf.c | 8 ++++---- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/route.c | 24 +++++++++++++++--------- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 15 files changed, 75 insertions(+), 51 deletions(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 22b672d..e9abf3f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -57,6 +57,7 @@ #include #include #include +#include struct pcpu_lstats { unsigned long packets; @@ -204,7 +205,7 @@ static const struct ethtool_ops loopback_ethtool_ops = { * The loopback device is special. There is only one instance and * it is statically allocated. Don't do this for other devices. */ -struct net_device loopback_dev = { +DEFINE_PER_NET(struct net_device, loopback_dev) = { .name = "lo", .get_stats = &get_stats, .priv = &loopback_stats, @@ -228,13 +229,28 @@ struct net_device loopback_dev = { .ethtool_ops = &loopback_ethtool_ops, }; +static int loopback_net_init(net_t net) +{ + per_net(loopback_dev, net).nd_net = net; + return register_netdev(&per_net(loopback_dev, net)); +} + +static void loopback_net_exit(net_t net) +{ + unregister_netdev(&per_net(loopback_dev, net)); +} + +static struct pernet_operations loopback_net_ops = { + .init = loopback_net_init, + .exit = loopback_net_exit, +}; + /* Setup and register the loopback device. */ static int __init loopback_init(void) { - loopback_dev.nd_net = init_net(); - return register_netdev(&loopback_dev); + return register_pernet_device(&loopback_net_ops); }; module_init(loopback_init); -EXPORT_SYMBOL(loopback_dev); +EXPORT_PER_NET_SYMBOL(loopback_dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e28671..73931a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -570,7 +570,7 @@ struct packet_type { #include #include -extern struct net_device loopback_dev; /* The loopback */ +DECLARE_PER_NET(struct net_device, loopback_dev); /* The loopback */ extern struct net_device *dev_base; /* All devices */ extern rwlock_t dev_base_lock; /* Device list lock */ diff --git a/net/core/dst.c b/net/core/dst.c index 8c4a272..3435771 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -241,13 +241,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, dst->input = dst_discard_in; dst->output = dst_discard_out; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &per_net(loopback_dev, init_net()); + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = &per_net(loopback_dev, init_net()); dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 19b1469..dbaf001 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -866,10 +866,10 @@ last_chance: rv = dn_dev_get_first(dev, addr); read_unlock(&dev_base_lock); dev_put(dev); - if (rv == 0 || dev == &loopback_dev) + if (rv == 0 || dev == &per_net(loopback_dev, init_net())) return rv; } - dev = &loopback_dev; + dev = &per_net(loopback_dev, init_net()); dev_hold(dev); goto last_chance; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4263cd9..b553cd4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -887,7 +887,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old .scope = RT_SCOPE_UNIVERSE, } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = per_net(loopback_dev, init_net()).ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL; @@ -904,7 +904,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->mark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, per_net(loopback_dev, init_net()).ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { @@ -955,7 +955,7 @@ source_ok: err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); dev_hold(dev_out); if (!fl.fld_dst) { fl.fld_dst = @@ -964,7 +964,7 @@ source_ok: if (!fl.fld_dst) goto out; } - fl.oif = loopback_dev.ifindex; + fl.oif = per_net(loopback_dev, init_net()).ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -1010,7 +1010,7 @@ source_ok: if (dev_out) dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); res.type = RTN_LOCAL; } else { dev_out = neigh->dev; @@ -1031,7 +1031,7 @@ source_ok: /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); dev_hold(dev_out); res.type = RTN_LOCAL; goto select_source; @@ -1067,7 +1067,7 @@ select_source: fl.fld_src = fl.fld_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a7d991d..201442c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1056,7 +1056,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ASSERT_RTNL(); if (!in_dev) { - if (event == NETDEV_REGISTER && dev == &loopback_dev) { + if (event == NETDEV_REGISTER && dev == &per_net(loopback_dev, init_net())) { in_dev = inetdev_init(dev); if (!in_dev) panic("devinet: Failed to create loopback\n"); @@ -1074,7 +1074,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev == &per_net(loopback_dev, init_net())) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 91b5729..ee77938 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -185,16 +185,18 @@ static int __init ic_open_devs(void) struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; + struct net_device *lo; last = &ic_first_dev; rtnl_lock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + lo = &per_net(loopback_dev, init_net()); + if (dev_change_flags(lo, lo->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name); for (dev = dev_base; dev; dev = dev->next) { - if (dev == &loopback_dev) + if (dev == lo) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3425752..2e1e41f 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -963,7 +963,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev == &loopback_dev || skb->sk)) { + || skb->dev == &per_net(loopback_dev, init_net()) || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, skb->nh.iph->protocol, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8be7506..d23a0d7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1498,8 +1498,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + if (dev != &per_net(loopback_dev, init_net()) && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(&per_net(loopback_dev, init_net())); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1651,7 +1651,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &per_net(loopback_dev, init_net()); dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; @@ -1969,7 +1969,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + per_net(loopback_dev, init_net()).ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -2036,7 +2036,7 @@ local_input: #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &per_net(loopback_dev, init_net()); dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -2375,7 +2375,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = per_net(loopback_dev, init_net()).ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2469,9 +2469,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = per_net(loopback_dev, init_net()).ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2516,7 +2516,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &per_net(loopback_dev, init_net()); dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fb9f69c..39a0ba2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -289,7 +289,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(&per_net(loopback_dev, init_net())); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7be542f..c9fa27a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2365,7 +2365,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == &per_net(loopback_dev, init_net()) && how == 1) how = 0; rt6_ifdown(dev); @@ -4074,13 +4074,13 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(&per_net(loopback_dev, init_net()))) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.rt6i_idev = in6_dev_get(&per_net(loopback_dev, init_net())); register_netdevice_notifier(&ipv6_dev_notf); @@ -4121,7 +4121,7 @@ void __exit addrconf_cleanup(void) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(&per_net(loopback_dev, init_net()), 2); /* * Check hash table. diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 311eae8..a80bbee 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -170,7 +170,7 @@ static inline void send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = &loopback_dev; + skb_in->dev = &per_net(loopback_dev, init_net()); icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8c9fef9..6805c39 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -125,7 +125,7 @@ struct rt6_info ip6_null_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -151,7 +151,7 @@ struct rt6_info ip6_prohibit_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -171,7 +171,7 @@ struct rt6_info ip6_blk_hole_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -211,8 +211,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != &per_net(loopback_dev, init_net()) && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&per_net(loopback_dev, init_net())); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -1103,12 +1103,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != &per_net(loopback_dev, init_net())) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = &per_net(loopback_dev, init_net()); dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1803,13 +1803,13 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(&per_net(loopback_dev, init_net())); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = &per_net(loopback_dev, init_net()); rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -2457,6 +2457,12 @@ void __init ip6_route_init(void) ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + /* Perform the initialization we can't perform at compile time */ + ip6_null_entry.u.dst.dev = &per_net(loopback_dev, init_net()); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + ip6_prohibit_entry.u.dst.dev = &per_net(loopback_dev, init_net()); + ip6_blk_hole_entry.u.dst.dev = &per_net(loopback_dev, init_net()); +#endif fib6_init(); #ifdef CONFIG_PROC_FS p = proc_net_create(init_net(), "ipv6_route", 0, rt6_proc_info); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8dffd4d..2608c75 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -354,7 +354,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(&per_net(loopback_dev, init_net())); BUG_ON(!loopback_idev); do { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0248343..51ab8ac 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1799,8 +1799,8 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &per_net(loopback_dev, init_net()); + dev_hold(dst->dev); dev_put(dev); } } -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:15 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:15 -0700 Subject: [openib-general] [PATCH RFC 13/31] net: Make device event notification network namespace safe In-Reply-To: References: Message-ID: <11697516352813-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Every user of the network device notifiers is either a protocol stack or a pseudo device. If a protocol stack that does not have support for multiple network namespaces receives an event for a device that is not in the initial network namespace it quite possibly can get confused and do the wrong thing. To avoid problems until all of the protocol stacks are converted this patch modifies all netdev event handlers to ignore events on devices that are not in the initial network namespace. As the rest of the code is made network namespace aware these checks can be removed. Signed-off-by: Eric W. Biederman --- arch/ia64/hp/sim/simeth.c | 3 +++ drivers/net/bonding/bond_main.c | 3 +++ drivers/net/hamradio/bpqether.c | 3 +++ drivers/net/pppoe.c | 3 +++ drivers/net/wan/dlci.c | 3 +++ drivers/net/wan/hdlc.c | 3 +++ drivers/net/wan/lapbether.c | 3 +++ net/8021q/vlan.c | 4 ++++ net/appletalk/aarp.c | 3 +++ net/appletalk/ddp.c | 3 +++ net/atm/clip.c | 3 +++ net/atm/mpc.c | 4 ++++ net/ax25/af_ax25.c | 3 +++ net/bridge/br_notify.c | 4 ++++ net/core/dst.c | 4 ++++ net/core/fib_rules.c | 4 ++++ net/core/pktgen.c | 3 +++ net/core/rtnetlink.c | 4 ++++ net/decnet/af_decnet.c | 3 +++ net/econet/af_econet.c | 3 +++ net/ipv4/arp.c | 3 +++ net/ipv4/devinet.c | 3 +++ net/ipv4/fib_frontend.c | 3 +++ net/ipv4/ipmr.c | 7 ++++++- net/ipv4/multipath_drr.c | 3 +++ net/ipv4/netfilter/ip_queue.c | 3 +++ net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +++ net/ipv6/addrconf.c | 3 +++ net/ipv6/ndisc.c | 3 +++ net/ipv6/netfilter/ip6_queue.c | 3 +++ net/ipx/af_ipx.c | 3 +++ net/netfilter/nfnetlink_queue.c | 3 +++ net/netrom/af_netrom.c | 3 +++ net/packet/af_packet.c | 3 +++ net/rose/af_rose.c | 3 +++ net/tipc/eth_media.c | 3 +++ net/wanrouter/af_wanpipe.c | 3 +++ net/x25/af_x25.c | 3 +++ net/xfrm/xfrm_policy.c | 5 +++++ security/selinux/netif.c | 3 +++ 40 files changed, 131 insertions(+), 1 deletions(-) diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index 424e925..1cbaa9e 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -300,6 +300,9 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) return NOTIFY_DONE; } + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; /* diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9c70568..3e04f58 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3325,6 +3325,9 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v { struct net_device *event_dev = (struct net_device *)ptr; + if (!net_eq(event_dev->nd_net, init_net())) + return NOTIFY_DONE; + dprintk("event_dev: %s, event: %lx\n", (event_dev ? event_dev->name : "None"), event); diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index c513e90..8826a96 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -564,6 +564,9 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index caf8ca3..3618862 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -299,6 +299,9 @@ static int pppoe_device_event(struct notifier_block *this, { struct net_device *dev = (struct net_device *) ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEMTU: diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 7369875..f826494 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -513,6 +513,9 @@ static int dlci_dev_event(struct notifier_block *unused, { struct net_device *dev = (struct net_device *) ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { struct dlci_local *dlp; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index f3bf160..e56e0a1 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -110,6 +110,9 @@ static int hdlc_device_event(struct notifier_block *this, unsigned long event, unsigned long flags; int on; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (dev->get_stats != hdlc_get_stats) return NOTIFY_DONE; /* not an HDLC device */ diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index c1de21e..a3560a9 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -395,6 +395,9 @@ static int lapbeth_device_event(struct notifier_block *this, struct lapbethdev *lapbeth; struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 18fcb9f..f80cfdd 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "vlan.h" @@ -595,6 +596,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (!grp) goto out; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 85c4dbc..6fd58a6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -327,6 +327,9 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; int ct; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { write_lock_bh(&aarp_lock); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f4ff8aa..61f36b1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -649,6 +649,9 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) /* Discard any use of this */ atalk_dev_down(dev); diff --git a/net/atm/clip.c b/net/atm/clip.c index 5f8a1d2..7d150c2 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -629,6 +629,9 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { neigh_ifdown(&clip_tbl, dev); return NOTIFY_DONE; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index c18f737..4fdb1af 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -953,6 +953,10 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo struct lec_priv *priv; dev = (struct net_device *)dev_ptr; + + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index cdbf3f6..8c187a6 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -105,6 +105,9 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* Reject non AX.25 devices */ if (dev->type != ARPHRD_AX25) return NOTIFY_DONE; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 2027849..0d56bc2 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -15,6 +15,7 @@ #include #include +#include #include "br_private.h" @@ -36,6 +37,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/core/dst.c b/net/core/dst.c index 836ec66..8c4a272 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -256,6 +257,9 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void struct net_device *dev = ptr; struct dst_entry *dst; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 1df6cd4..ffc31c1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -11,6 +11,7 @@ #include #include #include +#include #include static LIST_HEAD(rules_ops); @@ -441,6 +442,9 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct fib_rules_ops *ops; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ab48533..7796b39 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1892,6 +1892,9 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = (struct net_device *)(ptr); + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e76539a..7841e89 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -829,6 +829,10 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5e8042f..b27b2ac 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2086,6 +2086,9 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: dn_dev_up(dev); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 0baffda..cbf87f4 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1121,6 +1121,9 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (msg) { case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 95a34c7..0d23fb2 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1206,6 +1206,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 216cf2b..a7d991d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1050,6 +1050,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + ASSERT_RTNL(); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d47b72a..049c370 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -860,6 +860,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index af50394..9afaa13 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1075,13 +1075,18 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; struct vif_device *v; int ct; + + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; for(ct=0;ctdev==ptr) + if (v->dev==dev) vif_delete(ct); } return NOTIFY_DONE; diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index 252e837..b14d6ae 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -87,6 +87,9 @@ static int drr_dev_event(struct notifier_block *this, struct net_device *dev = ptr; int devidx; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index aae660c..8650a57 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -567,6 +567,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d669685..41fe6b5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -152,6 +152,9 @@ static int masq_device_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52bd4dd..7be542f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2235,6 +2235,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: case NETDEV_CHANGE: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6a9f616..9b3495f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1586,6 +1586,9 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 45b64a5..f6e108c 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -557,6 +557,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5c5f2cd..f2674fe 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -347,6 +347,9 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event != NETDEV_DOWN && event != NETDEV_UP) goto out; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a88a017..59bf595 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -734,6 +734,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3fa3f1a..6965a1a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -106,6 +106,9 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index aa298c3..6e3b947 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1439,6 +1439,9 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct hlist_node *node; struct net_device *dev = (struct net_device*)data; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + read_lock(&packet_sklist_lock); sk_for_each(sk, node, &packet_sklist) { struct packet_sock *po = pkt_sk(sk); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7d5e593..dad50d3 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -197,6 +197,9 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index b181cf9..c6f64de 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -194,6 +194,9 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + while ((eb_ptr->dev != dev)) { if (++eb_ptr == stop) return NOTIFY_DONE; /* couldn't find device */ diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 542c737..f9b896c 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -1714,6 +1714,9 @@ static int wanpipe_notifier(struct notifier_block *this, unsigned long msg, void hlist_node *node; struct net_device *dev = (struct net_device *)data; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + sk_for_each(sk, node, &wanpipe_sklist) { struct wanpipe_opt *po = wp_sk(sk); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6602a34..f4fad10 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -190,6 +190,9 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (dev->type == ARPHRD_X25 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) || dev->type == ARPHRD_ETHER diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bebd40e..0248343 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2188,6 +2188,11 @@ static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + switch (event) { case NETDEV_DOWN: xfrm_flush_bundles(); diff --git a/security/selinux/netif.c b/security/selinux/netif.c index b10c34e..45c422f 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -234,6 +234,9 @@ static int sel_netif_netdev_notifier_handler(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev->nd_net, init_net())) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) sel_netif_kill(dev); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:06 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:06 -0700 Subject: [openib-general] [PATCH RFC 4/31] net: Add a network namespace tag to struct net_device In-Reply-To: References: Message-ID: <11697516343777-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Please note that network devices do not increase the count count on the network namespace. The are inside the network namespace and so the network namespace tag is in the nature of a back pointer and so getting and putting the network namespace is unnecessary. Signed-off-by: Eric W. Biederman --- include/linux/netdevice.h | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4cb8b39..6a1579d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -38,6 +38,7 @@ #include #include #include +#include struct vlan_group; struct ethtool_ops; @@ -525,6 +526,9 @@ struct net_device void (*poll_controller)(struct net_device *dev); #endif + /* Network namespace this network device is inside */ + net_t nd_net; + /* bridge stuff */ struct net_bridge_port *br_port; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:07 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:07 -0700 Subject: [openib-general] [PATCH RFC 5/31] net: Add a network namespace parameter to struct sock In-Reply-To: References: Message-ID: <11697516341105-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Sockets need to get a reference to their network namespace, or possibly a simple hold if someone registers on the network namespace notifier and will free the sockets when the namespace is going to be destroyed. Signed-off-by: Eric W. Biederman --- include/net/inet_timewait_sock.h | 1 + include/net/sock.h | 3 +++ 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index f7be1ac..162c2b9 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -115,6 +115,7 @@ struct inet_timewait_sock { #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_net __tw_common.skc_net volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; diff --git a/include/net/sock.h b/include/net/sock.h index 03684e7..5bf6bb5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -105,6 +105,7 @@ struct proto; * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family + * @skc_net: reference to the network namespace of this socket * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. @@ -119,6 +120,7 @@ struct sock_common { atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + net_t skc_net; }; /** @@ -195,6 +197,7 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_net __sk_common.skc_net unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:08 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:08 -0700 Subject: [openib-general] [PATCH RFC 6/31] net: Add a helper to get a reference to the initial network namespace. In-Reply-To: References: Message-ID: <11697516344043-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted The initial network namespace is special and we need to use it for various things. Probably the biggest initial use will be to ensure code that can't cope with multiple namespaces only sees the initial network namespace. For that reason and because getting at the initial network namespace is just a little clumsy add a helper function. Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 06a9ba1..9208e2e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -27,6 +27,12 @@ struct net_namespace_head { struct work_struct work; }; +/* Get the initial network namespace */ +static inline net_t init_net(void) +{ + return init_nsproxy.net_ns; +} + static inline net_t get_net(net_t net) { return net; } static inline void put_net(net_t net) {} static inline net_t hold_net(net_t net) { return net; } -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:28 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:28 -0700 Subject: [openib-general] [PATCH RFC 26/31] net: Make the netlink methods in rtnetlink handle multiple network namespaces In-Reply-To: References: Message-ID: <11697516382367-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted It turns out after a quick audit that except for removing the checks there is really nothing to do here. Signed-off-by: Eric W. Biederman --- net/core/rtnetlink.c | 21 +++------------------ 1 files changed, 3 insertions(+), 18 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 29a81bf..0a42258 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -409,9 +409,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int s_idx = cb->args[0]; struct net_device *dev; - if (!net_eq(net, init_net())) - return 0; - read_lock(&per_net(dev_base_lock, net)); for (dev=per_net(dev_base, net), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) @@ -446,9 +443,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - if (!net_eq(net, init_net())) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) goto errout; @@ -622,9 +616,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int iw_buf_len = 0; int err; - if (!net_eq(net, init_net())) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) return err; @@ -673,13 +664,9 @@ errout: static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { - net_t net = skb->sk->sk_net; int idx; int s_idx = cb->family; - if (!net_eq(net, init_net())) - return 0; - if (s_idx == 0) s_idx = 1; for (idx=1; idxnd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -712,10 +700,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) /* failure implies BUG in if_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, init_net(), 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(init_net(), RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -862,9 +850,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = ptr; - if (!net_eq(dev->nd_net, init_net())) - return NOTIFY_DONE; - switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:30 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:30 -0700 Subject: [openib-general] [PATCH RFC 28/31] net: Make the SOMAXCONN sysctl per network namespace In-Reply-To: References: Message-ID: <1169751639767-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Signed-off-by: Eric W. Biederman --- include/linux/socket.h | 3 ++- net/core/sysctl_net_core.c | 16 ++++++++-------- net/socket.c | 7 ++++--- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 92cd38e..aa159ea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -23,8 +23,9 @@ struct __kernel_sockaddr_storage { #include /* iovec support */ #include /* pid_t */ #include /* __user */ +#include -extern int sysctl_somaxconn; +DECLARE_PER_NET(int, sysctl_somaxconn); #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 90f2a39..14eca68 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -90,14 +90,6 @@ ctl_table core_table[] = { }, #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -127,5 +119,13 @@ DEFINE_PER_NET(struct ctl_table, multi_core_table[]) = { .proc_handler = &proc_dointvec }, #endif /* CONFIG_XFRM */ + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &__per_net_base(sysctl_somaxconn), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, {} }; diff --git a/net/socket.c b/net/socket.c index 7371654..ab2aeea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1305,7 +1305,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) * ready for listening. */ -int sysctl_somaxconn __read_mostly = SOMAXCONN; +DEFINE_PER_NET(int, sysctl_somaxconn)= SOMAXCONN; asmlinkage long sys_listen(int fd, int backlog) { @@ -1314,8 +1314,9 @@ asmlinkage long sys_listen(int fd, int backlog) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) - backlog = sysctl_somaxconn; + net_t net = sock->sk->sk_net; + if ((unsigned)backlog > per_net(sysctl_somaxconn, net)) + backlog = per_net(sysctl_somaxconn, net); err = security_socket_listen(sock, backlog); if (!err) -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:27 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:27 -0700 Subject: [openib-general] [PATCH RFC 25/31] net: Make wireless netlink event generation handle multiple network namespaces In-Reply-To: References: Message-ID: <11697516381204-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Signed-off-by: Eric W. Biederman --- net/core/wireless.c | 15 ++++++++++----- 1 files changed, 10 insertions(+), 5 deletions(-) diff --git a/net/core/wireless.c b/net/core/wireless.c index 9036359..d534617 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1934,8 +1934,13 @@ static void wireless_nlevent_process(unsigned long data) { struct sk_buff *skb; - while ((skb = skb_dequeue(&wireless_nlevent_queue))) - rtnl_notify(skb, init_net(), 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + while ((skb = skb_dequeue(&wireless_nlevent_queue))) { + struct net_device *dev = skb->dev; + net_t net = dev->nd_net; + skb->dev = NULL; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + dev_put(dev); + } } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); @@ -1992,9 +1997,6 @@ static inline void rtmsg_iwinfo(struct net_device * dev, struct sk_buff *skb; int size = NLMSG_GOODSIZE; - if (!net_eq(dev->nd_net, init_net())) - return; - skb = alloc_skb(size, GFP_ATOMIC); if (!skb) return; @@ -2004,6 +2006,9 @@ static inline void rtmsg_iwinfo(struct net_device * dev, kfree_skb(skb); return; } + /* Remember the device until we are in process context */ + dev_hold(dev); + skb->dev = dev; NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:24 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:24 -0700 Subject: [openib-general] [PATCH RFC 22/31] net: Add network namespace clone support. In-Reply-To: References: Message-ID: <11697516373288-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch allows you to create a new network namespace using sys_clone(...). Signed-off-by: Eric W. Biederman --- include/linux/sched.h | 1 + kernel/nsproxy.c | 11 +++++++++++ net/core/net_namespace.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 0 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4463735..9e0f91a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,6 +26,7 @@ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_NEWNET 0x20000000 /* New network namespace */ /* * Scheduling policies diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 4f3c95a..7861c4c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -20,6 +20,7 @@ #include #include #include +#include struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); EXPORT_SYMBOL_GPL(init_nsproxy); @@ -70,6 +71,7 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig) get_ipc_ns(ns->ipc_ns); if (ns->pid_ns) get_pid_ns(ns->pid_ns); + get_net(ns->net_ns); } return ns; @@ -117,10 +119,18 @@ int copy_namespaces(int flags, struct task_struct *tsk) if (err) goto out_pid; + err = copy_net(flags, tsk); + if (err) + goto out_net; + out: put_nsproxy(old_ns); return err; +out_net: + if (new_ns->pid_ns) + put_pid_ns(new_ns->pid_ns); + out_pid: if (new_ns->ipc_ns) put_ipc_ns(new_ns->ipc_ns); @@ -146,5 +156,6 @@ void free_nsproxy(struct nsproxy *ns) put_ipc_ns(ns->ipc_ns); if (ns->pid_ns) put_pid_ns(ns->pid_ns); + put_net(ns->net_ns); kfree(ns); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 93e3879..cc56105 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -175,6 +175,44 @@ out_undo: goto out; } +int copy_net(int flags, struct task_struct *tsk) +{ + net_t old_net = tsk->nsproxy->net_ns; + net_t new_net; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return 0; + + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + err = -ENOMEM; + new_net = net_alloc(); + if (null_net(new_net)) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + net_lock(); + net_list_append(new_net); + net_unlock(); + + tsk->nsproxy->net_ns = new_net; + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + return err; +} + void pernet_modcopy(void *pnetdst, const void *src, unsigned long size) { net_t net; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:29 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:29 -0700 Subject: [openib-general] [PATCH RFC 27/31] net: Make the xfrm sysctls per network namespace. In-Reply-To: References: Message-ID: <11697516392951-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted In particalure I moved: /proc/sys/net/core/xfrm_aevent_etime /proc/sys/net/core/xfrm_aevent_rseqth Signed-off-by: Eric W. Biederman --- include/net/xfrm.h | 4 ++-- net/core/sysctl_net_core.c | 37 ++++++++++++++++++------------------- net/xfrm/xfrm_state.c | 8 ++++---- net/xfrm/xfrm_user.c | 10 ++++++---- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e476541..9b2e727 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -24,8 +24,8 @@ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) extern struct sock *xfrm_nl; -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; +DECLARE_PER_NET(u32, sysctl_xfrm_aevent_etime); +DECLARE_PER_NET(u32, sysctl_xfrm_aevent_rseqth); extern struct mutex xfrm_cfg_mutex; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 76f7a29..90f2a39 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -88,24 +88,6 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_XFRM - { - .ctl_name = NET_CORE_AEVENT_ETIME, - .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_AEVENT_RSEQTH, - .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { .ctl_name = NET_CORE_SOMAXCONN, @@ -127,6 +109,23 @@ ctl_table core_table[] = { }; DEFINE_PER_NET(struct ctl_table, multi_core_table[]) = { - /* Stub for holding per network namespace sysctls */ +#ifdef CONFIG_XFRM + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", + .data = &__per_net_base(sysctl_xfrm_aevent_etime), + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", + .data = &__per_net_base(sysctl_xfrm_aevent_rseqth), + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif /* CONFIG_XFRM */ {} }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fdb08d9..3304a2d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -27,11 +27,11 @@ struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); -u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); +DEFINE_PER_NET(u32, sysctl_xfrm_aevent_etime) = XFRM_AE_ETIME; +EXPORT_PER_NET_SYMBOL(sysctl_xfrm_aevent_etime); -u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); +DEFINE_PER_NET(u32, sysctl_xfrm_aevent_rseqth) = XFRM_AE_SEQT_SIZE; +EXPORT_PER_NET_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 55affa7..15e962b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -375,7 +375,8 @@ error: return err; } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(net_t net, + struct xfrm_usersa_info *p, struct rtattr **xfrma, int *errp) { @@ -411,9 +412,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = per_net(sysctl_xfrm_aevent_rseqth, net); /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (per_net(sysctl_xfrm_aevent_etime, net)*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; @@ -437,6 +438,7 @@ error_no_put: static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtattr **xfrma) { + net_t net = skb->sk->sk_net; struct xfrm_usersa_info *p = NLMSG_DATA(nlh); struct xfrm_state *x; int err; @@ -446,7 +448,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(net, p, xfrma, &err); if (!x) return err; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:19 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:19 -0700 Subject: [openib-general] [PATCH RFC 17/31] net: Factor out __dev_alloc_name from dev_alloc_name In-Reply-To: References: Message-ID: <11697516361051-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted When forcibly changing the network namespace of a device I need something that can generate a name for the device in the new namespace without overwriting the old name. __dev_alloc_name provides me that functionality. Signed-off-by: Eric W. Biederman --- net/core/dev.c | 44 +++++++++++++++++++++++++++++++++----------- 1 files changed, 33 insertions(+), 11 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 32fe905..fc0d2af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -655,9 +655,10 @@ int dev_valid_name(const char *name) } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -668,18 +669,13 @@ int dev_valid_name(const char *name) * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(net_t net, const char *name, char buf[IFNAMSIZ]) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; long *inuse; struct net_device *d; - net_t net; - - BUG_ON(null_net(dev->nd_net)); - net = dev->nd_net; p = strnchr(name, IFNAMSIZ-1, '%'); if (p) { @@ -713,10 +709,8 @@ int dev_alloc_name(struct net_device *dev, const char *name) } snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(net, buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -725,6 +719,34 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + net_t net; + int ret; + + BUG_ON(null_net(dev->nd_net)); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:22 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:22 -0700 Subject: [openib-general] [PATCH RFC 20/31] net: Implement CONFIG_NET_NS In-Reply-To: References: Message-ID: <1169751637792-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Add the config option to enable multiple network namespaces. Signed-off-by: Eric W. Biederman --- net/Kconfig | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/net/Kconfig b/net/Kconfig index 7dfc949..4671398 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,6 +27,13 @@ if NET menu "Networking options" +config NET_NS + bool "Network namespace support" + depends on EXPERIMENTAL + help + Support what appear to user space as multiple instances of the + network stack. + config NETDEBUG bool "Network packet debugging" help -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:31 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:31 -0700 Subject: [openib-general] [PATCH RFC 29/31] net: Make AF_PACKET handle multiple network namespaces In-Reply-To: References: Message-ID: <1169751639937-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This is done by making all of the relevant global variables per network namespace. Signed-off-by: Eric W. Biederman --- net/packet/af_packet.c | 125 +++++++++++++++++++++++++++++++----------------- 1 files changed, 81 insertions(+), 44 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4ac9f9f..c772491 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -152,8 +152,8 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) */ /* List of all packet sockets. */ -static HLIST_HEAD(packet_sklist); -static DEFINE_RWLOCK(packet_sklist_lock); +static DEFINE_PER_NET(rwlock_t, packet_sklist_lock); +static DEFINE_PER_NET(struct hlist_head, packet_sklist); static atomic_t packet_socks_nr; @@ -264,9 +264,6 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct packet_type *pt, struct n struct sock *sk; struct sockaddr_pkt *spkt; - if (!net_eq(dev->nd_net, init_net())) - goto out; - /* * When we registered the protocol we saved the socket in the data * field for just this event. @@ -288,6 +285,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct packet_type *pt, struct n if (skb->pkt_type == PACKET_LOOPBACK) goto out; + if (!net_eq(dev->nd_net, sk->sk_net)) + goto out; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto oom; @@ -359,7 +359,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(init_net(), saddr->spkt_device); + dev = dev_get_by_name(sk->sk_net, saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -475,15 +475,15 @@ static int packet_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_de int skb_len = skb->len; unsigned snaplen; - if (!net_eq(dev->nd_net, init_net())) - goto drop; - if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); + if (!net_eq(dev->nd_net, sk->sk_net)) + goto drop; + skb->dev = dev; if (dev->hard_header) { @@ -583,15 +583,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_d unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; - if (!net_eq(dev->nd_net, init_net())) - goto drop; - if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); + if (!net_eq(dev->nd_net, sk->sk_net)) + goto drop; + if (dev->hard_header) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb->mac.raw); @@ -744,7 +744,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(init_net(), ifindex); + dev = dev_get_by_index(sk->sk_net, ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -817,15 +817,17 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + net_t net; if (!sk) return 0; + net = sk->sk_net; po = pkt_sk(sk); - write_lock_bh(&packet_sklist_lock); + write_lock_bh(&per_net(packet_sklist_lock, net)); sk_del_node_init(sk); - write_unlock_bh(&packet_sklist_lock); + write_unlock_bh(&per_net(packet_sklist_lock, net)); /* * Unhook packet receive handler. @@ -943,7 +945,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(init_net(), name); + dev = dev_get_by_name(sk->sk_net, name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -971,7 +973,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(init_net(), sll->sll_ifindex); + dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex); if (dev == NULL) goto out; } @@ -1000,9 +1002,6 @@ static int packet_create(net_t net, struct socket *sock, int protocol) __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; - if (!net_eq(net, init_net())) - return -EAFNOSUPPORT; - if (!capable(CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW @@ -1052,9 +1051,9 @@ static int packet_create(net_t net, struct socket *sock, int protocol) po->running = 1; } - write_lock_bh(&packet_sklist_lock); - sk_add_node(sk, &packet_sklist); - write_unlock_bh(&packet_sklist_lock); + write_lock_bh(&per_net(packet_sklist_lock, net)); + sk_add_node(sk, &per_net(packet_sklist, net)); + write_unlock_bh(&per_net(packet_sklist_lock, net)); return(0); out: return err; @@ -1158,7 +1157,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(init_net(), pkt_sk(sk)->ifindex); + dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1184,7 +1183,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(init_net(), po->ifindex); + dev = dev_get_by_index(sk->sk_net, po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1237,7 +1236,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(init_net(), mreq->mr_ifindex); + dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex); if (!dev) goto done; @@ -1291,7 +1290,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(init_net(), ml->ifindex); + dev = dev_get_by_index(sk->sk_net, ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1319,7 +1318,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(init_net(), ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1438,12 +1437,10 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct sock *sk; struct hlist_node *node; struct net_device *dev = (struct net_device*)data; + net_t net = dev->nd_net; - if (!net_eq(dev->nd_net, init_net())) - return NOTIFY_DONE; - - read_lock(&packet_sklist_lock); - sk_for_each(sk, node, &packet_sklist) { + read_lock(&per_net(packet_sklist_lock, net)); + sk_for_each(sk, node, &per_net(packet_sklist, net)) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -1483,7 +1480,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void break; } } - read_unlock(&packet_sklist_lock); + read_unlock(&per_net(packet_sklist_lock, net)); return NOTIFY_DONE; } @@ -1851,12 +1848,12 @@ static struct notifier_block packet_netdev_notifier = { }; #ifdef CONFIG_PROC_FS -static inline struct sock *packet_seq_idx(loff_t off) +static inline struct sock *packet_seq_idx(net_t net, loff_t off) { struct sock *s; struct hlist_node *node; - sk_for_each(s, node, &packet_sklist) { + sk_for_each(s, node, &per_net(packet_sklist, net)) { if (!off--) return s; } @@ -1865,21 +1862,24 @@ static inline struct sock *packet_seq_idx(loff_t off) static void *packet_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&packet_sklist_lock); - return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; + net_t net = net_from_voidp(seq->private); + read_lock(&per_net(packet_sklist_lock, net)); + return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + net_t net = net_from_voidp(seq->private); ++*pos; return (v == SEQ_START_TOKEN) - ? sk_head(&packet_sklist) + ? sk_head(&per_net(packet_sklist, net)) : sk_next((struct sock*)v) ; } static void packet_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&packet_sklist_lock); + net_t net = net_from_voidp(seq->private); + read_unlock(&per_net(packet_sklist_lock, net)); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -1915,7 +1915,22 @@ static struct seq_operations packet_seq_ops = { static int packet_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &packet_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &packet_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = net_to_voidp(get_net(PROC_NET(inode))); + } + return res; +} + +static int packet_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq= file->private_data; + net_t net = net_from_voidp(seq->private); + put_net(net); + return seq_release(inode, file); } static struct file_operations packet_seq_fops = { @@ -1923,15 +1938,37 @@ static struct file_operations packet_seq_fops = { .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = packet_seq_release, }; #endif +static int packet_net_init(net_t net) +{ + rwlock_init(&per_net(packet_sklist_lock, net)); + INIT_HLIST_HEAD(&per_net(packet_sklist, net)); + + if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + return -ENOMEM; + + return 0; +} + +static void packet_net_exit(net_t net) +{ + proc_net_remove(net, "packet"); +} + +static struct pernet_operations packet_net_ops = { + .init = packet_net_init, + .exit = packet_net_exit, +}; + + static void __exit packet_exit(void) { - proc_net_remove(init_net(), "packet"); unregister_netdevice_notifier(&packet_netdev_notifier); + unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); } @@ -1944,8 +1981,8 @@ static int __init packet_init(void) goto out; sock_register(&packet_family_ops); + register_pernet_subsys(&packet_net_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create(init_net(), "packet", 0, &packet_seq_fops); out: return rc; } -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:13 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:13 -0700 Subject: [openib-general] [PATCH RFC 11/31] net: Initialize the network namespace of network devices. In-Reply-To: References: Message-ID: <11697516352135-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Except for carefully selected pseudo devices all network interfaces should start out in the initial network namespace. Ultimately it will be register_netdev that examines what dev->nd_net is set to and places a device in a network namespace. This patch modifies alloc_netdev to initialize the network namespace a device is in with the initial network namespace. This gets it right for the vast majority of devices so their drivers need not be modified and for those few pseudo devices that need something different they can change this parameter before calling register_netdevice. The network namespace parameter on a network device is not reference counted as the devices are inside of a network namespace and cannot remain in that namespace past the lifetime of the network namespace. Signed-off-by: Eric W. Biederman --- drivers/net/loopback.c | 1 + net/core/dev.c | 1 + 2 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 2b739fd..22b672d 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -231,6 +231,7 @@ struct net_device loopback_dev = { /* Setup and register the loopback device. */ static int __init loopback_init(void) { + loopback_dev.nd_net = init_net(); return register_netdev(&loopback_dev); }; diff --git a/net/core/dev.c b/net/core/dev.c index 90e4c0e..a3ee150 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3192,6 +3192,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = init_net(); if (sizeof_priv) dev->priv = netdev_priv(dev); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:04 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:04 -0700 Subject: [openib-general] [PATCH RFC 2/31] net: Implement a place holder network namespace In-Reply-To: References: Message-ID: <11697516332484-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Many of the changes to the network stack will simply be adding a network namespace parameter to function calls or moving variables from globals to being per network namespace. When those variables have initializers that cannot statically compute the proper value, a function that runs at the creation and destruction of network namespaces will need to be registered, and the logic will need to be changed to accomidate that. Adding unconditional support for these functions ensures that even when everything else is compiled out the modified network stack logic will continue to run correctly. This patch adds struct pernet_operations that has an init (constructor) and an exit (destructor) method. When registered the init method is called for every existing namespace, and when unregistered the exit method is called for every existing namespace. When a new network namespace is created all of the init methods are called in the order in which they were registered, and when a network namespace is destroyed the exit methods are called in the reverse order in which they were registered. There are two distinct types of pernet_operations recognized: subsys and device. At creation all subsys init functions are called before device init functions, and at destruction all device exit functions are called before subsys exit function. For other ordering the preservation of the order of registration combined with the various kinds of kernel initcalls should be sufficient. Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 62 ++++++++++++++++++ net/core/Makefile | 2 +- net/core/net_namespace.c | 149 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h new file mode 100644 index 0000000..06a9ba1 --- /dev/null +++ b/include/net/net_namespace.h @@ -0,0 +1,62 @@ +/* + * Operations on the network namespace + */ +#ifndef __NET_NET_NAMESPACE_H +#define __NET_NET_NAMESPACE_H + +#include +#include +#include +#include + +/* How many bytes in each network namespace should we allocate + * for use by modules when they are loaded. + */ +#ifdef CONFIG_MODULES +# define PER_NET_MODULE_RESERVE 2048 +#else +# define PER_NET_MODULE_RESERVE 0 +#endif + +struct net_namespace_head { + atomic_t count; /* To decided when the network namespace + * should go + */ + atomic_t use_count; /* For references we destroy on demand */ + struct list_head list; + struct work_struct work; +}; + +static inline net_t get_net(net_t net) { return net; } +static inline void put_net(net_t net) {} +static inline net_t hold_net(net_t net) { return net; } +static inline void release_net(net_t net) {} + +#define __per_net_start ((char *)0) +#define __per_net_end ((char *)0) + +static inline int copy_net(int flags, struct task_struct *tsk) { return 0; } + +/* Don't let the list of network namespaces change */ +static inline void net_lock(void) {} +static inline void net_unlock(void) {} + +#define for_each_net(VAR) if (1) + +extern net_t net_template; + +#define NET_CREATE 0x0001 /* A network namespace has been created */ +#define NET_DESTROY 0x0002 /* A network namespace is being destroyed */ + +struct pernet_operations { + struct list_head list; + int (*init)(net_t net); + void (*exit)(net_t net); +}; + +extern int register_pernet_subsys(struct pernet_operations *); +extern void unregister_pernet_subsys(struct pernet_operations *); +extern int register_pernet_device(struct pernet_operations *); +extern void unregister_pernet_device(struct pernet_operations *); + +#endif /* __NET_NET_NAMESPACE_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 73272d5..554dbdc 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c new file mode 100644 index 0000000..4ae266d --- /dev/null +++ b/net/core/net_namespace.c @@ -0,0 +1,149 @@ +#include +#include + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); +net_t net_template; + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + net_t net, undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (net_eq(undo_net, net)) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + net_t net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); + -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:11 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:11 -0700 Subject: [openib-general] [PATCH RFC 9/31] net: Implement the per network namespace sysctl infrastructure In-Reply-To: References: Message-ID: <11697516353775-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted The user interface is: register_net_sysctl_table and unregister_net_sysctl_table. Very much like the current interface except there is an network namespace parameter. This this any sysctl in the net_root_table and it's subdirectories are registered with register_net_sysctl shows up only to tasks in the same network namespace. All other sysctls continue to be globally visible. Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 7 ++++ include/net/sock.h | 1 + kernel/sysctl.c | 71 ++++++++++++++++++++++++++++++++++++++++++- net/core/sysctl_net_core.c | 5 +++ net/sysctl_net.c | 20 ++++++++++++ 5 files changed, 102 insertions(+), 2 deletions(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 8eba2d2..286e723 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1044,6 +1044,13 @@ struct ctl_table_header * register_sysctl_table(ctl_table * table); void unregister_sysctl_table(struct ctl_table_header * table); +#ifdef CONFIG_NET +#include +extern struct ctl_table_header *register_net_sysctl_table(net_t net, struct ctl_table *table); +extern void unregister_net_sysctl_table(struct ctl_table_header *header); +DECLARE_PER_NET(struct ctl_table, net_root_table[]); +#endif + #else /* __KERNEL__ */ #endif /* __KERNEL__ */ diff --git a/include/net/sock.h b/include/net/sock.h index 5bf6bb5..01a2781 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1414,6 +1414,7 @@ extern void sk_init(void); #ifdef CONFIG_SYSCTL extern struct ctl_table core_table[]; +DECLARE_PER_NET(struct ctl_table, multi_core_table[]); #endif extern int sysctl_optmem_max; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7da313e..ae6a424 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -135,6 +136,10 @@ static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_NET +static DEFINE_PER_NET(struct ctl_table_header, net_table_header); +#endif + static ctl_table root_table[]; static struct ctl_table_header root_table_header = { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; @@ -1059,6 +1064,7 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) { struct ctl_table_header *head; struct list_head *tmp; + net_t net = current->nsproxy->net_ns; spin_lock(&sysctl_lock); if (prev) { tmp = &prev->ctl_entry; @@ -1076,6 +1082,10 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) next: tmp = tmp->next; if (tmp == &root_table_header.ctl_entry) +#ifdef CONFIG_NET + tmp = &per_net(net_table_header, net).ctl_entry; + else if (tmp == &per_net(net_table_header, net).ctl_entry) +#endif break; } spin_unlock(&sysctl_lock); @@ -1290,7 +1300,8 @@ int do_sysctl_strategy (ctl_table *table, * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *register_sysctl_table(ctl_table * table) +static struct ctl_table_header *__register_sysctl_table( + struct ctl_table_header *root, ctl_table * table) { struct ctl_table_header *tmp; tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); @@ -1301,11 +1312,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table) tmp->used = 0; tmp->unregistering = NULL; spin_lock(&sysctl_lock); - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + list_add_tail(&tmp->ctl_entry, &root->ctl_entry); spin_unlock(&sysctl_lock); return tmp; } +struct ctl_table_header *register_sysctl_table(ctl_table *table) +{ + return __register_sysctl_table(&root_table_header, table); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1322,6 +1338,57 @@ void unregister_sysctl_table(struct ctl_table_header * header) kfree(header); } +#ifdef CONFIG_NET + +static void *fixup_per_net_addr(net_t net, void *addr) +{ + char *ptr = addr; + if ((ptr >= __per_net_start) && (ptr < __per_net_end)) + ptr += __per_net_offset(net); + return ptr; +} + +static void sysctl_net_table_fixup(net_t net, struct ctl_table *table) +{ + for (; table->ctl_name || table->procname; table++) { + table->child = fixup_per_net_addr(net, table->child); + table->data = fixup_per_net_addr(net, table->data); + table->extra1 = fixup_per_net_addr(net, table->extra1); + table->extra2 = fixup_per_net_addr(net, table->extra2); + + /* Whee recursive functions on the kernel stack */ + if (table->child) + sysctl_net_table_fixup(net, table->child); + } +} + +static void sysctl_net_init(net_t net) +{ + struct ctl_table *table = per_net(net_root_table, net); + + sysctl_net_table_fixup(net, table); + per_net(net_table_header, net).ctl_table = table; + + INIT_LIST_HEAD(&per_net(net_table_header, net).ctl_entry); +} + +struct ctl_table_header *register_net_sysctl_table(net_t net, ctl_table *table) +{ + if (!per_net(net_table_header, net).ctl_table) + sysctl_net_init(net); + sysctl_net_table_fixup(net, table); + return __register_sysctl_table(&per_net(net_table_header, net), table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_table); + +void unregister_net_sysctl_table(struct ctl_table_header *header) +{ + return unregister_sysctl_table(header); +} +EXPORT_SYMBOL_GPL(unregister_net_sysctl_table); +#endif + + #else /* !CONFIG_SYSCTL */ struct ctl_table_header * register_sysctl_table(ctl_table * table, int insert_at_head) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 176ad08..76f7a29 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -125,3 +125,8 @@ ctl_table core_table[] = { }, { .ctl_name = 0 } }; + +DEFINE_PER_NET(struct ctl_table, multi_core_table[]) = { + /* Stub for holding per network namespace sysctls */ + {} +}; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index cd4eafb..359c163 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -54,3 +54,23 @@ struct ctl_table net_table[] = { #endif { 0 }, }; + +DEFINE_PER_NET(struct ctl_table, multi_net_table[]) = { + { + .ctl_name = NET_CORE, + .procname = "core", + .mode = 0555, + .child = __per_net_base(multi_core_table), + }, + {}, +}; + +DEFINE_PER_NET(struct ctl_table, net_root_table[]) = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = __per_net_base(multi_net_table), + }, + {}, +}; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:26 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:26 -0700 Subject: [openib-general] [PATCH RFC 24/31] net: Make rtnetlink network namespace aware In-Reply-To: References: Message-ID: <1169751638963-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted After this patch none of the netlink callback support anything except the initial network namespace but the rtnetlink infrastructure now handles multiple network namespaces. Signed-off-by: Eric W. Biederman --- include/linux/rtnetlink.h | 8 ++-- net/bridge/br_netlink.c | 4 +- net/core/fib_rules.c | 4 +- net/core/neighbour.c | 4 +- net/core/rtnetlink.c | 74 +++++++++++++++++++++++++++++++++++--------- net/core/wireless.c | 5 ++- net/decnet/dn_dev.c | 4 +- net/decnet/dn_route.c | 2 +- net/decnet/dn_table.c | 4 +- net/ipv4/devinet.c | 4 +- net/ipv4/fib_semantics.c | 4 +- net/ipv4/ipmr.c | 4 +- net/ipv4/route.c | 2 +- net/ipv6/addrconf.c | 14 ++++---- net/ipv6/route.c | 6 ++-- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 4 +- 17 files changed, 98 insertions(+), 51 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4a629ea..6c8281d 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -581,11 +581,11 @@ struct rtnetlink_link }; extern struct rtnetlink_link * rtnetlink_links[NPROTO]; -extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); -extern int rtnl_unicast(struct sk_buff *skb, u32 pid); -extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +extern int rtnetlink_send(struct sk_buff *skb, net_t net, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, net_t net, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, net_t net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -extern void rtnl_set_sk_err(u32 group, int error); +extern void rtnl_set_sk_err(net_t net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, u32 ts, u32 tsage, long expires, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 85165a1..372fb18 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -94,10 +94,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) /* failure implies BUG in br_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(init_net(), RTNLGRP_LINK, err); } /* diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 00b4148..5f65973 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -418,10 +418,10 @@ static void notify_rule_change(int event, struct fib_rule *rule, /* failure implies BUG in fib_rule_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); + err = rtnl_notify(skb, init_net(), pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(ops->nlgroup, err); + rtnl_set_sk_err(init_net(), ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d89c6fe..6f61207 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2453,10 +2453,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) /* failure implies BUG in neigh_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_NEIGH, err); + rtnl_set_sk_err(init_net(), RTNLGRP_NEIGH, err); } void neigh_app_ns(struct neighbour *n) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9be586c..29a81bf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -58,7 +58,7 @@ #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ static DEFINE_MUTEX(rtnl_mutex); -static struct sock *rtnl; +static DEFINE_PER_NET(struct sock *, rtnl); void rtnl_lock(void) { @@ -72,9 +72,17 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { + net_t net; mutex_unlock(&rtnl_mutex); - if (rtnl && rtnl->sk_receive_queue.qlen) - rtnl->sk_data_ready(rtnl, 0); + + net_lock(); + for_each_net(net) { + struct sock *rtnl = per_net(rtnl, net); + if (rtnl && rtnl->sk_receive_queue.qlen) + rtnl->sk_data_ready(rtnl, 0); + } + net_unlock(); + netdev_run_todo(); } @@ -151,8 +159,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size) return ret; } -int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int rtnetlink_send(struct sk_buff *skb, net_t net, u32 pid, unsigned group, int echo) { + struct sock *rtnl = per_net(rtnl, net); int err = 0; NETLINK_CB(skb).dst_group = group; @@ -164,14 +173,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } -int rtnl_unicast(struct sk_buff *skb, u32 pid) +int rtnl_unicast(struct sk_buff *skb, net_t net, u32 pid) { + struct sock *rtnl = per_net(rtnl, net); + return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +int rtnl_notify(struct sk_buff *skb, net_t net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { + struct sock *rtnl = per_net(rtnl, net); int report = 0; if (nlh) @@ -180,8 +192,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, return nlmsg_notify(rtnl, skb, pid, group, report, flags); } -void rtnl_set_sk_err(u32 group, int error) +void rtnl_set_sk_err(net_t net, u32 group, int error) { + struct sock *rtnl = per_net(rtnl, net); + netlink_set_err(rtnl, 0, group, error); } @@ -649,7 +663,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ BUG_ON(err < 0); - err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); errout: kfree(iw_buf); dev_put(dev); @@ -698,10 +712,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) /* failure implies BUG in if_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(init_net(), RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -713,6 +727,7 @@ static int rtattr_max; static __inline__ int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) { + net_t net = skb->sk->sk_net; struct rtnetlink_link *link; struct rtnetlink_link *link_tab; int sz_idx, kind; @@ -767,7 +782,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) if (link->dumpit == NULL) goto err_inval; - if ((*errp = netlink_dump_start(rtnl, skb, nlh, + if ((*errp = netlink_dump_start(per_net(rtnl, net), skb, nlh, link->dumpit, NULL)) != 0) { return -1; } @@ -875,6 +890,36 @@ static struct notifier_block rtnetlink_dev_notifier = { .notifier_call = rtnetlink_event, }; + +static int rtnetlink_net_init(net_t net) +{ + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, THIS_MODULE); + if (!sk) + return -ENOMEM; + + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + per_net(rtnl, net) = sk; + return 0; +} + +static void rtnetlink_net_exit(net_t net) +{ + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to + * free. + */ + per_net(rtnl, net)->sk_net = get_net(init_net()); + sock_put(per_net(rtnl, net)); +} + +static struct pernet_operations rtnetlink_net_ops = { + .init = rtnetlink_net_init, + .exit = rtnetlink_net_exit, +}; + void __init rtnetlink_init(void) { int i; @@ -887,10 +932,9 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(init_net(), NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, THIS_MODULE); - if (rtnl == NULL) - panic("rtnetlink_init: cannot initialize rtnetlink\n"); + if (register_pernet_subsys(&rtnetlink_net_ops)) + panic("rtnetlink_init: cannot initialize rtnetlink\n"); + netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; diff --git a/net/core/wireless.c b/net/core/wireless.c index d1418bf..9036359 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1935,7 +1935,7 @@ static void wireless_nlevent_process(unsigned long data) struct sk_buff *skb; while ((skb = skb_dequeue(&wireless_nlevent_queue))) - rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + rtnl_notify(skb, init_net(), 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); @@ -1992,6 +1992,9 @@ static inline void rtmsg_iwinfo(struct net_device * dev, struct sk_buff *skb; int size = NLMSG_GOODSIZE; + if (!net_eq(dev->nd_net, init_net())) + return; + skb = alloc_skb(size, GFP_ATOMIC); if (!skb) return; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a09275b..bad972d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -788,10 +788,10 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) /* failure implies BUG in dn_ifaddr_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); + rtnl_set_sk_err(init_net(), RTNLGRP_DECnet_IFADDR, err); } static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index d942ea0..4b353d4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1604,7 +1604,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) goto out_free; } - return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, init_net(), NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 3ff151c..4090ab5 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -371,10 +371,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, /* failure implies BUG in dn_fib_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + err = rtnl_notify(skb, init_net(), pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); + rtnl_set_sk_err(init_net(), RTNLGRP_DECnet_ROUTE, err); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7769b1c..59acce2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1241,10 +1241,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, /* failure implies BUG in inet_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, init_net(), pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV4_IFADDR, err); } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 76218e5..8c64334 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -317,11 +317,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, /* failure implies BUG in fib_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + err = rtnl_notify(skb, init_net(), info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d2e7e55..15e0eb4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -314,7 +314,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, init_net(), NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -527,7 +527,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, init_net(), NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 509bfb1..5f8592e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2802,7 +2802,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, init_net(), NETLINK_CB(in_skb).pid); errout: return err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 83b7312..597bc10 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3362,7 +3362,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, /* failure implies BUG in inet6_ifaddr_msgsize() */ BUG_ON(err < 0); - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, init_net(), NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3382,10 +3382,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) /* failure implies BUG in inet6_ifaddr_msgsize() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV6_IFADDR, err); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3539,10 +3539,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) /* failure implies BUG in inet6_if_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3604,10 +3604,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, /* failure implies BUG in inet6_prefix_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, init_net(), 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV6_PREFIX, err); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 02fd8ae..cf568f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2210,7 +2210,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, init_net(), NETLINK_CB(in_skb).pid); errout: return err; } @@ -2237,10 +2237,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) /* failure implies BUG in rt6_nlmsg_size() */ BUG_ON(err < 0); - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, init_net(), pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(init_net(), RTNLGRP_IPV6_ROUTE, err); } /* diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 09a3ec8..c69b4fc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -369,7 +369,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, init_net(), pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7e33f73..ae55988 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -850,7 +850,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, init_net(), pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1082,7 +1082,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, init_net(), pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:20 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:20 -0700 Subject: [openib-general] [PATCH RFC 18/31] net: Implment network device movement between namespaces In-Reply-To: References: Message-ID: <11697516372179-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch introduces NETIF_F_NETNS_LOCAL a flag to indicate a network device is local to a single network namespace and should never be moved. Useful for pseudo devices that we need an instance in each network namespace (like the loopback device) and for any device we find that cannot handle multiple network namespaces so we may trap them in the initial network namespace. This patch introduces the function dev_change_net_namespace a function used to move a network device from one network namespace to another. To the network device nothing special appears to happen, to the components of the network stack it appears as if the network device was unregistered in the network namespace it is in, and a new device was registered in the network namespace the device was moved to. This patch sets up a namespace device destructor that upon the exit of a network namespace moves all of the movable network devices to the initial network namespace so they are not lost. Signed-off-by: Eric W. Biederman --- drivers/net/loopback.c | 3 +- include/linux/netdevice.h | 3 + net/core/dev.c | 222 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 201 insertions(+), 27 deletions(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index e9abf3f..7d15de0 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -225,7 +225,8 @@ DEFINE_PER_NET(struct net_device, loopback_dev) = { | NETIF_F_TSO #endif | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA - | NETIF_F_LLTX, + | NETIF_F_LLTX + | NETIF_F_NETNS_LOCAL, .ethtool_ops = &loopback_ethtool_ops, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0b4a4dc..3fcaf60 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -324,6 +324,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -710,6 +711,8 @@ extern int dev_ethtool(net_t net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_change_net_namespace(struct net_device *, net_t, + const char *); extern int dev_set_mtu(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); diff --git a/net/core/dev.c b/net/core/dev.c index fc0d2af..52994e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -198,6 +198,52 @@ static inline struct hlist_head *dev_index_hash(net_t net, int ifindex) return &per_net(dev_index_head, net)[ifindex & ((1<nd_net; + + ASSERT_RTNL(); + + dev->next = NULL; + write_lock_bh(&per_net(dev_base_lock, net)); + *per_net(dev_tail, net) = dev; + per_net(dev_tail, net) = &dev->next; + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&per_net(dev_base_lock, net)); + return 0; +} + +/* Device list removal */ +static int unlist_netdevice(struct net_device *dev) +{ + struct net_device *d, **dp; + net_t net = dev->nd_net; + + ASSERT_RTNL(); + + /* Unlink dev from the device chain */ + for (dp = &per_net(dev_base, net); (d = *dp) != NULL; dp = &d->next) { + if (d == dev) { + write_lock_bh(&per_net(dev_base_lock, net)); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + if (per_net(dev_tail, net) == &dev->next) + per_net(dev_tail, net) = dp; + *dp = d->next; + write_unlock_bh(&per_net(dev_base_lock, net)); + break; + } + } + if (!d) { + printk(KERN_ERR "unlist net_device: '%s' not found\n", + dev->name); + return -ENODEV; + } + return 0; +} + /* * Our notifier list */ @@ -3054,15 +3100,9 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); - dev->next = NULL; dev_init_scheduler(dev); - write_lock_bh(&per_net(dev_base_lock, net)); - *per_net(dev_tail, net) = dev; - per_net(dev_tail, net) = &dev->next; - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); dev_hold(dev); - write_unlock_bh(&per_net(dev_base_lock, net)); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); @@ -3327,9 +3367,6 @@ void synchronize_net(void) int unregister_netdevice(struct net_device *dev) { - struct net_device *d, **dp; - net_t net = dev->nd_net; - BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3347,23 +3384,8 @@ int unregister_netdevice(struct net_device *dev) dev_close(dev); /* And unlink it from device chain. */ - for (dp = &per_net(dev_base, net); (d = *dp) != NULL; dp = &d->next) { - if (d == dev) { - write_lock_bh(&per_net(dev_base_lock, net)); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - if (per_net(dev_tail, net) == &dev->next) - per_net(dev_tail, net) = dp; - *dp = d->next; - write_unlock_bh(&per_net(dev_base_lock, net)); - break; - } - } - if (!d) { - printk(KERN_ERR "unregister net_device: '%s' not found\n", - dev->name); + if (unlist_netdevice(dev)) return -ENODEV; - } dev->reg_state = NETREG_UNREGISTERING; @@ -3419,6 +3441,120 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, net_t net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (net_eq(dev->nd_net, net)) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname) && pat) { + /* We get here if we can't use the current device name */ + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + if (dev->flags & IFF_UP) + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + if (unlist_netdevice(dev)) + goto out; + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup sysfs */ + class_device_rename(&dev->class_dev, dev->name); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3561,6 +3697,37 @@ static struct pernet_operations netdev_net_ops = { .init = netdev_init, }; +static void default_device_exit(net_t net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for (dev = per_net(dev_base, net); dev; dev = next) { + int err; + next = dev->next; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, init_net(), "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -3591,6 +3758,9 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; + if (register_pernet_device(&default_device_ops)) + goto out; + /* * Initialise the packet receive queues. */ -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:16 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:16 -0700 Subject: [openib-general] [PATCH RFC 14/31] net: Support multiple network namespaces with netlink In-Reply-To: References: Message-ID: <11697516361090-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Each netlink socket will live in exactly one network namespace, this includes the controlling kernel sockets. This patch updates all of the existing netlink protocols to only support the initial network namespace. Request by clients in other namespaces will get -ECONREFUSED. As they would if the kernel did not have the support for that netlink protocol compiled in. As each netlink protocol is updated to be multiple network namespace safe it can register multiple kernel sockets to acquire a presence in the rest of the network namespaces. The implementation in af_netlink is a simple filter implemenation at hash table insertion and hash table look up time. Signed-off-by: Eric W. Biederman --- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_iscsi.c | 2 +- include/linux/netlink.h | 3 +- kernel/audit.c | 4 +- lib/kobject_uevent.c | 4 +- net/bridge/netfilter/ebt_ulog.c | 5 +- net/core/rtnetlink.c | 4 +- net/decnet/netfilter/dn_rtmsg.c | 3 +- net/ipv4/fib_frontend.c | 3 +- net/ipv4/inet_diag.c | 4 +- net/ipv4/netfilter/ip_queue.c | 6 +- net/ipv4/netfilter/ipt_ULOG.c | 4 +- net/ipv6/netfilter/ip6_queue.c | 4 +- net/netfilter/nfnetlink.c | 2 +- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 3 +- net/netlink/af_netlink.c | 104 ++++++++++++++++++++++++++--------- net/netlink/genetlink.c | 4 +- net/xfrm/xfrm_user.c | 2 +- 19 files changed, 112 insertions(+), 54 deletions(-) diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 1b59b27..02c2c1e 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -167,7 +167,7 @@ scsi_netlink_init(void) return; } - scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + scsi_nl_sock = netlink_kernel_create(init_net(), NETLINK_SCSITRANSPORT, SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE); if (!scsi_nl_sock) { printk(KERN_ERR "%s: register of recieve handler failed\n", diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 9c22f13..1ad22c2 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void) if (err) goto unregister_conn_class; - nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, + nls = netlink_kernel_create(init_net(), NETLINK_ISCSI, 1, iscsi_if_rx, THIS_MODULE); if (!nls) { err = -ENOBUFS; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b3b9b60..9dacd00 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -151,7 +151,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(net_t net, int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); @@ -188,6 +188,7 @@ struct netlink_callback struct netlink_notify { + net_t net; int pid; int protocol; }; diff --git a/kernel/audit.c b/kernel/audit.c index d9b690a..b0c5c61 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -696,8 +696,8 @@ static int __init audit_init(void) printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - THIS_MODULE); + audit_sock = netlink_kernel_create(init_net(), NETLINK_AUDIT, 0, + audit_receive, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 84272ed..9a5d4ca 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -292,8 +292,8 @@ EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - THIS_MODULE); + uevent_sock = netlink_kernel_create(init_net(), NETLINK_KOBJECT_UEVENT, 1, + NULL, THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index c1af68b..abf2be7 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -301,8 +301,9 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(init_net(), NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7841e89..8f3dda8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -870,8 +870,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - THIS_MODULE); + rtnl = netlink_kernel_create(init_net(), NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 8b99bd3..14089ed 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -137,7 +137,8 @@ static int __init dn_rtmsg_init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg = netlink_kernel_create(init_net(), + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, dnrmg_receive_user_sk, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 049c370..d1859ff 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -817,7 +817,8 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); + netlink_kernel_create(init_net(), NETLINK_FIB_LOOKUP, 0, nl_fib_input, + THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 77761ac..bdf3064 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -913,8 +913,8 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - THIS_MODULE); + idiagnl = netlink_kernel_create(init_net(), NETLINK_INET_DIAG, 0, + inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 8650a57..d1c42b5 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -589,7 +589,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if (net_eq(n->net, init_net()) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -681,8 +681,8 @@ static int __init ip_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - THIS_MODULE); + ipqnl = netlink_kernel_create(init_net(), NETLINK_FIREWALL, 0, + ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index dbd3478..8071d15 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -395,8 +395,8 @@ static int __init ipt_ulog_init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, - THIS_MODULE); + nflognl = netlink_kernel_create(init_net(), NETLINK_NFLOG, + ULOG_MAXNLGROUPS, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index f6e108c..02589b2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -579,7 +579,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if (net_eq(n->net, init_net()) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -671,7 +671,7 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, + ipqnl = netlink_kernel_create(init_net(), NETLINK_IP6_FW, 0, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 52fdfa2..180353f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -356,7 +356,7 @@ static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(init_net(), NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d1505dd..1bdf9af 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -741,7 +741,8 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if (net_eq(n->net, init_net()) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 59bf595..8e0c289 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -765,7 +765,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this, struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if (net_eq(n->net, init_net()) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7433e71..37b6c28 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -200,7 +200,7 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(net_t net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -210,7 +210,7 @@ static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if (net_eq(sk->sk_net, net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -315,7 +315,7 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, net_t net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -328,7 +328,7 @@ static int netlink_insert(struct sock *sk, u32 pid) head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if (net_eq(osk->sk_net, net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -400,9 +400,6 @@ static int netlink_create(net_t net, struct socket *sock, int protocol) unsigned int groups; int err = 0; - if (!net_eq(net, init_net())) - return -EAFNOSUPPORT; - sock->state = SS_UNCONNECTED; if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) @@ -469,6 +466,7 @@ static int netlink_release(struct socket *sock) if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -497,6 +495,7 @@ static int netlink_release(struct socket *sock) static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -510,6 +509,8 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if (!net_eq(osk->sk_net, net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -521,7 +522,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -575,6 +576,7 @@ static int netlink_alloc_groups(struct sock *sk) static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + net_t net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -598,7 +600,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -682,10 +684,12 @@ static void netlink_overrun(struct sock *sk) static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; + net_t net; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + net = ssk->sk_net; + sock = netlink_lookup(net, protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -858,6 +862,7 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff struct netlink_broadcast_data { struct sock *exclude_sk; + net_t net; u32 pid; u32 group; int failure; @@ -880,6 +885,9 @@ static inline int do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if (!net_eq(sk->sk_net, p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -918,6 +926,7 @@ out: int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + net_t net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -925,6 +934,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -973,6 +983,9 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; + if (!net_eq(sk->sk_net, p->exclude_sk->sk_net)) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1267,7 +1280,7 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, +netlink_kernel_create(net_t net, int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module) { @@ -1284,7 +1297,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(init_net(), sock, unit) < 0) + if (__netlink_create(net, sock, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1299,17 +1312,19 @@ netlink_kernel_create(int unit, unsigned int groups, if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = groups; - nl_table[unit].listeners = listeners; - nl_table[unit].module = module; - nl_table[unit].registered = 1; + if (!nl_table[unit].registered) { + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].module = module; + nl_table[unit].registered = 1; + } netlink_table_ungrab(); return sk; @@ -1410,7 +1425,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1447,7 +1462,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1585,6 +1601,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, #ifdef CONFIG_PROC_FS struct nl_seq_iter { + net_t net; int link; int hash_idx; }; @@ -1602,6 +1619,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (!net_eq(iter->net, s->sk_net)) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1630,12 +1649,15 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - - s = sk_next(v); + + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && !net_eq(iter->net, s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1644,6 +1666,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && !net_eq(iter->net, s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1714,15 +1738,24 @@ static int netlink_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = iter; + iter->net = get_net(PROC_NET(inode)); return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1764,6 +1797,27 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; +static int netlink_net_init(net_t net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void netlink_net_exit(net_t net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1809,9 +1863,7 @@ static int __init netlink_proto_init(void) } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create(init_net(), "netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 548e4e6..d6717e8 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -585,8 +585,8 @@ static int __init genl_init(void) goto errout_register; netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, - genl_rcv, THIS_MODULE); + genl_sock = netlink_kernel_create(init_net(), NETLINK_GENERIC, + GENL_MAX_ID, genl_rcv, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 82f36d3..55affa7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2293,7 +2293,7 @@ static int __init xfrm_user_init(void) printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(init_net(), NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:12 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:12 -0700 Subject: [openib-general] [PATCH RFC 10/31] net: Make socket creation namespace safe. In-Reply-To: References: Message-ID: <11697516351593-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch passes in the namespace a new socket should be created in and has the socket code do the appropriate reference counting. By virtue of this all socket create methods are touched. In addition the socket create methods are modified so that they will fail if you attempt to create a socket in a non-default network namespace. Failing if we attempt to create a socket outside of the default socket namespace ensures that as we incrementally make the network stack network namespace aware we will not export functionality that someone has not audited and made certain is network namespace safe. Allowing us to partially enable network namespaces before all of the exotic protocols are supported. Any protocol layers I have missed will fail to compile because I now pass an extra parameter into the socket creation code. Signed-off-by: Eric W. Biederman --- drivers/net/pppoe.c | 4 ++-- drivers/net/pppox.c | 7 +++++-- include/linux/if_pppox.h | 2 +- include/linux/net.h | 3 ++- include/net/llc_conn.h | 2 +- include/net/sock.h | 4 +++- net/appletalk/ddp.c | 7 +++++-- net/atm/common.c | 4 ++-- net/atm/common.h | 2 +- net/atm/pvc.c | 7 +++++-- net/atm/svc.c | 11 +++++++---- net/ax25/af_ax25.c | 9 ++++++--- net/bluetooth/af_bluetooth.c | 7 +++++-- net/bluetooth/bnep/sock.c | 4 ++-- net/bluetooth/cmtp/sock.c | 4 ++-- net/bluetooth/hci_sock.c | 4 ++-- net/bluetooth/hidp/sock.c | 4 ++-- net/bluetooth/l2cap.c | 10 +++++----- net/bluetooth/rfcomm/sock.c | 10 +++++----- net/bluetooth/sco.c | 10 +++++----- net/core/sock.c | 6 ++++-- net/decnet/af_decnet.c | 13 ++++++++----- net/econet/af_econet.c | 7 +++++-- net/ipv4/af_inet.c | 7 +++++-- net/ipv6/af_inet6.c | 7 +++++-- net/ipx/af_ipx.c | 7 +++++-- net/irda/af_irda.c | 11 +++++++---- net/key/af_key.c | 7 +++++-- net/llc/af_llc.c | 7 +++++-- net/llc/llc_conn.c | 6 +++--- net/netlink/af_netlink.c | 13 ++++++++----- net/netrom/af_netrom.c | 9 ++++++--- net/packet/af_packet.c | 7 +++++-- net/rose/af_rose.c | 9 ++++++--- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/socket.c | 8 ++++---- net/tipc/socket.c | 9 ++++++--- net/unix/af_unix.c | 13 ++++++++----- net/wanrouter/af_wanpipe.c | 15 +++++++++------ net/x25/af_x25.c | 13 ++++++++----- 41 files changed, 182 insertions(+), 111 deletions(-) diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index d34fe16..d09334d 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -475,12 +475,12 @@ static struct proto pppoe_sk_proto = { * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct socket *sock) +static int pppoe_create(net_t net, struct socket *sock) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); if (!sk) goto out; diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index 9315046..0d5c7bc 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -106,10 +106,13 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(pppox_ioctl); -static int pppox_create(struct socket *sock, int protocol) +static int pppox_create(net_t net, struct socket *sock, int protocol) { int rc = -EPROTOTYPE; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (protocol < 0 || protocol > PX_MAX_PROTO) goto out; @@ -118,7 +121,7 @@ static int pppox_create(struct socket *sock, int protocol) !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(sock); + rc = pppox_protos[protocol]->create(net, sock); module_put(pppox_protos[protocol]->owner); out: diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 4fab3d0..f6ffd83 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -148,7 +148,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct socket *sock); + int (*create)(net_t net, struct socket *sock); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff --git a/include/linux/net.h b/include/linux/net.h index f28d8a2..4136768 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -19,6 +19,7 @@ #define _LINUX_NET_H #include +#include #include struct poll_table_struct; @@ -169,7 +170,7 @@ struct proto_ops { struct net_proto_family { int family; - int (*create)(struct socket *sock, int protocol); + int (*create)(net_t net, struct socket *sock, int protocol); struct module *owner; }; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 00730d2..e4f7104 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -93,7 +93,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) return skb->cb[sizeof(skb->cb) - 1]; } -extern struct sock *llc_sk_alloc(int family, gfp_t priority, +extern struct sock *llc_sk_alloc(net_t net, int family, gfp_t priority, struct proto *prot); extern void llc_sk_free(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 01a2781..ebcaa7f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,6 +55,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -784,7 +785,7 @@ extern void FASTCALL(release_sock(struct sock *sk)); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, +extern struct sock *sk_alloc(net_t net, int family, gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); @@ -1013,6 +1014,7 @@ static inline void sock_copy(struct sock *nsk, const struct sock *osk) #endif memcpy(nsk, osk, osk->sk_prot->obj_size); + get_net(nsk->sk_net); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 5b8a8ce..e08367b 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1026,11 +1026,14 @@ static struct proto ddp_proto = { * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct socket *sock, int protocol) +static int atalk_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + /* * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do * and gives you the full ELAP frame. Should be handy for CAP 8) @@ -1038,7 +1041,7 @@ static int atalk_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index fbabff4..c4329f0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -132,7 +132,7 @@ static struct proto vcc_proto = { .obj_size = sizeof(struct atm_vcc), }; -int vcc_create(struct socket *sock, int protocol, int family) +int vcc_create(net_t net, struct socket *sock, int protocol, int family) { struct sock *sk; struct atm_vcc *vcc; @@ -140,7 +140,7 @@ int vcc_create(struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/atm/common.h b/net/atm/common.h index a422da7..c7101c7 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include /* for poll_table */ -int vcc_create(struct socket *sock, int protocol, int family); +int vcc_create(net_t net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index b2148b4..13bf58e 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -124,10 +124,13 @@ static const struct proto_ops pvc_proto_ops = { }; -static int pvc_create(struct socket *sock,int protocol) +static int pvc_create(net_t net, struct socket *sock,int protocol) { + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + sock->ops = &pvc_proto_ops; - return vcc_create(sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC); } diff --git a/net/atm/svc.c b/net/atm/svc.c index 3a180cf..e78d9f7 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -33,7 +33,7 @@ #endif -static int svc_create(struct socket *sock,int protocol); +static int svc_create(net_t net, struct socket *sock,int protocol); /* @@ -335,7 +335,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(newsock,0); + error = svc_create(sk->sk_net, newsock,0); if (error) goto out; @@ -636,12 +636,15 @@ static const struct proto_ops svc_proto_ops = { }; -static int svc_create(struct socket *sock,int protocol) +static int svc_create(net_t net, struct socket *sock,int protocol) { int error; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + sock->ops = &svc_proto_ops; - error = vcc_create(sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; ATM_SD(sock)->remote.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e60af4e..cdbf3f6 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -781,11 +781,14 @@ static struct proto ax25_proto = { .obj_size = sizeof(struct sock), }; -static int ax25_create(struct socket *sock, int protocol) +static int ax25_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + switch (sock->type) { case SOCK_DGRAM: if (protocol == 0 || protocol == PF_AX25) @@ -831,7 +834,7 @@ static int ax25_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -856,7 +859,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 67df99e..7110360 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -95,10 +95,13 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct socket *sock, int proto) +static int bt_sock_create(net_t net, struct socket *sock, int proto) { int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; @@ -113,7 +116,7 @@ static int bt_sock_create(struct socket *sock, int proto) read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(sock, proto); + err = bt_proto[proto]->create(net, sock, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5563db1..dc9b1ef 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -205,7 +205,7 @@ static struct proto bnep_proto = { .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct socket *sock, int protocol) +static int bnep_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -214,7 +214,7 @@ static int bnep_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 53295d3..107dbfe 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -196,7 +196,7 @@ static struct proto cmtp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct socket *sock, int protocol) +static int cmtp_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -205,7 +205,7 @@ static int cmtp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index dbf98c4..3a15a31 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -610,7 +610,7 @@ static struct proto hci_sk_proto = { .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct socket *sock, int protocol) +static int hci_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -621,7 +621,7 @@ static int hci_sock_create(struct socket *sock, int protocol) sock->ops = &hci_sock_ops; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 407fba4..647f85e 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -247,7 +247,7 @@ static struct proto hidp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct socket *sock, int protocol) +static int hidp_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -256,7 +256,7 @@ static int hidp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 29a8fa4..13e9b5b 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -517,11 +517,11 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *l2cap_sock_alloc(net_t net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); if (!sk) return NULL; @@ -542,7 +542,7 @@ static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int l2cap_sock_create(struct socket *sock, int protocol) +static int l2cap_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -559,7 +559,7 @@ static int l2cap_sock_create(struct socket *sock, int protocol) sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -1412,7 +1412,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index cb7e855..12ff829 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -282,12 +282,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(net_t net, struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); if (!sk) return NULL; @@ -323,7 +323,7 @@ static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio return sk; } -static int rfcomm_sock_create(struct socket *sock, int protocol) +static int rfcomm_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -336,7 +336,7 @@ static int rfcomm_sock_create(struct socket *sock, int protocol) sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -868,7 +868,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 5d13d4f..6d424ea 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -414,11 +414,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(net_t net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); if (!sk) return NULL; @@ -439,7 +439,7 @@ static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int sco_sock_create(struct socket *sock, int protocol) +static int sco_sock_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; @@ -452,7 +452,7 @@ static int sco_sock_create(struct socket *sock, int protocol) sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -807,7 +807,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); - sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; diff --git a/net/core/sock.c b/net/core/sock.c index 5555364..e42f7df 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -825,7 +825,7 @@ static void inline sock_lock_init(struct sock *sk) * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(net_t net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -846,6 +846,7 @@ struct sock *sk_alloc(int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } if (security_sk_alloc(sk, family, priority)) @@ -885,6 +886,7 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -894,7 +896,7 @@ void sk_free(struct sock *sk) struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 77cd802..f1553fa 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -471,10 +471,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(net_t net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); if (!sk) goto out; @@ -675,10 +675,13 @@ char *dn_addr2asc(__u16 addr, char *buf) -static int dn_create(struct socket *sock, int protocol) +static int dn_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + switch(sock->type) { case SOCK_SEQPACKET: if (protocol != DNPROTO_NSP) @@ -691,7 +694,7 @@ static int dn_create(struct socket *sock, int protocol) } - if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -1088,7 +1091,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 4d66aac..a0b3fc5 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -609,12 +609,15 @@ static struct proto econet_proto = { * Create an Econet socket */ -static int econet_create(struct socket *sock, int protocol) +static int econet_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct econet_sock *eo; int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + /* Econet only provides datagram services. */ if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -622,7 +625,7 @@ static int econet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); if (sk == NULL) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8640096..cb07cb6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -221,7 +221,7 @@ out: * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -233,6 +233,9 @@ static int inet_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -295,7 +298,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0e0e426..00bd55a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -86,7 +86,7 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct socket *sock, int protocol) +static int inet6_create(net_t net, struct socket *sock, int protocol) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -99,6 +99,9 @@ static int inet6_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + /* Look for the requested type/protocol pair. */ answer = NULL; lookup_protocol: @@ -159,7 +162,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 76c6615..2ec4a3c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1358,11 +1358,14 @@ static struct proto ipx_proto = { .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct socket *sock, int protocol) +static int ipx_create(net_t net, struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + /* * SPX support is not anymore in the kernel sources. If you want to * ressurrect it, completing it and making it understand shared skbs, @@ -1373,7 +1376,7 @@ static int ipx_create(struct socket *sock, int protocol) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 7e1aea8..e3344c3 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -60,7 +60,7 @@ #include -static int irda_create(struct socket *sock, int protocol); +static int irda_create(net_t net, struct socket *sock, int protocol); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -844,7 +844,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_ASSERT(self != NULL, return -1;); - err = irda_create(newsock, sk->sk_protocol); + err = irda_create(sk->sk_net, newsock, sk->sk_protocol); if (err) return err; @@ -1085,13 +1085,16 @@ static struct proto irda_proto = { * Create IrDA socket * */ -static int irda_create(struct socket *sock, int protocol) +static int irda_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct irda_sock *self; IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + /* Check for valid socket type */ switch (sock->type) { case SOCK_STREAM: /* For TTP connections with SAR disabled */ @@ -1103,7 +1106,7 @@ static int irda_create(struct socket *sock, int protocol) } /* Allocate networking socket */ - sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); if (sk == NULL) return -ENOMEM; diff --git a/net/key/af_key.c b/net/key/af_key.c index c79f9c4..244ab5b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -137,11 +137,14 @@ static struct proto key_proto = { .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct socket *sock, int protocol) +static int pfkey_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -150,7 +153,7 @@ static int pfkey_create(struct socket *sock, int protocol) return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 190bb3e..6bc0fff 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -150,14 +150,17 @@ static struct proto llc_proto = { * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct socket *sock, int protocol) +static int llc_ui_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c761c15..49f8703 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -700,7 +700,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); @@ -867,9 +867,9 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(net_t net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot, 1); if (!sk) goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c00f48..7433e71 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -371,14 +371,14 @@ static struct proto netlink_proto = { .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, int protocol) +static int __netlink_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; @@ -393,13 +393,16 @@ static int __netlink_create(struct socket *sock, int protocol) return 0; } -static int netlink_create(struct socket *sock, int protocol) +static int netlink_create(net_t net, struct socket *sock, int protocol) { struct module *module = NULL; struct netlink_sock *nlk; unsigned int groups; int err = 0; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + sock->state = SS_UNCONNECTED; if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) @@ -422,7 +425,7 @@ static int netlink_create(struct socket *sock, int protocol) groups = nl_table[protocol].groups; netlink_unlock_table(); - if ((err = __netlink_create(sock, protocol)) < 0) + if ((err = __netlink_create(net, sock, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -1281,7 +1284,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, unit) < 0) + if (__netlink_create(init_net(), sock, unit) < 0) goto out_sock_release; if (groups < 32) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 601d58c..3fa3f1a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -409,15 +409,18 @@ static struct proto nr_proto = { .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct socket *sock, int protocol) +static int nr_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct nr_sock *nr; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -459,7 +462,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; nr = nr_sk(sk); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 04e295a..ca371ea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -981,13 +981,16 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW @@ -1000,7 +1003,7 @@ static int packet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5532340..7d5e593 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -499,15 +499,18 @@ static struct proto rose_proto = { .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct socket *sock, int protocol) +static int rose_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct rose_sock *rose; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -545,7 +548,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return NULL; rose = rose_sk(sk); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ef36be0..0217546 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -622,7 +622,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ea94951..9461a10 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -540,7 +540,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; diff --git a/net/socket.c b/net/socket.c index 4e39631..0d0c92b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1053,7 +1053,7 @@ call_kill: return 0; } -static int __sock_create(int family, int type, int protocol, +static int __sock_create(net_t net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1129,7 +1129,7 @@ static int __sock_create(int family, int type, int protocol, /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(sock, protocol); + err = pf->create(net, sock, protocol); if (err < 0) goto out_module_put; @@ -1168,12 +1168,12 @@ out_release: int sock_create(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 0); + return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } int sock_create_kern(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 1); + return __sock_create(init_net(), family, type, protocol, res, 1); } asmlinkage long sys_socket(int family, int type, int protocol) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2a6a5a6..cf02a0c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -162,13 +162,16 @@ static void advance_queue(struct tipc_sock *tsock) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct socket *sock, int protocol) +static int tipc_create(net_t net, struct socket *sock, int protocol) { struct tipc_sock *tsock; struct tipc_port *port; struct sock *sk; u32 ref; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -198,7 +201,7 @@ static int tipc_create(struct socket *sock, int protocol) return -EPROTOTYPE; } - sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); if (!sk) { tipc_deleteport(ref); return -ENOMEM; @@ -1363,7 +1366,7 @@ static int accept(struct socket *sock, struct socket *newsock, int flags) } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(newsock, 0); + res = tipc_create(sock->sk->sk_net, newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 30855e1..8015a03 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -569,7 +569,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct socket *sock) +static struct sock * unix_create1(net_t net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -577,7 +577,7 @@ static struct sock * unix_create1(struct socket *sock) if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; @@ -602,8 +602,11 @@ out: return sk; } -static int unix_create(struct socket *sock, int protocol) +static int unix_create(net_t net, struct socket *sock, int protocol) { + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -629,7 +632,7 @@ static int unix_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - return unix_create1(sock) ? 0 : -ENOMEM; + return unix_create1(net, sock) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -980,7 +983,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(NULL); + newsk = unix_create1(sk->sk_net, NULL); if (newsk == NULL) goto out; diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index c205973..542c737 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -191,7 +191,7 @@ struct net_device *wanpipe_find_free_dev(sdla_t *card); static void wanpipe_unlink_card (struct sock *); static int wanpipe_link_card (struct sock *); static struct sock *wanpipe_make_new(struct sock *); -static struct sock *wanpipe_alloc_socket(void); +static struct sock *wanpipe_alloc_socket(net_t net); static inline int get_atomic_device(struct net_device *dev); static int wanpipe_exec_cmd(struct sock *, int, unsigned int); static int get_ioctl_cmd (struct sock *, void *); @@ -455,7 +455,7 @@ static struct sock *wanpipe_make_new(struct sock *osk) if (osk->sk_type != SOCK_RAW) return NULL; - if ((sk = wanpipe_alloc_socket()) == NULL) + if ((sk = wanpipe_alloc_socket(osk->sk_net)) == NULL) return NULL; sk->sk_type = osk->sk_type; @@ -498,12 +498,12 @@ static struct proto wanpipe_proto = { * *===========================================================*/ -static struct sock *wanpipe_alloc_socket(void) +static struct sock *wanpipe_alloc_socket(net_t net) { struct sock *sk; struct wanpipe_opt *wan_opt; - if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL) return NULL; if ((wan_opt = kzalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) { @@ -1498,10 +1498,13 @@ struct net_device *wanpipe_find_free_dev(sdla_t *card) * Crates AF_WANPIPE socket. *===========================================================*/ -static int wanpipe_create(struct socket *sock, int protocol) +static int wanpipe_create(net_t net, socket *sock, int protocol) { struct sock *sk; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + //FIXME: This checks for root user, SECURITY ? //if (!capable(CAP_NET_RAW)) // return -EPERM; @@ -1511,7 +1514,7 @@ static int wanpipe_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; - if ((sk = wanpipe_alloc_socket()) == NULL) + if ((sk = wanpipe_alloc_socket(net)) == NULL) return -ENOBUFS; sk->sk_reuse = 1; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index b5c80b1..6602a34 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -465,10 +465,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(void) +static struct sock *x25_alloc_socket(net_t net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); if (!sk) goto out; @@ -484,17 +484,20 @@ out: return sk; } -static int x25_create(struct socket *sock, int protocol) +static int x25_create(net_t net, struct socket *sock, int protocol) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; + if (!net_eq(net, init_net())) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol) goto out; rc = -ENOMEM; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(net)) == NULL) goto out; x25 = x25_sk(sk); @@ -542,7 +545,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) goto out; x25 = x25_sk(sk); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:23 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:23 -0700 Subject: [openib-general] [PATCH RFC 21/31] net: Implement the guts of the network namespace infrastructure In-Reply-To: References: Message-ID: <11697516373346-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted Support is added for the .data.pernet section where all of the variables who have a single instance in each network namespace will live. Every architectures linker script is modified so is should work. Summarizing the functions: net_ns_init creates a slab and allocates the template and the initial network namespace. pernet_modcopy keeps the network namespaces in sync with the loaded modules. Initializing new data variables as they are added. The network namespace destruction because the last reference can come from interrupt context queues itself for later with schedule_work. Then we alert everyone the network namespace is disappearing. If a buggy user is still holding a reference to the network namespace we print a nasty message and leak the network namespace. The wrest are just light-weight wrapper functions to make things more convinient. A little should probably be said about net_head the variable at the start of my network namespace structure. It is the only variable with a location decided by the C code instead of the linker and I string them together in a linked list so I can iterate. Probably more interesting is that it looks like it is saner not to directly use a pointer to my network namespace but instead to use an offset. All of the references to data in my network namespace are coming from per_net(...) which takes the address of the variable in the .data.pernet section and then adds my magic offset. If I used a pointer I would have to subract an additional value and export an extra symbol. Not good for performance or maintenance :) The expected usage of network namespace variables is to replace sequences like: &loopback_dev with &per_net(loopback_dev, net) where net is some network namespace reference. In my preliminary tests the only a single additional addition is inserted so it appears to be an efficient idiom. Hopefully it is also easy to comprehend and use. Signed-off-by: Eric W. Biederman --- arch/alpha/kernel/vmlinux.lds.S | 2 + arch/arm/kernel/vmlinux.lds.S | 3 + arch/arm26/kernel/vmlinux-arm26-xip.lds.in | 3 + arch/arm26/kernel/vmlinux-arm26.lds.in | 3 + arch/avr32/kernel/vmlinux.lds.c | 3 + arch/cris/arch-v10/vmlinux.lds.S | 2 + arch/cris/arch-v32/vmlinux.lds.S | 2 + arch/frv/kernel/vmlinux.lds.S | 2 + arch/h8300/kernel/vmlinux.lds.S | 3 + arch/i386/kernel/vmlinux.lds.S | 3 + arch/ia64/kernel/vmlinux.lds.S | 2 + arch/m32r/kernel/vmlinux.lds.S | 3 + arch/m68k/kernel/vmlinux-std.lds | 3 + arch/m68k/kernel/vmlinux-sun3.lds | 3 + arch/m68knommu/kernel/vmlinux.lds.S | 3 + arch/mips/kernel/vmlinux.lds.S | 3 + arch/parisc/kernel/vmlinux.lds.S | 3 + arch/powerpc/kernel/vmlinux.lds.S | 2 + arch/ppc/kernel/vmlinux.lds.S | 2 + arch/s390/kernel/vmlinux.lds.S | 3 + arch/sh/kernel/vmlinux.lds.S | 3 + arch/sh64/kernel/vmlinux.lds.S | 3 + arch/sparc/kernel/vmlinux.lds.S | 3 + arch/sparc64/kernel/vmlinux.lds.S | 3 + arch/v850/kernel/vmlinux.lds.S | 6 +- arch/x86_64/kernel/vmlinux.lds.S | 3 + arch/xtensa/kernel/vmlinux.lds.S | 2 + include/asm-generic/vmlinux.lds.h | 8 + include/asm-um/common.lds.S | 4 +- include/linux/module.h | 3 + include/linux/net_namespace_type.h | 63 ++++++++- include/net/net_namespace.h | 49 ++++++- kernel/module.c | 211 ++++++++++++++++++++++++- net/core/net_namespace.c | 232 ++++++++++++++++++++++++++++ 34 files changed, 631 insertions(+), 15 deletions(-) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 76bf071..ad20077 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -72,6 +72,8 @@ SECTIONS .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + DATA_PER_NET + . = ALIGN(2*8192); __init_end = .; /* Freed after init ends here */ diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index a8fa75e..5b003f9 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -61,6 +61,9 @@ SECTIONS __per_cpu_start = .; *(.data.percpu) __per_cpu_end = .; + + DATA_PER_NET + #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; *(.init.data) diff --git a/arch/arm26/kernel/vmlinux-arm26-xip.lds.in b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in index ca61ec8..69d5772 100644 --- a/arch/arm26/kernel/vmlinux-arm26-xip.lds.in +++ b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in @@ -50,6 +50,9 @@ SECTIONS __initramfs_start = .; usr/built-in.o(.init.ramfs) __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(32768); __init_end = .; } diff --git a/arch/arm26/kernel/vmlinux-arm26.lds.in b/arch/arm26/kernel/vmlinux-arm26.lds.in index d1d3418..473a5b4 100644 --- a/arch/arm26/kernel/vmlinux-arm26.lds.in +++ b/arch/arm26/kernel/vmlinux-arm26.lds.in @@ -51,6 +51,9 @@ SECTIONS __initramfs_start = .; usr/built-in.o(.init.ramfs) __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(32768); __init_end = .; } diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c index 5c4424e..dee3715 100644 --- a/arch/avr32/kernel/vmlinux.lds.c +++ b/arch/avr32/kernel/vmlinux.lds.c @@ -50,6 +50,9 @@ SECTIONS __initramfs_start = .; *(.init.ramfs) __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; } diff --git a/arch/cris/arch-v10/vmlinux.lds.S b/arch/cris/arch-v10/vmlinux.lds.S index 689729a..f1c890c 100644 --- a/arch/cris/arch-v10/vmlinux.lds.S +++ b/arch/cris/arch-v10/vmlinux.lds.S @@ -83,6 +83,8 @@ SECTIONS } SECURITY_INIT + DATA_PER_NET + .init.ramfs : { __initramfs_start = .; *(.init.ramfs) diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index 472d4b3..eb08771 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S @@ -95,6 +95,8 @@ SECTIONS .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + DATA_PER_NET + .init.ramfs : { __initramfs_start = .; *(.init.ramfs) diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 9c1fb12..f383c83 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -61,6 +61,8 @@ SECTIONS .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + DATA_PER_NET + . = ALIGN(4096); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index f05288b..5d5fda5 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -130,6 +130,9 @@ SECTIONS ___initramfs_start = .; *(.init.ramfs) ___initramfs_end = .; + + DATA_PER_NET + . = ALIGN(0x4) ; ___init_end = .; __edata = . ; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index a53c8b1..1aae8b4 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -193,6 +193,9 @@ SECTIONS *(.data.percpu) __per_cpu_end = .; } + + DATA_PER_NET + . = ALIGN(4096); /* freed after init ends here */ diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index d6083a0..28dd9eb 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -118,6 +118,8 @@ SECTIONS __initramfs_end = .; } + DATA_PER_NET + . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 358b9ce..3e8c624 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -107,6 +107,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; /* freed after init ends here */ diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index d279445..d60cb7e 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -65,6 +65,9 @@ SECTIONS __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(8192); __init_end = .; diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 8c7eccb..101ec12 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -59,6 +59,9 @@ __init_begin = .; __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(8192); __init_end = .; .data.init.task : { *(.data.init_task) } diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index 2b2a10d..e713614 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -153,6 +153,9 @@ SECTIONS { __initramfs_start = .; *(.init.ramfs) __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; } > INIT diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index cecff24..a5cfeef 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -121,6 +121,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(_PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 7b943b4..2cf241b 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -181,6 +181,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(ASM_PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 04b8e71..bdd4f05 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -150,6 +150,8 @@ SECTIONS __per_cpu_end = .; } + DATA_PER_NET + . = ALIGN(8); .machine.desc : { __machine_desc_start = . ; diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 6192126..59c5e6c 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -135,6 +135,8 @@ SECTIONS .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + DATA_PER_NET + . = ALIGN(4096); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index fe0f2e9..bcdd353 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -99,6 +99,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; /* freed after init ends here */ diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index f34bdcc..0a4249d 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -86,6 +86,9 @@ SECTIONS __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; + + DATA_PER_NET + __machvec_start = .; .init.machvec : { *(.init.machvec) } __machvec_end = .; diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S index 95c4d75..0c1a30e 100644 --- a/arch/sh64/kernel/vmlinux.lds.S +++ b/arch/sh64/kernel/vmlinux.lds.S @@ -118,6 +118,9 @@ SECTIONS __initramfs_start = .; .init.ramfs : C_PHYS(.init.ramfs) { *(.init.ramfs) } __initramfs_end = .; + + DATA_PER_NET + . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index b73e6b9..c1ff7de 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -65,6 +65,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; . = ALIGN(32); diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 4a6063f..24e6b7f 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -89,6 +89,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(8192); __init_end = .; __bss_start = .; diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S index 3a5fd07..b87a4cb 100644 --- a/arch/v850/kernel/vmlinux.lds.S +++ b/arch/v850/kernel/vmlinux.lds.S @@ -163,7 +163,8 @@ *(.text.init) /* 2.4 convention */ \ *(.data.init) \ INITCALL_CONTENTS \ - INITRAMFS_CONTENTS + INITRAMFS_CONTENTS \ + DATA_PER_NET /* The contents of `init' section for a ROM-resident kernel which should go into RAM. */ @@ -183,7 +184,8 @@ _einittext = .; \ *(.text.init) /* 2.4 convention */ \ INITCALL_CONTENTS \ - INITRAMFS_CONTENTS + INITRAMFS_CONTENTS \ + DATA_PER_NET /* A root filesystem image, for kernels with an embedded root filesystem. */ #define ROOT_FS_CONTENTS \ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 1e54ddf..38061b2 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -200,6 +200,9 @@ SECTIONS __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; + + DATA_PER_NET + . = ALIGN(4096); __init_end = .; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index a36c104..e77ed43 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -203,6 +203,8 @@ SECTIONS .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + DATA_PER_NET + . = ALIGN(4096); __initramfs_start =.; .init.ramfs : { *(.init.ramfs) } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9fcc8d9..298ed43 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -229,3 +229,11 @@ *(.initcall7.init) \ *(.initcall7s.init) +#define DATA_PER_NET \ + .data.pernet : AT(ADDR(.data.pernet) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_net_start) = .; \ + *(.data.pernet.head) \ + *(.data.pernet) \ + VMLINUX_SYMBOL(__per_net_end) = .; \ + } + diff --git a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S index f045451..1208960 100644 --- a/include/asm-um/common.lds.S +++ b/include/asm-um/common.lds.S @@ -39,7 +39,9 @@ __per_cpu_start = . ; .data.percpu : { *(.data.percpu) } __per_cpu_end = . ; - + + DATA_PER_NET + __initcall_start = .; .initcall.init : { INITCALLS diff --git a/include/linux/module.h b/include/linux/module.h index 10f771a..755f1b5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -353,6 +353,9 @@ struct module /* Per-cpu data. */ void *percpu; + /* Per-net data. */ + void *pernet; + /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; diff --git a/include/linux/net_namespace_type.h b/include/linux/net_namespace_type.h index 8173f59..5075199 100644 --- a/include/linux/net_namespace_type.h +++ b/include/linux/net_namespace_type.h @@ -7,14 +7,70 @@ #define __pernetname(name) per_net__##name +#ifdef CONFIG_NET_NS + +typedef struct { + unsigned long offset; +} net_t; + +#define __data_pernet __attribute__((__section__(".data.pernet"))) + +static inline unsigned long __per_net_offset(net_t net) { return net.offset; } + +/* Like per_net but returns a pseudo variable address that must be offset + * __per_net_offset() bytes before it will point to a real variable. + * Useful for static initializers. + */ +#define __per_net_base(name) __pernetname(name) + +/* Get the network namespace reference from a per_net variable address */ +#define net_of(ptr, name) \ +({ \ + net_t net = { .offset = 0 }; \ + char *__ptr = (void *)(ptr); \ + if (__ptr) \ + net.offset = __ptr - ((char *)&__per_net_base(name)); \ + net; \ +}) + +/* Look up a per network namespace variable */ +#define per_net(var, net) (*( \ + RELOC_HIDE(&__per_net_base(var), __per_net_offset(net)))) + +/* A more efficient form if gcc doesn't overoptimize it */ +#ifndef per_net +#define per_net(var, net) (*( \ + (typeof(__pernetname(var)) *) \ + (((char *)&__per_net_base(var)) + __per_net_offset(net)))) +#endif + + +/* Are the two network namespaces the same */ +static inline int net_eq(net_t a, net_t b) { return a.offset == b.offset; } + +/* Get an unsigned value appropriate for hashing the network namespace */ +static inline unsigned int net_hval(net_t net) { return net.offset; } + +/* Convert to and from to and from void pointers */ +static inline void *net_to_voidp(net_t net) { return (void *)net.offset; } +static inline net_t net_from_voidp(void *ptr) +{ + net_t r; + r.offset = (unsigned long)ptr; + return r; +} + +static inline int null_net(net_t net) { return net.offset == 0; } + +#else /* CONFIG_NET_NS */ + typedef struct {} net_t; #define __data_pernet -/* Look up a per network namespace variable */ static inline unsigned long __per_net_offset(net_t net) { return 0; } -/* Like per_net but returns a pseudo variable address that must be moved +/* Like per_net but returns a pseudo variable address that must be offset * __per_net_offset() bytes before it will point to a real variable. * Useful for static initializers. */ @@ -38,6 +94,9 @@ static inline net_t net_from_voidp(void *ptr) { net_t net; return net; } static inline int null_net(net_t net) { return 0; } +#endif /* CONFIG_NET_NS */ + + #define DEFINE_PER_NET(type, name) \ __data_pernet __typeof__(type) __pernetname(name) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b64568f..a2042ac 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -24,7 +24,8 @@ struct net_namespace_head { * should go */ atomic_t use_count; /* For references we destroy on demand */ - struct list_head list; + net_t next; + net_t prev; struct work_struct work; }; @@ -34,6 +35,50 @@ static inline net_t init_net(void) return init_nsproxy.net_ns; } +#ifdef CONFIG_NET_NS + +DECLARE_PER_NET(struct net_namespace_head, net_head); + +extern void pernet_modcopy(void *, const void *, unsigned long); +extern int copy_net(int, struct task_struct *); +extern void __put_net(net_t net); + +static inline net_t get_net(net_t net) +{ + atomic_inc(&per_net(net_head, net).count); + return net; +} + +static inline void put_net(net_t net) +{ + if (atomic_dec_and_test(&per_net(net_head, net).count)) + __put_net(net); +} + +static inline net_t hold_net(net_t net) +{ + atomic_inc(&per_net(net_head, net).use_count); + return net; +} + +static inline void release_net(net_t net) +{ + atomic_dec(&per_net(net_head, net).use_count); +} + +/* Created by linker magic */ +extern char __per_net_start[], __per_net_end[]; + +extern void net_lock(void); +extern void net_unlock(void); + +#define for_each_net(VAR) \ + for ( (VAR) = init_net(); !null_net((VAR)); \ + (VAR) = per_net(net_head, (VAR)).next) + + +#else /* CONFIG_NET_NS */ + static inline net_t get_net(net_t net) { return net; } static inline void put_net(net_t net) {} static inline net_t hold_net(net_t net) { return net; } @@ -50,6 +95,8 @@ static inline void net_unlock(void) {} #define for_each_net(VAR) if (1) +#endif /* CONFIG_NET_NS */ + extern net_t net_template; #define NET_CREATE 0x0001 /* A network namespace has been created */ diff --git a/kernel/module.c b/kernel/module.c index d0f2260..6f45090 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -44,6 +44,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -304,7 +305,7 @@ static unsigned int pcpu_num_used, pcpu_num_allocated; /* Size of each block. -ve means used. */ static int *pcpu_size; -static int split_block(unsigned int i, unsigned short size) +static int pcpu_split_block(unsigned int i, unsigned short size) { /* Reallocation required? */ if (pcpu_num_used + 1 > pcpu_num_allocated) { @@ -329,7 +330,7 @@ static int split_block(unsigned int i, unsigned short size) return 1; } -static inline unsigned int block_size(int val) +static inline unsigned int pcpu_block_size(int val) { if (val < 0) return -val; @@ -353,7 +354,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, } ptr = __per_cpu_start; - for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { + for (i = 0; i < pcpu_num_used; ptr += pcpu_block_size(pcpu_size[i]), i++) { /* Extra for alignment requirement. */ extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; BUG_ON(i == 0 && extra != 0); @@ -371,7 +372,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, /* Split block if warranted */ if (pcpu_size[i] - size > sizeof(unsigned long)) - if (!split_block(i, size)) + if (!pcpu_split_block(i, size)) return NULL; /* Mark allocated */ @@ -387,10 +388,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, static void percpu_modfree(void *freeme) { unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); + void *ptr = __per_cpu_start + pcpu_block_size(pcpu_size[0]); /* First entry is core kernel percpu data. */ - for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { + for (i = 1; i < pcpu_num_used; ptr += pcpu_block_size(pcpu_size[i]), i++) { if (ptr == freeme) { pcpu_size[i] = -pcpu_size[i]; goto free; @@ -465,6 +466,169 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, } #endif /* CONFIG_SMP */ +#ifdef CONFIG_NET_NS +/* Number of blocks used and allocated. */ +static unsigned int pnet_num_used, pnet_num_allocated; +/* Size of each block. -ve means used. */ +static int *pnet_size; + +static int pnet_split_block(unsigned int i, unsigned short size) +{ + /* Reallocation required? */ + if (pnet_num_used + 1 > pnet_num_allocated) { + int *new = kmalloc(sizeof(new[0]) * pnet_num_allocated*2, + GFP_KERNEL); + if (!new) + return 0; + + memcpy(new, pnet_size, sizeof(new[0])*pnet_num_allocated); + pnet_num_allocated *= 2; + kfree(pnet_size); + pnet_size = new; + } + + /* Insert a new subblock */ + memmove(&pnet_size[i+1], &pnet_size[i], + sizeof(pnet_size[0]) * (pnet_num_used - i)); + pnet_num_used++; + + pnet_size[i+1] -= size; + pnet_size[i] = size; + return 1; +} + +static inline unsigned int pnet_block_size(int val) +{ + if (val < 0) + return -val; + return val; +} + +static void *pernet_modalloc(unsigned long size, unsigned long align, + const char *name) +{ + unsigned long extra; + unsigned int i; + void *ptr; + + if (align > SMP_CACHE_BYTES) { + printk(KERN_WARNING "%s: per-net alignment %li > %i\n", + name, align, SMP_CACHE_BYTES); + align = SMP_CACHE_BYTES; + } + + ptr = __per_net_start; + for (i = 0; i < pnet_num_used; ptr += pnet_block_size(pnet_size[i]), i++) { + /* Extra for alignment requirement. */ + extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; + BUG_ON(i == 0 && extra != 0); + + if (pnet_size[i] < 0 || pnet_size[i] < extra + size) + continue; + + /* Transfer extra to previous block. */ + if (pnet_size[i-1] < 0) + pnet_size[i-1] -= extra; + else + pnet_size[i-1] += extra; + pnet_size[i] -= extra; + ptr += extra; + + /* Split block if warranted */ + if (pnet_size[i] - size > sizeof(unsigned long)) + if (!pnet_split_block(i, size)) + return NULL; + + /* Mark allocated */ + pnet_size[i] = -pnet_size[i]; + return ptr; + } + + printk(KERN_WARNING "Could not allocate %lu bytes pernet data\n", + size); + return NULL; +} + +static void pernet_modfree(void *freeme) +{ + unsigned int i; + void *ptr = __per_net_start + pnet_block_size(pnet_size[0]); + + /* First entry is core kernel pernet data. */ + for (i = 1; i < pnet_num_used; ptr += pnet_block_size(pnet_size[i]), i++) { + if (ptr == freeme) { + pnet_size[i] = -pnet_size[i]; + goto free; + } + } + BUG(); + + free: + /* Merge with previous? */ + if (pnet_size[i-1] >= 0) { + pnet_size[i-1] += pnet_size[i]; + pnet_num_used--; + memmove(&pnet_size[i], &pnet_size[i+1], + (pnet_num_used - i) * sizeof(pnet_size[0])); + i--; + } + /* Merge with next? */ + if (i+1 < pnet_num_used && pnet_size[i+1] >= 0) { + pnet_size[i] += pnet_size[i+1]; + pnet_num_used--; + memmove(&pnet_size[i+1], &pnet_size[i+2], + (pnet_num_used - (i+1)) * sizeof(pnet_size[0])); + } +} + +static unsigned int find_pnetsec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + return find_sec(hdr, sechdrs, secstrings, ".data.pernet"); +} + +static int pernet_modinit(void) +{ + pnet_num_used = 2; + pnet_num_allocated = 2; + pnet_size = kmalloc(sizeof(pnet_size[0]) *pnet_num_allocated, + GFP_KERNEL); + /* Static in-kernel pernet data (used). */ + pnet_size[0] = -ALIGN(__per_net_end-__per_net_start, SMP_CACHE_BYTES); + /* Free room. */ + pnet_size[1] = PER_NET_MODULE_RESERVE; + if (pnet_size[1] <= 0) { + printk(KERN_ERR "No per-net room for modules.\n"); + pnet_num_used = 1 ; + } + return 0; +} +__initcall(pernet_modinit); +#else /* ... !CONFIG_NET_NS */ +static inline void *pernet_modalloc(unsigned long size, unsigned long align, + const char *name) +{ + return NULL; +} +static inline void pernet_modfree(void *pnetptr) +{ + BUG(); +} +static inline unsigned int find_pnetsec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + return 0; +} +static inline void pernet_modcopy(void *pnetdst, const void *src, + unsigned long size) +{ + /* pnetsec should be 0, and size of that section should be 0. */ + BUG_ON(size != 0); +} +#endif /* CONFIG_NET_NS */ + #define MODINFO_ATTR(field) \ static void setup_modinfo_##field(struct module *mod, const char *s) \ { \ @@ -1198,6 +1362,8 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); + if (mod->pernet) + pernet_modfree(mod->pernet); if (mod->percpu) percpu_modfree(mod->percpu); @@ -1263,6 +1429,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int versindex, unsigned int pcpuindex, + unsigned int pnetindex, struct module *mod) { Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; @@ -1308,6 +1475,9 @@ static int simplify_symbols(Elf_Shdr *sechdrs, /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) secbase = (unsigned long)mod->percpu; + /* Divert to pernet allocation if a pernet var. */ + else if (sym[i].st_shndx == pnetindex) + secbase = (unsigned long)mod->pernet; else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1554,6 +1724,7 @@ static struct module *load_module(void __user *umod, unsigned int gplcrcindex; unsigned int versindex; unsigned int pcpuindex; + unsigned int pnetindex; unsigned int gplfutureindex; unsigned int gplfuturecrcindex; unsigned int unwindex = 0; @@ -1563,7 +1734,7 @@ static struct module *load_module(void __user *umod, unsigned int unusedgplcrcindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *percpu = NULL, *pernet = NULL, *ptr = NULL; /* Stops spurious gcc warning */ struct exception_table_entry *extable; mm_segment_t old_fs; @@ -1654,6 +1825,7 @@ static struct module *load_module(void __user *umod, versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); + pnetindex = find_pnetsec(hdr, sechdrs, secstrings); #ifdef ARCH_UNWIND_SECTION_NAME unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); #endif @@ -1719,6 +1891,20 @@ static struct module *load_module(void __user *umod, mod->percpu = percpu; } + if (pnetindex) { + /* We have a special allocation for this section */ + pernet = pernet_modalloc(sechdrs[pnetindex].sh_size, + sechdrs[pnetindex].sh_addralign, + mod->name); + + if (!pernet) { + err = -ENOMEM; + goto free_percpu; + } + sechdrs[pnetindex].sh_flags &= ~(unsigned long)SHF_ALLOC; + mod->pernet = pernet; + } + /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any special cases for the architectures. */ @@ -1728,7 +1914,7 @@ static struct module *load_module(void __user *umod, ptr = module_alloc(mod->core_size); if (!ptr) { err = -ENOMEM; - goto free_percpu; + goto free_pernet; } memset(ptr, 0, mod->core_size); mod->module_core = ptr; @@ -1781,7 +1967,7 @@ static struct module *load_module(void __user *umod, /* Fix up syms, so that st_value is a pointer to location. */ err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, - mod); + pnetindex, mod); if (err < 0) goto cleanup; @@ -1860,6 +2046,10 @@ static struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); + /* Copy pernet area over. */ + pernet_modcopy(mod->pernet, (void *)sechdrs[pnetindex].sh_addr, + sechdrs[pnetindex].sh_size); + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); err = module_finalize(hdr, sechdrs, mod); @@ -1924,6 +2114,9 @@ static struct module *load_module(void __user *umod, cleanup: module_unload_free(mod); module_free(mod, mod->module_init); + free_pernet: + if (pernet) + pernet_modfree(pernet); free_core: module_free(mod, mod->module_core); free_percpu: diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4ae266d..93e3879 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1,4 +1,9 @@ +#include #include +#include +#include +#include +#include #include /* @@ -10,6 +15,233 @@ static struct list_head *first_device = &pernet_list; static DEFINE_MUTEX(net_mutex); net_t net_template; +#ifdef CONFIG_NET_NS + +static DEFINE_MUTEX(net_list_mutex); + +static net_t net_tail; +static struct kmem_cache *net_cachep; +static size_t net_size; + +/* By using a special section for the first variable in the + * per net sectionI get several advantages. + * - I can align the entire network namespace structure easily + * to any desired alignment without needing an alignment directive + * in the linker script. In the worst case the section will start + * with some padding I will never see. + * - The code is C so I don't need linker script or header file tricks + * to make the alignment SMP_CACHE_BYTES + * - I am guaranteed what the first structure in the network namespace is. + * This allows things like container_of to work and be useful. + */ +__attribute__((section(".data.pernet.head"), aligned(SMP_CACHE_BYTES))) +struct net_namespace_head __pernetname(net_head) = { + .count = ATOMIC_INIT(1), + .use_count = ATOMIC_INIT(0), +}; +EXPORT_PER_NET_SYMBOL_GPL(net_head); + +void net_lock(void) +{ + mutex_lock(&net_list_mutex); +} + +void net_unlock(void) +{ + mutex_unlock(&net_list_mutex); +} + +static void net_list_remove(net_t net) +{ + net_t next, prev; + BUG_ON(net_eq(net, init_net())); + + next = per_net(net_head, net).next; + prev = per_net(net_head, net).prev; + + per_net(net_head, prev).next = next; + if (null_net(next)) { + net_tail = prev; + } else { + per_net(net_head, next).prev = prev; + } +} + +static void net_list_append(net_t net) +{ + + per_net(net_head, net_tail).next = net; + per_net(net_head, net).prev = net_tail; + net_tail = net; +} + +static net_t net_alloc(void) +{ + return net_of(kmem_cache_alloc(net_cachep, GFP_KERNEL), net_head); +} + +static void net_free(net_t net) +{ + struct net_namespace_head *head; + if (null_net(net)) + return; + + head = &per_net(net_head, net); + + if (unlikely(atomic_read(&head->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&head->use_count)); + return; + } + + kmem_cache_free(net_cachep, head); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct list_head *ptr; + net_t net; + + net = net_of(work, net_head.work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + net_lock(); + net_list_remove(net); + net_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_prev(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(net_t net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&per_net(net_head, net).work, cleanup_net); + schedule_work(&per_net(net_head, net).work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(net_t net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + struct list_head *ptr; + int error; + + /* First initialize the data from the template */ + memcpy(&per_net(net_head, net), &per_net(net_head, net_template), net_size); + + error = 0; + list_for_each(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + goto out; +} + +void pernet_modcopy(void *pnetdst, const void *src, unsigned long size) +{ + net_t net; + + mutex_lock(&net_mutex); + memcpy(pnetdst + __per_net_offset(net_template), src, size); + for_each_net(net) + memcpy(pnetdst + __per_net_offset(net), src, size); + mutex_unlock(&net_mutex); +} + +static int __init net_ns_init(void) +{ + size_t init_size; + net_t init_net; + int err; + + /* Compute the size of the init section */ + init_size = __per_net_end - __per_net_start; + + /* Compute how large my net namespace structure will be */ + net_size = ALIGN(init_size, SMP_CACHE_BYTES); + net_size += PER_NET_MODULE_RESERVE; + net_size = ALIGN(net_size, SMP_CACHE_BYTES); + + printk(KERN_INFO "net_namespace: %zd bytes\n", net_size); + net_cachep = kmem_cache_create("net_namespace", net_size, + SMP_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); + + /* Allocate my template */ + net_template = net_alloc(); + if (null_net(net_template)) + panic("Could not allocate network namespace template"); + + /* Initialize my template */ + memset(&per_net(net_head, net_template), '\0', net_size); + memcpy(&per_net(net_head, net_template), + &__pernetname(net_head), + init_size); + + /* Setup the initial network namespace */ + init_net = net_alloc(); + if (null_net(init_net)) + panic("Could not allocate initial network namespace"); + + mutex_lock(&net_mutex); + err = setup_net(init_net); + + net_lock(); + net_tail = init_net; + net_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + /* Initialze the init_nsproxy */ + init_nsproxy.net_ns = init_net; + + return 0; +} + +pure_initcall(net_ns_init); + +#endif /* CONFIG_NET_NS */ + static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:14 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:14 -0700 Subject: [openib-general] [PATCH RFC 12/31] net: Make packet reception network namespace safe In-Reply-To: References: Message-ID: <1169751635362-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch modifies every packet receive function registered with dev_add_pack() to drop packets if they are not from the initial network namespace, in addition to ensure consistency of argument passing the unnecessary device parameter is removed. This should ensure that the various network stacks do not receive packets in a anything but the initial network namespace until the code has been converted and is ready for them. Anything I may have missed will generate a compiler error, as the function protype has changed, preventing us from overlooking something by accident. Signed-off-by: Eric W. Biederman --- drivers/block/aoe/aoenet.c | 7 ++++++- drivers/net/bonding/bond_3ad.c | 7 ++++++- drivers/net/bonding/bond_3ad.h | 2 +- drivers/net/bonding/bond_alb.c | 6 +++++- drivers/net/bonding/bond_main.c | 6 +++++- drivers/net/hamradio/bpqether.c | 8 ++++++-- drivers/net/pppoe.c | 8 ++++++-- drivers/net/wan/hdlc.c | 10 +++++++++- drivers/net/wan/lapbether.c | 6 +++++- drivers/net/wan/syncppp.c | 14 ++++++++++---- include/linux/netdevice.h | 1 - include/net/ax25.h | 2 +- include/net/datalink.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 1 - include/net/llc.h | 4 +--- include/net/p8022.h | 1 - include/net/psnap.h | 2 +- include/net/x25.h | 2 +- net/802/p8022.c | 1 - net/802/psnap.c | 5 ++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_dev.c | 8 +++++++- net/appletalk/aarp.c | 6 +++++- net/appletalk/ddp.c | 15 ++++++++++++--- net/ax25/ax25_in.c | 8 +++++++- net/bridge/br_private.h | 2 +- net/bridge/br_stp_bpdu.c | 8 ++++++-- net/core/dev.c | 6 +++--- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 6 +++++- net/econet/af_econet.c | 6 +++++- net/ipv4/arp.c | 6 +++++- net/ipv4/ip_input.c | 7 +++++-- net/ipv4/ipconfig.c | 16 ++++++++++++---- net/ipv6/ip6_input.c | 8 +++++++- net/ipx/af_ipx.c | 6 +++++- net/irda/irlap_frame.c | 7 +++++-- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 1 - net/llc/llc_input.c | 10 +++++++--- net/packet/af_packet.c | 18 +++++++++++++++--- net/tipc/eth_media.c | 9 ++++++++- net/x25/x25_dev.c | 6 +++++- 44 files changed, 195 insertions(+), 67 deletions(-) diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 9626e0f..9b72a58 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "aoe.h" #define NECODES 5 @@ -108,11 +109,15 @@ aoenet_xmit(struct sk_buff *sl) * (1) len doesn't include the header by default. I want this. */ static int -aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) +aoenet_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *ifp = skb->dev; struct aoe_hdr *h; u32 n; + if (!net_eq(skb->dev->nd_net, init_net())) + goto exit; + skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) return 0; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 3fb354d..eea4f11 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" @@ -2443,12 +2444,16 @@ out: return 0; } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct packet_type* ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct bonding *bond = dev->priv; struct slave *slave = NULL; int ret = NET_RX_DROP; + if (!net_eq(skb->dev->nd_net, init_net())) + goto out; + if (!(dev->flags & IFF_MASTER)) goto out; diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index 6ad5ad6..1f2d7d2 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -282,7 +282,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave); void bond_3ad_handle_link_change(struct slave *slave, char link); int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct packet_type* ptype, struct net_device *orig_dev); int bond_3ad_set_carrier(struct bonding *bond); #endif //__BOND_3AD_H__ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 3292316..be780a8 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -336,12 +336,16 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) _unlock_rx_hashtbl(bond); } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) +static int rlb_arp_recv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { + struct net_device *bond_dev = skb->dev; struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; + if (!net_eq(skb->dev->nd_net, init_net())) + goto out; + if (!(bond_dev->flags & IFF_MASTER)) goto out; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9b3bf4e..9c70568 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2475,14 +2475,18 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip } } -static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int bond_arp_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct arphdr *arp; struct slave *slave; struct bonding *bond; unsigned char *arp_ptr; u32 sip, tip; + if (!net_eq(skb->dev->nd_net, init_net())) + goto out; + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) goto out; diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 9fc92ad..c513e90 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -93,7 +93,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; static char bpq_eth_addr[6]; -static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +static int bpq_rcv(struct sk_buff *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static const char *bpq_print_ethaddr(const unsigned char *); @@ -166,13 +166,17 @@ static inline int dev_is_ethdev(struct net_device *dev) /* * Receive an AX.25 frame via an ethernet interface. */ -static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) +static int bpq_rcv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; int len; char * ptr; struct ethhdr *eth; struct bpqdev *bpq; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index d09334d..caf8ca3 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -376,7 +376,6 @@ abort_kfree: * ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) @@ -384,6 +383,9 @@ static int pppoe_rcv(struct sk_buff *skb, struct pppoe_hdr *ph; struct pppox_sock *po; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; @@ -408,7 +410,6 @@ out: * ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) @@ -416,6 +417,9 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct pppoe_hdr *ph; struct pppox_sock *po; + if (!net_eq(skb->dev->nd_net, init_net())) + goto abort; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index db354e0..f3bf160 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -36,6 +36,7 @@ #include #include #include +#include static const char* version = "HDLC support module revision 1.20"; @@ -62,10 +63,17 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev) -static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, +static int hdlc_rcv(struct sk_buff *skb, struct packet_type *p, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct hdlc_device_desc *desc = dev_to_desc(dev); + + if (!net_eq(skb->dev->nd_net, init_net())) { + kfree_skb(skb); + return 0; + } + if (desc->netif_rx) return desc->netif_rx(skb); diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 6c302e9..c1de21e 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -86,11 +86,15 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) /* * Receive a LAPB frame via an ethernet interface. */ -static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) +static int lapbeth_rcv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; int len, err; struct lapbethdev *lapbeth; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 218f7b5..25137fc 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -220,13 +221,13 @@ static void sppp_clear_timeout(struct sppp *p) * here. */ -static void sppp_input (struct net_device *dev, struct sk_buff *skb) +static void sppp_input (struct sk_buff *skb) { + struct net_device *dev = skb->dev; struct ppp_header *h; struct sppp *sp = (struct sppp *)sppp_of(dev); unsigned long flags; - skb->dev=dev; skb->mac.raw=skb->data; if (dev->flags & IFF_RUNNING) @@ -1443,11 +1444,16 @@ static void sppp_print_bytes (u_char *p, u16 len) * after interrupt servicing to process frames queued via netif_rx. */ -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) +static int sppp_rcv(struct sk_buff *skb, struct packet_type *p, struct net_device *orig_dev) { + if (!net_eq(skb->dev->nd_net, init_net())) { + kfree_skb(skb); + return 0; + } + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; - sppp_input(dev,skb); + sppp_input(skb); return 0; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a1579d..9e28671 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -558,7 +558,6 @@ struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, - struct net_device *, struct packet_type *, struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, diff --git a/include/net/ax25.h b/include/net/ax25.h index 5ae10dd..a2ad59a 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -360,7 +360,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +extern int ax25_kiss_rcv(struct sk_buff *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_hard_header(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h index deb7ca7..133d55e 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -8,7 +8,7 @@ struct datalink_proto { unsigned short header_length; - int (*rcvfunc)(struct sk_buff *, struct net_device *, + int (*rcvfunc)(struct sk_buff *, struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); diff --git a/include/net/ip.h b/include/net/ip.h index 053f02b..c0c0dfd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -88,7 +88,7 @@ extern int igmp_mc_proc_init(void); extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options *opt); -extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, +extern int ip_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 00328b7..0b1d1a9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -438,7 +438,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add */ extern int ipv6_rcv(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); diff --git a/include/net/llc.h b/include/net/llc.h index f502458..dae09b9 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -48,7 +48,6 @@ struct llc_sap { unsigned char f_bit; atomic_t refcnt; int (*rcv_func)(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); struct llc_addr laddr; @@ -67,7 +66,7 @@ extern struct list_head llc_sap_list; extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; -extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, +extern int llc_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, @@ -81,7 +80,6 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)); static inline void llc_sap_hold(struct llc_sap *sap) diff --git a/include/net/p8022.h b/include/net/p8022.h index 42e9fac..545c15e 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -3,7 +3,6 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); diff --git a/include/net/psnap.h b/include/net/psnap.h index b2e01cc..e935d50 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/x25.h b/include/net/x25.h index e47fe44..e3d4cfb 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -184,7 +184,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); diff --git a/net/802/p8022.c b/net/802/p8022.c index 2530f35..1c7022d 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -34,7 +34,6 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, struct datalink_proto *register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)) { diff --git a/net/802/psnap.c b/net/802/psnap.c index 270b9d2..59ac0c5 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -46,7 +46,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc) /* * A SNAP packet has arrived */ -static int snap_rcv(struct sk_buff *skb, struct net_device *dev, +static int snap_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; @@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, /* Pass the frame on. */ skb->h.raw += 5; skb_pull_rcsum(skb, 5); - rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); + rc = proto->rcvfunc(skb, &snap_packet_type, orig_dev); } else { skb->sk = NULL; kfree_skb(skb); @@ -117,7 +117,6 @@ module_exit(snap_exit); */ struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, - struct net_device *, struct packet_type *, struct net_device *)) { diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9ae3a14..9207999 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -50,7 +50,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev, /* found in vlan_dev.c */ int vlan_dev_rebuild_header(struct sk_buff *skb); -int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, +int vlan_skb_recv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 60a508e..9fce3a8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -112,9 +112,10 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) * been commented out now... --Ben * */ -int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, +int vlan_skb_recv(struct sk_buff *skb, struct packet_type* ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; unsigned char *rawp = NULL; struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); unsigned short vid; @@ -122,6 +123,11 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vlan_TCI; __be16 proto; + if (!net_eq(skb->dev->nd_net, init_net())) { + kfree_skb(skb); + return 0; + } + /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index b51a010..85c4dbc 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -697,9 +697,10 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, * This is called by the SNAP driver whenever we see an AARP SNAP * frame. We currently only support Ethernet. */ -static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, +static int aarp_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct elapaarp *ea = aarp_hdr(skb); int hash, ret = 0; __u16 function; @@ -707,6 +708,9 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; + if (!net_eq(skb->dev->nd_net, init_net())) + goto out0; + /* We only do Ethernet SNAP AARP. */ if (dev->type != ARPHRD_ETHER) goto out0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index e08367b..f4ff8aa 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1393,9 +1393,10 @@ free_it: * extracted. PPP should probably pass frames marked as for this layer. * [ie ARPHRD_ETHERTALK] */ -static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, +static int atalk_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct ddpehdr *ddp; struct sock *sock; struct atalk_iface *atif; @@ -1403,6 +1404,9 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; + if (!net_eq(skb->dev->nd_net, init_net())) + goto freeit; + /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; @@ -1482,9 +1486,14 @@ freeit: * Caller must provide enough headroom on the packet to pull the short * header and append a long one. */ -static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, +static int ltalk_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; + + if (!net_eq(skb->dev->nd_net, init_net())) + goto freeit; + /* Expand any short form frames */ if (skb->mac.raw[2] == 1) { struct ddpehdr *ddp; @@ -1526,7 +1535,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, } skb->h.raw = skb->data; - return atalk_rcv(skb, dev, pt, orig_dev); + return atalk_rcv(skb, pt, orig_dev); freeit: kfree_skb(skb); return 0; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index e9d9429..8c9b0dd 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -444,12 +444,18 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, /* * Receive an AX.25 frame via a SLIP interface. */ -int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, +int ax25_kiss_rcv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ + if (!net_eq(skb->dev->nd_net, init_net())) { + kfree_skb(skb); + return 0; + } + if ((*skb->data & 0x0F) != 0) { kfree_skb(skb); /* Not a KISS data frame */ return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3a534e9..f1712b9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -223,7 +223,7 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p, extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ -extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, +extern int br_stp_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); /* br_stp_timer.c */ diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 068d8af..7f9f8b4 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -129,15 +130,18 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) * * NO locks, but rcu_read_lock (preempt_disabled) */ -int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, +int br_stp_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = rcu_dereference(dev->br_port); + struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; const unsigned char *buf; + if (!net_eq(skb->dev->nd_net, init_net())) + goto err; + if (!p) goto err; diff --git a/net/core/dev.c b/net/core/dev.c index a3ee150..d8aa534 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1094,7 +1094,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype, skb->dev); + ptype->func(skb2, ptype, skb->dev); } } rcu_read_unlock(); @@ -1693,7 +1693,7 @@ static __inline__ int deliver_skb(struct sk_buff *skb, struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + return pt_prev->func(skb, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1841,7 +1841,7 @@ ncls: } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = pt_prev->func(skb, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index f1553fa..5e8042f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2104,7 +2104,7 @@ static struct notifier_block dn_dev_notifier = { .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +extern int dn_route_rcv(struct sk_buff *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { .type = __constant_htons(ETH_P_DNA_RT), diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0d657eb..4263cd9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -575,14 +575,18 @@ static int dn_route_ptp_hello(struct sk_buff *skb) return NET_RX_SUCCESS; } -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +int dn_route_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct dn_skb_cb *cb; unsigned char flags = 0; __u16 len = dn_ntohs(*(__le16 *)skb->data); struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; + if (!net_eq(skb->dev->nd_net, init_net())) + goto dump_it; + if (dn == NULL) goto dump_it; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index a0b3fc5..0baffda 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1057,12 +1057,16 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int econet_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct ec_framehdr *hdr; struct sock *sk; struct ec_device *edev = dev->ec_ptr; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e3b89a7..95a34c7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -928,11 +928,15 @@ static void parp_redo(struct sk_buff *skb) * Receive an arp request from the device layer. */ -static int arp_rcv(struct sk_buff *skb, struct net_device *dev, +static int arp_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct arphdr *arp; + if (!net_eq(skb->dev->nd_net, init_net())) + goto freeskb; + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * dev->addr_len) + diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 212734c..77dddce 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -370,11 +370,14 @@ drop: /* * Main IP Receive routine. */ -int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +int ip_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; u32 len; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ @@ -431,7 +434,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, ip_rcv_finish); inhdr_error: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 8b649c5..91b5729 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -397,7 +397,7 @@ static int __init ic_defaults(void) #ifdef IPCONFIG_RARP -static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); +static int ic_rarp_recv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { .type = __constant_htons(ETH_P_RARP), @@ -418,14 +418,18 @@ static inline void ic_rarp_cleanup(void) * Process received RARP packet. */ static int __init -ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +ic_rarp_recv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct arphdr *rarp; unsigned char *rarp_ptr; __be32 sip, tip; unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -559,7 +563,7 @@ struct bootp_pkt { /* BOOTP packet format */ #define DHCPRELEASE 7 #define DHCPINFORM 8 -static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); +static int ic_bootp_recv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { .type = __constant_htons(ETH_P_IP), @@ -827,13 +831,17 @@ static void __init ic_do_bootp_ext(u8 *ext) /* * Receive BOOTP reply. */ -static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int __init ic_bootp_recv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct bootp_pkt *b; struct iphdr *h; struct ic_device *d; int len, ext_len; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + /* Perform verifications before taking the lock. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index ad0b8ab..ac366b9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -56,12 +56,18 @@ inline int ip6_rcv_finish( struct sk_buff *skb) return dst_input(skb); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +int ipv6_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; + if (!net_eq(skb->dev->nd_net, init_net())) { + kfree_skb(skb); + return 0; + } + if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 2ec4a3c..5c5f2cd 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1637,14 +1637,18 @@ out: return rc; } -static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int ipx_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; struct ipxhdr *ipx; u16 ipx_pktsize; int rc = 0; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + /* Not ours */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index dba349c..3252be7 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1306,7 +1306,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, * LMP level in irlmp.c. * Jean II */ -int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, +int irlap_driver_rcv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { struct irlap_info info; @@ -1314,8 +1314,11 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; + if (!net_eq(skb->dev->nd_net, init_net())) + goto out; + /* FIXME: should we get our own field? */ - self = (struct irlap_cb *) dev->atalk_ptr; + self = (struct irlap_cb *) skb->dev->atalk_ptr; /* If the net device is down, then IrLAP is gone! */ if (!self || self->magic != LAP_MAGIC) { diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 2869b16..6b1989c 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -52,7 +52,7 @@ extern void irda_sysctl_unregister(void); extern int irsock_init(void); extern void irsock_cleanup(void); /* irlap_frame.c */ -extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, +extern int irlap_driver_rcv(struct sk_buff *, struct packet_type *, struct net_device *); /* diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index d12413c..f438c38 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -112,7 +112,6 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) */ struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, - struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)) { diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index db82aff..cecb4a9 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -12,6 +12,7 @@ * See the GNU General Public License for more details. */ #include +#include #include #include #include @@ -136,15 +137,18 @@ static inline int llc_fixup_skb(struct sk_buff *skb) * the frame is related to a busy connection (a connection is sending * data now), it queues this frame in the connection's backlog. */ -int llc_rcv(struct sk_buff *skb, struct net_device *dev, +int llc_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { struct llc_sap *sap; struct llc_pdu_sn *pdu; int dest; - int (*rcv)(struct sk_buff *, struct net_device *, + int (*rcv)(struct sk_buff *, struct packet_type *, struct net_device *); + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. @@ -175,7 +179,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, if (rcv) { struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); if (cskb) - rcv(cskb, dev, pt, orig_dev); + rcv(cskb, pt, orig_dev); } dest = llc_pdu_type(skb); if (unlikely(!dest || !llc_type_handlers[dest - 1])) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ca371ea..aa298c3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -258,11 +258,15 @@ static const struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET static const struct proto_ops packet_ops_spkt; -static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int packet_rcv_spkt(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct sock *sk; struct sockaddr_pkt *spkt; + if (!net_eq(dev->nd_net, init_net())) + goto out; + /* * When we registered the protocol we saved the socket in the data * field for just this event. @@ -461,8 +465,9 @@ static inline int run_filter(struct sk_buff *skb, struct sock *sk, we will not harm anyone. */ -static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int packet_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct sock *sk; struct sockaddr_ll *sll; struct packet_sock *po; @@ -470,6 +475,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet int skb_len = skb->len; unsigned snaplen; + if (!net_eq(dev->nd_net, init_net())) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -561,8 +569,9 @@ drop: } #ifdef CONFIG_PACKET_MMAP -static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int tpacket_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; @@ -574,6 +583,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; + if (!net_eq(dev->nd_net, init_net())) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 682da4a..b181cf9 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -38,6 +38,7 @@ #include #include #include +#include #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -91,12 +92,18 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, * and ensures message size matches actual length */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, +static int recv_msg(struct sk_buff *buf, struct packet_type *pt, struct net_device *orig_dev) { + struct net_device *dev = buf->dev; struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; + if (!net_eq(buf->dev->nd_net, init_net())) { + kfree_skb(buf); + return 0; + } + if (likely(eb_ptr->bearer)) { if (likely(!dev->promiscuity) || !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) || diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 47b68a3..0f63415 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -79,12 +79,16 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) return 0; } -int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, +int x25_lapb_receive_frame(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev) { + struct net_device *dev = skb->dev; struct sk_buff *nskb; struct x25_neigh *nb; + if (!net_eq(skb->dev->nd_net, init_net())) + goto drop; + nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:18 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:18 -0700 Subject: [openib-general] [PATCH RFC 16/31] net: Make the device list and device lookups per namespace. In-Reply-To: References: Message-ID: <11697516363099-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch makes most of the generic device layer network namespace safe. This patch makes dev_base, dev_base_lock per network namespace variables, and then it picks up a few associated variables. The funnctions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base and dev_base_lock was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use init_net() the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman --- arch/s390/appldata/appldata_net_sum.c | 6 +- arch/sparc64/solaris/ioctl.c | 6 +- drivers/atm/idt77252.c | 2 +- drivers/block/aoe/aoecmd.c | 7 +- drivers/net/bonding/bond_main.c | 6 +- drivers/net/bonding/bond_sysfs.c | 3 +- drivers/net/eql.c | 9 +- drivers/net/pppoe.c | 2 +- drivers/net/shaper.c | 3 +- drivers/net/tun.c | 3 +- drivers/net/wan/dlci.c | 4 +- drivers/net/wan/sbni.c | 2 +- drivers/net/wireless/strip.c | 8 +- drivers/parisc/led.c | 6 +- include/linux/if_bridge.h | 2 +- include/linux/if_vlan.h | 2 +- include/linux/netdevice.h | 24 ++-- include/net/iw_handler.h | 2 +- net/802/tr.c | 2 +- net/8021q/vlan.c | 10 +- net/8021q/vlan_dev.c | 12 +- net/8021q/vlanproc.c | 8 +- net/appletalk/ddp.c | 6 +- net/atm/mpc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bridge/br_if.c | 6 +- net/bridge/br_ioctl.c | 7 +- net/bridge/br_netlink.c | 9 +- net/bridge/br_private.h | 2 +- net/core/dev.c | 282 +++++++++++++++++++++------------ net/core/dev_mcast.c | 46 +++++- net/core/ethtool.c | 4 +- net/core/fib_rules.c | 4 +- net/core/link_watch.c | 5 +- net/core/neighbour.c | 6 +- net/core/net-sysfs.c | 27 ++-- net/core/netpoll.c | 2 +- net/core/pktgen.c | 2 +- net/core/rtnetlink.c | 24 ++-- net/core/sock.c | 3 +- net/core/wireless.c | 43 +++++- net/decnet/af_decnet.c | 6 +- net/decnet/dn_dev.c | 32 ++-- net/decnet/dn_fib.c | 12 +- net/decnet/dn_route.c | 14 +- net/decnet/sysctl_net_decnet.c | 4 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 +- net/ipv4/devinet.c | 36 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 4 +- net/ipv4/igmp.c | 12 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_gre.c | 4 +- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipconfig.c | 2 +- net/ipv4/ipip.c | 4 +- net/ipv4/ipmr.c | 4 +- net/ipv4/ipvs/ip_vs_sync.c | 10 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/route.c | 4 +- net/ipv6/addrconf.c | 44 +++--- net/ipv6/af_inet6.c | 2 +- net/ipv6/anycast.c | 20 ++-- net/ipv6/datagram.c | 2 +- net/ipv6/ip6_tunnel.c | 6 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/mcast.c | 20 ++-- net/ipv6/raw.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/route.c | 4 +- net/ipv6/sit.c | 4 +- net/ipx/af_ipx.c | 6 +- net/llc/af_llc.c | 4 +- net/llc/llc_core.c | 5 +- net/netrom/nr_route.c | 14 +- net/packet/af_packet.c | 18 +- net/rose/rose_route.c | 20 ++-- net/sched/act_mirred.c | 2 +- net/sched/cls_api.c | 4 +- net/sched/em_meta.c | 2 +- net/sched/sch_api.c | 14 +- net/sctp/ipv6.c | 4 +- net/sctp/protocol.c | 6 +- net/socket.c | 22 ++- net/tipc/eth_media.c | 2 +- net/wanrouter/af_wanpipe.c | 24 ++-- net/x25/x25_route.c | 2 +- 88 files changed, 597 insertions(+), 433 deletions(-) diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 075e619..4a32370 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -106,8 +106,8 @@ static void appldata_get_net_sum_data(void *data) rx_dropped = 0; tx_dropped = 0; collisions = 0; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if (dev->get_stats == NULL) { continue; } @@ -123,7 +123,7 @@ static void appldata_get_net_sum_data(void *data) collisions += stats->collisions; i++; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); net_data->nr_interfaces = i; net_data->rx_packets = rx_packets; net_data->tx_packets = tx_packets; diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c index 330743c..1ecf4ab 100644 --- a/arch/sparc64/solaris/ioctl.c +++ b/arch/sparc64/solaris/ioctl.c @@ -685,9 +685,9 @@ static inline int solaris_i(unsigned int fd, unsigned int cmd, u32 arg) struct net_device *d; int i = 0; - read_lock_bh(&dev_base_lock); - for (d = dev_base; d; d = d->next) i++; - read_unlock_bh(&dev_base_lock); + read_lock_bh(&per_net(dev_base_lock, init_net())); + for (d = per_net(dev_base, init_net()); d; d = d->next) i++; + read_unlock_bh(&per_net(dev_base_lock, init_net())); if (put_user (i, (int __user *)A(arg))) return -EFAULT; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index f407861..3e75e0e 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3569,7 +3569,7 @@ init_card(struct atm_dev *dev) * XXX: */ sprintf(tname, "eth%d", card->index); - tmp = dev_get_by_name(tname); /* jhs: was "tmp = dev_get(tname);" */ + tmp = dev_get_by_name(init_net(), tname); /* jhs: was "tmp = dev_get(tname);" */ if (tmp) { memcpy(card->atmdev->esi, tmp->dev_addr, 6); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index bb022ed..9678169 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -192,8 +193,8 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) sl = sl_tail = NULL; - read_lock(&dev_base_lock); - for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (ifp = per_net(dev_base, init_net()); ifp; dev_put(ifp), ifp = ifp->next) { dev_hold(ifp); if (!is_aoe_netif(ifp)) continue; @@ -221,7 +222,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) skb->next = sl; sl = skb; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); if (tail != NULL) *tail = sl_tail; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3e04f58..2963004 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2932,7 +2932,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) int i; /* make sure the bond won't be taken away */ - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); read_lock_bh(&bond->lock); if (*pos == 0) { @@ -2968,7 +2968,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v) struct bonding *bond = seq->private; read_unlock_bh(&bond->lock); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static void bond_info_show_master(struct seq_file *seq) @@ -3742,7 +3742,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd } down_write(&(bonding_rwsem)); - slave_dev = dev_get_by_name(ifr->ifr_slave); + slave_dev = dev_get_by_name(init_net(), ifr->ifr_slave); dprintk("slave_dev=%p: \n", slave_dev); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ced9ed8..561707c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -36,6 +36,7 @@ #include #include #include +#include /* #define BONDING_DEBUG 1 */ #include "bonding.h" @@ -298,7 +299,7 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer, read_unlock_bh(&bond->lock); printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n", bond->dev->name, ifname); - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(init_net(), ifname); if (!dev) { printk(KERN_INFO DRV_NAME ": %s: Interface %s does not exist!\n", diff --git a/drivers/net/eql.c b/drivers/net/eql.c index a93700e..ceae8a0 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -116,6 +116,7 @@ #include #include #include +#include #include #include @@ -412,7 +413,7 @@ static int eql_enslave(struct net_device *master_dev, slaving_request_t __user * if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(init_net(), srq.slave_name); if (slave_dev) { if ((master_dev->flags & IFF_UP) == IFF_UP) { /* slave is not a master & not already a slave: */ @@ -460,7 +461,7 @@ static int eql_emancipate(struct net_device *master_dev, slaving_request_t __use if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(init_net(), srq.slave_name); ret = -EINVAL; if (slave_dev) { spin_lock_bh(&eql->queue.lock); @@ -493,7 +494,7 @@ static int eql_g_slave_cfg(struct net_device *dev, slave_config_t __user *scp) if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(init_net(), sc.slave_name); if (!slave_dev) return -ENODEV; @@ -528,7 +529,7 @@ static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *scp) if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(init_net(), sc.slave_name); if (!slave_dev) return -ENODEV; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 3618862..3c8b0a7 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -586,7 +586,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, /* Don't re-bind if sid==0 */ if (sp->sa_addr.pppoe.sid != 0) { - dev = dev_get_by_name(sp->sa_addr.pppoe.dev); + dev = dev_get_by_name(init_net(), sp->sa_addr.pppoe.dev); error = -ENODEV; if (!dev) diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index e886e8d..b852055 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -86,6 +86,7 @@ #include #include +#include struct shaper_cb { unsigned long shapeclock; /* Time it should go out */ @@ -488,7 +489,7 @@ static int shaper_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { case SHAPER_SET_DEV: { - struct net_device *them=__dev_get_by_name(ss->ss_name); + struct net_device *them=__dev_get_by_name(init_net(), ss->ss_name); if(them==NULL) return -ENODEV; if(sh->dev) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 151a2e1..efa1db8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -464,7 +465,7 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr) current->euid != tun->owner && !capable(CAP_NET_ADMIN)) return -EPERM; } - else if (__dev_get_by_name(ifr->ifr_name)) + else if (__dev_get_by_name(init_net(), ifr->ifr_name)) return -EINVAL; else { char *name; diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index f826494..1e3b73b 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -361,7 +361,7 @@ static int dlci_add(struct dlci_add *dlci) /* validate slave device */ - slave = dev_get_by_name(dlci->devname); + slave = dev_get_by_name(init_net(), dlci->devname); if (!slave) return -ENODEV; @@ -427,7 +427,7 @@ static int dlci_del(struct dlci_add *dlci) int err; /* validate slave device */ - master = __dev_get_by_name(dlci->devname); + master = __dev_get_by_name(init_net(), dlci->devname); if (!master) return(-ENODEV); diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index fc5c0c6..ae8f941 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -1367,7 +1367,7 @@ sbni_ioctl( struct net_device *dev, struct ifreq *ifr, int cmd ) if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name )) return -EFAULT; - slave_dev = dev_get_by_name( slave_name ); + slave_dev = dev_get_by_name(init_net(), slave_name ); if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { printk( KERN_ERR "%s: trying to enslave non-active " "device %s\n", dev->name, slave_name ); diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 6c27ff2..6c2ada3 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -1971,8 +1971,8 @@ static struct net_device *get_strip_dev(struct strip *strip_info) && memcmp(&strip_info->true_dev_addr, zero_address.c, sizeof(zero_address))) { struct net_device *dev; - read_lock_bh(&dev_base_lock); - dev = dev_base; + read_lock_bh(&per_net(dev_base_lock, init_net())); + dev = per_net(dev_base, init_net()); while (dev) { if (dev->type == strip_info->dev->type && !memcmp(dev->dev_addr, @@ -1981,12 +1981,12 @@ static struct net_device *get_strip_dev(struct strip *strip_info) printk(KERN_INFO "%s: Transferred packet ownership to %s.\n", strip_info->dev->name, dev->name); - read_unlock_bh(&dev_base_lock); + read_unlock_bh(&per_net(dev_base_lock, init_net())); return (dev); } dev = dev->next; } - read_unlock_bh(&dev_base_lock); + read_unlock_bh(&per_net(dev_base_lock, init_net())); } return (strip_info->dev); } diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index 8dac2ba..62662f3 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -365,9 +365,9 @@ static __inline__ int led_get_net_activity(void) /* we are running as a workqueue task, so locking dev_base * for reading should be OK */ - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); rcu_read_lock(); - for (dev = dev_base; dev; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) @@ -381,7 +381,7 @@ static __inline__ int led_get_net_activity(void) tx_total += stats->tx_packets; } rcu_read_unlock(); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); retval = 0; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index fd1b6eb..3b74be1 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -104,7 +104,7 @@ struct __fdb_entry #include -extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *)); +extern void brioctl_set(int (*ioctl_hook)(net_t, unsigned int, void __user *)); extern int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); extern int (*br_should_route_hook)(struct sk_buff **pskb); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 35cb385..8ee195a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -62,7 +62,7 @@ struct vlan_hdr { #define VLAN_VID_MASK 0xfff /* found in socket.c */ -extern void vlan_ioctl_set(int (*hook)(void __user *)); +extern void vlan_ioctl_set(int (*hook)(net_t, void __user *)); #define VLAN_NAME "vlan" diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 73931a0..0b4a4dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -571,21 +571,21 @@ struct packet_type { #include DECLARE_PER_NET(struct net_device, loopback_dev); /* The loopback */ -extern struct net_device *dev_base; /* All devices */ -extern rwlock_t dev_base_lock; /* Device list lock */ +DECLARE_PER_NET(struct net_device *, dev_base); /* All devices */ +DECLARE_PER_NET(rwlock_t, dev_base_lock); /* Device list lock */ extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); -extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); -extern struct net_device *dev_getfirstbyhwtype(unsigned short type); +extern struct net_device *dev_getbyhwaddr(net_t net, unsigned short type, char *hwaddr); +extern struct net_device *dev_getfirstbyhwtype(net_t net, unsigned short type); extern void dev_add_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt); extern void __dev_remove_pack(struct packet_type *pt); -extern struct net_device *dev_get_by_flags(unsigned short flags, +extern struct net_device *dev_get_by_flags(net_t net, unsigned short flags, unsigned short mask); -extern struct net_device *dev_get_by_name(const char *name); -extern struct net_device *__dev_get_by_name(const char *name); +extern struct net_device *dev_get_by_name(net_t net, const char *name); +extern struct net_device *__dev_get_by_name(net_t net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); extern int dev_close(struct net_device *dev); @@ -597,8 +597,8 @@ extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); -extern struct net_device *dev_get_by_index(int ifindex); -extern struct net_device *__dev_get_by_index(int ifindex); +extern struct net_device *dev_get_by_index(net_t net, int ifindex); +extern struct net_device *__dev_get_by_index(net_t net, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); @@ -705,8 +705,8 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern int dev_valid_name(const char *name); -extern int dev_ioctl(unsigned int cmd, void __user *); -extern int dev_ethtool(struct ifreq *); +extern int dev_ioctl(net_t net, unsigned int cmd, void __user *); +extern int dev_ethtool(net_t net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); @@ -982,7 +982,7 @@ extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -extern void dev_load(const char *name); +extern void dev_load(net_t net, const char *name); extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 10559e9..f274eca 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -434,7 +434,7 @@ extern int dev_get_wireless_info(char * buffer, char **start, off_t offset, int length); /* Handle IOCTLs, called in net/core/dev.c */ -extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd); +extern int wireless_process_ioctl(net_t net, struct ifreq *ifr, unsigned int cmd); /* Handle RtNetlink requests, called in net/core/rtnetlink.c */ extern int wireless_rtnetlink_set(struct net_device * dev, diff --git a/net/802/tr.c b/net/802/tr.c index 3324fa6..7a8cfbe 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -531,7 +531,7 @@ static int rif_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "if TR address TTL rcf routing segments\n"); else { - struct net_device *dev = dev_get_by_index(entry->iface); + struct net_device *dev = dev_get_by_index(init_net(), entry->iface); long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f80cfdd..e03d7de 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,7 +51,7 @@ static char vlan_copyright[] = "Ben Greear "; static char vlan_buggyright[] = "David S. Miller "; static int vlan_device_event(struct notifier_block *, unsigned long, void *); -static int vlan_ioctl_handler(void __user *); +static int vlan_ioctl_handler(net_t net, void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); static struct notifier_block vlan_notifier_block = { @@ -118,7 +118,7 @@ static void __exit vlan_cleanup_devices(void) struct net_device *dev, *nxt; rtnl_lock(); - for (dev = dev_base; dev; dev = nxt) { + for (dev = per_net(dev_base, init_net()); dev; dev = nxt) { nxt = dev->next; if (dev->priv_flags & IFF_802_1Q_VLAN) { unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, @@ -279,7 +279,7 @@ static int unregister_vlan_device(const char *vlan_IF_name) int ret; - dev = dev_get_by_name(vlan_IF_name); + dev = dev_get_by_name(init_net(), vlan_IF_name); ret = -EINVAL; if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { @@ -390,7 +390,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, goto out_ret_null; /* find the device relating to eth_IF_name. */ - real_dev = dev_get_by_name(eth_IF_name); + real_dev = dev_get_by_name(init_net(), eth_IF_name); if (!real_dev) goto out_ret_null; @@ -678,7 +678,7 @@ out: * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ -static int vlan_ioctl_handler(void __user *arg) +static int vlan_ioctl_handler(net_t net, void __user *arg) { int err = 0; unsigned short vid = 0; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9fce3a8..fa2186d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -539,7 +539,7 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct net_device *dev = dev_get_by_name(init_net(), dev_name); if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { @@ -556,7 +556,7 @@ int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_pri int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct net_device *dev = dev_get_by_name(init_net(), dev_name); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; @@ -596,7 +596,7 @@ int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio /* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) { - struct net_device *dev = dev_get_by_name(dev_name); + struct net_device *dev = dev_get_by_name(init_net(), dev_name); if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { @@ -632,7 +632,7 @@ int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) int vlan_dev_get_realdev_name(const char *dev_name, char* result) { - struct net_device *dev = dev_get_by_name(dev_name); + struct net_device *dev = dev_get_by_name(init_net(), dev_name); int rv = 0; if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { @@ -650,7 +650,7 @@ int vlan_dev_get_realdev_name(const char *dev_name, char* result) int vlan_dev_get_vid(const char *dev_name, unsigned short* result) { - struct net_device *dev = dev_get_by_name(dev_name); + struct net_device *dev = dev_get_by_name(init_net(), dev_name); int rv = 0; if (dev) { if (dev->priv_flags & IFF_802_1Q_VLAN) { @@ -821,7 +821,7 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCETHTOOL: - err = dev_ethtool(&ifrr); + err = dev_ethtool(real_dev->nd_net, &ifrr); } if (!err) diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index abcf58c..0e93991 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -253,12 +253,12 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) struct net_device *dev; loff_t i = 1; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); if (*pos == 0) return SEQ_START_TOKEN; - for (dev = vlan_skip(dev_base); dev && i < *pos; + for (dev = vlan_skip(per_net(dev_base, init_net())); dev && i < *pos; dev = vlan_skip(dev->next), ++i); return (i == *pos) ? dev : NULL; @@ -269,13 +269,13 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return vlan_skip((v == SEQ_START_TOKEN) - ? dev_base + ? per_net(dev_base, init_net()) : ((struct net_device *)v)->next); } static void vlan_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int vlan_seq_show(struct seq_file *seq, void *v) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 61f36b1..4cdcae3 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -677,7 +677,7 @@ static int atif_ioctl(int cmd, void __user *arg) if (copy_from_user(&atreq, arg, sizeof(atreq))) return -EFAULT; - dev = __dev_get_by_name(atreq.ifr_name); + dev = __dev_get_by_name(init_net(), atreq.ifr_name); if (!dev) return -ENODEV; @@ -901,7 +901,7 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) return -EFAULT; name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(init_net(), name); if (!dev) return -ENODEV; } @@ -1273,7 +1273,7 @@ static __inline__ int is_ip_over_ddp(struct sk_buff *skb) static int handle_ip_over_ddp(struct sk_buff *skb) { - struct net_device *dev = __dev_get_by_name("ipddp0"); + struct net_device *dev = __dev_get_by_name(init_net(), "ipddp0"); struct net_device_stats *stats; /* This needs to be able to handle ipddp"N" devices */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 4fdb1af..e17c10b 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -244,7 +244,7 @@ static struct net_device *find_lec_by_itfnum(int itf) char name[IFNAMSIZ]; sprintf(name, "lec%d", itf); - dev = dev_get_by_name(name); + dev = dev_get_by_name(init_net(), name); return dev; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8c187a6..e2f6fed 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -632,7 +632,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(devname); + dev = dev_get_by_name(init_net(), devname); if (dev == NULL) { res = -ENODEV; break; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55bb263..22509f1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -45,7 +45,7 @@ static int port_cost(struct net_device *dev) old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ethtool(&ifr); + err = dev_ethtool(dev->nd_net, &ifr); set_fs(old_fs); if (!err) { @@ -328,7 +328,7 @@ int br_del_bridge(const char *name) int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(init_net(), name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -483,7 +483,7 @@ void __exit br_cleanup_bridges(void) struct net_device *dev, *nxt; rtnl_lock(); - for (dev = dev_base; dev; dev = nxt) { + for (dev = per_net(dev_base, init_net()); dev; dev = nxt) { nxt = dev->next; if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 4c61a7e..2be1c2d 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -27,7 +28,7 @@ static int get_bridge_ifindices(int *indices, int num) struct net_device *dev; int i = 0; - for (dev = dev_base; dev && i < num; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev && i < num; dev = dev->next) { if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } @@ -88,7 +89,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (dev == NULL) return -EINVAL; @@ -362,7 +363,7 @@ static int old_deviceless(void __user *uarg) return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(net_t net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a913968..119b97d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -106,8 +107,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int idx; - read_lock(&dev_base_lock); - for (dev = dev_base, idx = 0; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -119,7 +120,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) skip: ++idx; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; @@ -153,7 +154,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(init_net(), ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f1712b9..1d60ee3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -189,7 +189,7 @@ extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +extern int br_ioctl_deviceless_stub(net_t net, unsigned int cmd, void __user *arg); /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/core/dev.c b/net/core/dev.c index d8aa534..32fe905 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -175,26 +175,27 @@ static spinlock_t net_dma_event_lock; * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -struct net_device *dev_base; -static struct net_device **dev_tail = &dev_base; -DEFINE_RWLOCK(dev_base_lock); +DEFINE_PER_NET(struct net_device *, dev_base); +static DEFINE_PER_NET(struct net_device **, dev_tail); +DEFINE_PER_NET(rwlock_t, dev_base_lock); -EXPORT_SYMBOL(dev_base); -EXPORT_SYMBOL(dev_base_lock); +EXPORT_PER_NET_SYMBOL(dev_base); +EXPORT_PER_NET_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<next) { - nb->notifier_call(nb, NETDEV_REGISTER, dev); + net_t net; + for_each_net(net) { + for (dev = per_net(dev_base, net); dev; dev = dev->next) { + nb->notifier_call(nb, NETDEV_REGISTER, dev); - if (dev->flags & IFF_UP) - nb->notifier_call(nb, NETDEV_UP, dev); + if (dev->flags & IFF_UP) + nb->notifier_call(nb, NETDEV_UP, dev); + } } } rtnl_unlock(); @@ -1991,7 +2002,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf) * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(net_t net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2003,15 +2014,15 @@ static int dev_ifname(struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + read_lock(&per_net(dev_base_lock, net)); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -2024,7 +2035,7 @@ static int dev_ifname(struct ifreq __user *arg) * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(net_t net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2048,7 +2059,7 @@ static int dev_ifconf(char __user *arg) */ total = 0; - for (dev = dev_base; dev; dev = dev->next) { + for (dev = per_net(dev_base, net); dev; dev = dev->next) { for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2080,31 +2091,35 @@ static int dev_ifconf(char __user *arg) * This is invoked by the /proc filesystem handler to display a device * in detail. */ -static __inline__ struct net_device *dev_get_idx(loff_t pos) +static __inline__ struct net_device *dev_get_idx(net_t net, loff_t pos) { struct net_device *dev; loff_t i; - for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); + for (i = 0, dev = per_net(dev_base, net); dev && i < pos; ++i, dev = dev->next); return i == pos ? dev : NULL; } void *dev_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); - return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; + net_t net = net_from_voidp(seq->private); + + read_lock(&per_net(dev_base_lock, net)); + return *pos ? dev_get_idx(net, *pos - 1) : SEQ_START_TOKEN; } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + net_t net = net_from_voidp(seq->private); ++*pos; - return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; + return v == SEQ_START_TOKEN ? per_net(dev_base, net) : ((struct net_device *)v)->next; } void dev_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&dev_base_lock); + net_t net = net_from_voidp(seq->private); + read_unlock(&per_net(dev_base_lock, net)); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) @@ -2198,7 +2213,22 @@ static struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = net_to_voidp(get_net(PROC_NET(inode))); + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + net_t net = net_from_voidp(seq->private); + put_net(net); + return seq_release(inode, file); } static struct file_operations dev_seq_fops = { @@ -2206,7 +2236,7 @@ static struct file_operations dev_seq_fops = { .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static struct seq_operations softnet_seq_ops = { @@ -2235,23 +2265,44 @@ extern int wireless_proc_init(void); #define wireless_proc_init() 0 #endif -static int __init dev_proc_init(void) +static int dev_proc_net_init(net_t net) { int rc = -ENOMEM; - if (!proc_net_fops_create(init_net(), "dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create(init_net(), "softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (wireless_proc_init()) - goto out_softnet; rc = 0; out: return rc; -out_softnet: - proc_net_remove(init_net(), "softnet_stat"); out_dev: - proc_net_remove(init_net(), "dev"); + proc_net_remove(net, "dev"); + goto out; +} + +static void dev_proc_net_exit(net_t net) +{ + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + int rc; + if ((rc = register_pernet_subsys(&dev_proc_ops))) + goto out; + if ((rc = wireless_proc_init())) + goto out_softnet; +out: + return rc; +out_softnet: + unregister_pernet_subsys(&dev_proc_ops); goto out; } #else @@ -2485,10 +2536,10 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) /* * Perform the SIOCxIFxxx calls. */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc(net_t net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2641,7 +2692,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(net_t net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -2654,12 +2705,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -2689,10 +2740,10 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); - read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); - read_unlock(&dev_base_lock); + dev_load(net, ifr.ifr_name); + read_lock(&per_net(dev_base_lock, net)); + ret = dev_ifsioc(net, &ifr, cmd); + read_unlock(&per_net(dev_base_lock, net)); if (!ret) { if (colon) *colon = ':'; @@ -2703,9 +2754,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -2727,9 +2778,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -2768,9 +2819,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -2790,9 +2841,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -2810,10 +2861,10 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (!capable(CAP_NET_ADMIN)) return -EPERM; } - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); /* Follow me in net/core/wireless.c */ - ret = wireless_process_ioctl(&ifr, cmd); + ret = wireless_process_ioctl(net, &ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && copy_to_user(arg, &ifr, @@ -2834,13 +2885,13 @@ int dev_ioctl(unsigned int cmd, void __user *arg) * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(net_t net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } @@ -2880,6 +2931,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; + net_t net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -2888,6 +2940,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(null_net(dev->nd_net)); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -2913,12 +2967,12 @@ int register_netdevice(struct net_device *dev) goto out; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); @@ -2980,13 +3034,13 @@ int register_netdevice(struct net_device *dev) dev->next = NULL; dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - *dev_tail = dev; - dev_tail = &dev->next; + write_lock_bh(&per_net(dev_base_lock, net)); + *per_net(dev_tail, net) = dev; + per_net(dev_tail, net) = &dev->next; hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + write_unlock_bh(&per_net(dev_base_lock, net)); /* Notify protocols, that a new device appeared. */ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); @@ -3252,6 +3306,7 @@ void synchronize_net(void) int unregister_netdevice(struct net_device *dev) { struct net_device *d, **dp; + net_t net = dev->nd_net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3270,15 +3325,15 @@ int unregister_netdevice(struct net_device *dev) dev_close(dev); /* And unlink it from device chain. */ - for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { + for (dp = &per_net(dev_base, net); (d = *dp) != NULL; dp = &d->next) { if (d == dev) { - write_lock_bh(&dev_base_lock); + write_lock_bh(&per_net(dev_base_lock, net)); hlist_del(&dev->name_hlist); hlist_del(&dev->index_hlist); - if (dev_tail == &dev->next) - dev_tail = dp; + if (per_net(dev_tail, net) == &dev->next) + per_net(dev_tail, net) = dp; *dp = d->next; - write_unlock_bh(&dev_base_lock); + write_unlock_bh(&per_net(dev_base_lock, net)); break; } } @@ -3464,6 +3519,26 @@ static int __init netdev_dma_register(void) static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ +/* Initialize per network namespace state */ +static int netdev_init(net_t net) +{ + int i; + per_net(dev_tail, net) = &per_net(dev_base, net); + rwlock_init(&per_net(dev_base_lock, net)); + + for (i = 0; i < ARRAY_SIZE(per_net(dev_name_head, net)); i++) + INIT_HLIST_HEAD(&per_net(dev_name_head, net)[i]); + + for (i = 0; i < ARRAY_SIZE(per_net(dev_index_head, net)); i++) + INIT_HLIST_HEAD(&per_net(dev_index_head, net)[i]); + + return 0; +} + +static struct pernet_operations netdev_net_ops = { + .init = netdev_init, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -3491,11 +3566,8 @@ static int __init net_dev_init(void) for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); - - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; /* * Initialise the packet receive queues. diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 623e606..131746b 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -221,11 +221,12 @@ void dev_mc_discard(struct net_device *dev) #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + net_t net = net_from_voidp(seq->private); struct net_device *dev; loff_t off = 0; - read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, net)); + for (dev = per_net(dev_base, net); dev; dev = dev->next) { if (off++ == *pos) return dev; } @@ -241,7 +242,8 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void dev_mc_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&dev_base_lock); + net_t net = net_from_voidp(seq->private); + read_unlock(&per_net(dev_base_lock, net)); } @@ -275,7 +277,22 @@ static struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = net_to_voidp(get_net(PROC_NET(inode))); + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + net_t net = net_from_voidp(seq->private); + put_net(net); + return seq_release(inode, file); } static struct file_operations dev_mc_seq_fops = { @@ -283,14 +300,31 @@ static struct file_operations dev_mc_seq_fops = { .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int dev_mc_net_init(net_t net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void dev_mc_net_exit(net_t net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create(init_net(), "dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 87dc556..d142377 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -798,9 +798,9 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(net_t net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ffc31c1..2fa2708 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -12,6 +12,7 @@ #include #include #include +#include #include static LIST_HEAD(rules_ops); @@ -155,6 +156,7 @@ EXPORT_SYMBOL_GPL(fib_rules_lookup); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -188,7 +190,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 549a2ce..8e317cf 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -63,12 +63,13 @@ static unsigned char default_operstate(const struct net_device *dev) static void rfc2863_policy(struct net_device *dev) { + net_t net = dev->nd_net; unsigned char operstate = default_operstate(dev); if (operstate == dev->operstate) return; - write_lock_bh(&dev_base_lock); + write_lock_bh(&per_net(dev_base_lock, net)); switch(dev->link_mode) { case IF_LINK_MODE_DORMANT: @@ -83,7 +84,7 @@ static void rfc2863_policy(struct net_device *dev) dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); + write_unlock_bh(&per_net(dev_base_lock, net)); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 90e1d2e..f5d4f92 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1438,6 +1438,7 @@ int neigh_table_clear(struct neigh_table *tbl) int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1453,7 +1454,7 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1503,6 +1504,7 @@ out: int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1519,7 +1521,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b08c1be..1be6f94 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -38,12 +38,13 @@ static ssize_t netdev_show(const struct class_device *cd, char *buf, ssize_t (*format)(const struct net_device *, char *)) { struct net_device *dev = to_net_dev(cd); + net_t net = dev->nd_net; ssize_t ret = -EINVAL; - - read_lock(&dev_base_lock); + + read_lock(&per_net(dev_base_lock, net)); if (dev_isalive(dev)) ret = (*format)(dev, buf); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); return ret; } @@ -109,12 +110,13 @@ static ssize_t format_addr(char *buf, const unsigned char *addr, int len) static ssize_t show_address(struct class_device *cd, char *buf) { struct net_device *dev = to_net_dev(cd); + net_t net = dev->nd_net; ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, net)); if (dev_isalive(dev)) ret = format_addr(buf, dev->dev_addr, dev->addr_len); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); return ret; } @@ -158,13 +160,14 @@ static const char *operstates[] = { static ssize_t show_operstate(struct class_device *cd, char *buf) { const struct net_device *dev = to_net_dev(cd); + net_t net = dev->nd_net; unsigned char operstate; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, net)); operstate = dev->operstate; if (!netif_running(dev)) operstate = IF_OPER_DOWN; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); if (operstate >= ARRAY_SIZE(operstates)) return -EINVAL; /* should not happen */ @@ -248,6 +251,7 @@ static ssize_t netstat_show(const struct class_device *cd, char *buf, unsigned long offset) { struct net_device *dev = to_net_dev(cd); + net_t net = dev->nd_net; struct net_device_stats *stats; ssize_t ret = -EINVAL; @@ -255,13 +259,13 @@ static ssize_t netstat_show(const struct class_device *cd, char *buf, offset % sizeof(unsigned long) != 0) WARN_ON(1); - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, net)); if (dev_isalive(dev) && dev->get_stats && (stats = (*dev->get_stats)(dev))) ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); return ret; } @@ -338,10 +342,11 @@ static ssize_t wireless_show(struct class_device *cd, char *buf, char *)) { struct net_device *dev = to_net_dev(cd); + net_t net = dev->nd_net; const struct iw_statistics *iw = NULL; ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, net)); if (dev_isalive(dev)) { if(dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) @@ -349,7 +354,7 @@ static ssize_t wireless_show(struct class_device *cd, char *buf, if (iw != NULL) ret = (*format)(iw, buf); } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); return ret; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 823215d..f2c7c07 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -621,7 +621,7 @@ int netpoll_setup(struct netpoll *np) int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(init_net(), np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7796b39..a415efb 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1928,7 +1928,7 @@ static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) pkt_dev->odev = NULL; } - odev = dev_get_by_name(pkt_dev->ifname); + odev = dev_get_by_name(init_net(), pkt_dev->ifname); if (!odev) { printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8f3dda8..5ac07a0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -235,6 +235,7 @@ EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { + net_t net = dev->nd_net; unsigned char operstate = dev->operstate; switch(transition) { @@ -253,9 +254,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) }; if (dev->operstate != operstate) { - write_lock_bh(&dev_base_lock); + write_lock_bh(&per_net(dev_base_lock, net)); dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); + write_unlock_bh(&per_net(dev_base_lock, net)); netdev_state_change(dev); } } @@ -389,12 +390,13 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + net_t net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; - read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + read_lock(&per_net(dev_base_lock, net)); + for (dev=per_net(dev_base, net), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, @@ -402,7 +404,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) break; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, net)); cb->args[0] = idx; return skb->len; @@ -420,6 +422,7 @@ static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ifinfomsg *ifm; struct net_device *dev; int err, send_addr_notify = 0, modified = 0; @@ -438,9 +441,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index >= 0) - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(net, ifname); else goto errout; @@ -566,9 +569,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) { - write_lock_bh(&dev_base_lock); + write_lock_bh(&per_net(dev_base_lock, net)); dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - write_unlock_bh(&dev_base_lock); + write_unlock_bh(&per_net(dev_base_lock, net)); } err = 0; @@ -590,6 +593,7 @@ errout: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + net_t net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -604,7 +608,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index >= 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else diff --git a/net/core/sock.c b/net/core/sock.c index e42f7df..737838c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -343,6 +343,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk=sock->sk; + net_t net = sk->sk_net; struct sk_filter *filter; int val; int valbool; @@ -579,7 +580,7 @@ set_rcvbuf: if (devname[0] == '\0') { sk->sk_bound_dev_if = 0; } else { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(net, devname); if (!dev) { ret = -ENODEV; break; diff --git a/net/core/wireless.c b/net/core/wireless.c index faa242f..d1418bf 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -672,7 +672,22 @@ static struct seq_operations wireless_seq_ops = { static int wireless_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &wireless_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &wireless_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = net_to_voidp(get_net(PROC_NET(inode))); + } + return res; +} + +static int wireless_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + net_t net = net_from_voidp(seq->private); + put_net(net); + return seq_release(inode, file); } static struct file_operations wireless_seq_fops = { @@ -680,17 +695,33 @@ static struct file_operations wireless_seq_fops = { .open = wireless_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = wireless_seq_release, }; -int __init wireless_proc_init(void) +static int wireless_proc_net_init(net_t net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create(init_net(), "wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; } + +static void wireless_proc_net_exit(net_t net) +{ + proc_net_remove(net, "wireless"); +} + +static struct pernet_operations wireless_proc_ops = { + .init = wireless_proc_net_init, + .exit = wireless_proc_net_exit, +}; + +int __init wireless_proc_init(void) +{ + return register_pernet_subsys(&wireless_proc_ops); +} + #endif /* CONFIG_PROC_FS */ /************************** IOCTL SUPPORT **************************/ @@ -1066,7 +1097,7 @@ static inline int ioctl_private_call(struct net_device * dev, * (dev_ioctl() in net/core/dev.c). * Check the type of IOCTL and call the appropriate wrapper... */ -int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) +int wireless_process_ioctl(net_t net, struct ifreq *ifr, unsigned int cmd) { struct net_device *dev; iw_handler handler; @@ -1075,7 +1106,7 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index b27b2ac..1cc502a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -749,14 +749,14 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(saddr->sdn_flags & SDF_WILD)) { if (dn_ntohs(saddr->sdn_nodeaddrl)) { - read_lock(&dev_base_lock); - for(dev = dev_base; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for(dev = per_net(dev_base, init_net()); dev; dev = dev->next) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) break; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); if (dev == NULL) return -EADDRNOTAVAIL; } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index dbaf001..c83c8d1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -514,7 +514,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) ifr->ifr_name[IFNAMSIZ-1] = 0; #ifdef CONFIG_KMOD - dev_load(ifr->ifr_name); + dev_load(init_net(), ifr->ifr_name); #endif switch(cmd) { @@ -532,7 +532,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { + if ((dev = __dev_get_by_name(init_net(), ifr->ifr_name)) == NULL) { ret = -ENODEV; goto done; } @@ -630,7 +630,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (dev) { dn_dev = dev->dn_ptr; dev_put(dev); @@ -695,7 +695,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + if ((dev = __dev_get_by_index(init_net(), ifm->ifa_index)) == NULL) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { @@ -796,8 +796,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_ndevs = cb->args[0]; skip_naddr = cb->args[1]; - read_lock(&dev_base_lock); - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next, idx++) { if (idx < skip_ndevs) continue; else if (idx > skip_ndevs) { @@ -821,7 +821,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) } } done: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; cb->args[1] = dn_idx; @@ -862,9 +862,9 @@ int dn_dev_bind_default(__le16 *addr) dev = dn_dev_get_default(); last_chance: if (dev) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); rv = dn_dev_get_first(dev, addr); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); dev_put(dev); if (rv == 0 || dev == &per_net(loopback_dev, init_net())) return rv; @@ -1296,7 +1296,7 @@ void dn_dev_devices_off(void) struct net_device *dev; rtnl_lock(); - for(dev = dev_base; dev; dev = dev->next) + for(dev = per_net(dev_base, init_net()); dev; dev = dev->next) dn_dev_down(dev); rtnl_unlock(); @@ -1307,7 +1307,7 @@ void dn_dev_devices_on(void) struct net_device *dev; rtnl_lock(); - for(dev = dev_base; dev; dev = dev->next) { + for(dev = per_net(dev_base, init_net()); dev; dev = dev->next) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1338,7 +1338,7 @@ static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos) { struct net_device *dev; - dev = dev_base; + dev = per_net(dev_base, init_net()); if (dev && !dev->dn_ptr) dev = dn_dev_get_next(seq, dev); if (pos) { @@ -1352,10 +1352,10 @@ static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos) { struct net_device *dev; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); dev = dn_dev_get_idx(seq, *pos - 1); if (dev == NULL) - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return dev; } return SEQ_START_TOKEN; @@ -1371,7 +1371,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } else { dev = dn_dev_get_next(seq, dev); if (dev == NULL) - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } ++*pos; return dev; @@ -1380,7 +1380,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void dn_dev_seq_stop(struct seq_file *seq, void *v) { if (v && v != SEQ_START_TOKEN) - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static char *dn_type2asc(char type) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 1cf0101..cc2ab1f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -212,7 +212,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return -EINVAL; if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(init_net(), nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -255,7 +255,7 @@ out: if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - dev = __dev_get_by_index(nh->nh_oif); + dev = __dev_get_by_index(init_net(), nh->nh_oif); if (dev == NULL || dev->dn_ptr == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -352,7 +352,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(init_net(), fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -598,8 +598,8 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Scan device list */ - read_lock(&dev_base_lock); - for(dev = dev_base; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for(dev = per_net(dev_base, init_net()); dev; dev = dev->next) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; @@ -610,7 +610,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) } } } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); if (found_it == 0) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b553cd4..9669e50 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -849,7 +849,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int int best_match = 0; int ret; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_scope > scope) continue; @@ -863,7 +863,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int if (best_match == 0) saddr = ifa->ifa_local; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return saddr; } @@ -908,7 +908,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(init_net(), oldflp->oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -928,8 +928,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_put(dev_out); goto out; } - read_lock(&dev_base_lock); - for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for(dev_out = per_net(dev_base, init_net()); dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; if (!dn_dev_islocal(dev_out, oldflp->fld_src)) @@ -940,7 +940,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old continue; break; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); if (dev_out == NULL) goto out; dev_hold(dev_out); @@ -1554,7 +1554,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) if (fl.iif) { struct net_device *dev; - if ((dev = dev_get_by_index(fl.iif)) == NULL) { + if ((dev = dev_get_by_index(init_net(), fl.iif)) == NULL) { kfree_skb(skb); return -ENODEV; } diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 37fff9a..778b8a5 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -259,7 +259,7 @@ static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, devname[newlen] = 0; - dev = dev_get_by_name(devname); + dev = dev_get_by_name(init_net(), devname); if (dev == NULL) return -ENODEV; @@ -299,7 +299,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, devname[*lenp] = 0; strip_it(devname); - dev = dev_get_by_name(devname); + dev = dev_get_by_name(init_net(), devname); if (dev == NULL) return -ENODEV; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index cbf87f4..cd5336b 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -663,7 +663,7 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = dev_get_by_name(init_net(), ifr.ifr_name)) == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 0d23fb2..39d2ac4 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -983,7 +983,7 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); + dev = dev_getbyhwaddr(init_net(), r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; } @@ -1170,7 +1170,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) + if ((dev = __dev_get_by_name(init_net(), r.arp_dev)) == NULL) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 201442c..b0d12ec 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -419,11 +419,11 @@ struct in_device *inetdev_by_index(int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + read_lock(&per_net(dev_base_lock, init_net())); + dev = __dev_get_by_index(init_net(), ifindex); if (dev) in_dev = in_dev_get(dev); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return in_dev; } @@ -504,7 +504,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) goto errout; - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(init_net(), ifm->ifa_index); if (dev == NULL) { err = -ENODEV; goto errout; @@ -627,7 +627,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(ifr.ifr_name); + dev_load(init_net(), ifr.ifr_name); #endif switch(cmd) { @@ -668,7 +668,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ifr.ifr_name)) == NULL) goto done; if (colon) @@ -906,9 +906,9 @@ no_in_dev: in this case. It is importnat that lo is the first interface in dev_base list. */ - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); rcu_read_lock(); - for (dev = dev_base; dev; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -921,7 +921,7 @@ no_in_dev: } endfor_ifa(in_dev); } out_unlock_both: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); rcu_read_unlock(); out: return addr; @@ -985,9 +985,9 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, return addr; } - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); rcu_read_lock(); - for (dev = dev_base; dev; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -995,7 +995,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, } } rcu_read_unlock(); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return addr; } @@ -1180,8 +1180,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; - read_lock(&dev_base_lock); - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if (idx > s_idx) @@ -1207,7 +1207,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) } done: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; cb->args[1] = ip_idx; @@ -1258,8 +1258,8 @@ void inet_forward_change(void) ipv4_devconf.accept_redirects = !on; ipv4_devconf_dflt.forwarding = on; - read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -1267,7 +1267,7 @@ void inet_forward_change(void) in_dev->cnf.forwarding = on; rcu_read_unlock(); } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); rt_cache_flush(0); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d1859ff..449f42d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -337,7 +337,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt, colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(init_net(), devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63b8a9..76218e5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -530,7 +530,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(init_net(), nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -807,7 +807,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(init_net(), fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 92624cc..0455935 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2262,7 +2262,7 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) struct ip_mc_list *im = NULL; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - for (state->dev = dev_base, state->in_dev = NULL; + for (state->dev = per_net(dev_base, init_net()), state->in_dev = NULL; state->dev; state->dev = state->dev->next) { struct in_device *in_dev; @@ -2315,7 +2315,7 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos) static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2339,7 +2339,7 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) state->in_dev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int igmp_mc_seq_show(struct seq_file *seq, void *v) @@ -2424,7 +2424,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) struct ip_mc_list *im = NULL; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL, state->im = NULL; + for (state->dev = per_net(dev_base, init_net()), state->idev = NULL, state->im = NULL; state->dev; state->dev = state->dev->next) { struct in_device *idev; @@ -2493,7 +2493,7 @@ static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos) static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2521,7 +2521,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int igmp_mcf_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8ce00d3..078eed3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -292,7 +292,7 @@ static void ip_expire(unsigned long arg) if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(init_net(), qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 476cb60..a21688c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -266,7 +266,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "gre%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(init_net(), name) == NULL) break; } if (i==100) @@ -1196,7 +1196,7 @@ static int ipgre_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(init_net(), tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 57d4bae..95094c5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -597,7 +597,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, dev_put(dev); } } else - dev = __dev_get_by_index(mreq.imr_ifindex); + dev = __dev_get_by_index(init_net(), mreq.imr_ifindex); err = -EADDRNOTAVAIL; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ee77938..2606b8c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -195,7 +195,7 @@ static int __init ic_open_devs(void) if (dev_change_flags(lo, lo->flags | IFF_UP) < 0) printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name); - for (dev = dev_base; dev; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { if (dev == lo) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9d719d6..4e75691 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -232,7 +232,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c int i; for (i=1; i<100; i++) { sprintf(name, "tunl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(init_net(), name) == NULL) break; } if (i==100) @@ -827,7 +827,7 @@ static int ipip_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(init_net(), tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9afaa13..d2e7e55 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -125,7 +125,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) { struct net_device *dev; - dev = __dev_get_by_name("tunl0"); + dev = __dev_get_by_name(init_net(), "tunl0"); if (dev) { int err; @@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { + if (err == 0 && (dev = __dev_get_by_name(init_net(), p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 7ea2d98..fd6d1ca 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -382,7 +382,7 @@ static int set_mcast_if(struct sock *sk, char *ifname) struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -407,7 +407,7 @@ static int set_sync_mesg_maxlen(int sync_state) int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ip_vs_master_mcast_ifn)) == NULL) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - @@ -418,7 +418,7 @@ static int set_sync_mesg_maxlen(int sync_state) IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ip_vs_backup_mcast_ifn)) == NULL) return -ENODEV; sync_recv_mesg_maxlen = dev->mtu - @@ -446,7 +446,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -467,7 +467,7 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(init_net(), ifname)) == NULL) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 779e2c6..02003ff 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -430,7 +430,7 @@ checkentry(const char *tablename, return 0; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(init_net(), e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d23a0d7..509bfb1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2436,7 +2436,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(init_net(), oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2761,7 +2761,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(init_net(), iif); if (dev == NULL) { err = -ENODEV; goto errout_free; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c9fa27a..7afe698 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -477,8 +477,8 @@ static void addrconf_forward_change(void) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for (dev=dev_base; dev; dev=dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev=per_net(dev_base, init_net()); dev; dev=dev->next) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -489,7 +489,7 @@ static void addrconf_forward_change(void) } rcu_read_unlock(); } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } #endif @@ -919,10 +919,10 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, memset(&hiscore, 0, sizeof(hiscore)); - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); rcu_read_lock(); - for (dev = dev_base; dev; dev=dev->next) { + for (dev = per_net(dev_base, init_net()); dev; dev=dev->next) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1151,7 +1151,7 @@ record_it: read_unlock_bh(&idev->lock); } rcu_read_unlock(); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); if (!ifa_result) return -EADDRNOTAVAIL; @@ -1853,7 +1853,7 @@ int addrconf_set_dstaddr(void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(init_net(), ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1884,7 +1884,7 @@ int addrconf_set_dstaddr(void __user *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) + if ((dev = __dev_get_by_name(init_net(), p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -1913,7 +1913,7 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), ifindex)) == NULL) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1956,7 +1956,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), ifindex)) == NULL) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2051,7 +2051,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for (dev = dev_base; dev != NULL; dev = dev->next) { + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2198,12 +2198,12 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev) /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { + (link_dev = __dev_get_by_index(init_net(), idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for (link_dev = dev_base; link_dev; link_dev = link_dev->next) { + for (link_dev = per_net(dev_base, init_net()); link_dev; link_dev = link_dev->next) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -3032,7 +3032,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(init_net(), ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3208,9 +3208,9 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + for (dev = per_net(dev_base, init_net()), idx = 0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if (idx > s_idx) @@ -3270,7 +3270,7 @@ done: read_unlock_bh(&idev->lock); in6_dev_put(idev); } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; cb->args[1] = ip_idx; return skb->len; @@ -3318,7 +3318,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(init_net(), ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3477,8 +3477,8 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev=per_net(dev_base, init_net()), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if ((idev = in6_dev_get(dev)) == NULL) @@ -3489,7 +3489,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (err <= 0) break; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; return skb->len; @@ -4116,7 +4116,7 @@ void __exit addrconf_cleanup(void) * clean dev list. */ - for (dev=dev_base; dev; dev=dev->next) { + for (dev=per_net(dev_base, init_net()); dev; dev=dev->next) { if ((idev = __in6_dev_get(dev)) == NULL) continue; addrconf_ifdown(dev, 1); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 00bd55a..84f0623 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -302,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index c42bad9..906ef0f 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -113,10 +113,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(init_net(), IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (dev == NULL) { err = -ENODEV; @@ -197,7 +197,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(init_net(), pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -225,7 +225,7 @@ void ipv6_sock_ac_close(struct sock *sk) if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(init_net(), pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -427,11 +427,11 @@ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) { if (dev) return ipv6_chk_acast_dev(dev, addr); - read_lock(&dev_base_lock); - for (dev=dev_base; dev; dev=dev->next) + read_lock(&per_net(dev_base_lock, init_net())); + for (dev=per_net(dev_base, init_net()); dev; dev=dev->next) if (ipv6_chk_acast_dev(dev, addr)) break; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return dev != 0; } @@ -449,7 +449,7 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL; + for (state->dev = per_net(dev_base, init_net()), state->idev = NULL; state->dev; state->dev = state->dev->next) { struct inet6_dev *idev; @@ -502,7 +502,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); return ac6_get_idx(seq, *pos); } @@ -521,7 +521,7 @@ static void ac6_seq_stop(struct seq_file *seq, void *v) read_unlock_bh(&state->idev->lock); in6_dev_put(state->idev); } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int ac6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c94fea..c5dfb4e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -536,7 +536,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (!src_info->ipi6_ifindex) return -EINVAL; else { - dev = dev_get_by_index(src_info->ipi6_ifindex); + dev = dev_get_by_index(init_net(), src_info->ipi6_ifindex); if (!dev) return -ENODEV; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8d91834..9006cbf 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -231,7 +231,7 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p) int i; for (i = 1; i < IP6_TNL_MAX; i++) { sprintf(name, "ip6tnl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(init_net(), name) == NULL) break; } if (i == IP6_TNL_MAX) @@ -505,7 +505,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(init_net(), p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && @@ -621,7 +621,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(init_net(), p->link); if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) printk(KERN_WARNING diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 352690e..65d9b82 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -548,7 +548,7 @@ done: if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(val) == NULL) { + if (__dev_get_by_index(init_net(), val) == NULL) { retv = -ENODEV; break; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 2759571..da45f46 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -215,7 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -266,7 +266,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { + if ((dev = dev_get_by_index(init_net(), mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -301,7 +301,7 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (!dev) return NULL; @@ -332,7 +332,7 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(mc_lst->ifindex); + dev = dev_get_by_index(init_net(), mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -2334,7 +2334,7 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct ifmcaddr6 *im = NULL; struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL; + for (state->dev = per_net(dev_base, init_net()), state->idev = NULL; state->dev; state->dev = state->dev->next) { struct inet6_dev *idev; @@ -2388,7 +2388,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); return igmp6_mc_get_idx(seq, *pos); } @@ -2409,7 +2409,7 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int igmp6_mc_seq_show(struct seq_file *seq, void *v) @@ -2478,7 +2478,7 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) struct ifmcaddr6 *im = NULL; struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL, state->im = NULL; + for (state->dev = per_net(dev_base, init_net()), state->idev = NULL, state->im = NULL; state->dev; state->dev = state->dev->next) { struct inet6_dev *idev; @@ -2547,7 +2547,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&dev_base_lock); + read_lock(&per_net(dev_base_lock, init_net())); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2575,7 +2575,7 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2e1825c..5a68e2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -256,7 +256,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6f9a904..0441380 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -301,7 +301,7 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); - dev = dev_get_by_index(fq->iif); + dev = dev_get_by_index(init_net(), fq->iif); if (!dev) goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6805c39..4519006 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1045,7 +1045,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(init_net(), cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -2168,7 +2168,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(init_net(), iif); if (!dev) { err = -ENODEV; goto errout; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 77b7b09..8f97692 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -173,7 +173,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "sit%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(init_net(), name) == NULL) break; } if (i==100) @@ -759,7 +759,7 @@ static int ipip6_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(init_net(), tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f2674fe..0e63fd2 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -987,7 +987,7 @@ static int ipxitf_create(struct ipx_interface_definition *idef) if (intrfc) ipxitf_put(intrfc); - dev = dev_get_by_name(idef->ipx_device); + dev = dev_get_by_name(init_net(), idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1095,7 +1095,7 @@ static int ipxitf_delete(struct ipx_interface_definition *idef) if (!dlink_type) goto out; - dev = __dev_get_by_name(idef->ipx_device); + dev = __dev_get_by_name(init_net(), idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1190,7 +1190,7 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(ifr))) break; sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; - dev = __dev_get_by_name(ifr.ifr_name); + dev = __dev_get_by_name(init_net(), ifr.ifr_name); rc = -ENODEV; if (!dev) break; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6bc0fff..ac380ac 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -252,7 +252,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -ENODEV; - llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); + llc->dev = dev_getfirstbyhwtype(init_net(), addr->sllc_arphrd); if (!llc->dev) goto out; rc = -EUSERS; @@ -303,7 +303,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out; rc = -ENODEV; rtnl_lock(); - llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + llc->dev = dev_getbyhwaddr(init_net(), addr->sllc_arphrd, addr->sllc_mac); rtnl_unlock(); if (!llc->dev) goto out; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index f438c38..24a5739 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -19,6 +19,7 @@ #include #include #include +#include #include LIST_HEAD(llc_sap_list); @@ -159,8 +160,8 @@ static struct packet_type llc_tr_packet_type = { static int __init llc_init(void) { - if (dev_base->next) - memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN); + if (per_net(dev_base, init_net())->next) + memcpy(llc_station_mac_sa, per_net(dev_base, init_net())->next->dev_addr, ETH_ALEN); else memset(llc_station_mac_sa, 0, ETH_ALEN); dev_add_pack(&llc_packet_type); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 8f88964..5bfd12e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -581,7 +581,7 @@ static struct net_device *nr_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(init_net(), devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -598,15 +598,15 @@ struct net_device *nr_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return first; } @@ -618,15 +618,15 @@ struct net_device *nr_dev_get(ax25_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } out: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return dev; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6e3b947..4ac9f9f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -359,7 +359,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(init_net(), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -744,7 +744,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -943,7 +943,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(init_net(), name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -971,7 +971,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(init_net(), sll->sll_ifindex); if (dev == NULL) goto out; } @@ -1158,7 +1158,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(init_net(), pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1184,7 +1184,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(init_net(), po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1237,7 +1237,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(init_net(), mreq->mr_ifindex); if (!dev) goto done; @@ -1291,7 +1291,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(init_net(), ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1319,7 +1319,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(init_net(), ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 8028c0d..92343be 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -579,7 +579,7 @@ static struct net_device *rose_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(init_net(), devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -596,13 +596,13 @@ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return first; } @@ -614,15 +614,15 @@ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } out: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return dev; } @@ -630,13 +630,13 @@ static int rose_dev_exists(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev != NULL; dev = dev->next) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } out: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); return dev != NULL; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 4838972..2c3e4af 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -85,7 +85,7 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(init_net(), parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index edb8fc9..19935f9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -164,7 +164,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -397,7 +397,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return skb->len; read_lock(&qdisc_tree_lock); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 45d47d3..5df7cdf 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -291,7 +291,7 @@ META_COLLECTOR(var_sk_bound_if) } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index da7e1eb..912e8e1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -586,7 +586,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -653,7 +653,7 @@ replay: clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -858,8 +858,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; - read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev=per_net(dev_base, init_net()), idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if (idx > s_idx) @@ -882,7 +882,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) } done: - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); cb->args[0] = idx; cb->args[1] = q_idx; @@ -912,7 +912,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1095,7 +1095,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(init_net(), tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0217546..10b748c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -844,7 +844,7 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(init_net(), addr->v6.sin6_scope_id); if (!dev) return 0; dev_put(dev); @@ -871,7 +871,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(init_net(), addr->v6.sin6_scope_id); if (!dev) return 0; dev_put(dev); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9461a10..05e2bb5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -170,14 +170,14 @@ static void sctp_get_local_addr_list(void) struct list_head *pos; struct sctp_af *af; - read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + read_lock(&per_net(dev_base_lock, init_net())); + for (dev = per_net(dev_base, init_net()); dev; dev = dev->next) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); } } - read_unlock(&dev_base_lock); + read_unlock(&per_net(dev_base_lock, init_net())); } /* Free the existing local addresses. */ diff --git a/net/socket.c b/net/socket.c index 0d0c92b..7371654 100644 --- a/net/socket.c +++ b/net/socket.c @@ -772,9 +772,9 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (net_t, unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook) (net_t, unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -784,9 +784,9 @@ void brioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook) (net_t, void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook) (net_t, void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; @@ -815,16 +815,20 @@ EXPORT_SYMBOL(dlci_ioctl_set); static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; + struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; + net_t net; sock = file->private_data; + sk = sock->sk; + net = sk->sk_net; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { @@ -850,7 +854,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); + err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; case SIOCGIFVLAN: @@ -861,7 +865,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); + err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: @@ -884,7 +888,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) * to the NIC driver. */ if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); break; } return err; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index c6f64de..ba207ba 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -127,7 +127,7 @@ static int recv_msg(struct sk_buff *buf, static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = dev_base; + struct net_device *dev = per_net(dev_base, init_net()); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index f9b896c..397e876 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -586,7 +586,7 @@ static int wanpipe_sendmsg(struct kiocb *iocb, struct socket *sock, addr = saddr->sll_addr; } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(init_net(), ifindex); if (dev == NULL){ printk(KERN_INFO "wansock: Send failed, dev index: %i\n",ifindex); return -ENXIO; @@ -769,7 +769,7 @@ static int execute_command(struct sock *sk, unsigned char cmd, unsigned int fla int err=0; DECLARE_WAITQUEUE(wait, current); - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (dev == NULL){ printk(KERN_INFO "wansock: Exec failed no dev %i\n", sk->sk_bound_dev_if); @@ -878,7 +878,7 @@ static void wanpipe_unlink_driver (struct sock *sk) sk->sk_state = WANSOCK_DISCONNECTED; wp_sk(sk)->dev = NULL; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev){ printk(KERN_INFO "wansock: No dev on release\n"); return; @@ -974,7 +974,7 @@ static int wanpipe_release(struct socket *sock) if (wp->num == htons(X25_PROT) && sk->sk_state != WANSOCK_DISCONNECTED && sock_flag(sk, SOCK_ZAPPED)) { - struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if); + struct net_device *dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); wanpipe_common_t *chan; if (dev){ chan=dev->priv; @@ -1153,7 +1153,7 @@ static void wanpipe_kill_sock_timer (unsigned long data) if (wp_sk(sk)->num == htons(X25_PROT) && sk->sk_state != WANSOCK_DISCONNECTED) { - struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if); + struct net_device *dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); wanpipe_common_t *chan; if (dev){ chan=dev->priv; @@ -1396,7 +1396,7 @@ static int wanpipe_bind(struct socket *sock, struct sockaddr *uaddr, int addr_le * This is used by PVC mostly */ strlcpy(name,sll->sll_device,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(init_net(), name); if (dev == NULL){ printk(KERN_INFO "wansock: Failed to get Dev from name: %s,\n", name); @@ -1641,7 +1641,7 @@ static void wanpipe_wakeup_driver(struct sock *sk) struct net_device *dev = NULL; wanpipe_common_t *chan=NULL; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev) return; @@ -1680,7 +1680,7 @@ static int wanpipe_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_WANPIPE; sll->sll_ifindex = sk->sk_bound_dev_if; sll->sll_protocol = wp_sk(sk)->num; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1898,7 +1898,7 @@ static int wanpipe_debug (struct sock *origsk, void *arg) return err; if (sk->sk_bound_dev_if) { - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev) continue; @@ -2009,7 +2009,7 @@ static int set_ioctl_cmd (struct sock *sk, void *arg) if (!wp_sk(sk)->mbox) { void *mbox_ptr; - struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if); + struct net_device *dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); if (!dev) return -ENODEV; @@ -2352,7 +2352,7 @@ static int wanpipe_exec_cmd(struct sock *sk, int cmd, unsigned int flags) static int check_driver_busy (struct sock *sk) { - struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if); + struct net_device *dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if); wanpipe_common_t *chan; if (!dev) @@ -2507,7 +2507,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr if (addr->sll_family != AF_WANPIPE) return -EINVAL; - if ((dev = dev_get_by_index(sk->sk_bound_dev_if)) == NULL) + if ((dev = dev_get_by_index(init_net(), sk->sk_bound_dev_if)) == NULL) return -ENETUNREACH; dev_put(dev); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 2a3fe98..091b96e 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -126,7 +126,7 @@ void x25_route_device_down(struct net_device *dev) */ struct net_device *x25_dev_get(char *devname) { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(init_net(), devname); if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:09 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:09 -0700 Subject: [openib-general] [PATCH RFC 7/31] net: Make /proc/net per network namespace In-Reply-To: References: Message-ID: <11697516343724-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted This patch makes /proc/net per network namespace. It modifies the global variables proc_net and proc_net_stat to be per network namespace. The proc_net file helpers are modified to take a network namespace argument, and all of their callers are fixed to pass init_net() for that argument. This ensures that all of the /proc/net files are only visible and usable in the initial network namespace until the code behind them has bee updated to be handle multiple network namespaces. Making /proc/net per namespace is necessary as at least some files in /proc/net depend upon the set of network devices which is per network namespace, and even more files in /proc/net have contents that are relevant to a single network namespace. Signed-off-by: Eric W. Biederman --- drivers/isdn/divert/divert_procfs.c | 7 +- drivers/isdn/hardware/eicon/diva_didd.c | 5 +- drivers/isdn/hysdn/hysdn_procconf.c | 4 +- drivers/net/bonding/bond_main.c | 7 +- drivers/net/hamradio/bpqether.c | 5 +- drivers/net/hamradio/scc.c | 4 +- drivers/net/hamradio/yam.c | 5 +- drivers/net/ibmveth.c | 6 +- drivers/net/pppoe.c | 5 +- drivers/net/tc35815.c | 1 - drivers/net/tokenring/lanstreamer.c | 4 +- drivers/net/tokenring/olympic.c | 9 +- drivers/net/wireless/hostap/hostap_main.c | 7 +- drivers/net/wireless/strip.c | 5 +- fs/proc/Makefile | 1 + fs/proc/internal.h | 5 + fs/proc/proc_net.c | 126 ++++++++++++++++++++ fs/proc/root.c | 8 +- include/linux/proc_fs.h | 28 +++-- include/net/net_namespace.h | 11 ++ net/802/tr.c | 3 +- net/8021q/vlanproc.c | 5 +- net/appletalk/atalk_proc.c | 7 +- net/atm/proc.c | 5 +- net/ax25/af_ax25.c | 13 +- net/core/dev.c | 9 +- net/core/dev_mcast.c | 3 +- net/core/neighbour.c | 3 +- net/core/pktgen.c | 9 +- net/core/sock.c | 3 +- net/core/wireless.c | 3 +- net/dccp/probe.c | 7 +- net/decnet/af_decnet.c | 5 +- net/decnet/dn_dev.c | 5 +- net/decnet/dn_neigh.c | 5 +- net/decnet/dn_route.c | 5 +- net/ieee80211/ieee80211_module.c | 6 +- net/ipv4/arp.c | 3 +- net/ipv4/fib_hash.c | 5 +- net/ipv4/fib_trie.c | 17 ++-- net/ipv4/igmp.c | 5 +- net/ipv4/ipconfig.c | 3 +- net/ipv4/ipmr.c | 5 +- net/ipv4/ipvs/ip_vs_app.c | 5 +- net/ipv4/ipvs/ip_vs_conn.c | 5 +- net/ipv4/ipvs/ip_vs_ctl.c | 9 +- net/ipv4/ipvs/ip_vs_lblcr.c | 4 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 16 ++-- net/ipv4/netfilter/ip_queue.c | 7 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 +- net/ipv4/netfilter/ipt_recent.c | 5 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 17 ++-- net/ipv4/proc.c | 11 +- net/ipv4/raw.c | 5 +- net/ipv4/route.c | 7 +- net/ipv4/tcp_ipv4.c | 5 +- net/ipv4/tcp_probe.c | 6 +- net/ipv4/udp.c | 5 +- net/ipv6/addrconf.c | 7 +- net/ipv6/anycast.c | 5 +- net/ipv6/ip6_flowlabel.c | 5 +- net/ipv6/mcast.c | 9 +- net/ipv6/netfilter/ip6_queue.c | 7 +- net/ipv6/proc.c | 17 ++-- net/ipv6/raw.c | 5 +- net/ipv6/route.c | 9 +- net/ipx/ipx_proc.c | 7 +- net/irda/irproc.c | 5 +- net/key/af_key.c | 5 +- net/llc/llc_proc.c | 7 +- net/netfilter/core.c | 3 +- net/netfilter/nf_conntrack_standalone.c | 19 ++-- net/netfilter/x_tables.c | 17 ++-- net/netfilter/xt_hashlimit.c | 11 +- net/netlink/af_netlink.c | 3 +- net/netrom/af_netrom.c | 13 +- net/packet/af_packet.c | 5 +- net/rose/af_rose.c | 17 ++-- net/rxrpc/proc.c | 7 +- net/sched/sch_api.c | 3 +- net/sctp/protocol.c | 5 +- net/sunrpc/stats.c | 5 +- net/unix/af_unix.c | 5 +- net/wanrouter/wanproc.c | 7 +- net/x25/x25_proc.c | 7 +- 85 files changed, 462 insertions(+), 250 deletions(-) diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c index 06967da..6517dd5 100644 --- a/drivers/isdn/divert/divert_procfs.c +++ b/drivers/isdn/divert/divert_procfs.c @@ -18,6 +18,7 @@ #include #endif #include +#include #include "isdn_divert.h" @@ -285,12 +286,12 @@ divert_dev_init(void) init_waitqueue_head(&rd_queue); #ifdef CONFIG_PROC_FS - isdn_proc_entry = proc_mkdir("net/isdn", NULL); + isdn_proc_entry = proc_mkdir("isdn", per_net(proc_net, init_net())); if (!isdn_proc_entry) return (-1); isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); if (!isdn_divert_entry) { - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", per_net(proc_net, init_net())); return (-1); } isdn_divert_entry->proc_fops = &isdn_fops; @@ -310,7 +311,7 @@ divert_dev_deinit(void) #ifdef CONFIG_PROC_FS remove_proc_entry("divert", isdn_proc_entry); - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", per_net(proc_net, init_net())); #endif /* CONFIG_PROC_FS */ return (0); diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c index 14298b8..1b7c0f9 100644 --- a/drivers/isdn/hardware/eicon/diva_didd.c +++ b/drivers/isdn/hardware/eicon/diva_didd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "platform.h" #include "di_defs.h" @@ -86,7 +87,7 @@ proc_read(char *page, char **start, off_t off, int count, int *eof, static int DIVA_INIT_FUNCTION create_proc(void) { - proc_net_eicon = proc_mkdir("net/eicon", NULL); + proc_net_eicon = proc_mkdir("eicon", per_net(proc_net, init_net())); if (proc_net_eicon) { if ((proc_didd = @@ -102,7 +103,7 @@ static int DIVA_INIT_FUNCTION create_proc(void) static void DIVA_EXIT_FUNCTION remove_proc(void) { remove_proc_entry(DRIVERLNAME, proc_net_eicon); - remove_proc_entry("net/eicon", NULL); + remove_proc_entry("eicon", per_net(proc_net, init_net())); } static int DIVA_INIT_FUNCTION divadidd_init(void) diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 94a9350..b634e67 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -392,7 +392,7 @@ hysdn_procconf_init(void) hysdn_card *card; unsigned char conf_name[20]; - hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net); + hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, per_net(proc_net, init_net())); if (!hysdn_proc_entry) { printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n"); return (-1); @@ -437,5 +437,5 @@ hysdn_procconf_release(void) card = card->next; /* point to next card */ } - remove_proc_entry(PROC_SUBDIR_NAME, proc_net); + remove_proc_entry(PROC_SUBDIR_NAME, per_net(proc_net, init_net())); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6482aed..9b3bf4e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -75,6 +75,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -3169,7 +3170,7 @@ static void bond_create_proc_dir(void) { int len = strlen(DRV_NAME); - for (bond_proc_dir = proc_net->subdir; bond_proc_dir; + for (bond_proc_dir = per_net(proc_net, init_net())->subdir; bond_proc_dir; bond_proc_dir = bond_proc_dir->next) { if ((bond_proc_dir->namelen == len) && !memcmp(bond_proc_dir->name, DRV_NAME, len)) { @@ -3178,7 +3179,7 @@ static void bond_create_proc_dir(void) } if (!bond_proc_dir) { - bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); + bond_proc_dir = proc_mkdir(DRV_NAME, per_net(proc_net, init_net())); if (bond_proc_dir) { bond_proc_dir->owner = THIS_MODULE; } else { @@ -3213,7 +3214,7 @@ static void bond_destroy_proc_dir(void) bond_proc_dir->owner = NULL; } } else { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, per_net(proc_net, init_net())); bond_proc_dir = NULL; } } diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 5b788d8..9fc92ad 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -83,6 +83,7 @@ #include #include +#include #include @@ -594,7 +595,7 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) { + if (!proc_net_fops_create(init_net(), "bpqether", S_IRUGO, &bpq_info_fops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; @@ -618,7 +619,7 @@ static void __exit bpq_cleanup_driver(void) unregister_netdevice_notifier(&bpq_dev_notifier); - proc_net_remove("bpqether"); + proc_net_remove(init_net(), "bpqether"); rtnl_lock(); while (!list_empty(&bpq_devices)) { diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 2ce047e..2000597 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -2114,7 +2114,7 @@ static int __init scc_init_driver (void) } rtnl_unlock(); - proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops); + proc_net_fops_create(init_net(), "z8530drv", 0, &scc_net_seq_fops); return 0; } @@ -2169,7 +2169,7 @@ static void __exit scc_cleanup_driver(void) if (Vector_Latch) release_region(Vector_Latch, 1); - proc_net_remove("z8530drv"); + proc_net_remove(init_net(), "z8530drv"); } MODULE_AUTHOR("Joerg Reuter "); diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 6d74f08..3e92f3b 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -1147,7 +1148,7 @@ static int __init yam_init_driver(void) yam_timer.expires = jiffies + HZ / 100; add_timer(&yam_timer); - proc_net_fops_create("yam", S_IRUGO, &yam_info_fops); + proc_net_fops_create(init_net(), "yam", S_IRUGO, &yam_info_fops); return 0; error: while (--i >= 0) { @@ -1179,7 +1180,7 @@ static void __exit yam_cleanup_driver(void) kfree(p); } - proc_net_remove("yam"); + proc_net_remove(init_net(), "yam"); } /* --------------------------------------------------------------------- */ diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 99343b5..d8b0ba8 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -97,7 +97,7 @@ static inline void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); static struct kobj_type ktype_veth_pool; #ifdef CONFIG_PROC_FS -#define IBMVETH_PROC_DIR "net/ibmveth" +#define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; #endif @@ -1073,7 +1073,7 @@ static int __devexit ibmveth_remove(struct vio_dev *dev) #ifdef CONFIG_PROC_FS static void ibmveth_proc_register_driver(void) { - ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL); + ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, per_net(proc_net, init_net())); if (ibmveth_proc_dir) { SET_MODULE_OWNER(ibmveth_proc_dir); } @@ -1081,7 +1081,7 @@ static void ibmveth_proc_register_driver(void) static void ibmveth_proc_unregister_driver(void) { - remove_proc_entry(IBMVETH_PROC_DIR, NULL); + remove_proc_entry(IBMVETH_PROC_DIR, per_net(proc_net, init_net())); } static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 315d5c3..d34fe16 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -72,6 +72,7 @@ #include #include +#include #include #include @@ -1055,7 +1056,7 @@ static int __init pppoe_proc_init(void) { struct proc_dir_entry *p; - p = create_proc_entry("net/pppoe", S_IRUGO, NULL); + p = create_proc_entry("pppoe", S_IRUGO, per_net(proc_net, init_net())); if (!p) return -ENOMEM; @@ -1126,7 +1127,7 @@ static void __exit pppoe_exit(void) dev_remove_pack(&pppoes_ptype); dev_remove_pack(&pppoed_ptype); unregister_netdevice_notifier(&pppoe_notifier); - remove_proc_entry("net/pppoe", NULL); + remove_proc_entry("pppoe", per_net(proc_net, init_net())); proto_unregister(&pppoe_sk_proto); } diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index 81ed82f..1f26c29 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -61,7 +61,6 @@ static const char *version = * io regions, irqs and dma channels */ static const char* cardname = "TC35815CF"; -#define TC35815_PROC_ENTRY "net/tc35815" #define TC35815_MODULE_NAME "TC35815CF" #define TX_TIMEOUT (4*HZ) diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c index e999feb..b382ef3 100644 --- a/drivers/net/tokenring/lanstreamer.c +++ b/drivers/net/tokenring/lanstreamer.c @@ -250,7 +250,7 @@ static int __devinit streamer_init_one(struct pci_dev *pdev, #if STREAMER_NETWORK_MONITOR #ifdef CONFIG_PROC_FS if (!dev_streamer) - create_proc_read_entry("net/streamer_tr", 0, 0, + create_proc_read_entry("streamer_tr", 0, per_net(proc_net, init_net()), streamer_proc_info, NULL); streamer_priv->next = dev_streamer; dev_streamer = streamer_priv; @@ -423,7 +423,7 @@ static void __devexit streamer_remove_one(struct pci_dev *pdev) } } if (!dev_streamer) - remove_proc_entry("net/streamer_tr", NULL); + remove_proc_entry("streamer_tr", per_net(proc_net, init_net())); } #endif #endif diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c index 8f4ecc1..6b74c3b 100644 --- a/drivers/net/tokenring/olympic.c +++ b/drivers/net/tokenring/olympic.c @@ -101,6 +101,7 @@ #include #include +#include #include #include @@ -268,9 +269,9 @@ static int __devinit olympic_probe(struct pci_dev *pdev, const struct pci_device printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name); if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */ char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ; + create_proc_read_entry(proc_name,0,per_net(proc_net, init_net()),olympic_proc_info,(void *)dev) ; printk("Olympic: Network Monitor information: /proc/%s\n",proc_name); } return 0 ; @@ -1750,9 +1751,9 @@ static void __devexit olympic_remove_one(struct pci_dev *pdev) if (olympic_priv->olympic_network_monitor) { char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - remove_proc_entry(proc_name,NULL); + remove_proc_entry(proc_name,per_net(proc_net, init_net())); } unregister_netdev(dev) ; iounmap(olympic_priv->olympic_mmio) ; diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index 04c19ce..69b56d6 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1093,8 +1094,8 @@ struct proc_dir_entry *hostap_proc; static int __init hostap_init(void) { - if (proc_net != NULL) { - hostap_proc = proc_mkdir("hostap", proc_net); + if (per_net(proc_net, init_net()) != NULL) { + hostap_proc = proc_mkdir("hostap", per_net(proc_net, init_net())); if (!hostap_proc) printk(KERN_WARNING "Failed to mkdir " "/proc/net/hostap\n"); @@ -1109,7 +1110,7 @@ static void __exit hostap_exit(void) { if (hostap_proc != NULL) { hostap_proc = NULL; - remove_proc_entry("hostap", proc_net); + remove_proc_entry("hostap", per_net(proc_net, init_net())); } } diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index ce3a8ba..6c27ff2 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -107,6 +107,7 @@ static const char StripVersion[] = "1.3A-STUART.CHESHIRE"; #include #include #include +#include #include #include @@ -2789,7 +2790,7 @@ static int __init strip_init_driver(void) /* * Register the status file with /proc */ - proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops); + proc_net_fops_create(init_net(), "strip", S_IFREG | S_IRUGO, &strip_seq_fops); return status; } @@ -2811,7 +2812,7 @@ static void __exit strip_exit_driver(void) } /* Unregister with the /proc/net file here. */ - proc_net_remove("strip"); + proc_net_remove(init_net(), "strip"); if ((i = tty_unregister_ldisc(N_STRIP))) printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i); diff --git a/fs/proc/Makefile b/fs/proc/Makefile index a6b3a8f..63cc3ce 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -10,6 +10,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ proc_tty.o proc_misc.o proc_sysctl.o +proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3c9a305..f916252 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -12,6 +12,11 @@ #include extern int proc_sys_init(void); +#ifdef CONFIG_NET +extern int proc_net_init(void); +#else +static inline int proc_net_init(void) { return 0; } +#endif struct vmalloc_info { unsigned long used; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c new file mode 100644 index 0000000..022dd9a --- /dev/null +++ b/fs/proc/proc_net.c @@ -0,0 +1,126 @@ +/* + * linux/fs/proc/net.c + * + * Copyright (C) 2007 + * + * Author: Eric Biederman + * + * proc net directory handling functions + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct proc_dir_entry *proc_net_shadow; +DEFINE_PER_NET(struct proc_dir_entry *, proc_net); +DEFINE_PER_NET(struct proc_dir_entry *, proc_net_stat); +EXPORT_PER_NET_SYMBOL(proc_net); +EXPORT_PER_NET_SYMBOL(proc_net_stat); + +static DEFINE_PER_NET(struct proc_dir_entry, proc_net_root); + +static struct dentry *proc_net_shadow_dentry(struct dentry *parent, + struct proc_dir_entry *de) +{ + struct dentry *shadow = NULL; + struct inode *inode; + if (!de) + goto out; + inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); + if (!inode) + goto out; + shadow = d_alloc_name(parent, de->name); + if (!shadow) + goto out_iput; + shadow->d_op = parent->d_op; /* proc_dentry_operations */ + d_instantiate(shadow, inode); +out: + return shadow; +out_iput: + iput(inode); + goto out; +} + +static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) +{ + net_t net = current->nsproxy->net_ns; + struct dentry *shadow; + shadow = proc_net_shadow_dentry(parent, per_net(proc_net, net)); + if (!shadow) + return ERR_PTR(-ENOENT); + + dput(nd->dentry); + /* My dentry count is 1 and that should be enough as the + * shadow dentry is thrown away immediately. + */ + nd->dentry = shadow; + return NULL; +} + +static const struct file_operations proc_net_dir_operations = { + .read = generic_read_dir, +}; + +static struct inode_operations proc_net_dir_inode_operations = { + .follow_link = proc_net_follow_link, +}; + + +static int proc_net_ns_init(net_t net) +{ + struct proc_dir_entry *netd, *net_statd; + + netd = proc_mkdir("net", &per_net(proc_net_root, net)); + if (!netd) + return -EEXIST; + + net_statd = proc_mkdir("stat", netd); + if (!net_statd) { + remove_proc_entry("net", &per_net(proc_net_root, net)); + return -EEXIST; + } + + netd->data = net_to_voidp(net); + net_statd->data = net_to_voidp(net); + per_net(proc_net_root, net).data = net_to_voidp(net); + + per_net(proc_net, net) = netd; + per_net(proc_net_stat, net) = net_statd; + + return 0; +} + +static void proc_net_ns_exit(net_t net) +{ + remove_proc_entry("stat", per_net(proc_net, net)); + remove_proc_entry("net", &per_net(proc_net_root, net)); + +} + +struct pernet_operations proc_net_ns_ops = { + .init = proc_net_ns_init, + .exit = proc_net_ns_exit, +}; + +int proc_net_init(void) +{ + proc_net_shadow = proc_mkdir("net", NULL); + proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; + proc_net_shadow->proc_fops = &proc_net_dir_operations; + + return register_pernet_subsys(&proc_net_ns_ops); +} diff --git a/fs/proc/root.c b/fs/proc/root.c index 4d42406..7c3939c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -21,7 +21,7 @@ #include "internal.h" -struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; +struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -61,8 +61,8 @@ void __init proc_root_init(void) return; } proc_misc_init(); - proc_net = proc_mkdir("net", NULL); - proc_net_stat = proc_mkdir("net/stat", NULL); + + proc_net_init(); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); @@ -161,7 +161,5 @@ EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_net); -EXPORT_SYMBOL(proc_net_stat); EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2969913..c1b958d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -85,8 +86,8 @@ struct vmcore { extern struct proc_dir_entry proc_root; extern struct proc_dir_entry *proc_root_fs; -extern struct proc_dir_entry *proc_net; -extern struct proc_dir_entry *proc_net_stat; +DECLARE_PER_NET(struct proc_dir_entry *, proc_net); +DECLARE_PER_NET(struct proc_dir_entry *, proc_net_stat); extern struct proc_dir_entry *proc_bus; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; @@ -183,24 +184,25 @@ static inline struct proc_dir_entry *create_proc_info_entry(const char *name, return res; } -static inline struct proc_dir_entry *proc_net_create(const char *name, - mode_t mode, get_info_t *get_info) +static inline struct proc_dir_entry *proc_net_create(net_t net, + const char *name, mode_t mode, get_info_t *get_info) { - return create_proc_info_entry(name,mode,proc_net,get_info); + return create_proc_info_entry(name,mode,per_net(proc_net, net),get_info); } -static inline struct proc_dir_entry *proc_net_fops_create(const char *name, - mode_t mode, const struct file_operations *fops) +static inline struct proc_dir_entry *proc_net_fops_create(net_t net, + const char *name, mode_t mode, const struct file_operations *fops) { - struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net); + struct proc_dir_entry *res = + create_proc_entry(name, mode, per_net(proc_net, net)); if (res) res->proc_fops = fops; return res; } -static inline void proc_net_remove(const char *name) +static inline void proc_net_remove(net_t net, const char *name) { - remove_proc_entry(name,proc_net); + remove_proc_entry(name, per_net(proc_net, net)); } #else @@ -209,9 +211,9 @@ static inline void proc_net_remove(const char *name) #define proc_net NULL #define proc_bus NULL -#define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; }) -#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) -static inline void proc_net_remove(const char *name) {} +#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) +#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) +static inline void proc_net_remove(net_t net, const char *name) {} static inline void proc_flush_task(struct task_struct *task) { } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9208e2e..b64568f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -8,6 +8,7 @@ #include #include #include +#include /* How many bytes in each network namespace should we allocate * for use by modules when they are loaded. @@ -65,4 +66,14 @@ extern void unregister_pernet_subsys(struct pernet_operations *); extern int register_pernet_device(struct pernet_operations *); extern void unregister_pernet_device(struct pernet_operations *); +static inline net_t PDE_NET(struct proc_dir_entry *pde) +{ + return net_from_voidp(pde->parent->data); +} + +static inline net_t PROC_NET(const struct inode *inode) +{ + return PDE_NET(PDE(inode)); +} + #endif /* __NET_NET_NAMESPACE_H */ diff --git a/net/802/tr.c b/net/802/tr.c index 829deb4..3324fa6 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -36,6 +36,7 @@ #include #include #include +#include static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); static void rif_check_expire(unsigned long dummy); @@ -636,7 +637,7 @@ static int __init rif_init(void) rif_timer.function = rif_check_expire; add_timer(&rif_timer); - proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); + proc_net_fops_create(init_net(), "tr_rif", S_IRUGO, &rif_seq_fops); return 0; } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index a8fc0de..abcf58c 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "vlanproc.h" #include "vlan.h" @@ -143,7 +144,7 @@ void vlan_proc_cleanup(void) remove_proc_entry(name_conf, proc_vlan_dir); if (proc_vlan_dir) - proc_net_remove(name_root); + proc_net_remove(init_net(), name_root); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... @@ -156,7 +157,7 @@ void vlan_proc_cleanup(void) int __init vlan_proc_init(void) { - proc_vlan_dir = proc_mkdir(name_root, proc_net); + proc_vlan_dir = proc_mkdir(name_root, per_net(proc_net, init_net())); if (proc_vlan_dir) { proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR, diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 7ae4916..0e77c68 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -13,6 +13,7 @@ #include #include #include +#include static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) @@ -271,7 +272,7 @@ int __init atalk_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - atalk_proc_dir = proc_mkdir("atalk", proc_net); + atalk_proc_dir = proc_mkdir("atalk", per_net(proc_net, init_net())); if (!atalk_proc_dir) goto out; atalk_proc_dir->owner = THIS_MODULE; @@ -306,7 +307,7 @@ out_socket: out_route: remove_proc_entry("interface", atalk_proc_dir); out_interface: - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", per_net(proc_net, init_net())); goto out; } @@ -316,5 +317,5 @@ void __exit atalk_proc_exit(void) remove_proc_entry("route", atalk_proc_dir); remove_proc_entry("socket", atalk_proc_dir); remove_proc_entry("arp", atalk_proc_dir); - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", per_net(proc_net, init_net())); } diff --git a/net/atm/proc.c b/net/atm/proc.c index 739866b..8b0299d 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -22,6 +22,7 @@ #include #include #include /* for __init */ +#include #include #include #include @@ -475,7 +476,7 @@ static void atm_proc_dirs_remove(void) if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - remove_proc_entry("net/atm", NULL); + remove_proc_entry("atm", per_net(proc_net, init_net())); } int __init atm_proc_init(void) @@ -483,7 +484,7 @@ int __init atm_proc_init(void) static struct atm_proc_entry *e; int ret; - atm_proc_root = proc_mkdir("net/atm",NULL); + atm_proc_root = proc_mkdir("atm", per_net(proc_net, init_net())); if (!atm_proc_root) goto err_out; for (e = atm_proc_ents; e->name; e++) { diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 42233df..e60af4e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -48,6 +48,7 @@ #include #include #include +#include @@ -2000,9 +2001,9 @@ static int __init ax25_init(void) register_netdevice_notifier(&ax25_dev_notifier); ax25_register_sysctl(); - proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_net_fops_create(init_net(), "ax25_route", S_IRUGO, &ax25_route_fops); + proc_net_fops_create(init_net(), "ax25", S_IRUGO, &ax25_info_fops); + proc_net_fops_create(init_net(), "ax25_calls", S_IRUGO, &ax25_uid_fops); out: return rc; } @@ -2016,9 +2017,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25); static void __exit ax25_exit(void) { - proc_net_remove("ax25_route"); - proc_net_remove("ax25"); - proc_net_remove("ax25_calls"); + proc_net_remove(init_net(), "ax25_route"); + proc_net_remove(init_net(), "ax25"); + proc_net_remove(init_net(), "ax25_calls"); ax25_rt_free(); ax25_uid_free(); ax25_dev_free(); diff --git a/net/core/dev.c b/net/core/dev.c index 17c07f3..90e4c0e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include #include #include +#include /* * The list of packet types we will receive (as opposed to discard) @@ -2238,9 +2239,9 @@ static int __init dev_proc_init(void) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(init_net(), "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(init_net(), "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; if (wireless_proc_init()) goto out_softnet; @@ -2248,9 +2249,9 @@ static int __init dev_proc_init(void) out: return rc; out_softnet: - proc_net_remove("softnet_stat"); + proc_net_remove(init_net(), "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(init_net(), "dev"); goto out; } #else diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index b22648d..623e606 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -47,6 +47,7 @@ #include #include #include +#include /* @@ -289,7 +290,7 @@ static struct file_operations dev_mc_seq_fops = { void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + proc_net_fops_create(init_net(), "dev_mcast", 0, &dev_mc_seq_fops); } EXPORT_SYMBOL(dev_mc_add); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8437678..90e1d2e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -34,6 +34,7 @@ #include #include #include +#include #define NEIGH_DEBUG 1 @@ -1348,7 +1349,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, per_net(proc_net_stat, init_net())); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 04d4b93..ab48533 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -152,6 +152,7 @@ #include #include #include +#include #include #include #include @@ -3565,7 +3566,7 @@ static int __init pg_init(void) printk(version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, per_net(proc_net, init_net())); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; @@ -3574,7 +3575,7 @@ static int __init pg_init(void) if (pe == NULL) { printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(init_net(), PG_PROC_DIR); return -EINVAL; } @@ -3597,7 +3598,7 @@ static int __init pg_init(void) printk("pktgen: ERROR: Initialization failed for all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(init_net(), PG_PROC_DIR); return -ENODEV; } @@ -3624,7 +3625,7 @@ static void __exit pg_cleanup(void) /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(init_net(), PG_PROC_DIR); } module_init(pg_init); diff --git a/net/core/sock.c b/net/core/sock.c index 0ed5b4f..5555364 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -123,6 +123,7 @@ #include #include #include +#include #include @@ -1922,7 +1923,7 @@ static struct file_operations proto_seq_fops = { static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(init_net(), "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff --git a/net/core/wireless.c b/net/core/wireless.c index f69ab7b..faa242f 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -94,6 +94,7 @@ #include /* Pretty obvious */ #include /* New driver API */ #include +#include #include /* copy_to_user() */ @@ -685,7 +686,7 @@ static struct file_operations wireless_seq_fops = { int __init wireless_proc_init(void) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(init_net(), "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index f81e37d..7c1c1ef 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "dccp.h" #include "ccid.h" @@ -165,7 +166,7 @@ static __init int dccpprobe_init(void) if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(init_net(), procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -175,7 +176,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(init_net(), procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -185,7 +186,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(init_net(), procname); unregister_jprobe(&dccp_send_probe); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 21f20f2..77cd802 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -131,6 +131,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include +#include #include #include #include @@ -2396,7 +2397,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); + proc_net_fops_create(init_net(), "decnet", S_IRUGO, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2424,7 +2425,7 @@ static void __exit decnet_exit(void) dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove("decnet"); + proc_net_remove(init_net(), "decnet"); proto_unregister(&dn_proto); } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 913e25a..19b1469 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1483,7 +1484,7 @@ void __init dn_dev_init(void) rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table; - proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_net_fops_create(init_net(), "decnet_dev", S_IRUGO, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1506,7 +1507,7 @@ void __exit dn_dev_cleanup(void) } #endif /* CONFIG_SYSCTL */ - proc_net_remove("decnet_dev"); + proc_net_remove(init_net(), "decnet_dev"); dn_dev_devices_off(); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 7322bb3..fd99aca 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -611,11 +612,11 @@ static struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_net_fops_create(init_net(), "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove("decnet_neigh"); + proc_net_remove(init_net(), "decnet_neigh"); neigh_table_clear(&dn_neigh_table); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9881933..0d657eb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -1811,7 +1812,7 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_net_fops_create(init_net(), "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); } void __exit dn_route_cleanup(void) @@ -1819,6 +1820,6 @@ void __exit dn_route_cleanup(void) del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove("decnet_cache"); + proc_net_remove(init_net(), "decnet_cache"); } diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index b1c6d1f..23539f6 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -263,7 +263,7 @@ static int __init ieee80211_init(void) struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, per_net(proc_net, init_net())); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); @@ -272,7 +272,7 @@ static int __init ieee80211_init(void) e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, ieee80211_proc); if (!e) { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, per_net(proc_net, init_net())); ieee80211_proc = NULL; return -EIO; } @@ -292,7 +292,7 @@ static void __exit ieee80211_exit(void) #ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, per_net(proc_net, init_net())); ieee80211_proc = NULL; } #endif /* CONFIG_IEEE80211_DEBUG */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 3981e8b..e3b89a7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -110,6 +110,7 @@ #include #include #include +#include #include #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) #include @@ -1400,7 +1401,7 @@ static struct file_operations arp_seq_fops = { static int __init arp_proc_init(void) { - if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(init_net(), "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 648f47c..42ea992 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -1067,13 +1068,13 @@ static struct file_operations fib_seq_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) + if (!proc_net_fops_create(init_net(), "route", S_IRUGO, &fib_seq_fops)) return -ENOMEM; return 0; } void __init fib_proc_exit(void) { - proc_net_remove("route"); + proc_net_remove(init_net(), "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 13307c0..94598b3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -2494,30 +2495,30 @@ static struct file_operations fib_route_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + if (!proc_net_fops_create(init_net(), "fib_trie", S_IRUGO, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + if (!proc_net_fops_create(init_net(), "fib_triestat", S_IRUGO, &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + if (!proc_net_fops_create(init_net(), "route", S_IRUGO, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove("fib_triestat"); + proc_net_remove(init_net(), "fib_triestat"); out2: - proc_net_remove("fib_trie"); + proc_net_remove(init_net(), "fib_trie"); out1: return -ENOMEM; } void __init fib_proc_exit(void) { - proc_net_remove("fib_trie"); - proc_net_remove("fib_triestat"); - proc_net_remove("route"); + proc_net_remove(init_net(), "fib_trie"); + proc_net_remove(init_net(), "fib_triestat"); + proc_net_remove(init_net(), "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0017ccb..92624cc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE #include @@ -2585,8 +2586,8 @@ static struct file_operations igmp_mcf_seq_fops = { int __init igmp_mc_proc_init(void) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + proc_net_fops_create(init_net(), "igmp", S_IRUGO, &igmp_mc_seq_fops); + proc_net_fops_create(init_net(), "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); return 0; } #endif diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index afa60b9..8b649c5 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -1252,7 +1253,7 @@ static int __init ip_auto_config(void) __be32 addr; #ifdef CONFIG_PROC_FS - proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); + proc_net_fops_create(init_net(), "pnp", S_IRUGO, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ecb5422..af50394 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -63,6 +63,7 @@ #include #include #include +#include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) #define CONFIG_IP_PIMSM 1 @@ -1906,7 +1907,7 @@ void __init ip_mr_init(void) ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); + proc_net_fops_create(init_net(),"ip_mr_vif", 0, &ipmr_vif_fops); + proc_net_fops_create(init_net(),"ip_mr_cache", 0, &ipmr_mfc_fops); #endif } diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 6c40899..4f44452 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -618,12 +619,12 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, int ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); + proc_net_fops_create(init_net(), "ip_vs_app", 0, &ip_vs_app_fops); return 0; } void ip_vs_app_cleanup(void) { - proc_net_remove("ip_vs_app"); + proc_net_remove(init_net(), "ip_vs_app"); } diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 8086787..0764e0f 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -923,7 +924,7 @@ int ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(init_net(), "ip_vs_conn", 0, &ip_vs_conn_fops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); @@ -939,6 +940,6 @@ void ip_vs_conn_cleanup(void) /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove("ip_vs_conn"); + proc_net_remove(init_net(), "ip_vs_conn"); vfree(ip_vs_conn_tab); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index c4e4237..d4bf160 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -2356,8 +2357,8 @@ int ip_vs_control_init(void) return ret; } - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); + proc_net_fops_create(init_net(), "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(init_net(), "ip_vs_stats",0, &ip_vs_stats_fops); sysctl_header = register_sysctl_table(vs_root_table); @@ -2389,8 +2390,8 @@ void ip_vs_control_cleanup(void) cancel_rearming_delayed_work(&defense_work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); - proc_net_remove("ip_vs_stats"); - proc_net_remove("ip_vs"); + proc_net_remove(init_net(), "ip_vs_stats"); + proc_net_remove(init_net(), "ip_vs"); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 22004f8..f8491e7 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -843,7 +843,7 @@ static int __init ip_vs_lblcr_init(void) INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_table(lblcr_root_table); #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + proc_net_create(init_net(), "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); #endif return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); } @@ -852,7 +852,7 @@ static int __init ip_vs_lblcr_init(void) static void __exit ip_vs_lblcr_cleanup(void) { #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_remove("ip_vs_lblcr"); + proc_net_remove(init_net(), "ip_vs_lblcr"); #endif unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 9d89469..d04cbb0 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -828,14 +828,14 @@ static int __init ip_conntrack_standalone_init(void) #ifdef CONFIG_PROC_FS ret = -ENOMEM; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(init_net(), "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(init_net(), "ip_conntrack_expect", 0440, &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, per_net(proc_net_stat, init_net())); if (!proc_stat) goto cleanup_proc_exp; @@ -864,11 +864,11 @@ static int __init ip_conntrack_standalone_init(void) #endif cleanup_proc_stat: #ifdef CONFIG_PROC_FS - remove_proc_entry("ip_conntrack", proc_net_stat); + remove_proc_entry("ip_conntrack", per_net(proc_net_stat, init_net())); cleanup_proc_exp: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(init_net(), "ip_conntrack_expect"); cleanup_proc: - proc_net_remove("ip_conntrack"); + proc_net_remove(init_net(), "ip_conntrack"); cleanup_init: #endif /* CONFIG_PROC_FS */ ip_conntrack_cleanup(); @@ -884,8 +884,8 @@ static void __exit ip_conntrack_standalone_fini(void) nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops)); #ifdef CONFIG_PROC_FS remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + proc_net_remove(init_net(), "ip_conntrack_expect"); + proc_net_remove(init_net(), "ip_conntrack"); #endif /* CONFIG_PROC_FS */ ip_conntrack_cleanup(); } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 3446d4a..aae660c 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -38,6 +38,7 @@ #include #include #include +#include #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip_queue" @@ -684,7 +685,7 @@ static int __init ip_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(init_net(), IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -705,7 +706,7 @@ static int __init ip_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(init_net(), IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -725,7 +726,7 @@ static void __exit ip_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(init_net(), IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b1c1116..779e2c6 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -736,7 +737,7 @@ static int __init ipt_clusterip_init(void) goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", per_net(proc_net, init_net())); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 4db0e73..4bfa2f9 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -483,7 +484,7 @@ static int __init ipt_recent_init(void) #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", proc_net); + proc_dir = proc_mkdir("ipt_recent", per_net(proc_net, init_net())); if (proc_dir == NULL) { ipt_unregister_match(&recent_match); err = -ENOMEM; @@ -497,7 +498,7 @@ static void __exit ipt_recent_exit(void) BUG_ON(!list_empty(&tables)); ipt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", proc_net); + remove_proc_entry("ipt_recent", per_net(proc_net, init_net())); #endif } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3b31bc6..ebdb56e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -378,16 +379,16 @@ int __init nf_conntrack_ipv4_compat_init(void) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(init_net(), "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(init_net(), "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, per_net(proc_net_stat, init_net())); if (!proc_stat) goto err3; @@ -397,16 +398,16 @@ int __init nf_conntrack_ipv4_compat_init(void) return 0; err3: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(init_net(), "ip_conntrack_expect"); err2: - proc_net_remove("ip_conntrack"); + proc_net_remove(init_net(), "ip_conntrack"); err1: return -ENOMEM; } void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + remove_proc_entry("ip_conntrack", per_net(proc_net_stat, init_net())); + proc_net_remove(init_net(), "ip_conntrack_expect"); + proc_net_remove(init_net(), "ip_conntrack"); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cd873da..c9c5601 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -44,6 +44,7 @@ #include #include #include +#include static int fold_prot_inuse(struct proto *proto) { @@ -372,20 +373,20 @@ int __init ip_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_net_fops_create(init_net(), "netstat", S_IRUGO, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_net_fops_create(init_net(), "snmp", S_IRUGO, &snmp_seq_fops)) goto out_snmp; - if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_net_fops_create(init_net(), "sockstat", S_IRUGO, &sockstat_seq_fops)) goto out_sockstat; out: return rc; out_sockstat: - proc_net_remove("snmp"); + proc_net_remove(init_net(), "snmp"); out_snmp: - proc_net_remove("netstat"); + proc_net_remove(init_net(), "netstat"); out_netstat: rc = -ENOMEM; goto out; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a6c63bb..38fe668 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -926,13 +927,13 @@ static struct file_operations raw_seq_fops = { int __init raw_proc_init(void) { - if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) + if (!proc_net_fops_create(init_net(), "raw", S_IRUGO, &raw_seq_fops)) return -ENOMEM; return 0; } void __init raw_proc_exit(void) { - proc_net_remove("raw"); + proc_net_remove(init_net(), "raw"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2daa0dc..8be7506 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -105,6 +105,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -3178,15 +3179,15 @@ int __init ip_rt_init(void) #ifdef CONFIG_PROC_FS { struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ - if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || + if (!proc_net_fops_create(init_net(), "rt_cache", S_IRUGO, &rt_cache_seq_fops) || !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { + per_net(proc_net_stat, init_net())))) { return -ENOMEM; } rtstat_pde->proc_fops = &rt_cpu_seq_fops; } #ifdef CONFIG_NET_CLS_ROUTE - create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); + create_proc_read_entry("rt_acct", 0, per_net(proc_net, init_net()), ip_rt_acct_read, NULL); #endif #endif #ifdef CONFIG_XFRM diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12de90a..ee4306f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -2252,7 +2253,7 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(init_net(), afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -2264,7 +2265,7 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(init_net(), afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f230eee..e8a3d96 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -159,7 +159,7 @@ static __init int tcpprobe_init(void) if (IS_ERR(tcpw.fifo)) return PTR_ERR(tcpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_net_fops_create(init_net(), procname, S_IRUSR, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_send_probe); @@ -169,7 +169,7 @@ static __init int tcpprobe_init(void) pr_info("TCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(init_net(), procname); err0: kfifo_free(tcpw.fifo); return ret; @@ -179,7 +179,7 @@ module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { kfifo_free(tcpw.fifo); - proc_net_remove(procname); + proc_net_remove(init_net(), procname); unregister_jprobe(&tcp_send_probe); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cfff930..7527183 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -101,6 +101,7 @@ #include #include #include +#include #include "udp_impl.h" /* @@ -1643,7 +1644,7 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(init_net(), afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1655,7 +1656,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(init_net(), afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6aded83..52bd4dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -2780,14 +2781,14 @@ static struct file_operations if6_fops = { int __init if6_proc_init(void) { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(init_net(), "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } void if6_proc_exit(void) { - proc_net_remove("if_inet6"); + proc_net_remove(init_net(), "if_inet6"); } #endif /* CONFIG_PROC_FS */ @@ -4143,6 +4144,6 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); + proc_net_remove(init_net(), "if_inet6"); #endif } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index a960476..c42bad9 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -575,7 +576,7 @@ static struct file_operations ac6_seq_fops = { int __init ac6_proc_init(void) { - if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(init_net(), "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -583,7 +584,7 @@ int __init ac6_proc_init(void) void ac6_proc_exit(void) { - proc_net_remove("anycast6"); + proc_net_remove(init_net(), "anycast6"); } #endif diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 624fae2..350aedb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -690,7 +691,7 @@ static struct file_operations ip6fl_seq_fops = { void ip6_flowlabel_init(void) { #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_fops_create(init_net(), "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); #endif } @@ -698,6 +699,6 @@ void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); #ifdef CONFIG_PROC_FS - proc_net_remove("ip6_flowlabel"); + proc_net_remove(init_net(), "ip6_flowlabel"); #endif } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a1c231a..2759571 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -2661,8 +2662,8 @@ int __init igmp6_init(struct net_proto_family *ops) np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + proc_net_fops_create(init_net(), "igmp6", S_IRUGO, &igmp6_mc_seq_fops); + proc_net_fops_create(init_net(), "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); #endif return 0; @@ -2674,7 +2675,7 @@ void igmp6_cleanup(void) igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - proc_net_remove("mcfilter6"); - proc_net_remove("igmp6"); + proc_net_remove(init_net(), "mcfilter6"); + proc_net_remove(init_net(), "igmp6"); #endif } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index e774be7..45b64a5 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -674,7 +675,7 @@ static int __init ip6_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(init_net(), IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -695,7 +696,7 @@ static int __init ip6_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(init_net(), IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -715,7 +716,7 @@ static void __exit ip6_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(init_net(), IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 35249d8..1827885 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_net_devsnmp6; @@ -244,22 +245,22 @@ int __init ipv6_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_net_fops_create(init_net(), "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); + proc_net_devsnmp6 = proc_mkdir("dev_snmp6", per_net(proc_net, init_net())); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) + if (!proc_net_fops_create(init_net(), "sockstat6", S_IRUGO, &sockstat6_seq_fops)) goto proc_sockstat6_fail; out: return rc; proc_sockstat6_fail: - proc_net_remove("dev_snmp6"); + proc_net_remove(init_net(), "dev_snmp6"); proc_dev_snmp6_fail: - proc_net_remove("snmp6"); + proc_net_remove(init_net(), "snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; @@ -267,9 +268,9 @@ proc_snmp6_fail: void ipv6_misc_proc_exit(void) { - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + proc_net_remove(init_net(), "sockstat6"); + proc_net_remove(init_net(), "dev_snmp6"); + proc_net_remove(init_net(), "snmp6"); } #else /* CONFIG_PROC_FS */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4ae1b19..2e1825c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,6 +50,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MIP6 #include #endif @@ -1274,13 +1275,13 @@ static struct file_operations raw6_seq_fops = { int __init raw6_proc_init(void) { - if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(init_net(), "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; return 0; } void raw6_proc_exit(void) { - proc_net_remove("raw6"); + proc_net_remove(init_net(), "raw6"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8c3d568..8c9fef9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -2458,11 +2459,11 @@ void __init ip6_route_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); fib6_init(); #ifdef CONFIG_PROC_FS - p = proc_net_create("ipv6_route", 0, rt6_proc_info); + p = proc_net_create(init_net(), "ipv6_route", 0, rt6_proc_info); if (p) p->owner = THIS_MODULE; - proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_net_fops_create(init_net(), "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); #endif #ifdef CONFIG_XFRM xfrm6_init(); @@ -2478,8 +2479,8 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); #endif #ifdef CONFIG_PROC_FS - proc_net_remove("ipv6_route"); - proc_net_remove("rt6_stats"); + proc_net_remove(init_net(), "ipv6_route"); + proc_net_remove(init_net(), "rt6_stats"); #endif #ifdef CONFIG_XFRM xfrm6_fini(); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index b7463df..bda8775 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -353,7 +354,7 @@ int __init ipx_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - ipx_proc_dir = proc_mkdir("ipx", proc_net); + ipx_proc_dir = proc_mkdir("ipx", per_net(proc_net, init_net())); if (!ipx_proc_dir) goto out; @@ -381,7 +382,7 @@ out_socket: out_route: remove_proc_entry("interface", ipx_proc_dir); out_interface: - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", per_net(proc_net, init_net())); goto out; } @@ -390,7 +391,7 @@ void __exit ipx_proc_exit(void) remove_proc_entry("interface", ipx_proc_dir); remove_proc_entry("route", ipx_proc_dir); remove_proc_entry("socket", ipx_proc_dir); - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", per_net(proc_net, init_net())); } #else /* CONFIG_PROC_FS */ diff --git a/net/irda/irproc.c b/net/irda/irproc.c index 88b9c43..0af0f55 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -66,7 +67,7 @@ void __init irda_proc_register(void) int i; struct proc_dir_entry *d; - proc_irda = proc_mkdir("irda", proc_net); + proc_irda = proc_mkdir("irda", per_net(proc_net, init_net())); if (proc_irda == NULL) return; proc_irda->owner = THIS_MODULE; @@ -92,7 +93,7 @@ void __exit irda_proc_unregister(void) for (i=0; i #include #include +#include #include @@ -3292,7 +3293,7 @@ static struct xfrm_mgr pfkeyv2_mgr = static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); - remove_proc_entry("net/pfkey", NULL); + remove_proc_entry("pfkey", per_net(proc_net, init_net())); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3309,7 +3310,7 @@ static int __init ipsec_pfkey_init(void) goto out_unregister_key_proto; #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) + if (create_proc_read_entry("pfkey", 0, per_net(proc_net, init_net()), pfkey_read_proc, NULL) == NULL) goto out_sock_unregister; #endif err = xfrm_register_km(&pfkeyv2_mgr); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 19308fe..4d0a804 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -231,7 +232,7 @@ int __init llc_proc_init(void) int rc = -ENOMEM; struct proc_dir_entry *p; - llc_proc_dir = proc_mkdir("llc", proc_net); + llc_proc_dir = proc_mkdir("llc", per_net(proc_net, init_net())); if (!llc_proc_dir) goto out; llc_proc_dir->owner = THIS_MODULE; @@ -254,7 +255,7 @@ out: out_core: remove_proc_entry("socket", llc_proc_dir); out_socket: - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", per_net(proc_net, init_net())); goto out; } @@ -262,5 +263,5 @@ void llc_proc_exit(void) { remove_proc_entry("socket", llc_proc_dir); remove_proc_entry("core", llc_proc_dir); - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", per_net(proc_net, init_net())); } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 291b8c6..cafa00c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "nf_internals.h" @@ -269,7 +270,7 @@ void __init netfilter_init(void) } #ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); + proc_net_netfilter = proc_mkdir("netfilter", per_net(proc_net, init_net())); if (!proc_net_netfilter) panic("cannot create netfilter proc entry"); #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2587b49..314dc2c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -430,14 +431,14 @@ static int __init nf_conntrack_standalone_init(void) return ret; #ifdef CONFIG_PROC_FS - proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(init_net(), "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(init_net(), "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, per_net(proc_net_stat, init_net())); if (!proc_stat) goto cleanup_proc_exp; @@ -458,11 +459,11 @@ static int __init nf_conntrack_standalone_init(void) cleanup_proc_stat: #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); + remove_proc_entry("nf_conntrack", per_net(proc_net_stat, init_net())); cleanup_proc_exp: - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(init_net(), "nf_conntrack_expect"); cleanup_proc: - proc_net_remove("nf_conntrack"); + proc_net_remove(init_net(), "nf_conntrack"); cleanup_init: #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); @@ -475,9 +476,9 @@ static void __exit nf_conntrack_standalone_fini(void) unregister_sysctl_table(nf_ct_sysctl_header); #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack_expect"); - proc_net_remove("nf_conntrack"); + remove_proc_entry("nf_conntrack", per_net(proc_net_stat, init_net())); + proc_net_remove(init_net(), "nf_conntrack_expect"); + proc_net_remove(init_net(), "nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8996584..9fb3491 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -800,7 +801,7 @@ int xt_proto_init(int af) #ifdef CONFIG_PROC_FS strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(init_net(), buf, 0440, &xt_file_ops); if (!proc) goto out; proc->data = (void *) ((unsigned long) af | (TABLE << 16)); @@ -808,14 +809,14 @@ int xt_proto_init(int af) strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(init_net(), buf, 0440, &xt_file_ops); if (!proc) goto out_remove_tables; proc->data = (void *) ((unsigned long) af | (MATCH << 16)); strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(init_net(), buf, 0440, &xt_file_ops); if (!proc) goto out_remove_matches; proc->data = (void *) ((unsigned long) af | (TARGET << 16)); @@ -827,12 +828,12 @@ int xt_proto_init(int af) out_remove_matches: strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(init_net(), buf); out_remove_tables: strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(init_net(), buf); out: return -1; #endif @@ -846,15 +847,15 @@ void xt_proto_fini(int af) strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(init_net(), buf); strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(init_net(), buf); strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(init_net(), buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f28bf69..21c07df 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -737,13 +738,13 @@ static int __init xt_hashlimit_init(void) printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", per_net(proc_net, init_net())); if (!hashlimit_procdir4) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err3; } - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", per_net(proc_net, init_net())); if (!hashlimit_procdir6) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); @@ -751,7 +752,7 @@ static int __init xt_hashlimit_init(void) } return 0; err4: - remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", per_net(proc_net, init_net())); err3: kmem_cache_destroy(hashlimit_cachep); err2: @@ -763,8 +764,8 @@ err1: static void __exit xt_hashlimit_fini(void) { - remove_proc_entry("ipt_hashlimit", proc_net); - remove_proc_entry("ip6t_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", per_net(proc_net, init_net())); + remove_proc_entry("ip6t_hashlimit", per_net(proc_net, init_net())); kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 383dd4e..3c00f48 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -60,6 +60,7 @@ #include #include #include +#include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -1806,7 +1807,7 @@ static int __init netlink_proto_init(void) sock_register(&netlink_family_ops); #ifdef CONFIG_PROC_FS - proc_net_fops_create("netlink", 0, &netlink_seq_fops); + proc_net_fops_create(init_net(), "netlink", 0, &netlink_seq_fops); #endif /* The netlink device handler may be needed early. */ rtnetlink_init(); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 43bbe2c..601d58c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -41,6 +41,7 @@ #include #include #include +#include #include static int nr_ndevs = 4; @@ -1442,9 +1443,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_net_fops_create(init_net(), "nr", S_IRUGO, &nr_info_fops); + proc_net_fops_create(init_net(), "nr_neigh", S_IRUGO, &nr_neigh_fops); + proc_net_fops_create(init_net(), "nr_nodes", S_IRUGO, &nr_nodes_fops); out: return rc; fail: @@ -1472,9 +1473,9 @@ static void __exit nr_exit(void) { int i; - proc_net_remove("nr"); - proc_net_remove("nr_neigh"); - proc_net_remove("nr_nodes"); + proc_net_remove(init_net(), "nr"); + proc_net_remove(init_net(), "nr_neigh"); + proc_net_remove(init_net(), "nr_nodes"); nr_loopback_clear(); nr_rt_free(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da73e8a..04e295a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -1911,7 +1912,7 @@ static struct file_operations packet_seq_fops = { static void __exit packet_exit(void) { - proc_net_remove("packet"); + proc_net_remove(init_net(), "packet"); unregister_netdevice_notifier(&packet_netdev_notifier); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); @@ -1926,7 +1927,7 @@ static int __init packet_init(void) sock_register(&packet_family_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); + proc_net_fops_create(init_net(), "packet", 0, &packet_seq_fops); out: return rc; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9e27946..5532340 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -45,6 +45,7 @@ #include #include #include +#include static int rose_ndevs = 10; @@ -1550,10 +1551,10 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); + proc_net_fops_create(init_net(), "rose", S_IRUGO, &rose_info_fops); + proc_net_fops_create(init_net(), "rose_neigh", S_IRUGO, &rose_neigh_fops); + proc_net_fops_create(init_net(), "rose_nodes", S_IRUGO, &rose_nodes_fops); + proc_net_fops_create(init_net(), "rose_routes", S_IRUGO, &rose_routes_fops); out: return rc; fail: @@ -1580,10 +1581,10 @@ static void __exit rose_exit(void) { int i; - proc_net_remove("rose"); - proc_net_remove("rose_neigh"); - proc_net_remove("rose_nodes"); - proc_net_remove("rose_routes"); + proc_net_remove(init_net(), "rose"); + proc_net_remove(init_net(), "rose_neigh"); + proc_net_remove(init_net(), "rose_nodes"); + proc_net_remove(init_net(), "rose_routes"); rose_loopback_clear(); rose_rt_free(); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 29975d9..e7bd87b 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -133,7 +134,7 @@ int rxrpc_proc_init(void) { struct proc_dir_entry *p; - proc_rxrpc = proc_mkdir("rxrpc", proc_net); + proc_rxrpc = proc_mkdir("rxrpc", per_net(proc_net, init_net())); if (!proc_rxrpc) goto error; proc_rxrpc->owner = THIS_MODULE; @@ -169,7 +170,7 @@ int rxrpc_proc_init(void) error_calls: remove_proc_entry("calls", proc_rxrpc); error_proc: - remove_proc_entry("rxrpc", proc_net); + remove_proc_entry("rxrpc", per_net(proc_net, init_net())); error: return -ENOMEM; } /* end rxrpc_proc_init() */ @@ -185,7 +186,7 @@ void rxrpc_proc_cleanup(void) remove_proc_entry("connections", proc_rxrpc); remove_proc_entry("calls", proc_rxrpc); - remove_proc_entry("rxrpc", proc_net); + remove_proc_entry("rxrpc", per_net(proc_net, init_net())); } /* end rxrpc_proc_cleanup() */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 65825f4..da7e1eb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -36,6 +36,7 @@ #include #include +#include #include #include @@ -1296,7 +1297,7 @@ static int __init pktsched_init(void) register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(init_net(), "psched", 0, &psched_fops); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 225f39b..ea94951 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -59,6 +59,7 @@ #include #include #include +#include /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -93,7 +94,7 @@ static __init int sctp_proc_init(void) { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", per_net(proc_net, init_net())); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -126,7 +127,7 @@ static void sctp_proc_exit(void) if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", per_net(proc_net, init_net())); } } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index bd98124..996b71c 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -22,6 +22,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_MISC @@ -266,7 +267,7 @@ rpc_proc_init(void) dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", proc_net); + ent = proc_mkdir("rpc", per_net(proc_net, init_net())); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; @@ -280,7 +281,7 @@ rpc_proc_exit(void) dprintk("RPC: unregistering /proc/net/rpc\n"); if (proc_net_rpc) { proc_net_rpc = NULL; - remove_proc_entry("net/rpc", NULL); + remove_proc_entry("rpc", per_net(proc_net, init_net())); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2f208c7..30855e1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include #include #include +#include int sysctl_unix_max_dgram_qlen __read_mostly = 10; @@ -2072,7 +2073,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); #ifdef CONFIG_PROC_FS - proc_net_fops_create("unix", 0, &unix_seq_fops); + proc_net_fops_create(init_net(), "unix", 0, &unix_seq_fops); #endif unix_sysctl_register(); out: @@ -2083,7 +2084,7 @@ static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); unix_sysctl_unregister(); - proc_net_remove("unix"); + proc_net_remove(init_net(), "unix"); proto_unregister(&unix_proto); } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 930ea59..1fcb0b8 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -28,6 +28,7 @@ #include /* WAN router API definitions */ #include #include +#include #include @@ -287,7 +288,7 @@ static struct file_operations wandev_fops = { int __init wanrouter_proc_init(void) { struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, proc_net); + proc_router = proc_mkdir(ROUTER_NAME, per_net(proc_net, init_net())); if (!proc_router) goto fail; @@ -303,7 +304,7 @@ int __init wanrouter_proc_init(void) fail_stat: remove_proc_entry("config", proc_router); fail_config: - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, per_net(proc_net, init_net())); fail: return -ENOMEM; } @@ -316,7 +317,7 @@ void wanrouter_proc_cleanup(void) { remove_proc_entry("config", proc_router); remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, per_net(proc_net, init_net())); } /* diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index a11837d..7bcf98d 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -212,7 +213,7 @@ int __init x25_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - x25_proc_dir = proc_mkdir("x25", proc_net); + x25_proc_dir = proc_mkdir("x25", per_net(proc_net, init_net())); if (!x25_proc_dir) goto out; @@ -231,7 +232,7 @@ out: out_socket: remove_proc_entry("route", x25_proc_dir); out_route: - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", per_net(proc_net, init_net())); goto out; } @@ -239,7 +240,7 @@ void __exit x25_proc_exit(void) { remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", per_net(proc_net, init_net())); } #else /* CONFIG_PROC_FS */ -- 1.4.4.1.g278f From or.gerlitz at gmail.com Thu Jan 25 11:01:49 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Thu, 25 Jan 2007 21:01:49 +0200 Subject: [openib-general] [PATCH 1/2] rdma_cm: add support to join IPOIB multicast groups In-Reply-To: <000001c740a4$b1501c20$ff0da8c0@amr.corp.intel.com> References: <45B8CF69.8080708@voltaire.com> <000001c740a4$b1501c20$ff0da8c0@amr.corp.intel.com> Message-ID: <15ddcffd0701251101p9211f67g37c3967a73426609@mail.gmail.com> On 1/25/07, Sean Hefty wrote: > >The only missing piece here, as we agreed yesterday is to allow using > >PS_IPOIB IDs for unicast traffic over librdmacm, i guess this should be > >fairly simple to add. > I'm adding this now. I would like to include all of these changes as part of > the multicast code push for OFED/upstream. I hope to test this today. Cool, just to make sure... the push to OFED should include both the kernel and librdmacm changes... i did not see a commit of the librdmacm patch to your librdmacm git tree. thanks for all your help and responsiveness Or. From ebiederm at xmission.com Thu Jan 25 11:00:33 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:33 -0700 Subject: [openib-general] [PATCH RFC 31/31] net: Add etun driver In-Reply-To: References: Message-ID: <11697516404048-git-send-email-ebiederm@xmission.com> From: Eric W. Biederman - unquoted etun is a simple two headed tunnel driver that at the link layer looks like ethernet. It's target audience is communicating between network namespaces but it is general enough it may have other uses as well. Signed-off-by: Eric W. Biederman --- drivers/net/Kconfig | 14 ++ drivers/net/Makefile | 1 + drivers/net/etun.c | 470 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 485 insertions(+), 0 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8aa8dd0..969d3df 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -119,6 +119,20 @@ config TUN If you don't know what to use this for, you don't need it. +config ETUN + tristate "Ethernet tunnel device driver support" + depends on SYSFS + ---help--- + ETUN provices a pair of network devices that can be used for + configuring interesting topolgies. What one devices transmits + the other receives and vice versa. The link level framing + is ethernet for wide compatibility with network stacks. + + To compile this driver as a module, choose M here: the module + will be called etun. + + If you don't know what to use this for, you don't need it. + config NET_SB1000 tristate "General Instruments Surfboard 1000" depends on PNP diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 4c0d4e5..396af4f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -185,6 +185,7 @@ obj-$(CONFIG_MACSONIC) += macsonic.o obj-$(CONFIG_MACMACE) += macmace.o obj-$(CONFIG_MAC89x0) += mac89x0.o obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_ETUN) += etun.o obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_DL2K) += dl2k.o obj-$(CONFIG_R8169) += r8169.o diff --git a/drivers/net/etun.c b/drivers/net/etun.c new file mode 100644 index 0000000..1dd8cd8 --- /dev/null +++ b/drivers/net/etun.c @@ -0,0 +1,470 @@ +/* + * ETUN - Universal ETUN device driver. + * Copyright (C) 2006 Linux Networx + * + */ + +#define DRV_NAME "etun" +#define DRV_VERSION "1.0" +#define DRV_DESCRIPTION "Ethernet pseudo tunnel device driver" +#define DRV_COPYRIGHT "(C) 2007 Linux Networx" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Device cheksum strategy. + * + * etun is designed to a be a pair of virutal devices + * connecting two network stack instances. + * + * Typically it will either be used with ethernet bridging or + * it will be used to route packets between the two stacks. + * + * The only checksum offloading I can do is to completely + * skip the checksumming step all together. + * + * When used for ethernet bridging I don't believe any + * checksum off loading is safe. + * - If my source is an external interface the checksum may be + * invalid so I don't want to report I have already checked it. + * - If my destination is an external interface I don't want to put + * a packet on the wire with someone computing the checksum. + * + * When used for routing between two stacks checksums should + * be as unnecessary as they are on the loopback device. + * + * So by default I am safe and disable checksumming and + * other advanced features like SG and TSO. + * + * However because I think these features could be useful + * I provide the ethtool functions to and enable/disable + * them at runtime. + * + * If you think you can correctly enable these go ahead. + * For checksums both the transmitter and the receiver must + * agree before the are actually disabled. + */ + +#define ETUN_NUM_STATS 1 +static struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[ETUN_NUM_STATS] = { + { "partner_ifindex" }, +}; + +struct etun_info { + struct net_device *rx_dev; + unsigned ip_summed; + struct net_device_stats stats; + struct list_head list; + struct net_device *dev; +}; + +/* + * I have to hold the rtnl_lock during device delete. + * So I use the rtnl_lock to protect my list manipulations + * as well. Crude but simple. + */ +static LIST_HEAD(etun_list); + +/* + * The higher levels take care of making this non-reentrant (it's + * called with bh's disabled). + */ +static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + struct etun_info *rx_info = rx_dev->priv; + + tx_info->stats.tx_packets++; + tx_info->stats.tx_bytes += skb->len; + + /* Drop the skb state that was needed to get here */ + skb_orphan(skb); + if (skb->dst) + skb->dst = dst_pop(skb->dst); /* Allow for smart routing */ + + /* Switch to the receiving device */ + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, rx_dev); + skb->dev = rx_dev; + skb->ip_summed = CHECKSUM_NONE; + + /* If both halves agree no checksum is needed */ + if (tx_dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = rx_info->ip_summed; + + rx_dev->last_rx = jiffies; + rx_info->stats.rx_packets++; + rx_info->stats.rx_bytes += skb->len; + netif_rx(skb); + + return 0; +} + +static struct net_device_stats *etun_get_stats(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return &info->stats; +} + +/* ethtool interface */ +static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10000; /* Memory is fast! */ + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); +} + +static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch(stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + case ETH_SS_TEST: + default: + break; + } +} + +static int etun_get_stats_count(struct net_device *dev) +{ + return ETUN_NUM_STATS; +} + +static void etun_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct etun_info *info = dev->priv; + + data[0] = info->rx_dev->ifindex; +} + +static u32 etun_get_rx_csum(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return info->ip_summed == CHECKSUM_UNNECESSARY; +} + +static int etun_set_rx_csum(struct net_device *dev, u32 data) +{ + struct etun_info *info = dev->priv; + + info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + + return 0; +} + +static u32 etun_get_tx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_NO_CSUM) != 0; +} + +static int etun_set_tx_csum(struct net_device *dev, u32 data) +{ + dev->features &= NETIF_F_NO_CSUM; + if (data) + dev->features |= NETIF_F_NO_CSUM; + + return 0; +} + +static struct ethtool_ops etun_ethtool_ops = { + .get_settings = etun_get_settings, + .get_drvinfo = etun_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_rx_csum = etun_get_rx_csum, + .set_rx_csum = etun_set_rx_csum, + .get_tx_csum = etun_get_tx_csum, + .set_tx_csum = etun_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#if 0 /* Does just setting the bit successfuly emulate tso? */ + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +#endif + .get_strings = etun_get_strings, + .get_stats_count = etun_get_stats_count, + .get_ethtool_stats = etun_get_ethtool_stats, + .get_perm_addr = ethtool_op_get_perm_addr, +}; + +static int etun_open(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + if (rx_dev->flags & IFF_UP) { + netif_carrier_on(tx_dev); + netif_carrier_on(rx_dev); + } + netif_start_queue(tx_dev); + return 0; +} + +static int etun_stop(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + netif_stop_queue(tx_dev); + if (netif_carrier_ok(tx_dev)) { + netif_carrier_off(tx_dev); + netif_carrier_off(rx_dev); + } + return 0; +} + +static void etun_set_multicast_list(struct net_device *dev) +{ + /* Nothing sane I can do here */ + return; +} + +static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + return -EOPNOTSUPP; +} + +/* Only allow letters and numbers in an etun device name */ +static int is_valid_name(const char *name) +{ + const char *ptr; + for (ptr = name; *ptr; ptr++) { + if (!isalnum(*ptr)) + return 0; + } + return 1; +} + +static struct net_device *etun_alloc(net_t net, const char *name) +{ + struct net_device *dev; + struct etun_info *info; + int err; + + if (!name || !is_valid_name(name)) + return ERR_PTR(-EINVAL); + + dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup); + if (!dev) + return ERR_PTR(-ENOMEM); + + info = dev->priv; + info->dev = dev; + dev->nd_net = net; + + random_ether_addr(dev->dev_addr); + dev->tx_queue_len = 0; /* A queue is silly for a loopback device */ + dev->hard_start_xmit = etun_xmit; + dev->get_stats = etun_get_stats; + dev->open = etun_open; + dev->stop = etun_stop; + dev->set_multicast_list = etun_set_multicast_list; + dev->do_ioctl = etun_ioctl; + dev->features = NETIF_F_FRAGLIST + | NETIF_F_HIGHDMA + | NETIF_F_LLTX; + dev->flags = IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC; + dev->ethtool_ops = &etun_ethtool_ops; + dev->destructor = free_netdev; + err = register_netdev(dev); + if (err) { + free_netdev(dev); + dev = ERR_PTR(err); + goto out; + } + netif_carrier_off(dev); +out: + return dev; +} + +static int etun_alloc_pair(net_t net, const char *name0, const char *name1) +{ + struct net_device *dev0, *dev1; + struct etun_info *info0, *info1; + + dev0 = etun_alloc(net, name0); + if (IS_ERR(dev0)) { + return PTR_ERR(dev0); + } + info0 = dev0->priv; + + dev1 = etun_alloc(net, name1); + if (IS_ERR(dev1)) { + unregister_netdev(dev0); + return PTR_ERR(dev1); + } + info1 = dev1->priv; + + dev_hold(dev0); + dev_hold(dev1); + info0->rx_dev = dev1; + info1->rx_dev = dev0; + + /* Only place one member of the pair on the list + * so I don't confuse list_for_each_entry_safe, + * by deleting two list entries at once. + */ + rtnl_lock(); + list_add(&info0->list, &etun_list); + INIT_LIST_HEAD(&info1->list); + rtnl_unlock(); + + return 0; +} + +static int etun_unregister_pair(struct net_device *dev0) +{ + struct etun_info *info0, *info1; + struct net_device *dev1; + + ASSERT_RTNL(); + + if (!dev0) + return -ENODEV; + + info0 = dev0->priv; + dev1 = info0->rx_dev; + info1 = dev1->priv; + + /* Drop the cross device references */ + dev_put(dev0); + dev_put(dev1); + + /* Remove from the etun list */ + if (!list_empty(&info0->list)) + list_del_init(&info0->list); + if (!list_empty(&info1->list)) + list_del_init(&info1->list); + + unregister_netdevice(dev0); + unregister_netdevice(dev1); + return 0; +} + +static int etun_noget(char *buffer, struct kernel_param *kp) +{ + return 0; +} + +static int etun_newif(const char *val, struct kernel_param *kp) +{ + char name0[IFNAMSIZ], name1[IFNAMSIZ]; + const char *mid; + int len, len0, len1; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Split the string into 2 names */ + mid = memchr(val, ',', len); + if (!mid) + return -EINVAL; + + /* Get the first device name */ + len0 = mid - val; + if (len0 > sizeof(name0) - 1) + len = sizeof(name0) - 1; + strncpy(name0, val, len0); + name0[len0] = '\0'; + + /* And the second device name */ + len1 = len - (len0 + 1); + if (len1 > sizeof(name1) - 1) + len1 = sizeof(name1) - 1; + strncpy(name1, mid + 1, len1); + name1[len1] = '\0'; + + return etun_alloc_pair(current->nsproxy->net_ns, name0, name1); +} + +static int etun_delif(const char *val, struct kernel_param *kp) +{ + char name[IFNAMSIZ]; + int len; + struct net_device *dev; + int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Get the device name */ + if (len > sizeof(name) - 1) + return -EINVAL; + strncpy(name, val, len); + name[len] = '\0'; + + /* Double check I don't have strange characters in my device name */ + if (!is_valid_name(name)) + return -EINVAL; + + rtnl_lock(); + err = -ENODEV; + dev = __dev_get_by_name(current->nsproxy->net_ns, name); + err = etun_unregister_pair(dev); + rtnl_unlock(); + return err; +} + +static int __init etun_init(void) +{ + printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); + printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT); + + return 0; +} + +static void etun_cleanup(void) +{ + struct etun_info *info, *tmp; + rtnl_lock(); + list_for_each_entry_safe(info, tmp, &etun_list, list) { + etun_unregister_pair(info->dev); + } + rtnl_unlock(); +} + +module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR); +module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR); +module_init(etun_init); +module_exit(etun_cleanup); +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("Eric Biederman "); +MODULE_LICENSE("GPL"); -- 1.4.4.1.g278f From ebiederm at xmission.com Thu Jan 25 11:00:03 2007 From: ebiederm at xmission.com (Eric W. Biederman) Date: Thu, 25 Jan 2007 12:00:03 -0700 Subject: [openib-general] [PATCH RFC 1/31] net: Add net_namespace_type.h to allow for per network namespace variables. In-Reply-To: References: Message-ID: <11697516331146-git-send-email-ebiederm@xmission.com> The problem: To properly implement a ``level 2'' network namespace we need to move many of the networking stack global variables into the network namespace. We want to keep it explicit that the code is accessing a variable in a network namespace. We want to be able to completely compile out the network namespace support so we can do comparitive performance testing, and so to not penalize users who don't need network namespace support. Because the network stack is a moving target we want something simple that allows for the bulk of the changes to be merged before we enable network namespace support. My biggest challenge when looking into this was to find an approach that would allow the code to compile out, in a way that does not yield any performance overhead and does not make the code ugly. While playing with the different possibilities I discovered that gcc will not pass 0 byte structures that are arguments to functions and instead will simply optmize them away. This appears to be true on i386 all of the way back to gcc-2.95 and I verified that it also works with gcc 4.1 on x86_64. Since this is part of the ABI I never expect it to change. Hopefully gcc uses this nice optimization on all architectures, I suspect so as C++ allows passing function arguments of type void in certain circumstances. Using this observation I was able to come up with an network namespace implementation network namespace code that allows the changes to completely compile out when we don't build the kernel with network namespace support. This patch implements my dummy network namespace support that should completely compiles out. Further patches will add the real version. Starting with the dummy gives a quick hint of where I am going and allows for dependencies to be overcome. When doing my proof of concept implementation one of the other problems I had was that as the network stack comes in so many modular pieces figuring out how to get their global variables into the network namespace structure was a challenge. The basic technique used by our per cpu variables for having the linker build and dynamically change structures for us appears applicable here and a lot less nuisance then what I did before so I am implementing a tailored version of that technique as well, and again this makes it very simple to compile the code out. Signed-off-by: Eric W. Biederman --- include/linux/net_namespace_type.h | 52 ++++++++++++++++++++++++++++++++++++ 1 files changed, 52 insertions(+), 0 deletions(-) diff --git a/include/linux/net_namespace_type.h b/include/linux/net_namespace_type.h new file mode 100644 index 0000000..8173f59 --- /dev/null +++ b/include/linux/net_namespace_type.h @@ -0,0 +1,52 @@ +/* + * Definition of the network namespace reference type + * And operations upon it. + */ +#ifndef __LINUX_NET_NAMESPACE_TYPE_H +#define __LINUX_NET_NAMESPACE_TYPE_H + +#define __pernetname(name) per_net__##name + +typedef struct {} net_t; + +#define __data_pernet + +/* Look up a per network namespace variable */ +static inline unsigned long __per_net_offset(net_t net) { return 0; } + +/* Like per_net but returns a pseudo variable address that must be moved + * __per_net_offset() bytes before it will point to a real variable. + * Useful for static initializers. + */ +#define __per_net_base(name) __pernetname(name) + +/* Get the network namespace reference from a per_net variable address */ +#define net_of(ptr, name) ({ net_t net; ptr; net; }) + +/* Look up a per network namespace variable */ +#define per_net(name, net) \ + (*(__per_net_offset(net), &__per_net_base(name))) + +/* Are the two network namespaces the same */ +static inline int net_eq(net_t a, net_t b) { return 1; } +/* Get an unsigned value appropriate for hashing the network namespace */ +static inline unsigned int net_hval(net_t net) { return 0; } + +/* Convert to and from to and from void pointers */ +static inline void *net_to_voidp(net_t net) { return NULL; } +static inline net_t net_from_voidp(void *ptr) { net_t net; return net; } + +static inline int null_net(net_t net) { return 0; } + +#define DEFINE_PER_NET(type, name) \ + __data_pernet __typeof__(type) __pernetname(name) + +#define DECLARE_PER_NET(type, name) \ + extern __typeof__(type) __pernetname(name) + +#define EXPORT_PER_NET_SYMBOL(var) \ + EXPORT_SYMBOL(__pernetname(var)) +#define EXPORT_PER_NET_SYMBOL_GPL(var) \ + EXPORT_SYMBOL_GPL(__pernetname(var)) + +#endif /* __LINUX_NET_NAMESPACE_TYPE_H */ -- 1.4.4.1.g278f From swise at opengridcomputing.com Thu Jan 25 11:13:23 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:23 -0600 Subject: [openib-general] [PATCH 01/12] ofed_1_2 Handle Ethernet neighbour updates during route resolution. In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191323.30934.62536.stgit@dell3.ogc.int> Handle Ethernet neighbour updates during route resolution. The IWCM uses the ib_addr services to do route resolution (neighbour discovery in the IP world). The ib_addr netevent callback routine, however, currently only acts on Inifininband neighbour updates. It needs to act on ethernet neighbour updates as well. This patch just removes filtering on device type altogether and will trigger on any neighour updates where the nud_type is valid. This simplifies the code some. Signed-off-by: Steve Wise --- drivers/infiniband/core/addr.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index af93979..d2bb5a9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -360,8 +360,7 @@ static int netevent_callback(struct noti if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->dev->type == ARPHRD_INFINIBAND && - (neigh->nud_state & NUD_VALID)) { + if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } } From swise at opengridcomputing.com Thu Jan 25 11:13:21 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:21 -0600 Subject: [openib-general] [PATCH 00/12] ofed_1_2 - Neighbour update support Message-ID: <20070125191321.30934.74542.stgit@dell3.ogc.int> Michael/Vlad: Here are the backports for snooping arp packets to generate neighbour update netevents. Also included is the addr.c patch to act on all valid neigh update events. If this series looks good to you then I'll push this up and you all can pull it from my git tree. Steve. From swise at opengridcomputing.com Thu Jan 25 11:13:27 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:27 -0600 Subject: [openib-general] [PATCH 03/12] ofed_1_2 Backport 2.6.16: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191327.30934.79593.stgit@dell3.ogc.int> Backport 2.6.16: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.16/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.16/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 79 deletions(-) diff --git a/kernel_addons/backport/2.6.16/include/src/netevent.c b/kernel_addons/backport/2.6.16/include/src/netevent.c index 5ffadd1..188283c 100644 --- a/kernel_addons/backport/2.6.16/include/src/netevent.c +++ b/kernel_addons/backport/2.6.16/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.16/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.16/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.16/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:25 -0600 Subject: [openib-general] [PATCH 02/12] ofed_1_2 Backport 2.6.17: Simulate neighbour update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191325.30934.69339.stgit@dell3.ogc.int> Backport 2.6.17: Simulate neighbour update events by snooping ARP packets Needed to support iWARP devices on backported kernels. This also allows using the current drivers/infiniband/core/addr.c which requires netevents as well. For each incoming ARP request or response, we add a destructor function to the skb. When the skb is freed (ie when the ARP subsystem has updated the neighbour entry if needed) our destructor function will get called and we can generate a NEIGH_UPDATE netevent. When the first consumer registers for netevents, we add an ARP packet filter to start snooping. When the last consumer unregisters, we remove the filter. Changes: - add the snoop code to the backport netevent.c file. - remove the backport patch to revert addr.c to snoop ARP packets. Signed-off-by: Steve Wise --- .../backport/2.6.17/include/src/netevent.c | 67 ++++++++++++++++++++ .../2.6.17/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 78 deletions(-) diff --git a/kernel_addons/backport/2.6.17/include/src/netevent.c b/kernel_addons/backport/2.6.17/include/src/netevent.c index 35d02c3..26a0920 100644 --- a/kernel_addons/backport/2.6.17/include/src/netevent.c +++ b/kernel_addons/backport/2.6.17/include/src/netevent.c @@ -15,6 +15,55 @@ #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); @@ -30,8 +79,13 @@ static ATOMIC_NOTIFIER_HEAD(netevent_not int register_netevent_notifier(struct notifier_block *nb) { int err; - err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -47,7 +101,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = atomic_notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.17/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:29 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:29 -0600 Subject: [openib-general] [PATCH 04/12] ofed_1_2 Backport sles10: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191329.30934.44885.stgit@dell3.ogc.int> Backport sles10: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.16_sles10/include/src/netevent.c | 68 ++++++++++++++++++++- .../addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 79 deletions(-) diff --git a/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c index 5ffadd1..188283c 100644 --- a/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c +++ b/kernel_addons/backport/2.6.16_sles10/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.16_sles10/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.16_sles10/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.16_sles10/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:31 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:31 -0600 Subject: [openib-general] [PATCH 05/12] ofed_1_2 Backport 2.6.15: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191331.30934.15439.stgit@dell3.ogc.int> Backport 2.6.15: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.15/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.15/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 79 deletions(-) diff --git a/kernel_addons/backport/2.6.15/include/src/netevent.c b/kernel_addons/backport/2.6.15/include/src/netevent.c index 5ffadd1..188283c 100644 --- a/kernel_addons/backport/2.6.15/include/src/netevent.c +++ b/kernel_addons/backport/2.6.15/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.15/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.15/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.15/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:46 -0600 Subject: [openib-general] [PATCH 12/12] ofed_1_2 Backport sles9sp3: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191346.30934.29432.stgit@dell3.ogc.int> Backport sles9sp3: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../2.6.5_sles9_sp3/include/src/netevent.c | 68 ++++++++++++++++++++- .../addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- .../2.6.5_sles9_sp3/addr_3926_to_2_6_13.patch | 13 ---- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c index 5ffadd1..6a8df29 100644 --- a/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c +++ b/kernel_addons/backport/2.6.5_sles9_sp3/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.5_sles9_sp3/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.5_sles9_sp3/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.5_sles9_sp3/addr_3926_to_2_6_13.patch b/kernel_patches/backport/2.6.5_sles9_sp3/addr_3926_to_2_6_13.patch deleted file mode 100644 index 30c2f49..0000000 --- a/kernel_patches/backport/2.6.5_sles9_sp3/addr_3926_to_2_6_13.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-kernel/drivers/infiniband/core/addr.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/core/addr.c 2005-11-24 14:03:07.000000000 +0200 -+++ linux-kernel/drivers/infiniband/core/addr.c 2005-11-24 14:03:08.000000000 +0200 -@@ -316,7 +316,7 @@ void ib_addr_cancel(struct ib_addr *addr - EXPORT_SYMBOL(ib_addr_cancel); - - static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -- struct packet_type *pkt, struct net_device *orig_dev) -+ struct packet_type *pkt) - { - struct arphdr *arp_hdr; - From swise at opengridcomputing.com Thu Jan 25 11:13:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:36 -0600 Subject: [openib-general] [PATCH 07/12] ofed_1_2 Backport 2.6.14: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191335.30934.86815.stgit@dell3.ogc.int> Backport 2.6.14: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.14/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.14/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 79 deletions(-) diff --git a/kernel_addons/backport/2.6.14/include/src/netevent.c b/kernel_addons/backport/2.6.14/include/src/netevent.c index 5ffadd1..188283c 100644 --- a/kernel_addons/backport/2.6.14/include/src/netevent.c +++ b/kernel_addons/backport/2.6.14/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.14/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.14/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.14/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:40 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:40 -0600 Subject: [openib-general] [PATCH 09/12] ofed_1_2 Backport 2.6.12: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191340.30934.3655.stgit@dell3.ogc.int> Backport 2.6.12: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.12/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.12/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- .../backport/2.6.12/addr_3926_to_2_6_13.patch | 13 ---- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/kernel_addons/backport/2.6.12/include/src/netevent.c b/kernel_addons/backport/2.6.12/include/src/netevent.c index 5ffadd1..6a8df29 100644 --- a/kernel_addons/backport/2.6.12/include/src/netevent.c +++ b/kernel_addons/backport/2.6.12/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.12/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.12/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.12/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.12/addr_3926_to_2_6_13.patch b/kernel_patches/backport/2.6.12/addr_3926_to_2_6_13.patch deleted file mode 100644 index 2e765f5..0000000 --- a/kernel_patches/backport/2.6.12/addr_3926_to_2_6_13.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-2.6.13/drivers/infiniband/core/addr.c -=================================================================== ---- linux-2.6.13.orig/drivers/infiniband/core/addr.c 2005-11-01 13:12:09.000000000 +0200 -+++ linux-2.6.13/drivers/infiniband/core/addr.c 2005-11-01 13:47:36.000000000 +0200 -@@ -310,7 +310,7 @@ void ib_addr_cancel(struct ib_addr *addr - EXPORT_SYMBOL(ib_addr_cancel); - - static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -- struct packet_type *pkt, struct net_device *orig_dev) -+ struct packet_type *pkt) - { - struct arphdr *arp_hdr; - From swise at opengridcomputing.com Thu Jan 25 11:13:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:42 -0600 Subject: [openib-general] [PATCH 10/12] ofed_1_2 Backport 2.6.11: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191342.30934.58992.stgit@dell3.ogc.int> Backport 2.6.11: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.11/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.11/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- .../backport/2.6.11/addr_3926_to_2_6_13.patch | 13 ---- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/kernel_addons/backport/2.6.11/include/src/netevent.c b/kernel_addons/backport/2.6.11/include/src/netevent.c index 5ffadd1..6a8df29 100644 --- a/kernel_addons/backport/2.6.11/include/src/netevent.c +++ b/kernel_addons/backport/2.6.11/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.11/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.11/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.11/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.11/addr_3926_to_2_6_13.patch b/kernel_patches/backport/2.6.11/addr_3926_to_2_6_13.patch deleted file mode 100644 index 574f433..0000000 --- a/kernel_patches/backport/2.6.11/addr_3926_to_2_6_13.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-kernel/drivers/infiniband/core/addr.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/core/addr.c 2005-11-24 15:18:00.000000000 +0200 -+++ linux-kernel/drivers/infiniband/core/addr.c 2005-11-24 15:20:18.000000000 +0200 -@@ -310,7 +310,7 @@ void ib_addr_cancel(struct ib_addr *addr - EXPORT_SYMBOL(ib_addr_cancel); - - static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -- struct packet_type *pkt, struct net_device *orig_dev) -+ struct packet_type *pkt) - { - struct arphdr *arp_hdr; - From swise at opengridcomputing.com Thu Jan 25 11:13:33 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:33 -0600 Subject: [openib-general] [PATCH 06/12] ofed_1_2 Backport ubuntu606: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191333.30934.82627.stgit@dell3.ogc.int> Backport ubuntu606: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../2.6.15_ubuntu606/include/src/netevent.c | 68 ++++++++++++++++++++- .../addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- 2 files changed, 65 insertions(+), 79 deletions(-) diff --git a/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c index 5ffadd1..188283c 100644 --- a/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c +++ b/kernel_addons/backport/2.6.15_ubuntu606/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *dev2) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.15_ubuntu606/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - From swise at opengridcomputing.com Thu Jan 25 11:13:38 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:38 -0600 Subject: [openib-general] [PATCH 08/12] ofed_1_2 Backport 2.6.13: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191338.30934.12044.stgit@dell3.ogc.int> Backport 2.6.13: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.13/include/src/netevent.c | 68 ++++++++++++++++++++- .../2.6.13/addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- .../backport/2.6.13/addr_3926_to_2_6_13.patch | 13 ---- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/kernel_addons/backport/2.6.13/include/src/netevent.c b/kernel_addons/backport/2.6.13/include/src/netevent.c index 5ffadd1..6a8df29 100644 --- a/kernel_addons/backport/2.6.13/include/src/netevent.c +++ b/kernel_addons/backport/2.6.13/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.13/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.13/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.13/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.13/addr_3926_to_2_6_13.patch b/kernel_patches/backport/2.6.13/addr_3926_to_2_6_13.patch deleted file mode 100644 index 2e765f5..0000000 --- a/kernel_patches/backport/2.6.13/addr_3926_to_2_6_13.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-2.6.13/drivers/infiniband/core/addr.c -=================================================================== ---- linux-2.6.13.orig/drivers/infiniband/core/addr.c 2005-11-01 13:12:09.000000000 +0200 -+++ linux-2.6.13/drivers/infiniband/core/addr.c 2005-11-01 13:47:36.000000000 +0200 -@@ -310,7 +310,7 @@ void ib_addr_cancel(struct ib_addr *addr - EXPORT_SYMBOL(ib_addr_cancel); - - static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -- struct packet_type *pkt, struct net_device *orig_dev) -+ struct packet_type *pkt) - { - struct arphdr *arp_hdr; - From swise at opengridcomputing.com Thu Jan 25 11:13:44 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:13:44 -0600 Subject: [openib-general] [PATCH 11/12] ofed_1_2 Backport rhel4u4: Simulate neigh update events by snooping ARP packets In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <20070125191344.30934.57205.stgit@dell3.ogc.int> Backport rhel4u4: Simulate neigh update events by snooping ARP packets Signed-off-by: Steve Wise --- .../backport/2.6.9_U4/include/src/netevent.c | 68 ++++++++++++++++++++- .../addr_1_netevents_revert_to_2_6_17.patch | 76 ----------------------- .../backport/2.6.9_U4/addr_3926_to_2_6_13.patch | 13 ---- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/kernel_addons/backport/2.6.9_U4/include/src/netevent.c b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c index 5ffadd1..6a8df29 100644 --- a/kernel_addons/backport/2.6.9_U4/include/src/netevent.c +++ b/kernel_addons/backport/2.6.9_U4/include/src/netevent.c @@ -13,10 +13,57 @@ * Fixes: */ -#include -#include #include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_MUTEX(lock); +static int count; + +static void destructor(struct sk_buff *skb) +{ + struct neighbour *n; + u8 *arp_ptr; + __be32 gw; + + /* Pull the SPA */ + arp_ptr = skb->nh.raw + sizeof(struct arphdr) + skb->dev->addr_len; + memcpy(&gw, arp_ptr, 4); + n = neigh_lookup(&arp_tbl, &gw, skb->dev); + if (n) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + return; +} + +static int arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + u16 op; + + arp_hdr = (struct arphdr *) skb->nh.raw; + op = ntohs(arp_hdr->ar_op); + + if ((op == ARPOP_REQUEST || op == ARPOP_REPLY) && !skb->destructor) + skb->destructor = destructor; + + kfree_skb(skb); + return 0; +} + +static struct packet_type arp = { + .type = __constant_htons(ETH_P_ARP), + .func = arp_recv, + .af_packet_priv = (void *)1, +}; static struct notifier_block *netevent_notif_chain; @@ -34,6 +81,12 @@ int register_netevent_notifier(struct no int err; err = notifier_chain_register(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (count++ == 0) + dev_add_pack(&arp); + mutex_unlock(&lock); + } return err; } @@ -49,7 +102,16 @@ int register_netevent_notifier(struct no int unregister_netevent_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netevent_notif_chain, nb); + int err; + + err = notifier_chain_unregister(&netevent_notif_chain, nb); + if (!err) { + mutex_lock(&lock); + if (--count == 0) + dev_remove_pack(&arp); + mutex_unlock(&lock); + } + return err; } /** diff --git a/kernel_patches/backport/2.6.9_U4/addr_1_netevents_revert_to_2_6_17.patch b/kernel_patches/backport/2.6.9_U4/addr_1_netevents_revert_to_2_6_17.patch deleted file mode 100644 index 316d8d2..0000000 --- a/kernel_patches/backport/2.6.9_U4/addr_1_netevents_revert_to_2_6_17.patch +++ /dev/null @@ -1,76 +0,0 @@ -commit e795d092507d571d66f2ec98d3efdc7dd284bf80 -Author: Tom Tucker -Date: Sun Jul 30 20:44:19 2006 -0700 - - [NET] infiniband: Cleanup ib_addr module to use the netevents - - Signed-off-by: Tom Tucker - Signed-off-by: Steve Wise - Signed-off-by: David S. Miller - -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1205e80..d294bbc 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -35,7 +35,6 @@ #include - #include - #include - #include --#include - #include - - MODULE_AUTHOR("Sean Hefty"); -@@ -327,22 +326,25 @@ void rdma_addr_cancel(struct rdma_dev_ad - } - EXPORT_SYMBOL(rdma_addr_cancel); - --static int netevent_callback(struct notifier_block *self, unsigned long event, -- void *ctx) -+static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pkt, struct net_device *orig_dev) - { -- if (event == NETEVENT_NEIGH_UPDATE) { -- struct neighbour *neigh = ctx; -+ struct arphdr *arp_hdr; - -- if (neigh->dev->type == ARPHRD_INFINIBAND && -- (neigh->nud_state & NUD_VALID)) { -- set_timeout(jiffies); -- } -- } -+ arp_hdr = (struct arphdr *) skb->nh.raw; -+ -+ if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || -+ arp_hdr->ar_op == htons(ARPOP_REPLY)) -+ set_timeout(jiffies); -+ -+ kfree_skb(skb); - return 0; - } - --static struct notifier_block nb = { -- .notifier_call = netevent_callback -+static struct packet_type addr_arp = { -+ .type = __constant_htons(ETH_P_ARP), -+ .func = addr_arp_recv, -+ .af_packet_priv = (void*) 1, - }; - - static int addr_init(void) -@@ -351,13 +353,13 @@ static int addr_init(void) - if (!addr_wq) - return -ENOMEM; - -- register_netevent_notifier(&nb); -+ dev_add_pack(&addr_arp); - return 0; - } - - static void addr_cleanup(void) - { -- unregister_netevent_notifier(&nb); -+ dev_remove_pack(&addr_arp); - destroy_workqueue(addr_wq); - } - - diff --git a/kernel_patches/backport/2.6.9_U4/addr_3926_to_2_6_13.patch b/kernel_patches/backport/2.6.9_U4/addr_3926_to_2_6_13.patch deleted file mode 100644 index 30c2f49..0000000 --- a/kernel_patches/backport/2.6.9_U4/addr_3926_to_2_6_13.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-kernel/drivers/infiniband/core/addr.c -=================================================================== ---- linux-kernel.orig/drivers/infiniband/core/addr.c 2005-11-24 14:03:07.000000000 +0200 -+++ linux-kernel/drivers/infiniband/core/addr.c 2005-11-24 14:03:08.000000000 +0200 -@@ -316,7 +316,7 @@ void ib_addr_cancel(struct ib_addr *addr - EXPORT_SYMBOL(ib_addr_cancel); - - static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, -- struct packet_type *pkt, struct net_device *orig_dev) -+ struct packet_type *pkt) - { - struct arphdr *arp_hdr; - From sean.hefty at intel.com Thu Jan 25 11:30:37 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 25 Jan 2007 11:30:37 -0800 Subject: [openib-general] librdmacm code confusion wrt iWarp Message-ID: <000101c740b7$4abbc140$ff0da8c0@amr.corp.intel.com> Steve, I'm looking at rdma_create_qp() in librdmacm. There's a section of code in there: if (id->ps == RDMA_PS_UDP) ret = ucma_init_ud_qp(id_priv, qp); else ret = ucma_init_ib_qp(id_priv, qp); Both of these calls transition the QP to INIT, so that the user can post receives before trying to establish a connection. iWarp is handled the same as IB, which confuses me, since it is treated differently in the kernel. I'm assuming that the librdmacm works for you over iWarp, but I'd like to understand this better. - Sean From shemminger at linux-foundation.org Thu Jan 25 11:29:32 2007 From: shemminger at linux-foundation.org (Stephen Hemminger) Date: Thu, 25 Jan 2007 11:29:32 -0800 Subject: [openib-general] [PATCH RFC 2/31] net: Implement a place holder network namespace In-Reply-To: <11697516332484-git-send-email-ebiederm@xmission.com> References: <11697516332484-git-send-email-ebiederm@xmission.com> Message-ID: <20070125112932.3506751e@freekitty> On Thu, 25 Jan 2007 12:00:04 -0700 "Eric W. Biederman" wrote: > From: Eric W. Biederman - unquoted > > Many of the changes to the network stack will simply be adding a > network namespace parameter to function calls or moving variables > from globals to being per network namespace. When those variables > have initializers that cannot statically compute the proper value, > a function that runs at the creation and destruction of network > namespaces will need to be registered, and the logic will need to > be changed to accomidate that. > > Adding unconditional support for these functions ensures that even when > everything else is compiled out the modified network stack logic will > continue to run correctly. > > This patch adds struct pernet_operations that has an init (constructor) > and an exit (destructor) method. When registered the init method > is called for every existing namespace, and when unregistered the > exit method is called for every existing namespace. When a new > network namespace is created all of the init methods are called > in the order in which they were registered, and when a network namespace > is destroyed the exit methods are called in the reverse order in > which they were registered. > > There are two distinct types of pernet_operations recognized: subsys and > device. At creation all subsys init functions are called before device > init functions, and at destruction all device exit functions are called > before subsys exit function. For other ordering the preservation > of the order of registration combined with the various kinds of > kernel initcalls should be sufficient. > > Signed-off-by: Eric W. Biederman > + > +static inline net_t get_net(net_t net) { return net; } > +static inline void put_net(net_t net) {} > +static inline net_t hold_net(net_t net) { return net; } > +static inline void release_net(net_t net) {} > + > +#define __per_net_start ((char *)0) > +#define __per_net_end ((char *)0 Don't use these use NULL > + > +static inline int copy_net(int flags, struct task_struct *tsk) { return 0; } > + > +/* Don't let the list of network namespaces change */ > +static inline void net_lock(void) {} > +static inline void net_unlock(void) {} Don't make all one line, or use #define instead. > + > +#define for_each_net(VAR) if (1) > + > +extern net_t net_template; > + > +#define NET_CREATE 0x0001 /* A network namespace has been created */ > +#define NET_DESTROY 0x0002 /* A network namespace is being destroyed */ > + > +struct pernet_operations { > + struct list_head list; > + int (*init)(net_t net); > + void (*exit)(net_t net); > +}; > + > +extern int register_pernet_subsys(struct pernet_operations *); > +extern void unregister_pernet_subsys(struct pernet_operations *); > +extern int register_pernet_device(struct pernet_operations *); > +extern void unregister_pernet_device(struct pernet_operations *); > + > +#endif /* __NET_NET_NAMESPACE_H */ > diff --git a/net/core/Makefile b/net/core/Makefile > index 73272d5..554dbdc 100644 > --- a/net/core/Makefile > +++ b/net/core/Makefile > @@ -3,7 +3,7 @@ > # > > obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ > - gen_stats.o gen_estimator.o > + gen_stats.o gen_estimator.o net_namespace.o > > obj-$(CONFIG_SYSCTL) += sysctl_net_core.o > > diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c > new file mode 100644 > index 0000000..4ae266d > --- /dev/null > +++ b/net/core/net_namespace.c > @@ -0,0 +1,149 @@ > +#include > +#include > + > +/* > + * Our network namespace constructor/destructor lists > + */ > + > +static LIST_HEAD(pernet_list); > +static struct list_head *first_device = &pernet_list; > +static DEFINE_MUTEX(net_mutex); > +net_t net_template; > + > +static int register_pernet_operations(struct list_head *list, > + struct pernet_operations *ops) > +{ > + net_t net, undo_net; > + int error; > + > + error = 0; > + list_add_tail(&ops->list, list); > + for_each_net(net) { > + if (ops->init) { > + error = ops->init(net); > + if (error) > + goto out_undo; > + } > + } > +out: > + return error; > + > +out_undo: > + /* If I have an error cleanup all namespaces I initialized */ > + list_del(&ops->list); > + for_each_net(undo_net) { > + if (net_eq(undo_net, net)) > + goto undone; > + if (ops->exit) > + ops->exit(undo_net); > + } > +undone: > + goto out; > +} > + > +static void unregister_pernet_operations(struct pernet_operations *ops) > +{ > + net_t net; > + > + list_del(&ops->list); > + for_each_net(net) > + if (ops->exit) > + ops->exit(net); > +} > + > You should use RCU for this because registering/unregistering network namespaces is obviously a much rarer occurrence than referencing them. -- Stephen Hemminger From changquing.tang at hp.com Thu Jan 25 11:46:21 2007 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 25 Jan 2007 19:46:21 -0000 Subject: [openib-general] git question In-Reply-To: <459D2EAA.9040204@pathscale.com> References: <459D2EAA.9040204@pathscale.com> Message-ID: <349DCDA352EACF42A0C49FA6DCEA840330097C@G3W0634.americas.hpqcorp.net> I have trouble to get the latest OFED 1.2 source because of the firewall or other reason I don't know. Is there a checkout version somewhere I can access, for example. Daily build version ? Thanks. --CQ > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Robert Walsh > Sent: Thursday, January 04, 2007 10:43 AM > To: openib-general at openib.org > Subject: [openib-general] git question > > Hi all, > > I was away for all of December, so imagine my surprise when I > came back and saw git up and running, svn deprecated, etc. > :-) Anyway, I poked around looking for some information on > where stuff had moved to. I found this page: > > https://wiki.openfabrics.org/tiki-index.php?page=Downloading+C ode+From+the+OFA+git+Repositories > > This tells me where the userspace and kernel components have > moved to. > Is there a similar document telling me where things like the > trubnk/contrib directory is? For that matter, is there a > mapping at all between https://openib.org/svn/x/y/z and > git:/something/something/something? > > BTW: I know that this could be somewhere in the month-long > backlog of email I'm still wading through, but I haven't > noticed yet and it probably should be added to that wiki page > in any case. > > Regards, > Robert. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From dotanb at dev.mellanox.co.il Thu Jan 25 11:46:26 2007 From: dotanb at dev.mellanox.co.il (dotanb at dev.mellanox.co.il) Date: Thu, 25 Jan 2007 21:46:26 +0200 (IST) Subject: [openib-general] [RFT] [PATCH] Add ABI compatibility for apps linked against libibverbs 1.0 In-Reply-To: References: Message-ID: <4933.85.65.223.185.1169754386.squirrel@dev.mellanox.co.il> Hi Roland. > OK, if no one else is going to test this, I guess I have to... > > Anyway, I just built Open MPI 1.1.2 against libibvebs 1.0 and tried > NetPIPE 3.6.2's NPmpi with libibverbs.git master branch + the ABI > compat below. I discovered a few problems (including a stupid problem > with a test change to configure.in that got left behind by mistake, > which would lead to the ABI compat stuff not being enabled at all). > > Updated patch is below. I would still appreciate test reports with > other apps, but now I think I'm confident enough that I will push this > out on the libibverbs.git master branch soon. > I am planning to test this issue during the next week and i will update you with the results. Dotan From swise at opengridcomputing.com Thu Jan 25 11:50:40 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 13:50:40 -0600 Subject: [openib-general] librdmacm code confusion wrt iWarp In-Reply-To: <000101c740b7$4abbc140$ff0da8c0@amr.corp.intel.com> References: <000101c740b7$4abbc140$ff0da8c0@amr.corp.intel.com> Message-ID: <1169754640.1799.19.camel@stevo-desktop> On Thu, 2007-01-25 at 11:30 -0800, Sean Hefty wrote: > Steve, > > I'm looking at rdma_create_qp() in librdmacm. There's a section of code in > there: > > if (id->ps == RDMA_PS_UDP) > ret = ucma_init_ud_qp(id_priv, qp); > else > ret = ucma_init_ib_qp(id_priv, qp); > > Both of these calls transition the QP to INIT, so that the user can post > receives before trying to establish a connection. iWarp is handled the same as > IB, which confuses me, since it is treated differently in the kernel. I'm > assuming that the librdmacm works for you over iWarp, but I'd like to understand > this better. > > - Sean I think the kernel could probably also be common and it would work. iWARP QPs don't have a separate RESET state like IB, so the qp_modify RESET->INIT really becomes a noop. iWARP QPs start out in IDLE state and its valid to post receives in IDLE. We map iWARP IDLE -> IB INIT and IB RESET. Steve. From mshefty at ichips.intel.com Thu Jan 25 11:58:56 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 25 Jan 2007 11:58:56 -0800 Subject: [openib-general] librdmacm code confusion wrt iWarp In-Reply-To: <1169754640.1799.19.camel@stevo-desktop> References: <000101c740b7$4abbc140$ff0da8c0@amr.corp.intel.com> <1169754640.1799.19.camel@stevo-desktop> Message-ID: <45B90C00.7060003@ichips.intel.com> > I think the kernel could probably also be common and it would work. > iWARP QPs don't have a separate RESET state like IB, so the qp_modify > RESET->INIT really becomes a noop. iWARP QPs start out in IDLE state > and its valid to post receives in IDLE. We map iWARP IDLE -> IB INIT > and IB RESET. Part of the call to transition the IB QP to INIT involves looking up the pkey_index. Is this simulated as well? My intention is to change the librdamcm to call rdma_init_qp_attr() when transitioning the IB QP from reset->init. (This is necessary to get the qkey used by the ipoib port space.) So, the userspace code should get the same qp_attr and qp_attr_mask as that used in the kernel when a QP is created. - Sean From swise at opengridcomputing.com Thu Jan 25 12:13:38 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 14:13:38 -0600 Subject: [openib-general] librdmacm code confusion wrt iWarp In-Reply-To: <45B90C00.7060003@ichips.intel.com> References: <000101c740b7$4abbc140$ff0da8c0@amr.corp.intel.com> <1169754640.1799.19.camel@stevo-desktop> <45B90C00.7060003@ichips.intel.com> Message-ID: <1169756018.1799.23.camel@stevo-desktop> On Thu, 2007-01-25 at 11:58 -0800, Sean Hefty wrote: > > I think the kernel could probably also be common and it would work. > > iWARP QPs don't have a separate RESET state like IB, so the qp_modify > > RESET->INIT really becomes a noop. iWARP QPs start out in IDLE state > > and its valid to post receives in IDLE. We map iWARP IDLE -> IB INIT > > and IB RESET. > > Part of the call to transition the IB QP to INIT involves looking up the > pkey_index. Is this simulated as well? > Looks like it is. From rdreier at cisco.com Thu Jan 25 12:18:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 12:18:18 -0800 Subject: [openib-general] [RFT] [PATCH] Add ABI compatibility for apps linked against libibverbs 1.0 In-Reply-To: <4933.85.65.223.185.1169754386.squirrel@dev.mellanox.co.il> (dotanb@dev.mellanox.co.il's message of "Thu, 25 Jan 2007 21:46:26 +0200 (IST)") References: <4933.85.65.223.185.1169754386.squirrel@dev.mellanox.co.il> Message-ID: > I am planning to test this issue during the next week and i will update > you with the results. great, thanks. From shemminger at linux-foundation.org Thu Jan 25 12:30:17 2007 From: shemminger at linux-foundation.org (Stephen Hemminger) Date: Thu, 25 Jan 2007 12:30:17 -0800 Subject: [openib-general] [PATCH RFC 1/31] net: Add net_namespace_type.h to allow for per network namespace variables. In-Reply-To: <11697516331146-git-send-email-ebiederm@xmission.com> References: <11697516331146-git-send-email-ebiederm@xmission.com> Message-ID: <20070125123017.1408941e@freekitty> Can all this be a nop if a CONFIG option is not selected? > > diff --git a/include/linux/net_namespace_type.h b/include/linux/net_namespace_type.h > new file mode 100644 > index 0000000..8173f59 > --- /dev/null > +++ b/include/linux/net_namespace_type.h > @@ -0,0 +1,52 @@ > +/* > + * Definition of the network namespace reference type > + * And operations upon it. > + */ > +#ifndef __LINUX_NET_NAMESPACE_TYPE_H > +#define __LINUX_NET_NAMESPACE_TYPE_H > + > +#define __pernetname(name) per_net__##name Code obfuscation, please don't do that > +typedef struct {} net_t; No typedef for this please. > + > +#define __data_pernet > + > +/* Look up a per network namespace variable */ > +static inline unsigned long __per_net_offset(net_t net) { return 0; } > + > +/* Like per_net but returns a pseudo variable address that must be moved > + * __per_net_offset() bytes before it will point to a real variable. > + * Useful for static initializers. > + */ > +#define __per_net_base(name) __pernetname(name) > + > +/* Get the network namespace reference from a per_net variable address */ > +#define net_of(ptr, name) ({ net_t net; ptr; net; }) > + > +/* Look up a per network namespace variable */ > +#define per_net(name, net) \ > + (*(__per_net_offset(net), &__per_net_base(name))) > + > +/* Are the two network namespaces the same */ > +static inline int net_eq(net_t a, net_t b) { return 1; } > +/* Get an unsigned value appropriate for hashing the network namespace */ > +static inline unsigned int net_hval(net_t net) { return 0; } > + > +/* Convert to and from to and from void pointers */ > +static inline void *net_to_voidp(net_t net) { return NULL; } > +static inline net_t net_from_voidp(void *ptr) { net_t net; return net; } > + > +static inline int null_net(net_t net) { return 0; } > + > +#define DEFINE_PER_NET(type, name) \ > + __data_pernet __typeof__(type) __pernetname(name) > + > +#define DECLARE_PER_NET(type, name) \ > + extern __typeof__(type) __pernetname(name) > + > +#define EXPORT_PER_NET_SYMBOL(var) \ > + EXPORT_SYMBOL(__pernetname(var)) > +#define EXPORT_PER_NET_SYMBOL_GPL(var) \ > + EXPORT_SYMBOL_GPL(__pernetname(var)) > + > +#endif /* __LINUX_NET_NAMESPACE_TYPE_H */ -- Stephen Hemminger From ebiederm at xmission.com Thu Jan 25 12:53:11 2007 From: ebiederm at xmission.com (ebiederm at xmission.com) Date: Thu, 25 Jan 2007 13:53:11 -0700 Subject: [openib-general] [PATCH RFC 1/31] net: Add net_namespace_type.h to allow for per network namespace variables. In-Reply-To: <20070125123017.1408941e@freekitty> (Stephen Hemminger's message of "Thu, 25 Jan 2007 12:30:17 -0800") References: <11697516331146-git-send-email-ebiederm@xmission.com> <20070125123017.1408941e@freekitty> Message-ID: Stephen Hemminger writes: > Can all this be a nop if a CONFIG option is not selected? That is exactly what this infrastructure supports. What you see is the version that comes into effect when the CONFIG option is not selected. >From using an empty structure to replace a pointer to make that a NOP to most of the rest below. >> diff --git a/include/linux/net_namespace_type.h > b/include/linux/net_namespace_type.h >> new file mode 100644 >> index 0000000..8173f59 >> --- /dev/null >> +++ b/include/linux/net_namespace_type.h >> @@ -0,0 +1,52 @@ >> +/* >> + * Definition of the network namespace reference type >> + * And operations upon it. >> + */ >> +#ifndef __LINUX_NET_NAMESPACE_TYPE_H >> +#define __LINUX_NET_NAMESPACE_TYPE_H >> + >> +#define __pernetname(name) per_net__##name > > Code obfuscation, please don't do that Single point of making the naming rules, better maintenance. The basic point is that variables that come through this path you should not access directly. Tweaking the name enforces that even in the compiled out state. >> +typedef struct {} net_t; > > No typedef for this please. Why. That is conventially how we do opaque types in linux when someone is doing something sophisticated. You probably want to look down to patch 21 to see what the compiled in version of these look like. Eric From swise at opengridcomputing.com Thu Jan 25 12:57:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 25 Jan 2007 14:57:25 -0600 Subject: [openib-general] [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). Message-ID: <20070125205725.497.74172.stgit@dell3.ogc.int> Backport Chelsio to rhel5 (2.6.18_FC6). Signed-off-by: Steve Wise --- .../backport/2.6.18_FC6/include/linux/genalloc.h | 42 +++++ .../backport/2.6.18_FC6/include/linux/interrupt.h | 17 ++ .../backport/2.6.18_FC6/include/linux/netdevice.h | 9 + .../backport/2.6.18_FC6/include/linux/random.h | 15 ++ .../backport/2.6.18_FC6/include/linux/skbuff.h | 1 .../backport/2.6.18_FC6/include/linux/workqueue.h | 9 + .../backport/2.6.18_FC6/include/src/genalloc.c | 198 +++++++++++++++++++++++ .../2.6.18_FC6/cxgb3_makefile_to_2_6_19.patch | 12 + .../2.6.18_FC6/linux_genalloc_to_2_6_20.patch | 17 ++ 9 files changed, 319 insertions(+), 1 deletions(-) diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/genalloc.h b/kernel_addons/backport/2.6.18_FC6/include/linux/genalloc.h new file mode 100644 index 0000000..3c23c68 --- /dev/null +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/genalloc.h @@ -0,0 +1,42 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +/* + * General purpose special memory pool descriptor. + */ +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ +}; + +/* + * General purpose special memory pool chunk descriptor. + */ +struct gen_pool_chunk { + spinlock_t lock; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +extern struct gen_pool *ib_gen_pool_create(int, int); +extern int ib_gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern void ib_gen_pool_destroy(struct gen_pool *); +extern unsigned long ib_gen_pool_alloc(struct gen_pool *, size_t); +extern void ib_gen_pool_free(struct gen_pool *, unsigned long, size_t); + +#define gen_pool_create ib_gen_pool_create +#define gen_pool_add ib_gen_pool_add +#define gen_pool_destroy ib_gen_pool_destroy +#define gen_pool_alloc ib_gen_pool_alloc +#define gen_pool_free ib_gen_pool_free diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/interrupt.h b/kernel_addons/backport/2.6.18_FC6/include/linux/interrupt.h new file mode 100644 index 0000000..66e66a9 --- /dev/null +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/interrupt.h @@ -0,0 +1,17 @@ +#ifndef BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#define BACKPORT_LINUX_INTERRUPT_TO_2_6_18 +#include_next + +static inline int +backport_request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *), + unsigned long flags, const char *dev_name, void *dev_id) +{ + return request_irq(irq, + (irqreturn_t (*)(int, void *, struct pt_regs *))handler, + flags, dev_name, dev_id); +} + +#define request_irq backport_request_irq + +#endif diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/netdevice.h b/kernel_addons/backport/2.6.18_FC6/include/linux/netdevice.h new file mode 100644 index 0000000..61a6deb --- /dev/null +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/netdevice.h @@ -0,0 +1,9 @@ +#ifndef BACKPORT_LINUX_NETDEVICE_TO_2_6_18 +#define BACKPORT_LINUX_NETDEVICE_TO_2_6_18 +#include_next + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) \ + (netdev)->ethtool_ops = (struct ethtool_ops *)(ops) + +#endif diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/random.h b/kernel_addons/backport/2.6.18_FC6/include/linux/random.h new file mode 100644 index 0000000..2ea2e1f --- /dev/null +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/random.h @@ -0,0 +1,15 @@ +#ifndef BACKPORT_LINUX_RANDOM_TO_2_6_18 +#define BACKPORT_LINUX_RANDOM_TO_2_6_18 +#include_next + +static inline u32 backport_random32(void) +{ + u32 v; + + get_random_bytes(&v, sizeof(u32)); + return v; +} + +#define random32 backport_random32 + +#endif diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/skbuff.h b/kernel_addons/backport/2.6.18_FC6/include/linux/skbuff.h index 4845283..ca5edc0 100644 --- a/kernel_addons/backport/2.6.18_FC6/include/linux/skbuff.h +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/skbuff.h @@ -4,5 +4,6 @@ #define LINUX_SKBUFF_H_BACKPORT #include_next #define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW #endif diff --git a/kernel_addons/backport/2.6.18_FC6/include/linux/workqueue.h b/kernel_addons/backport/2.6.18_FC6/include/linux/workqueue.h index 330f47f..cc8b2cd 100644 --- a/kernel_addons/backport/2.6.18_FC6/include/linux/workqueue.h +++ b/kernel_addons/backport/2.6.18_FC6/include/linux/workqueue.h @@ -26,6 +26,12 @@ backport_cancel_delayed_work(struct dela return cancel_delayed_work(&work->work); } +static inline void +backport_cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) +{ + cancel_rearming_delayed_workqueue(wq, &work->work); +} + #undef INIT_WORK #define INIT_WORK(_work, _func) backport_INIT_WORK(_work, _func) @@ -33,11 +39,12 @@ #define INIT_DELAYED_WORK(_work, _func) #undef DECLARE_WORK #define DECLARE_WORK(n, f) \ - struct work_struct n = __WORK_INITIALIZER(n, f, &(n)) + struct work_struct n = __WORK_INITIALIZER(n, (void (*)(void *))f, &(n)) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = { .work = __WORK_INITIALIZER(n.work, f, &(n.work)) } #define queue_delayed_work backport_queue_delayed_work #define cancel_delayed_work backport_cancel_delayed_work +#define cancel_rearming_delayed_workqueue backport_cancel_rearming_delayed_workqueue #endif diff --git a/kernel_addons/backport/2.6.18_FC6/include/src/genalloc.c b/kernel_addons/backport/2.6.18_FC6/include/src/genalloc.c new file mode 100644 index 0000000..75ae68c --- /dev/null +++ b/kernel_addons/backport/2.6.18_FC6/include/src/genalloc.c @@ -0,0 +1,198 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + +/** + * gen_pool_create - create a new special memory pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) +{ + struct gen_pool *pool; + + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + */ +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) +{ + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; + + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy - destroy a special memory pool + * @pool: pool to destroy + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct list_head *_chunk, *_next_chunk; + struct gen_pool_chunk *chunk; + int order = pool->min_alloc_order; + int bit, end_bit; + + + write_lock(&pool->lock); + list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + list_del(&chunk->next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + bit = find_next_bit(chunk->bits, end_bit, 0); + BUG_ON(bit < end_bit); + + kfree(chunk); + } + kfree(pool); + return; +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc - allocate special memory from the pool + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; + + if (size == 0) + return 0; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; + } + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); + +/** + * gen_pool_free - free allocated special memory back to the pool + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + break; + } + } + BUG_ON(nbits > 0); + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/kernel_patches/backport/2.6.18_FC6/cxgb3_makefile_to_2_6_19.patch b/kernel_patches/backport/2.6.18_FC6/cxgb3_makefile_to_2_6_19.patch new file mode 100644 index 0000000..ad7e7f4 --- /dev/null +++ b/kernel_patches/backport/2.6.18_FC6/cxgb3_makefile_to_2_6_19.patch @@ -0,0 +1,12 @@ +diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile +index 3434679..bb008b6 100755 +--- a/drivers/net/cxgb3/Makefile ++++ b/drivers/net/cxgb3/Makefile +@@ -1,6 +1,7 @@ + # + # Chelsio T3 driver + # ++NOSTDINC_FLAGS:= $(NOSTDINC_FLAGS) $(LINUXINCLUDE) + + obj-$(CONFIG_CHELSIO_T3) += cxgb3.o + diff --git a/kernel_patches/backport/2.6.18_FC6/linux_genalloc_to_2_6_20.patch b/kernel_patches/backport/2.6.18_FC6/linux_genalloc_to_2_6_20.patch new file mode 100644 index 0000000..93fee2b --- /dev/null +++ b/kernel_patches/backport/2.6.18_FC6/linux_genalloc_to_2_6_20.patch @@ -0,0 +1,17 @@ +diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile +index 163d991..2cd239f 100644 +--- a/drivers/infiniband/core/Makefile ++++ b/drivers/infiniband/core/Makefile +@@ -30,3 +30,5 @@ ib_ucm-y := ucm.o + + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o ++ ++ib_core-y += genalloc.o +diff --git a/drivers/infiniband/core/genalloc.c b/drivers/infiniband/core/genalloc.c +new file mode 100644 +index 0000000..96a48fe +--- /dev/null ++++ b/drivers/infiniband/core/genalloc.c +@@ -0,0 +1 @@ ++#include "src/genalloc.c" From sorrillo at jlab.org Thu Jan 25 13:15:55 2007 From: sorrillo at jlab.org (Lawrence Sorrillo) Date: Thu, 25 Jan 2007 16:15:55 -0500 Subject: [openib-general] How to implement IP over IB Message-ID: <200701252115.l0PLFsbT022072@ccs15.jlab.org> HI: I am a newbie to IB. There is not a lot of good information readily available. Has anyone successfully implemented IP over IB? Lawrence Sorrillo UNIX/Linux Systems Administrator Jefferson Laboratory 12000 Jeffferson Avenue, Newport News, VA 23606 Phone: 757-269-7681 Email: sorrillo at jlab.org -------------- next part -------------- An HTML attachment was scrubbed... URL: From robert.j.woodruff at intel.com Thu Jan 25 13:45:19 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 25 Jan 2007 13:45:19 -0800 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: <45B87D7A.5000708@mellanox.co.il> Message-ID: Ok, here is an updated slide set with the comments I received from this list. Who can publish these somewhere on the website so that the general community knows who to go to for specific components ? woody -------------- next part -------------- A non-text attachment was scrubbed... Name: OpenFabrics Maintainers.pdf Type: application/octet-stream Size: 182979 bytes Desc: OpenFabrics Maintainers.pdf URL: From rdreier at cisco.com Thu Jan 25 13:51:09 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 13:51:09 -0800 Subject: [openib-general] How to implement IP over IB In-Reply-To: <200701252115.l0PLFsbT022072@ccs15.jlab.org> (Lawrence Sorrillo's message of "Thu, 25 Jan 2007 16:15:55 -0500") References: <200701252115.l0PLFsbT022072@ccs15.jlab.org> Message-ID: > Has anyone successfully implemented IP over IB? Yes, an implementation has been part of the Linux kernel since version 2.6.11 or so. From robert.j.woodruff at intel.com Thu Jan 25 13:52:59 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 25 Jan 2007 13:52:59 -0800 Subject: [openib-general] Current List of OFA Linux components and maintainers In-Reply-To: Message-ID: James wrote, >The list is missing the the iWARP CM and the Chelsio driver. I believe >Tom Tucker and Steve Wise are co-maintainers of both. Added iWarp CM and also amaso driver maintainers. Think I have all the feedback in this latest one. (attached) woody -------------- next part -------------- A non-text attachment was scrubbed... Name: OpenFabrics Maintainers.pdf Type: application/octet-stream Size: 185943 bytes Desc: OpenFabrics Maintainers.pdf URL: From robert.j.woodruff at intel.com Thu Jan 25 14:07:44 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 25 Jan 2007 14:07:44 -0800 Subject: [openib-general] InfiniBand Maintainers Summit/BOF at Ottawa Linux Symposium In-Reply-To: Message-ID: Roland wrote, >Since the kernel summit has been moved to Cambridge, England in >September this year, I'm unlikely to go to OLS too. >But I don't think an OLS BOF is really the best venue for an "RDMA >summit" anyway. A BOF is too little time to really cover everything, >and having non-core people there changes the mood -- of course it's >good to include the hoi polloi most of the time, but I think you're >really proposing an event for the core developers only. >I think a summit really needs to be a full day and be invitation only. >One idea that might work would be to have a summit the day before the >Sonoma workshop in April. This could be pretty informal, with no need >to involve the OFA board or anything cumbersome like that -- just tell >everyone to show up a day early, and get a small conference room and a >catered lunch at the hotel (money could probably be scared up to pay >for that without much trouble). >Or we could pick another day/location... I'm sure I could get a room & >food at Cisco's campus any time given a few weeks warning. > - R. Good point, perhaps and OLS BOF is not the right place to hold a maintainer's summit, I like the idea of perhaps having it adjacent to the Sonoma or some other OFA event. As for OLS, it might however be good to have an InfiniBand BOF anyway just to present the current status of OpenFabrics, what is now in the kernel and present the roadmap of what is coming in the near future and such, I think the wider Linux community might be interested in such a BOF. I guess I'd be willing to help pull together the slides for such a BOF working with the various maintainers if they were not planning on attending. Thoughts ? woody From robert.j.woodruff at intel.com Thu Jan 25 14:17:42 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 25 Jan 2007 14:17:42 -0800 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: Message-ID: Yet one more updated slide set with a couple more minor modifications. woody -----Original Message----- From: openib-general-bounces at openib.org [mailto:openib-general-bounces at openib.org] On Behalf Of Woodruff, Robert J Sent: Thursday, January 25, 2007 1:45 PM To: EWG; openib-general; openib-promoters at openib.org Subject: Re: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers Ok, here is an updated slide set with the comments I received from this list. Who can publish these somewhere on the website so that the general community knows who to go to for specific components ? woody -------------- next part -------------- A non-text attachment was scrubbed... Name: OpenFabrics Maintainers.pdf Type: application/octet-stream Size: 184980 bytes Desc: OpenFabrics Maintainers.pdf URL: From bill.magro at intel.com Thu Jan 25 14:25:14 2007 From: bill.magro at intel.com (Magro, Bill) Date: Thu, 25 Jan 2007 14:25:14 -0800 Subject: [openib-general] [Openib-promoters] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: Message-ID: <4D97B70CF7F72144881F66DFF4BD7A12016CA389@fmsmsx413.amr.corp.intel.com> Does is make sense to break out the components that aren't part of OFA, but are OFA consumers, into a separate consumers section? Examples would be the various MPI implementations. --Bill -----Original Message----- From: openib-promoters-bounces at openib.org [mailto:openib-promoters-bounces at openib.org] On Behalf Of Woodruff, Robert J Sent: Thursday, January 25, 2007 4:18 PM To: Woodruff, Robert J; EWG; openib-general; openib-promoters at openib.org Subject: Re: [Openib-promoters] [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers Yet one more updated slide set with a couple more minor modifications. woody -----Original Message----- From: openib-general-bounces at openib.org [mailto:openib-general-bounces at openib.org] On Behalf Of Woodruff, Robert J Sent: Thursday, January 25, 2007 1:45 PM To: EWG; openib-general; openib-promoters at openib.org Subject: Re: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers Ok, here is an updated slide set with the comments I received from this list. Who can publish these somewhere on the website so that the general community knows who to go to for specific components ? woody From robert.j.woodruff at intel.com Thu Jan 25 14:33:05 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Thu, 25 Jan 2007 14:33:05 -0800 Subject: [openib-general] [Openib-promoters] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: <4D97B70CF7F72144881F66DFF4BD7A12016CA389@fmsmsx413.amr.corp.intel.com> Message-ID: Bill Magro wrote, >Does is make sense to break out the components that aren't part of OFA, but are OFA consumers, >into a separate consumers section? Examples would be the various MPI implementations. >--Bill Probably does, but don't think that it is really needed for this go round as the important information is to get the list of maintainers published so that people know who to go to. One could also argue that since OpenMPI and MVAPICH are not really developed within OFA, just shipped by OFED, they should not even be in the list, however, I suppose it does not hurt, so I am inclined to leave them in. woody From jgunthorpe at obsidianresearch.com Thu Jan 25 16:03:19 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Thu, 25 Jan 2007 17:03:19 -0700 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets Message-ID: <20070126000319.GA12386@obsidianresearch.com> ib_init_ah_from_path contains the logic to decide when to use a GRH so call ib_init_ah_from_path instead of the hand coded version in IPOIB. This change along with recent opensm changes allows unicast IPOIB traffic to traverse a router. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 20 ++++++++++++-------- 1 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 705eb1d..cb54da1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -385,14 +385,18 @@ static void path_rec_completion(int status, skb_queue_head_init(&skqueue); if (!status) { - struct ib_ah_attr av = { - .dlid = be16_to_cpu(pathrec->dlid), - .sl = pathrec->sl, - .port_num = priv->port, - .static_rate = pathrec->rate - }; - - ah = ipoib_create_ah(dev, priv->pd, &av); + int ret; + struct ib_ah_attr av; + if ((ret = ib_init_ah_from_path(priv->ca,priv->port,pathrec,&av))) + ipoib_dbg(priv, "PathRec init_ah failed %d for GID " + IPOIB_GID_FMT "\n", ret, + IPOIB_GID_ARG(path->pathrec.dgid)); + else { + // FIXME: Should this be in ib_init_ah_from_path? + av.static_rate = pathrec->rate; + + ah = ipoib_create_ah(dev, priv->pd, &av); + } } spin_lock_irqsave(&priv->lock, flags); -- 1.4.4.3 From hch at lst.de Thu Jan 25 18:50:49 2007 From: hch at lst.de (Christoph Hellwig) Date: Fri, 26 Jan 2007 03:50:49 +0100 Subject: [openib-general] [PATCH 2.6.21 1/4] ehca: fix improper use of yield with spinlock held In-Reply-To: <200701240010.37540.hnguyen@linux.vnet.ibm.com> References: <200701240010.37540.hnguyen@linux.vnet.ibm.com> Message-ID: <20070126025049.GB18537@lst.de> On Wed, Jan 24, 2007 at 12:10:36AM +0100, Hoang-Nam Nguyen wrote: > Here is a patch for ehca_cq.c that fixes improper use of yield > with spinlock held. Btw, please don't forget to replace the yield call with a proper condition for 2.6.21. From steve.langdon at hp.com Thu Jan 25 20:17:25 2007 From: steve.langdon at hp.com (Stephen Langdon) Date: Thu, 25 Jan 2007 23:17:25 -0500 Subject: [openib-general] [Openib-promoters] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: <45B980D5.8050208@hp.com> Bill, I for one would like the OFA components clearly distinguished from things like the (non-OFA) MPI implementations which are distributed with OFED on a "grace-and-favor" basis as a convenience to users. It is my view (which may or may not be shared by others) that they are not integral parts of OFED itself. Steve Woodruff, Robert J wrote: > Bill Magro wrote, > >> Does is make sense to break out the components that aren't part of OFA, >> > but are OFA consumers, > >> into a separate consumers section? Examples would be the various MPI >> > implementations. > > >> --Bill >> > > > Probably does, but don't think that it is really needed for this > go round as the important information is to get the list of maintainers > published so that people know who to go to. > One could also argue that since OpenMPI and MVAPICH are not really > developed within OFA, just shipped by OFED, they should not even > be in the list, however, I suppose it does not hurt, so I am inclined > to leave them in. > > woody > > > _______________________________________________ > openib-promoters mailing list > openib-promoters at openib.org > http://openib.org/mailman/listinfo/openib-promoters > -------------- next part -------------- begin:vcard fn:Stephen Langdon n:Langdon;Stephen org:Hewlett-Packard, HPC Division;Consulting & Architecture Group adr:MRO1-3/J14;;200 Forest Street;Marlborough;MA;01451;USA email;internet:steve.langdon at hp.com title:HP Fellow tel;work:+1 508-467-3774 tel;fax:+1 978 742 1141 tel;home:+1 978-456-8177 tel;cell:+1 978-618-8599 x-mozilla-html:TRUE version:2.1 end:vcard From rdreier at cisco.com Thu Jan 25 21:05:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 21:05:58 -0800 Subject: [openib-general] [PATCH] ib_addr: Handle Ethernet neighbour updates during route resolution. In-Reply-To: <1169600597.26256.53.camel@stevo-desktop> (Steve Wise's message of "Tue, 23 Jan 2007 19:03:17 -0600") References: <1169600597.26256.53.camel@stevo-desktop> Message-ID: Thanks, queued for 2.6.21. From rdreier at cisco.com Thu Jan 25 21:15:31 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 25 Jan 2007 21:15:31 -0800 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: <20070126000319.GA12386@obsidianresearch.com> (Jason Gunthorpe's message of "Thu, 25 Jan 2007 17:03:19 -0700") References: <20070126000319.GA12386@obsidianresearch.com> Message-ID: > + if ((ret = ib_init_ah_from_path(priv->ca,priv->port,pathrec,&av))) kernel style is spaces after commas, like ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av) also, I really prefer to see code like this written as ret = ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av); if (ret) { If the call fails, do you think > + ipoib_dbg(priv, "PathRec init_ah failed %d for GID " printing the error message with ipoib_dbg() is a good idea? It means the failure will be invisible unless someone explicitly enables debugging. > + // FIXME: Should this be in ib_init_ah_from_path? > + av.static_rate = pathrec->rate; Yes, I think this should be in ib_init_ah_from_path() ... I don't see any reason why it shouldn't. I suspect the reason is historical, because ib_init_ah_from_path() predates the switch to having av.static_rate mean the real data rate of to use (rather than the relative inter-packet delay static rate that the IB spec for verbs talks about). BTW do SRP and iSER already work through routers? - R. From eitan at sw053.yok.mtl.com Thu Jan 25 21:36:39 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Fri, 26 Jan 2007 07:36:39 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-26:normal completion Message-ID: <200701260536.l0Q5ad7k026319@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Wed_Jan_24_14:35:07_2007 ed2146 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 29 OsmStress IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: 1 OsmStress IS1-16.topo From tziporet at mellanox.co.il Thu Jan 25 22:37:07 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Fri, 26 Jan 2007 08:37:07 +0200 Subject: [openib-general] reminder: OFED 1.2 coordination meeting next Monday Jan-29 at 9amPST Message-ID: <45B9A193.1050301@mellanox.co.il> Hi all, I wish to remind you that we are going to have the OFED 1.2 coordination meeting next Monday (Jan-29) at 9am PST. Agenda: 1. Code readiness toward code freeze 2. Review release & packaging procedures (to be send before the meeting). Please send more agenda items if you have. Tziporet Bridge info: Meeting ID: 2106670 Meeting Password: Global Access Numbers: http://cisco.com/en/US/about/doing_business/conferencing/index.html US/Canada: +1.866.432.9903 United Kingdom: +44.20.8824.0117 India: +91.80.4103.3979 Germany: +49.619.6773.9002 Japan: +81.3.5763.9394 China: +86.10.8515.5666 for world-wide access numbers see: http://openib.org/pipermail/openib-general/2007-January/031282.html From mst at mellanox.co.il Thu Jan 25 23:35:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 26 Jan 2007 09:35:35 +0200 Subject: [openib-general] [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). In-Reply-To: <20070125205725.497.74172.stgit@dell3.ogc.int> References: <20070125205725.497.74172.stgit@dell3.ogc.int> Message-ID: <20070126073535.GG10812@mellanox.co.il> > Quoting Steve Wise : > Subject: [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). > > > Backport Chelsio to rhel5 (2.6.18_FC6). BTW, steve, is FC4 supported? I don't see a backport ... -- MST From mst at mellanox.co.il Thu Jan 25 23:41:28 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 26 Jan 2007 09:41:28 +0200 Subject: [openib-general] IPOIB CM with Non SRQ support In-Reply-To: References: Message-ID: <20070126074128.GJ10812@mellanox.co.il> > Quoting Pradeep Satyanarayana : > Subject: IPOIB CM with Non SRQ support > > > Michael, > > I am working on a prototype based on your IPOIB CM patch to incorporate support for Non SRQ as well. IPOIB CM was planned to be in OFED 1.2 if I remember correctly. If I were to submit a patch for non SRQ support, what would be the cut off date to make it > into OFED 1.2? I think it must be ready for merge by feature freeze on Feb 1st, but at this stage it really needs to be a small patch. I can't commit to merging it before I see it. I have to warn you that I thought about this problem, and unfortunately I do not see a way to implement it in a robust fashion without complicating the code significantly. In this case, you'll just might have to maintain it as a separate patch until the code lands upstream, and propose as a separate improvement later. -- MST From vlad at lists.openfabrics.org Fri Jan 26 02:21:20 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Fri, 26 Jan 2007 02:21:20 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070126-0200 daily build status Message-ID: <20070126102121.0C910E603CC@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on powerpc with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Failed: From mst at mellanox.co.il Fri Jan 26 05:37:34 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 26 Jan 2007 15:37:34 +0200 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: <20070126133651.GM10812@mellanox.co.il> > Yet one more updated slide set with a couple more minor > modifications. I still don't see Ishai listed against SRP. Is there a problem? -- MST From mst at mellanox.co.il Fri Jan 26 05:40:58 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 26 Jan 2007 15:40:58 +0200 Subject: [openib-general] git question In-Reply-To: <349DCDA352EACF42A0C49FA6DCEA840330097C@G3W0634.americas.hpqcorp.net> References: <459D2EAA.9040204@pathscale.com> <349DCDA352EACF42A0C49FA6DCEA840330097C@G3W0634.americas.hpqcorp.net> Message-ID: <20070126134058.GN10812@mellanox.co.il> > Quoting Tang, Changqing : > Subject: Re: git question > > > I have trouble to get the latest OFED 1.2 source because of the firewall > or other reason I don't know. Is there a checkout version somewhere I > can access, for example. Daily build version ? Take a look here: http://www.openfabrics.org/~vlad/builds/ -- MST From mst at mellanox.co.il Fri Jan 26 06:50:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Fri, 26 Jan 2007 16:50:51 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <1169740919.13608.79933.camel@hal.voltaire.com> References: <45AE3A4E.4050501@dev.mellanox.co.il> <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> <45B8CED6.3040201@dev.mellanox.co.il> <1169740919.13608.79933.camel@hal.voltaire.com> Message-ID: <20070126145051.GR10812@mellanox.co.il> > >>> As far as using lex/yacc, are they invoked as part of the build > >>> procedure or are the files they generate just checked in and used ? > >>> > >> When lex/yacc are invoked, they generate three files: > >> - osm_qos_parser_l.c > >> - osm_qos_parser_y.c > >> - osm_qos_parser_y.h > >> These generated files should be included in the git repository, > >> and they are the ones that are compiled by 'make' command. By the way, a disadvantage of this approach is that git log becomes quite useless when you touch one of lex/yacc sources, logs are cluttered with unreadable diffs. A general rule is to avoid keeping generated files under git. If you want to avoid dependance on yacc/lex on user's system, I think a better way would be to generate these during 'make dist', *not* have them tracked in git. -- MST From sashak at voltaire.com Fri Jan 26 07:07:49 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 26 Jan 2007 17:07:49 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <20070126145051.GR10812@mellanox.co.il> References: <20070118000120.GO21043@sashak.voltaire.com> <45B32872.4070006@dev.mellanox.co.il> <1169496259.23046.10310.camel@hal.voltaire.com> <45B769E6.3080905@dev.mellanox.co.il> <1169655749.29183.93022.camel@hal.voltaire.com> <45B7FB25.7020704@dev.mellanox.co.il> <1169734403.13608.75532.camel@hal.voltaire.com> <45B8CED6.3040201@dev.mellanox.co.il> <1169740919.13608.79933.camel@hal.voltaire.com> <20070126145051.GR10812@mellanox.co.il> Message-ID: <20070126150749.GD8333@sashak.voltaire.com> On 16:50 Fri 26 Jan , Michael S. Tsirkin wrote: > > >>> As far as using lex/yacc, are they invoked as part of the build > > >>> procedure or are the files they generate just checked in and used ? > > >>> > > >> When lex/yacc are invoked, they generate three files: > > >> - osm_qos_parser_l.c > > >> - osm_qos_parser_y.c > > >> - osm_qos_parser_y.h > > >> These generated files should be included in the git repository, > > >> and they are the ones that are compiled by 'make' command. > > By the way, a disadvantage of this approach is that git log becomes quite > useless when you touch one of lex/yacc sources, logs are cluttered > with unreadable diffs. > > A general rule is to avoid keeping generated files under git. Good rule. > If you want to avoid dependance on yacc/lex > on user's system, I think a better way would be to generate these > during 'make dist', *not* have them tracked in git. That seems better approach for me too. Sasha From swise at opengridcomputing.com Fri Jan 26 07:08:39 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 26 Jan 2007 09:08:39 -0600 Subject: [openib-general] [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). In-Reply-To: <20070126073535.GG10812@mellanox.co.il> References: <20070125205725.497.74172.stgit@dell3.ogc.int> <20070126073535.GG10812@mellanox.co.il> Message-ID: <1169824119.2996.7.camel@stevo-desktop> On Fri, 2007-01-26 at 09:35 +0200, Michael S. Tsirkin wrote: > > Quoting Steve Wise : > > Subject: [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). > > > > > > Backport Chelsio to rhel5 (2.6.18_FC6). > > BTW, steve, is FC4 supported? I don't see a backport ... > > I haven't done that one. I wasn't planning on it since its not one of the OFED 1.2 supported distros. It's trivial to add it, but I don't have the kernel src. From bugzilla-daemon at lists.openfabrics.org Fri Jan 26 07:15:28 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Fri, 26 Jan 2007 07:15:28 -0800 (PST) Subject: [openib-general] [Bug 325] New: RDMA_CM and address translation broken on sles9sp3 Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=325 Summary: RDMA_CM and address translation broken on sles9sp3 Product: OpenFabrics Linux Version: 1.2 Platform: X86-64 OS/Version: SLES 9 Status: NEW Severity: critical Priority: P2 Component: RDMA CM AssignedTo: bugzilla at openib.org ReportedBy: swise at opengridcomputing.com rdma_translate_ip() and friends use ip_dev_find(local_ip_addr) to obtain a net_device pointer. Then the device type is used to determine if the rdma address is iwarp or infiniband. On sles9sp3, ip_dev_find(local_ip_addr) is returning the loopback device. This causes rmda_copy_addr() to fail. -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From swise at opengridcomputing.com Fri Jan 26 08:00:15 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 26 Jan 2007 10:00:15 -0600 Subject: [openib-general] [PATCH 00/12] ofed_1_2 - Neighbour update support In-Reply-To: <20070125191321.30934.74542.stgit@dell3.ogc.int> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> Message-ID: <1169827215.2996.14.camel@stevo-desktop> Shall I push these up for you to pull from my git tree? On Thu, 2007-01-25 at 13:13 -0600, Steve Wise wrote: > Michael/Vlad: > > Here are the backports for snooping arp packets to generate neighbour > update netevents. Also included is the addr.c patch to act on all valid > neigh update events. If this series looks good to you then I'll push > this up and you all can pull it from my git tree. > > > Steve. > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ossrosch at linux.vnet.ibm.com Fri Jan 26 08:46:34 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Fri, 26 Jan 2007 17:46:34 +0100 Subject: [openib-general] [Patch ofed1.2 0/3]libehca: cleanup and adjust mmap Message-ID: <200701261746.35189.ossrosch@linux.vnet.ibm.com> Hi, This 3 patches changes the libehca coding style to kernel coding and kernel tracing style.The userspace mmap code needs to be adjusted to the changed userspace mapping introduced in kernel patch [PATCH/RFC 2.6.21 0/5] ehca: remove use of do_mmap() from kernel space. Stefan From ossrosch at linux.vnet.ibm.com Fri Jan 26 08:47:07 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Fri, 26 Jan 2007 17:47:07 +0100 Subject: [openib-general] [Patch ofed1.2 1/3]libehca: cleanup and adjust mmap Message-ID: <200701261747.07766.ossrosch@linux.vnet.ibm.com> Signed-off-by: Stefan Roscher --- diff -Nurp libehca_old/Makefile.am libehca_new/Makefile.am --- libehca_old/Makefile.am 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/Makefile.am 2007-01-26 14:27:43.000000000 +0100 @@ -48,6 +48,9 @@ if HAVE_IBV_DEVICE_LIBRARY_EXTENSION src_libehca_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ -Wl,--version-script=$(srcdir)/src/libehca.map \ -lpthread -libverbs -nostdlib + ehcaconfdir = $(sysconfdir)/libibverbs.d + ehcaconf_DATA = ehca.driver + else ehcalibdir = $(libdir)/infiniband ehcalib_LTLIBRARIES = src/libehca.la diff -Nurp libehca_old/src/ehca_asm.h libehca_new/src/ehca_asm.h --- libehca_old/src/ehca_asm.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_asm.h 2007-01-26 14:27:43.000000000 +0100 @@ -37,8 +37,6 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: ehca_asm.h,v 1.1 2006/02/22 12:26:55 nguyen Exp $ */ @@ -48,9 +46,14 @@ #if defined(CONFIG_PPC_PSERIES) || defined (__PPC64__) || defined (__PPC__) #define clear_cacheline(adr) __asm__ __volatile("dcbz 0,%0"::"r"(adr)) +/* rmb() in 32-bit mode does a full sync, while we need a lwsync */ +#define lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") + +#define mftb() ({ unsigned long rval; \ + asm volatile("mftb %0" : "=r" (rval)); rval; }) + +#define asm_sync_mem() __asm__ __volatile__ ("sync" : : : "memory") -#elif defined(CONFIG_ARCH_S390) -#error "unsupported yet" #else #error "invalid platform" #endif diff -Nurp libehca_old/src/ehca_everbs.h libehca_new/src/ehca_everbs.h --- libehca_old/src/ehca_everbs.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_everbs.h 2007-01-26 14:27:43.000000000 +0100 @@ -44,44 +44,47 @@ #include -/** direct access qp and send/recv completion flags (can be OR-ed) +/* + * direct access qp and send/recv completion flags (can be OR-ed) * set this option in ibv_qp_init_attr.qp_type when creating a daqp */ enum ehcau_completion_flag { - DA_SEND_COMPLETION=0x20, - DA_RECV_COMPLETION=0x40, - DA_QP_ENABLE=0x80 + DA_SEND_COMPLETION = 0x20, + DA_RECV_COMPLETION = 0x40, + DA_QP_ENABLE = 0x80 }; -/** da send/recv message size +/* + * da send/recv message size * set send/recv message size in ibv_ap_init_attr.cap.max_send/recv_sge * respectively when creating a daqp */ enum ehcau_msg_size { - DA_MSG_SIZE_128=0, - DA_MSG_SIZE_256=1, - DA_MSG_SIZE_512=2, - DA_MSG_SIZE_1024=3, - DA_MSG_SIZE_2048=4, - DA_MSG_SIZE_4096=5 + DA_MSG_SIZE_128 = 0, + DA_MSG_SIZE_256 = 1, + DA_MSG_SIZE_512 = 2, + DA_MSG_SIZE_1024 = 3, + DA_MSG_SIZE_2048 = 4, + DA_MSG_SIZE_4096 = 5 }; -/** +/* + * * ehcau_qp_attr_da - enhanced qp attr containing send/recv queue data */ struct ehcau_qp_attr_da { - /* wqe size in bytes for send */ - __u16 send_wqe_size; - /* total length of send queue */ - __u64 send_queue_length; - /* send queue pointer, i.e. first wqe address */ - void *send_queue_ptr; - /* wqe size in bytes for recv */ - __u16 recv_wqe_size; - /* total length of recv queue */ - __u64 recv_queue_length; - /* recv queue pointer, i.e. first wqe address */ - void *recv_queue_ptr; + /* wqe size in bytes for send */ + __u16 send_wqe_size; + /* total length of send queue */ + __u64 send_queue_length; + /* send queue pointer, i.e. first wqe address */ + void *send_queue_ptr; + /* wqe size in bytes for recv */ + __u16 recv_wqe_size; + /* total length of recv queue */ + __u64 recv_queue_length; + /* recv queue pointer, i.e. first wqe address */ + void *recv_queue_ptr; }; /* @@ -117,14 +120,18 @@ int ehcau_write_rwqe(void *wqe, struct i * returns send queue's next entry ptr */ #define GET_SQUEUE_NEXT_ENTRY_PTR(current_ptr, qp_attr) \ - (((void*)current_ptr+(qp_attr).send_wqe_size>=(qp_attr).send_queue_ptr+(qp_attr).send_queue_length) ? \ - (qp_attr).send_queue_ptr : (void*)current_ptr+(qp_attr).send_wqe_size) + (((void *)current_ptr + (qp_attr).send_wqe_size >= \ + (qp_attr).send_queue_ptr + (qp_attr).send_queue_length) ? \ + (qp_attr).send_queue_ptr : (void *)current_ptr + \ + (qp_attr).send_wqe_size) /* * returns recv queue's next entry ptr */ #define GET_RQUEUE_NEXT_ENTRY_PTR(current_ptr, qp_attr) \ - (((void*)current_ptr+(qp_attr).recv_wqe_size>=(void*)(qp_attr).recv_queue_ptr+(qp_attr).recv_queue_length) ? \ - (qp_attr).recv_queue_ptr : (void*)current_ptr+(qp_attr).recv_wqe_size) + (((void *)current_ptr + (qp_attr).recv_wqe_size >= \ + (void *)(qp_attr).recv_queue_ptr + (qp_attr).recv_queue_length) ? \ + (qp_attr).recv_queue_ptr : (void *)current_ptr + \ + (qp_attr).recv_wqe_size) #endif /* __EHCA_EVERBS_H__ */ diff -Nurp libehca_old/src/ehca_galpa.h libehca_new/src/ehca_galpa.h --- libehca_old/src/ehca_galpa.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_galpa.h 2007-01-26 14:27:43.000000000 +0100 @@ -37,38 +37,36 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: ehca_galpa.h,v 1.1 2006/02/22 12:26:55 nguyen Exp $ */ #ifndef __EHCA_GALPA_H__ #define __EHCA_GALPA_H__ -/* eHCA page (mapped into p-memory) - resource to access eHCA register pages in CPU address space -*/ +/* + * eHCA page (mapped into p-memory) + * resource to access eHCA register pages in CPU address space + */ struct h_galpa { u64 fw_handle; /* for pSeries this is a 64bit memory address where - I/O memory is mapped into CPU address space (kv) */ + * I/O memory is mapped into CPU address space (kv) + */ }; -/** - resource to access eHCA address space registers, all types -*/ +/* resource to access eHCA address space registers, all types */ struct h_galpas { u32 pid; /*PID of userspace galpa checking */ struct h_galpa user; /* user space accessible resource, - set to 0 if unused */ + * set to 0 if unused + */ struct h_galpa kernel; /* kernel space accessible resource, - set to 0 if unused */ + * set to 0 if unused + */ }; -/** @brief store value at offset into galpa, will be inline function - */ +/* store value at offset into galpa, will be inline function */ void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value); -/** @brief return value from offset in galpa, will be inline function - */ +/* return value from offset in galpa, will be inline function */ u64 hipz_galpa_load(struct h_galpa galpa, u32 offset); -#endif /* __EHCA_GALPA_H__ */ +#endif /* __EHCA_GALPA_H__ */ diff -Nurp libehca_old/src/ehca_qes.h libehca_new/src/ehca_qes.h --- libehca_old/src/ehca_qes.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_qes.h 2007-01-26 14:27:43.000000000 +0100 @@ -38,36 +38,33 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: ehca_qes.h,v 1.1 2006/02/22 12:26:55 nguyen Exp $ */ #ifndef _EHCA_QES_H_ #define _EHCA_QES_H_ -/** DON'T include any kernel related files here!!! +/* + * DON'T include any kernel related files here!!! * This file is used commonly in user and kernel space!!! */ -/** - * virtual scatter gather entry to specify remote adresses with length - */ +/* virtual scatter gather entry to specify remote adresses with length */ struct ehca_vsgentry { u64 vaddr; u32 lkey; u32 length; }; -#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) +#define GRH_FLAG_MASK EHCA_BMASK_IBM(7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) -/** +/* * Unreliable Datagram Address Vector Format * see IBTA Vol1 chapter 8.3 Global Routing Header */ @@ -183,8 +180,7 @@ struct ehca_wqe { } ud_avp; struct { struct ehca_ud_av ud_av; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - - 2]; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - 2]; } ud_av; struct { u64 reserved0; @@ -224,10 +220,10 @@ struct ehca_wqe { }; -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) -#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) struct ehca_cqe { u64 work_request_id; @@ -262,7 +258,7 @@ struct ehca_eqe { struct ehca_mrte { u64 starting_va; - u64 length; /* length of memory region in bytes*/ + u64 length; /* length of memory region in bytes */ u32 pd; u8 key_instance; u8 pagesize; diff -Nurp libehca_old/src/ehca_u_mrmw.c libehca_new/src/ehca_u_mrmw.c --- libehca_old/src/ehca_u_mrmw.c 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_u_mrmw.c 2007-01-26 14:27:43.000000000 +0100 @@ -38,30 +38,26 @@ * POSSIBILITY OF SUCH DAMAGE. */ - -#define DEB_PREFIX "umrw" - #include #include #include "ehca_utools.h" -/*----------------------------------------------------------------------*/ struct ibv_mr *ehcau_reg_mr(struct ibv_pd *pd, void *addr, size_t length, enum ibv_access_flags access) { - int ret = 0; - struct ibv_mr *mr = NULL; + int ret; + struct ibv_mr *mr; struct ibv_reg_mr cmd; - EDEB_EN(7, "pd=%p addr=%p length=%lx access=%x", - pd, addr, (unsigned long)length, access); + ehca_dbg(pd->context->device, "pd=%p addr=%p length=%lx access=%x", + pd, addr, (unsigned long)length, access); mr = malloc(sizeof(struct ibv_mr)); if (!mr) { - EDEB_ERR(4, "no memory"); + ehca_err(pd->context->device, "no memory"); return NULL; } @@ -78,30 +74,30 @@ struct ibv_mr *ehcau_reg_mr(struct ibv_p access, mr, &cmd, sizeof(struct ibv_reg_mr)); #endif if (ret) { - EDEB_ERR(4, "ibv_cmd_reg_mr ret=%x", ret); + ehca_err(pd->context->device, "ibv_cmd_reg_mr ret=%x", ret); free(mr); return NULL; } - EDEB_EX(7, "mr=%p", mr); + ehca_dbg(pd->context->device, "mr=%p", mr); return mr; } -/*----------------------------------------------------------------------*/ int ehcau_dereg_mr(struct ibv_mr *mr) { - int ret = 0; + int ret; - EDEB_EN(7, "mr=%p", mr); + ehca_dbg(mr->context->device, "mr=%p", mr); ret = ibv_cmd_dereg_mr(mr); if (ret) { - EDEB_ERR(4, "ibv_cmd_dereg_mr failed, ret=%x", ret); + ehca_err(mr->context->device, "ibv_cmd_dereg_mr failed, ret=%x", + ret); return ret; } free(mr); - EDEB_EX(7, ""); + ehca_gen_dbg("mr=%p", mr); return 0; } diff -Nurp libehca_old/src/ehca_uclasses.h libehca_new/src/ehca_uclasses.h --- libehca_old/src/ehca_uclasses.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_uclasses.h 2007-01-26 14:27:43.000000000 +0100 @@ -118,7 +118,8 @@ struct ibv_mr *ehcau_reg_mr(struct ibv_p int ehcau_dereg_mr(struct ibv_mr *mr); struct ibv_cq *ehcau_create_cq(struct ibv_context *context, int cqe, - struct ibv_comp_channel *channel, int comp_vector); + struct ibv_comp_channel *channel, + int comp_vector); int ehcau_destroy_cq(struct ibv_cq *cq); @@ -149,13 +150,13 @@ int ehcau_attach_mcast(struct ibv_qp *qp int ehcau_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); int ehcau_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *qp_attr, - enum ibv_qp_attr_mask attr_mask, struct ibv_qp_init_attr* init_attr); + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr* init_attr); -/** +/* * resp structs from kernel space */ struct ipzu_queue_resp { - u64 queue; /* points to first queue entry */ u32 qe_size; /* queue entry size */ u32 act_nr_of_sg; u32 queue_length; /* queue length allocated in bytes */ @@ -169,7 +170,6 @@ struct ehcau_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; - struct h_galpas galpas; }; struct ehcau_create_qp_resp { @@ -183,7 +183,6 @@ struct ehcau_create_qp_resp { u32 dummy; /* padding for 8 byte alignment */ struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; - struct h_galpas galpas; }; struct ehcau_qp* ehca_cq_get_qp(struct ehcau_cq *cq, int qp_num); @@ -198,8 +197,8 @@ inline static void ehcau_lock(unsigned i " stwcx. %1,0,%2\n" " bne- 1b\n" " lwsync\n" - : "=&r" (temp1), "+&r" (setval) : "r" (lock_var) : "cr0", "memo\ -ry"); + : "=&r" (temp1), "+&r" (setval) : "r" (lock_var) : "cr0", + "memory"); } inline static void ehcau_unlock(unsigned int * lock_var) { diff -Nurp libehca_old/src/ehca_uinit.c libehca_new/src/ehca_uinit.c --- libehca_old/src/ehca_uinit.c 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_uinit.c 2007-01-26 14:27:43.000000000 +0100 @@ -40,20 +40,24 @@ */ #if HAVE_CONFIG_H -# include +#include #endif /* HAVE_CONFIG_H */ -#include #include #include #include #include #include #include +#ifndef HAVE_IBV_READ_SYSFS_FILE #include #include #include +#endif + +#ifdef HAVE_SYSFS_LIBSYSFS_H #include +#endif #include "ehca_uclasses.h" @@ -81,19 +85,18 @@ static struct ibv_context_ops ehcau_ctx_ .detach_mcast = ehcau_detach_mcast }; -static struct ibv_context *ehcau_alloc_context(struct ibv_device *ibdev, int cmd_fd) +static struct ibv_context *ehcau_alloc_context(struct ibv_device *ibdev, + int cmd_fd) { - struct ehcau_context *my_context = NULL; + struct ehcau_context *my_context; struct ibv_get_context cmd; struct ibv_get_context_resp resp; - EDEB_EN(7, "device=%p cmd_fd=%x", - ibdev, cmd_fd); + ehca_dbg(ibdev, "device=%p cmd_fd=%x", ibdev, cmd_fd); my_context = (struct ehcau_context *)malloc(sizeof(*my_context)); - if (my_context == NULL) { + if (!my_context) return NULL; - } memset(my_context, 0, sizeof(*my_context)); my_context->ibv_ctx.cmd_fd = cmd_fd; @@ -102,7 +105,8 @@ static struct ibv_context *ehcau_alloc_c memset(&resp, 0, sizeof(resp)); if (ibv_cmd_get_context(&my_context->ibv_ctx, &cmd, sizeof(cmd), &resp, sizeof(resp))) { - EDEB_ERR(4, "ibv_cmd_get_context() failed device=%p", ibdev); + ehca_err(ibdev, "ibv_cmd_get_context() failed device=%p", + ibdev); goto alloc_context_exit0; } @@ -110,42 +114,42 @@ static struct ibv_context *ehcau_alloc_c my_context->ibv_ctx.device = ibdev; my_context->ibv_ctx.ops = ehcau_ctx_ops; - EDEB_EX(7, "retcode=context=%p device=%p", &my_context->ibv_ctx, ibdev); + ehca_dbg(ibdev, "retcode=context=%p device=%p", + &my_context->ibv_ctx, ibdev); return &my_context->ibv_ctx; alloc_context_exit0: free(my_context); - EDEB_EX(7, "retcode=context=NULL device=%p", ibdev); + ehca_dbg(ibdev, "retcode=context=NULL device=%p", ibdev); return NULL; } static void ehcau_free_context(struct ibv_context *ibctx) { - struct ehcau_context *my_context = NULL; - EDEB_EN(7, "context=%p", ibctx); - if (ibctx == NULL) { - EDEB_ERR(4, "context pointer is NULL"); - } else { + struct ehcau_context *my_context; + ehca_dbg(ibctx->device, "context=%p", ibctx); + if (!ibctx) + ehca_err(ibctx->device, "context pointer is NULL"); + else { my_context = container_of(ibctx, struct ehcau_context, ibv_ctx); free(my_context); } - EDEB_EX(7, "context=%p", ibctx); + ehca_dbg(ibctx->device, "context=%p", ibctx); } static char *str_strip(char *str) { - char *last=NULL; + char *last; /* strip left white spaces */ - while (*str==' ' || *str=='\t' || *str=='\n' || *str=='\r') { + while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r') str++; - } /* strip right white spaces */ - last=str+(strlen(str)-1); + last = str + (strlen(str) - 1); while (last>=str && - (*last==' ' || *last=='\t' || *last=='\n' || *last=='\r')) { + (*last == ' ' || *last == '\t' || + *last == '\n' || *last == '\r')) last--; - } - *(last+1)=0; + *(last + 1) = 0; return str; } @@ -185,19 +189,19 @@ static int ibv_read_sysfs_file(const cha #endif /* HAVE_IBV_READ_SYSFS_FILE */ static struct ibv_device *ehca_driver_init(const char *uverbs_sys_path, - int abi_version) + int abi_version) { struct ehcau_device *my_dev = NULL; char value[64]; int num_ports = 0; - EDEB_EN(7, ""); + ehca_gen_dbg("uverbs_sys_path=%s", uverbs_sys_path); if (ibv_read_sysfs_file(uverbs_sys_path, "device/name", value, sizeof value) < 0) return NULL; - if (strcmp("lhca", str_strip(value)) != 0) + if (strcmp("lhca", str_strip(value))) return NULL; if (ibv_read_sysfs_file(uverbs_sys_path, "device/num_ports", @@ -206,9 +210,8 @@ static struct ibv_device *ehca_driver_in sscanf(value, "%i", &num_ports); - if (num_ports < 1) { + if (num_ports < 1) return NULL; - } /* here we've got our dev */ my_dev = malloc(sizeof *my_dev); @@ -220,11 +223,19 @@ static struct ibv_device *ehca_driver_in my_dev->ibv_dev.ops = ehcau_dev_ops; - EDEB_EX(7, "ehcau_device=%p", my_dev); + ehca_gen_dbg("ehcau_device=%p", my_dev); return &my_dev->ibv_dev; } +/* + * Export the old libsysfs sysfs_class_device-based driver entry point + * if libsysfs headers are installed. It doesn't hurt to export it, + * even if libibverbs is new enough not to use it; but if libsysfs + * headers are not installed, we can assume that the version of + * libibverbs we are building against is new enough not to use + * openib_driver_init(). + */ #ifndef HAVE_IBV_REGISTER_DRIVER struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { @@ -237,98 +248,17 @@ struct ibv_device *openib_driver_init(st return ehca_driver_init(sysdev->path, abi_ver); } -#endif /* HAVE_IBV_REGISTER_DRIVER */ +#endif -/** @brief module initialization - */ -int libehca_trlevel = 5; -FILE *libehca_fh = NULL; -#define LIBEHCA_DEFAULT_CFGFILE "/usr/local/libehca/etc/libehca.conf" -#define CFG_TOKEN_TRLEVEL "log.trlevel" -#define CFG_TOKEN_FILENAME "log.filename" +int libehca_trace_on = 0; /* default is no debug */ +#define CFG_VAR_TRACE_ON "LIBEHCA_TRACE_ON" void __attribute__ ((constructor)) ehcau_init(void) { - char *cfgfilename = LIBEHCA_DEFAULT_CFGFILE; - FILE *cfg_fh = NULL; - char linebuf[1024]; - char *cur_line = NULL; - char *logfilename = NULL; - - cfg_fh = fopen(cfgfilename, "r"); - if (cfg_fh == NULL) { - libehca_trlevel = -1; - fprintf(stderr, "Could not read config file \"%s\"\n", - cfgfilename); - return; - } - /* parse for log cfg entries per line: - "log.trlevel " - "log.filename " - lines starting with # will be ignored - */ - while ((cur_line=fgets(linebuf, sizeof(linebuf)-1, cfg_fh))!=NULL) { - /* strip white spaces, LF. CR */ - cur_line = str_strip(cur_line); - if (*cur_line==0 || *cur_line=='#') { - continue; - } - /* parse for token and value */ - if (strstr(cur_line, CFG_TOKEN_TRLEVEL)==cur_line) { - char *trlevel=cur_line+strlen(CFG_TOKEN_TRLEVEL); - if (*trlevel!=' ' && *trlevel!='\t') { - fprintf(stderr, "Invalid token \"%s\"\n", - cur_line); - continue; - } - trlevel=str_strip(trlevel); - if (*trlevel!=0 && *trlevel>='0' && *trlevel<='9') { - libehca_trlevel=(*trlevel)-'0'; - } else { - fprintf(stderr, "Invalid trace level \"%s\"\n", - cur_line); - } - } else if (strstr(cur_line, CFG_TOKEN_FILENAME)==cur_line) { - char *filename=cur_line+strlen(CFG_TOKEN_FILENAME); - if (*filename!=' ' && *filename!='\t') { - fprintf(stderr, "Invalid token \"%s\"\n", - cur_line); - continue; - } - filename=str_strip(filename); - if (*filename==0) { - continue; - } - logfilename=malloc(strlen(filename)+1); - if (logfilename==NULL) { - fprintf(stderr, "Out of memory error"); - exit(-ENOMEM); - } - strcpy(logfilename, filename); - } else { /* invalid token */ - fprintf(stderr, "Invalid token \"%s\"\n", cur_line); - } - } /* eof while current_line */ - fclose(cfg_fh); - /* open logfile if given and found */ - if (logfilename!=NULL) { - if (strcmp(logfilename, "STDOUT")==0) { - libehca_fh = stdout; - } else if (strcmp(logfilename, "STDERR")==0) { - libehca_fh = stderr; - } else { - libehca_fh = fopen(logfilename, "a"); - if (libehca_fh == NULL) { - fprintf(stderr, "Could not open " - "logfile \"%s\"\n", logfilename); - } - } - free(logfilename); - } - if (libehca_fh==NULL) { - libehca_trlevel = -1; - } else { - fprintf(libehca_fh, "tracelevel is:%i\n", libehca_trlevel); + char *value = getenv(CFG_VAR_TRACE_ON); + if (value) { + value = str_strip(value); + libehca_trace_on = (*value) - '0'; } #ifdef HAVE_IBV_REGISTER_DRIVER diff -Nurp libehca_old/src/ehca_umain.c libehca_new/src/ehca_umain.c --- libehca_old/src/ehca_umain.c 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_umain.c 2007-01-26 14:27:43.000000000 +0100 @@ -40,7 +40,6 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: ehca_umain.c,v 1.14 2006/06/07 10:38:00 nguyen Exp $ */ #include @@ -48,7 +47,7 @@ #include "ehca_utools.h" #include "ehca_everbs.h" #include "ipzu_pt_fn.h" - +#include "hipz_hw.h" #include #include @@ -57,49 +56,50 @@ #include #include +#define EHCA_PAGESIZE 4096 + static int cq_assign_qp(struct ehcau_cq *cq, struct ehcau_qp *qp) { unsigned int qp_num = qp->real_qp_num; - unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned int key = qp_num & (QP_HASHTAB_LEN - 1); ehcau_lock(&cq->lockvar); - LIST_INSERT_HEAD(&cq->qp_hashtab[key], - qp, list_entries); + LIST_INSERT_HEAD(&cq->qp_hashtab[key], qp, list_entries); ehcau_unlock(&cq->lockvar); - EDEB(7, "cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); + ehca_gen_dbg("cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); return 0; } static int cq_unassign_qp(struct ehcau_cq *cq, unsigned int real_qp_num) { int ret = -EINVAL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct ehcau_qp *qp = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN - 1); + struct ehcau_qp *qp; ehcau_lock(&cq->lockvar); - for (qp = cq->qp_hashtab[key].lh_first; - qp!=NULL; qp = qp->list_entries.le_next) { + for (qp = cq->qp_hashtab[key].lh_first; qp; + qp = qp->list_entries.le_next) { if (qp->real_qp_num == real_qp_num) { LIST_REMOVE(qp, list_entries); - EDEB(7, "removed qp from cq .cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); + ehca_gen_dbg("removed qp from cq .cq_num=%x " + "real_qp_num=%x", + cq->cq_number, real_qp_num); ret = 0; break; } } ehcau_unlock(&cq->lockvar); - if (ret!=0) { - EDEB_ERR(4, "qp not found cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - } + if (ret) + ehca_gen_err("qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); return ret; } struct ehcau_qp* ehca_cq_get_qp(struct ehcau_cq *cq, int real_qp_num) { struct ehcau_qp *ret = NULL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct ehcau_qp *qp = NULL; - for (qp = cq->qp_hashtab[key].lh_first; - qp!=NULL; qp = qp->list_entries.le_next) { + unsigned int key = real_qp_num & (QP_HASHTAB_LEN - 1); + struct ehcau_qp *qp; + for (qp = cq->qp_hashtab[key].lh_first; qp; + qp = qp->list_entries.le_next) { if (qp->real_qp_num == real_qp_num) { ret = qp; break; @@ -111,59 +111,45 @@ struct ehcau_qp* ehca_cq_get_qp(struct e int ehcau_query_device(struct ibv_context *context, struct ibv_device_attr *device_attr) { - int ret = 0; + int ret; struct ibv_query_device cmd; uint64_t raw_fw_ver; - EDEB_EN(7, "context=%p", context); - memset(&cmd, 0, sizeof(cmd)); ret = ibv_cmd_query_device(context, device_attr, &raw_fw_ver, &cmd, sizeof(cmd)); + if (ret) + ehca_gen_err("ibv_cmd_query_device() failed, ret=%x", ret); - if (ret) { - EDEB_ERR(4, "ibv_cmd_query_device() failed, ret=%x", ret); - } - - EDEB_EX(7, "context=%p", context); return ret; } int ehcau_query_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr) { - int ret = 0; + int ret; struct ibv_query_port cmd; - EDEB_EN(7, "context=%p port=%x", context, port); - memset(&cmd, 0, sizeof(cmd)); ret = ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); + if (ret) + ehca_gen_err("ibv_cmd_query_port failed ret=%x context=%p " + "port=%x", ret, context, port); - if (ret) { - EDEB_ERR(4, - "ibv_cmd_query_port failed ret=%x context=%p port=%x", - ret, context, port); - } - - EDEB_EX(7, "ret=%x context=%p port=%x", ret, context, port); return ret; - } struct ibv_pd *ehcau_alloc_pd(struct ibv_context *context) { - struct ibv_pd *pd = NULL; + struct ibv_pd *pd; struct ibv_alloc_pd cmd; struct ibv_alloc_pd_resp resp; - int ret = 0; - - EDEB_EN(7, "context=%p", context); + int ret; pd = malloc(sizeof(*pd)); - if (pd == NULL) { - EDEB_ERR(4, "Out of memory to alloc ehcau_pd " - "context=%p", context); + if (!pd) { + ehca_gen_err("Out of memory to alloc ehcau_pd " + "context=%p", context); return NULL; } @@ -173,31 +159,29 @@ struct ibv_pd *ehcau_alloc_pd(struct ibv ret = ibv_cmd_alloc_pd(context, pd, &cmd, sizeof(cmd), &resp, sizeof(resp)); - if (ret != 0) { - EDEB_ERR(4, "ibv_cmd_alloc_pd() failed ret=%x context=%p", - ret, context); + if (ret) { + ehca_err(context->device, "ibv_cmd_alloc_pd() failed ret=%x " + "context=%p", ret, context); free(pd); return NULL; } - EDEB_EX(7, "context=%p ret=ehcau_pd=%p", context, pd); - return (pd); + return pd; } int ehcau_dealloc_pd(struct ibv_pd *pd) { - int ret = 0; + int ret; - EHCA_CHECK_ADR(pd); - EDEB_EN(7, "pd=%p", pd); ret = ibv_cmd_dealloc_pd(pd); if (ret) { - EDEB_ERR(4, "ibv_cmd_dealloc_pd failed ret=%x pd=%p", ret, pd); + ehca_err(pd->context->device, + "ibv_cmd_dealloc_pd failed ret=%x pd=%p", ret, pd); return ret; } free(pd); - EDEB_EX(7, "pd=%p", pd); - return (0); + + return 0; } struct ibv_cq *ehcau_create_cq(struct ibv_context *context, int cqe, @@ -205,14 +189,14 @@ struct ibv_cq *ehcau_create_cq(struct ib { struct ibv_create_cq cmd; struct ehcau_create_cq_resp resp; - struct ehcau_cq *my_cq = NULL; - int ret = 0; + struct ehcau_cq *my_cq; + int ret; int i; - EDEB_EN(7, "context=%p cqe=%x", context, cqe); my_cq = malloc(sizeof(*my_cq)); if (!my_cq) { - EDEB_ERR(4, "Out of memory context=%p cqe=%x", context, cqe); + ehca_err(context->device, "Out of memory context=%p cqe=%x", + context, cqe); return NULL; } @@ -223,83 +207,119 @@ struct ibv_cq *ehcau_create_cq(struct ib &cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) { - EDEB_ERR(4, "ibv_cmd_create_cq() failed " + ehca_err(context->device, "ibv_cmd_create_cq() failed " "ret=%x context=%p cqe=%x", ret, context, cqe); goto create_cq_exit0; } - for (i=0; iqp_hashtab[i]); - } /* copy data returned from kernel */ my_cq->cq_number = resp.cq_number; my_cq->token = resp.token; - /* right most cast is required to avoid gcc warning in 32 bit mode */ - my_cq->ipz_queue.queue = (u8*)(long)resp.ipz_queue.queue; - my_cq->ipz_queue.current_q_addr = (u8*)(long)resp.ipz_queue.queue; + my_cq->ipz_queue.queue = mmap64(NULL, resp.ipz_queue.queue_length, + PROT_READ | PROT_WRITE, MAP_SHARED, + context->cmd_fd, + ((u64)my_cq->token << 32) | 0x12000000); + if (!my_cq->ipz_queue.queue) { + ehca_err(context->device, "mmap64() failed cq_num=%x", + my_cq->cq_number); + goto create_cq_exit1; + } + my_cq->ipz_queue.current_q_addr = my_cq->ipz_queue.queue; my_cq->ipz_queue.qe_size = resp.ipz_queue.qe_size; my_cq->ipz_queue.act_nr_of_sg = resp.ipz_queue.act_nr_of_sg; my_cq->ipz_queue.queue_length = resp.ipz_queue.queue_length; my_cq->ipz_queue.pagesize = resp.ipz_queue.pagesize; my_cq->ipz_queue.toggle_state = resp.ipz_queue.toggle_state; - my_cq->galpas = resp.galpas; + /* right most cast is required to avoid gcc warning in 32 bit mode */ + my_cq->galpas.kernel.fw_handle = (u64)(unsigned long) + mmap64(NULL, EHCA_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + context->cmd_fd, + ((u64)my_cq->token << 32) | 0x11000000); + if (!my_cq->galpas.kernel.fw_handle) { + ehca_err(context->device, "mmap64() failed cq_num=%x", + my_cq->cq_number); + goto create_cq_exit2; + } /* access queue mem to fill page cache */ - memset(my_cq->ipz_queue.queue, 0, - my_cq->ipz_queue.queue_length); + memset(my_cq->ipz_queue.queue, 0, my_cq->ipz_queue.queue_length); - EDEB_EX(7, "ehcau_cq=%p cqn=%x token=%x " - "ipz_queue.galpa=%p ipz_queue.adr=%p", my_cq, - my_cq->cq_number, my_cq->token, - (u64 *) (unsigned long)my_cq->galpas.kernel.fw_handle, - (u64 *) my_cq->ipz_queue.queue); + ehca_dbg(context->device, "ehcau_cq=%p cqn=%x token=%x " + "ipz_queue.galpa=%p ipz_queue.adr=%p", my_cq, + my_cq->cq_number, my_cq->token, + (u64*)(unsigned long)my_cq->galpas.kernel.fw_handle, + (u64*)my_cq->ipz_queue.queue); return &my_cq->ib_cq; - create_cq_exit0: - EDEB_EX(4, "An error has occured context=%p cqe=%x", - context, cqe); +create_cq_exit2: + ret = munmap(my_cq->ipz_queue.queue, my_cq->ipz_queue.queue_length); + if (ret) + ehca_err(context->device, "munmap() failed rc=%x cq_num=%x queue=%p", + ret, my_cq->cq_number, my_cq->ipz_queue.queue); + +create_cq_exit1: + ret = ibv_cmd_destroy_cq(&my_cq->ib_cq); + if (ret) + ehca_err(context->device, "ibv_cmd_destroy_cq() failed " + "ret=%x ehcau_cq=%p cq_num=%x", + ret, my_cq, my_cq->cq_number); + +create_cq_exit0: + ehca_err(context->device, "An error has occured context=%p cqe=%x", + context, cqe); free(my_cq); return NULL; } int ehcau_destroy_cq(struct ibv_cq *cq) { - struct ehcau_cq *my_cq = NULL; - int cq_num = 0; - int ret = 0; - EHCA_CHECK_ADR(cq); + struct ehcau_cq *my_cq; + int cq_num; + int ret; + my_cq = container_of(cq, struct ehcau_cq, ib_cq); cq_num = my_cq->cq_number; - EDEB_EN(7, "ehcau_cq=%p cq_num=%x", my_cq, cq_num); + ret = munmap(my_cq->ipz_queue.queue, my_cq->ipz_queue.queue_length); + if (ret) { + ehca_err(cq->context->device, "munmap() failed rc=%x cq_num=%x " + "queue=%p", ret, cq_num, my_cq->ipz_queue.queue); + return ret; + } + ret = munmap((void*)(unsigned long)my_cq->galpas.kernel.fw_handle, EHCA_PAGESIZE); + if (ret) { + ehca_err(cq->context->device, "munmap() failed rc=%x cq_num=%x " + "fw_handle=%p", ret, cq_num, + (void*)(unsigned long)my_cq->galpas.kernel.fw_handle); + return ret; + } ret = ibv_cmd_destroy_cq(cq); if (ret) { - EDEB_ERR(4, "ibv_cmd_destroy_cq() failed ret=%x " - "ehcau_cq=%p cq_num=%x", ret, my_cq, cq_num); + ehca_err(cq->context->device, "ibv_cmd_destroy_cq() failed " + "ret=%x ehcau_cq=%p cq_num=%x", ret, my_cq, cq_num); return ret; } free(my_cq); - EDEB_EX(7, "ehcau_cq=%p cq_num=%x", my_cq, cq_num); + ehca_gen_dbg("ehcau_cq=%p cq_num=%x", my_cq, cq_num); return 0; } struct ibv_qp *ehcau_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; + int ret; + struct ehcau_qp *my_qp; struct ibv_create_qp cmd; struct ehcau_create_qp_resp resp; - struct ibv_context *context = NULL; - int ret2 = 0; + struct ibv_context *context; - EHCA_CHECK_ADR_P(pd); - EHCA_CHECK_ADR_P(pd->context); context = pd->context; - EDEB_EN(7, "pd=%p attr=%p", pd, attr); - my_qp = malloc(sizeof(*my_qp)); if (!my_qp) { - EDEB_ERR(4, "Out of memory to alloc qp pd=%p", pd); + ehca_err(pd->context->device, "Out of memory to alloc qp pd=%p", + pd); return NULL; } @@ -311,9 +331,9 @@ struct ibv_qp *ehcau_create_qp(struct ib &cmd, sizeof(cmd), &resp.ibv_resp, sizeof resp); - if (ret != 0) { - EDEB_ERR(4, "ibv_cmd_create_qp() failed ret=%x pd=%p", - ret, pd); + if (ret) { + ehca_err(pd->context->device, "ibv_cmd_create_qp() failed " + "ret=%x pd=%p", ret, pd); goto create_qp_exit0; } /* copy data returned from kernel */ @@ -323,22 +343,48 @@ struct ibv_qp *ehcau_create_qp(struct ib my_qp->qkey = resp.qkey; my_qp->real_qp_num = resp.real_qp_num; /* rqueue properties */ - my_qp->ipz_rqueue.queue = (u8*)(long)resp.ipz_rqueue.queue; - my_qp->ipz_rqueue.current_q_addr = (u8*)(long)resp.ipz_rqueue.queue; + my_qp->ipz_rqueue.queue = mmap64(NULL, resp.ipz_rqueue.queue_length, + PROT_READ | PROT_WRITE, MAP_SHARED, + context->cmd_fd, + ((u64)my_qp->token << 32) | 0x22000000); + if (!my_qp->ipz_rqueue.queue) { + ehca_err(pd->context->device, "mmap64() failed qp_num=%x", + my_qp->qp_num); + goto create_qp_exit1; + } + my_qp->ipz_rqueue.current_q_addr = my_qp->ipz_rqueue.queue; my_qp->ipz_rqueue.qe_size = resp.ipz_rqueue.qe_size; my_qp->ipz_rqueue.act_nr_of_sg = resp.ipz_rqueue.act_nr_of_sg; my_qp->ipz_rqueue.queue_length = resp.ipz_rqueue.queue_length; my_qp->ipz_rqueue.pagesize = resp.ipz_rqueue.pagesize; my_qp->ipz_rqueue.toggle_state = resp.ipz_rqueue.toggle_state; /* squeue properties */ - my_qp->ipz_squeue.queue = (u8*)(long)resp.ipz_squeue.queue; - my_qp->ipz_squeue.current_q_addr = (u8*)(long)resp.ipz_squeue.queue; + my_qp->ipz_squeue.queue = mmap64(NULL, resp.ipz_squeue.queue_length, + PROT_READ | PROT_WRITE, MAP_SHARED, + context->cmd_fd, + ((u64)my_qp->token << 32) | 0x23000000); + if (!my_qp->ipz_squeue.queue) { + ehca_err(pd->context->device, "mmap64() failed qp_num=%x", + my_qp->qp_num); + goto create_qp_exit2; + } + my_qp->ipz_squeue.current_q_addr = my_qp->ipz_squeue.queue; my_qp->ipz_squeue.qe_size = resp.ipz_squeue.qe_size; my_qp->ipz_squeue.act_nr_of_sg = resp.ipz_squeue.act_nr_of_sg; my_qp->ipz_squeue.queue_length = resp.ipz_squeue.queue_length; my_qp->ipz_squeue.pagesize = resp.ipz_squeue.pagesize; my_qp->ipz_squeue.toggle_state = resp.ipz_squeue.toggle_state; - my_qp->galpas = resp.galpas; + /* right most cast is required to avoid gcc warning in 32 bit mode */ + my_qp->galpas.kernel.fw_handle = (u64)(unsigned long) + mmap64(NULL, EHCA_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + context->cmd_fd, + ((u64)my_qp->token << 32) | 0x21000000); + if (!my_qp->galpas.kernel.fw_handle) { + ehca_err(pd->context->device, "mmap64() failed qp_num=%x", + my_qp->qp_num); + goto create_qp_exit3; + } /* access queue mem to fill page cache */ memset(my_qp->ipz_squeue.queue, 0, @@ -346,33 +392,53 @@ struct ibv_qp *ehcau_create_qp(struct ib memset(my_qp->ipz_rqueue.queue, 0, my_qp->ipz_rqueue.queue_length); - if (attr->send_cq!=NULL) { - struct ehcau_cq *cq = container_of(attr->send_cq, struct ehcau_cq, ib_cq); + if (attr->send_cq) { + struct ehcau_cq *cq = container_of(attr->send_cq, + struct ehcau_cq, ib_cq); ret = cq_assign_qp(cq, my_qp); - if (ret !=0) { - EDEB_ERR(4, "Couldn't assign qp to send_cq ret=%x", ret); - goto create_qp_exit1; + if (ret) { + ehca_err(pd->context->device, + "Couldn't assign qp to send_cq ret=%x", ret); + goto create_qp_exit4; } my_qp->send_cq = cq; } - EDEB_EX(7, "ehcau_qp=%p " - "ipz_queue.galpa=%p ipz_rqueue.adr=%p ipz_squeue.adr=%p", - my_qp, - (u64 *) (unsigned long)my_qp->galpas.kernel.fw_handle, - (u64 *) my_qp->ipz_rqueue.queue, - (u64 *) my_qp->ipz_squeue.queue); + ehca_dbg(pd->context->device, "ehcau_qp=%p " + "ipz_queue.galpa=%p ipz_rqueue.adr=%p ipz_squeue.adr=%p", + my_qp, + (u64*)(unsigned long)my_qp->galpas.kernel.fw_handle, + (u64*)my_qp->ipz_rqueue.queue, + (u64*)my_qp->ipz_squeue.queue); return &my_qp->ib_qp; - create_qp_exit1: - ret2 = ibv_cmd_destroy_qp(&my_qp->ib_qp); - if (ret2) { - EDEB_ERR(4, "ibv_cmd_destroy_qp() failed ret=%x " - "qp=%p qp_num=%x", ret2, my_qp, my_qp->qp_num); - } - create_qp_exit0: - EDEB_EX(4, "An error has occured pd=%p", pd); +create_qp_exit4: + ret = munmap((void*)(unsigned long)my_qp->galpas.kernel.fw_handle, EHCA_PAGESIZE); + if (ret) + ehca_err(pd->context->device, "munmap() failed rc=%x qp_num=%x " + "fw_handle=%p", ret, my_qp->qp_num, + (void*)(unsigned long)my_qp->galpas.kernel.fw_handle); + +create_qp_exit3: + ret = munmap(my_qp->ipz_squeue.queue, my_qp->ipz_squeue.queue_length); + if (ret) + ehca_err(pd->context->device, "munmap() failed rc=%x qp_num=%x " + "squeue=%p", ret, my_qp->qp_num, my_qp->ipz_squeue.queue); + +create_qp_exit2: + ret = munmap(my_qp->ipz_rqueue.queue, my_qp->ipz_rqueue.queue_length); + if (ret) + ehca_err(pd->context->device, "munmap() failed rc=%x qp_num=%x " + "rqueue=%p", ret, my_qp->qp_num, my_qp->ipz_rqueue.queue); + +create_qp_exit1: + ret = ibv_cmd_destroy_qp(&my_qp->ib_qp); + if (ret) + ehca_err(pd->context->device, "ibv_cmd_destroy_qp() failed " + "ret=%x qp=%p qp_num=%x", ret, my_qp, my_qp->qp_num); + +create_qp_exit0: free(my_qp); return NULL; } @@ -380,23 +446,17 @@ struct ibv_qp *ehcau_create_qp(struct ib int ehcau_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask) { - int ret = 0; + int ret; struct ibv_modify_qp cmd; - struct ehcau_qp *my_qp = NULL; + struct ehcau_qp *my_qp; struct ehca_wqe *wqe = NULL; int sq_locked = 0; - EHCA_CHECK_ADR(qp); - EHCA_CHECK_ADR(attr); - if (attr_mask == 0) { /* nothing to modify */ - return ret; - } + if (!attr_mask) /* nothing to modify */ + return 0; my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EDEB_EN(7, "qp=%p qp_num=%x attr=%p attr_mask=%x", - qp, my_qp->qp_num, attr, attr_mask); - - if ((attr_mask & IBV_QP_STATE)!=0 && attr->qp_state==IBV_QPS_RTS) { + if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RTS) { unsigned int qp_type = -1; qp_type = my_qp->qp_type; if (IBV_QPT_UD == qp_type) { @@ -407,16 +467,17 @@ int ehcau_modify_qp(struct ibv_qp *qp, s wqe=(struct ehca_wqe*) my_qp->ipz_squeue.current_q_addr; wqe->optype = wqe->wqef = 0xff; - EDEB(7, "qp_num=%x next_free_wqe=%p", - my_qp->qp_num, wqe); + ehca_dbg(qp->context->device, + "qp_num=%x next_free_wqe=%p", + my_qp->qp_num, wqe); } } ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); - if (ret) { - EDEB_ERR(4, "ibv_cmd_modify_qp() failed ret=%x " - "qp=%p qp_num=%x", ret, qp, my_qp->qp_num); - } else if (attr_mask & IBV_QP_STATE) { + if (ret) + ehca_err(qp->context->device, "ibv_cmd_modify_qp() failed " + "ret=%x qp=%p qp_num=%x", ret, qp, my_qp->qp_num); + else if (attr_mask & IBV_QP_STATE) { switch (attr->qp_state) { case IBV_QPS_RESET: /* reset s/r queue pointers */ @@ -424,77 +485,85 @@ int ehcau_modify_qp(struct ibv_qp *qp, s ipzu_qeit_reset(&my_qp->ipz_squeue); break; case IBV_QPS_RTS: - if (sq_locked && wqe!=NULL) { + if (sq_locked && wqe) my_qp->sqerr_purgeflag = ~wqe->wqef; - } break; default: /* nothing to do */ break; } } - if (sq_locked) { /* unlock send queue */ + if (sq_locked) /* unlock send queue */ ehcau_unlock(&my_qp->lockvar_s); - } - if (attr_mask & IBV_QP_QKEY) { + if (attr_mask & IBV_QP_QKEY) my_qp->qkey = attr->qkey; - } - EDEB_EX(7, "ret=%x qp=%p qp_num=%x", ret, qp, my_qp->qp_num); return ret; } int ehcau_destroy_qp(struct ibv_qp *qp) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; - u32 qp_num = 0; + int ret; + struct ehcau_qp *my_qp; + u32 qp_num; - EHCA_CHECK_ADR(qp); my_qp = container_of(qp, struct ehcau_qp, ib_qp); qp_num = my_qp->qp_num; - EDEB_EN(7, "qp=%p qp_num=%x", qp, qp_num); - - if (my_qp->send_cq!=NULL) { + if (my_qp->send_cq) { ret = cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); - if (ret !=0) { - EDEB_ERR(4, "Couldn't unassign qp from send_cq " + if (ret) { + ehca_err(qp->context->device, + "Couldn't unassign qp from send_cq " "ret=%x real_qp_num=%x cq_num=%x", ret, my_qp->real_qp_num, my_qp->send_cq->cq_number); - goto destroy_qp_exit0; + return ret; } } - ret = ibv_cmd_destroy_qp(qp); + ret = munmap(my_qp->ipz_rqueue.queue, my_qp->ipz_rqueue.queue_length); if (ret) { - EDEB_ERR(4, "ibv_cmd_destroy_qp() failed ret=%x " + ehca_err(qp->context->device, "munmap() failed rc=%x qp_num=%x " + "rqueue=%p", ret, qp_num, my_qp->ipz_rqueue.queue); + return ret; + } + ret = munmap(my_qp->ipz_squeue.queue, my_qp->ipz_squeue.queue_length); + if (ret) { + ehca_err(qp->context->device, "munmap() failed rc=%x qp_num=%x " + "squeue=%p", ret, qp_num, my_qp->ipz_squeue.queue); + return ret; + } + ret = munmap((void*)(unsigned long)my_qp->galpas.kernel.fw_handle, EHCA_PAGESIZE); + if (ret) { + ehca_err(qp->context->device, "munmap() failed rc=%x qp_num=%x " + "fw_handle=%p", ret, qp_num, + (void*)(unsigned long)my_qp->galpas.kernel.fw_handle); + return ret; + } + ret = ibv_cmd_destroy_qp(qp); + if (ret) + ehca_err(qp->context->device, + "ibv_cmd_destroy_qp() failed ret=%x " "qp=%p qp_num=%x", ret, qp, qp_num); - } else { + else { + ehca_gen_dbg("ret=%x qp=%p qp_num=%x", ret, qp, qp_num); free(my_qp); } - destroy_qp_exit0: - EDEB_EX(7, "ret=%x qp=%p qp_num=%x", ret, qp, qp_num); return ret; } struct ibv_ah *ehcau_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { - struct ehcau_av *my_av = NULL; - - EHCA_CHECK_ADR_P(pd); - EHCA_CHECK_ADR_P(attr); - - EDEB_EN(7, "pd=%p attr=%p", pd, attr); + struct ehcau_av *my_av; my_av = malloc(sizeof *my_av); if (!my_av) { - EDEB_ERR(4, "no address handle"); - return (NULL); + ehca_err(pd->context->device, "no address handle"); + return NULL; } memset(my_av, 0, sizeof(*my_av)); @@ -518,15 +587,15 @@ struct ibv_ah *ehcau_create_ah(struct ib my_av->av.grh.word_0 = be64_to_cpu(my_av->av.grh.word_0); /* set sgid in grh.word_1 */ - if (attr->is_global != 0) { + if (attr->is_global) { struct ibv_context *context = pd->context; struct ibv_port_attr port_attr; union ibv_gid gid; - int rc = 0; + int rc; memset(&port_attr, 0, sizeof(port_attr)); rc = ibv_query_port(context, attr->port_num, &port_attr); - if (rc != 0) { /* port number invalid */ - EDEB_ERR(4, "ibv_query_port() failed " + if (rc) { /* port number invalid */ + ehca_err(pd->context->device, "ibv_query_port() failed " "rc=%x context=%p port_num=%x", rc, context, attr->port_num); free(my_av); @@ -536,8 +605,8 @@ struct ibv_ah *ehcau_create_ah(struct ib rc = ibv_query_gid(context, attr->port_num, attr->grh.sgid_index, &gid); - if (rc != 0) { - EDEB_ERR(4, "ibv_query_gid() failed " + if (rc) { + ehca_err(pd->context->device, "ibv_query_gid() failed " "rc=%x context=%p port_num=%x " "sgid_index=%x", rc, context, attr->port_num, @@ -548,129 +617,87 @@ struct ibv_ah *ehcau_create_ah(struct ib memcpy(&my_av->av.grh.word_1, &gid, sizeof(gid)); } - /* see also ehca_av.c - * For the time beeing we use a hard coded PMTU of 2048 Bytes. - */ - my_av->av.pmtu = 4; /* TODO */ + /* see also ehca_av.c */ + my_av->av.pmtu = EHCA_MAX_MTU; /* dgid comes in grh.word_3 */ memcpy(&my_av->av.grh.word_3, &attr->grh.dgid, sizeof(attr->grh.dgid)); - EDEB_EX(7, "pd=%p attr=%p my_av=%p", pd, attr, my_av); - - return (&my_av->ib_ah); + return &my_av->ib_ah; } int ehcau_destroy_ah(struct ibv_ah *ah) { - EHCA_CHECK_ADR(ah); - - EDEB_EN(7, "ah=%p", ah); + ehca_dbg(ah->context->device, "ah=%p", ah); free(ah); - EDEB_EX(7, "ah=%p", ah); return 0; } int ehcau_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; - u32 qp_num = 0; + int ret; + struct ehcau_qp *my_qp; + u32 qp_num; - EHCA_CHECK_ADR(qp); - EHCA_CHECK_ADR(gid); my_qp = container_of(qp, struct ehcau_qp, ib_qp); - - EHCA_CHECK_QP(my_qp); - qp_num = my_qp->qp_num; - EDEB_EN(7, "qp=%p qp_num=%x", qp, qp_num); - ret = ibv_cmd_attach_mcast(qp, gid, lid); - - if (ret) { - - EDEB_ERR(4, "ehcau_qp=%p qp_num=%x " + if (ret) + ehca_err(qp->context->device, "ehcau_qp=%p qp_num=%x " "ibv_cmd_attach_mcast() failed " "ret=%x", my_qp, my_qp->qp_num, ret); - } - - EDEB_EX(7, "qp=%p qp_num=%x ret=%x", qp, qp_num, ret); return ret; } int ehcau_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; - u32 qp_num = 0; + int ret; + struct ehcau_qp *my_qp; + u32 qp_num; - EHCA_CHECK_ADR(qp); - EHCA_CHECK_ADR(gid); my_qp = container_of(qp, struct ehcau_qp, ib_qp); - - EHCA_CHECK_QP(my_qp); - qp_num = my_qp->qp_num; - EDEB_EN(7, "qp=%p qp_num=%x", qp, qp_num); - ret = ibv_cmd_detach_mcast(qp, gid, lid); - - if (ret) { - - EDEB_ERR(4, "ehcau_qp=%p qp_num=%x " + if (ret) + ehca_err(qp->context->device, "ehcau_qp=%p qp_num=%x " "ibv_cmd_detach_mcast() failed " "ret=%x", my_qp, my_qp->qp_num, ret); - } - - EDEB_EX(7, "qp=%p qp_num=%x ret=%x", qp, qp_num, ret); return ret; } int ehcau_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *qp_attr, - enum ibv_qp_attr_mask attr_mask, struct ibv_qp_init_attr* init_attr) + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr* init_attr) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; + int ret; + struct ehcau_qp *my_qp; struct ibv_query_qp cmd; - EHCA_CHECK_ADR(qp); - EHCA_CHECK_ADR(qp_attr); - my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EDEB_EN(7, "qp=%p qp_num=%x qp_attr=%p", qp, my_qp->qp_num, qp_attr); - ret = ibv_cmd_query_qp(qp, qp_attr, attr_mask, init_attr, &cmd, sizeof(cmd)); - if (ret) { - EDEB_ERR(4, "ehcau_qp=%p qp_num=%x " + if (ret) + ehca_err(qp->context->device, "ehcau_qp=%p qp_num=%x " "ibv_cmd_query_() failed " "ret=%x", my_qp, my_qp->qp_num, ret); - } - EDEB_EX(7, "ret=%x qp=%p qp_num=%x", ret, qp, my_qp->qp_num); return ret; } int ehcau_query_qp_da(struct ibv_qp *qp, struct ehcau_qp_attr_da *qp_attr) { - int ret = 0; - struct ehcau_qp *my_qp = NULL; + struct ehcau_qp *my_qp; - EHCA_CHECK_ADR(qp); - EHCA_CHECK_ADR(qp_attr); my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EDEB_EN(7, "qp=%p qp_num=%x qp_attr=%p", qp, my_qp->qp_num, qp_attr); - qp_attr->send_wqe_size = my_qp->ipz_squeue.qe_size; qp_attr->send_queue_length = my_qp->ipz_squeue.queue_length; qp_attr->send_queue_ptr = my_qp->ipz_squeue.queue; @@ -678,17 +705,17 @@ int ehcau_query_qp_da(struct ibv_qp *qp, qp_attr->recv_queue_length = my_qp->ipz_rqueue.queue_length; qp_attr->recv_queue_ptr = my_qp->ipz_rqueue.queue; - EDEB_EX(7, "ret=%x qp=%p qp_num=%x " - "send_wqe_size=%x send_queue_size=%lx send_queue_ptr=%p " - "recv_wqe_size=%x recv_queue_size=%lx recv_queue_ptr=%p", - ret, qp, my_qp->qp_num, - qp_attr->send_wqe_size, - (unsigned long)qp_attr->send_queue_length, - qp_attr->send_queue_ptr, - qp_attr->recv_wqe_size, - (unsigned long)qp_attr->recv_queue_length, - qp_attr->recv_queue_ptr); - return ret; + ehca_dbg(qp->context->device, "qp=%p qp_num=%x " + "send_wqe_size=%x send_queue_size=%lx send_queue_ptr=%p " + "recv_wqe_size=%x recv_queue_size=%lx recv_queue_ptr=%p", + qp, my_qp->qp_num, + qp_attr->send_wqe_size, + (unsigned long)qp_attr->send_queue_length, + qp_attr->send_queue_ptr, + qp_attr->recv_wqe_size, + (unsigned long)qp_attr->recv_queue_length, + qp_attr->recv_queue_ptr); + return 0; } /* eof ehca_umain.c */ From ossrosch at linux.vnet.ibm.com Fri Jan 26 08:47:54 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Fri, 26 Jan 2007 17:47:54 +0100 Subject: [openib-general] [Patch ofed1.2 2/3]libehca: cleanup and adjust mmap Message-ID: <200701261747.55159.ossrosch@linux.vnet.ibm.com> Signed-off-by: Stefan Roscher --- diff -Nurp libehca_old/src/ehca_utools.h libehca_new/src/ehca_utools.h --- libehca_old/src/ehca_utools.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_utools.h 2007-01-26 14:27:43.000000000 +0100 @@ -1,53 +1,53 @@ -/* +/* * IBM eServer eHCA Infiniband device driver for Linux on POWER - * + * * Userspace functions * * Authors: Khadija Souissi * Christoph Raisch * * Copyright (c) 2005 IBM Corporation - * + * * All rights reserved. * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. * * OpenIB BSD License * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation * and/or other materials - * provided with the distribution. + * provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: ehca_utools.h,v 1.2 2006/02/21 17:15:47 nguyen Exp $ */ - + #ifndef __EHCA_UTOOL_H__ #define __EHCA_UTOOL_H__ #include +#include #include -#include +#include #define u64 __u64 #define u32 __u32 #define u16 __u16 @@ -55,41 +55,38 @@ #define unlikely(x) __builtin_expect(!!(x), 0) -/* defines for EDEBs */ -extern int libehca_trlevel; -extern FILE *libehca_fh; +/* defines for EHCA traces */ +extern int libehca_trace_on; /* checks if debug is on for the given level - * caller's module must have this decl: extern int libehca_trlevel; + * caller's module must have this decl: extern int libehca_trace_on; */ -#ifdef EDEB_NO_TRACE -#define IS_EDEB_ON(level) (1==0) -#define EDEB_GENERIC(level,idstring,format,args...) \ -while (1==0) { \ - \ - fprintf(libehca_fh,"%s " idstring "%p "format "\n", __func__, ##args); \ -} -#else /* EDEB_NO_TRACE */ -#define IS_EDEB_ON(level) (level<=libehca_trlevel) +#define ehca_dbg(ibv_dev, format, arg...) \ + do { \ + if (unlikely(libehca_trace_on)) \ + fprintf(stderr, "PID%04x %s EHCA_DBG:%s " format "\n", \ + getpid(), (ibv_dev)->name, __func__, ##arg); \ + } while (0) + +#define ehca_err(ibv_dev, format, arg...) \ + fprintf(stderr, "PID%04x %s EHCA_ERR:%s " format "\n", \ + getpid(), (ibv_dev)->name, __func__, ##arg) + +/* use this one only if no ibv_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(libehca_trace_on)) \ + fprintf(stderr, "PID%04x EHCA_DBG:%s " format "\n", \ + getpid(), __func__, ##arg); \ + } while (0) + +#define ehca_gen_err(format, arg...) \ + fprintf(stderr, "PID%04x EHCA_ERR:%s " format "\n", \ + getpid(), __func__, ##arg) -#define EDEB_GENERIC(level,idstring,format,args...) \ -do { \ - if (unlikely(level<=libehca_trlevel))\ - fprintf(libehca_fh,"%s " idstring " "format "\n", __func__, ##args); \ -} while (1==0) -#endif /* EDEB_NO_TRACE */ - -#define EDEB(level,format,args...) \ - EDEB_GENERIC(level,"",format,##args) -#define EDEB_EN(level,format,args...) \ - EDEB_GENERIC(level,">>>",format,##args) -#define EDEB_EX(level,format,args...) \ - EDEB_GENERIC(level,"<<<",format,##args) -#define EDEB_ERR(level,format,args...) \ - EDEB_GENERIC(level,"HCAD_ERROR ",format,##args) /** - * EDEB macro to dump a memory block, whose length is n*8 bytes. + * EHCA macro to dump a memory block, whose length is n*8 bytes. * Each line has the following layout: * adr=X ofs=Y <8 bytes hex> <8 bytes hex> */ @@ -98,92 +95,56 @@ do { \ #else #define FORMAT_2U64 "%016lx %016lx" #endif -#define EDEB_DMP(level,adr,len,format,args...) \ - do { \ +#define ehca_dmp_dbg(adr, len, format, args...) \ +do { \ + if (unlikely(libehca_trace_on)) { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char*)(adr); \ + for (x = 0; x < l; x += 16) { \ + ehca_gen_dbg(format " adr=%p ofs=%04x " FORMAT_2U64, \ + ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } \ +} while (0) + +#define ehca_dmp_err(adr, len, format, args...) \ + do { \ unsigned int x; \ unsigned int l = (unsigned int)(len); \ unsigned char *deb = (unsigned char*)(adr); \ for (x = 0; x < l; x += 16) { \ - EDEB(level, format " adr=%p ofs=%04x " FORMAT_2U64, \ + ehca_gen_err(format " adr=%p ofs=%04x " FORMAT_2U64, \ ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ deb += 16; \ } \ - } while (0) + } while (0) -/* define a bitmask, little endian version */ -#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) /* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) +#define EHCA_BMASK_IBM(from, to) (((63-to)<<16)+((to)-(from)+1)) /* internal function, don't use */ #define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) /* internal function, don't use */ #define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) -/** return value shifted and masked by mask\n - variable|=HCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable\n - variable&=~HCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask +/** return value shifted and masked by mask + variable|=HCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + variable&=~HCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask in variable */ -#define EHCA_BMASK_SET(mask,value) \ +#define EHCA_BMASK_SET(mask, value) \ ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<MEMBER) +#endif #define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -#define PARANOIA_MODE -#ifdef PARANOIA_MODE -#define EHCA_CHECK_ADR_P(adr) \ - if (adr==0) {\ - EDEB_ERR(4, "adr=%p check failed line %i", adr,__LINE__); \ - return ERR_PTR(-EFAULT); } - -#define EHCA_CHECK_ADR(adr) \ - if (adr==0) {\ - EDEB_ERR(4, "adr=%p check failed line %i", adr,__LINE__); \ - return -EFAULT; } - -#define EHCA_CHECK_CQ(cq) \ - if (cq==0) {\ - EDEB_ERR(4, "cq=%p check failed", cq); \ - return -EFAULT; } - -#define EHCA_CHECK_CQ_P(cq) \ - if (cq==0) {\ - EDEB_ERR(4, "cq=%p check failed", cq); \ - return ERR_PTR(-EFAULT); } - -#define EHCA_CHECK_QP(qp) \ - if (qp==0) {\ - EDEB_ERR(4, "qp=%p check failed", qp); \ - return -EFAULT; } - -#define EHCA_CHECK_QP_P(qp) \ - if (qp==0) {\ - EDEB_ERR(4, "qp=%p check failed", qp); \ - return ERR_PTR(-EFAULT); } - -#else -#define EHCA_CHECK_ADR_P(adr) - -#define EHCA_CHECK_ADR(adr) + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member));}) -#define EHCA_CHECK_CQ(cq) - -#define EHCA_CHECK_CQ_P(cq) - -#define EHCA_CHECK_QP(qp) - -#define EHCA_CHECK_QP_P(qp) -#endif #define be64_to_cpu(x) (x) -#define mftb() ({ unsigned long rval; \ - asm volatile("mftb %0" : "=r" (rval)); rval; }) - -#define asm_sync_mem() __asm__ __volatile__ ("sync" : : : "memory") - #endif /* __EHCA_UTOOL_H__ */ diff -Nurp libehca_old/src/hcp_phyp.c libehca_new/src/hcp_phyp.c --- libehca_old/src/hcp_phyp.c 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/hcp_phyp.c 2007-01-26 14:27:43.000000000 +0100 @@ -1,74 +1,60 @@ -/* +/* * IBM eServer eHCA Infiniband device driver for Linux on POWER - * + * * load store abstraction for ehca register access * - * Authors: Christoph Raisch + * Authors: Christoph Raisch * * Copyright (c) 2005 IBM Corporation - * + * * All rights reserved. * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. * * OpenIB BSD License * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation * and/or other materials - * provided with the distribution. + * provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: hcp_phyp.c,v 1.1 2006/02/22 12:26:55 nguyen Exp $ */ - -#define DEB_PREFIX "PHYP" -#ifdef __KERNEL__ -#include "ehca_kernel.h" -#include "hipz_hw.h" -/* #include "hipz_structs.h" / * TODO: still necessary */ -#include "ehca_classes_pSeries.h" -#else /* !__KERNEL__ */ #include "ehca_utools.h" #include "ehca_galpa.h" -#endif - -#ifndef EHCA_USERDRIVER /* TODO: is this correct */ #ifdef __PPC64__ u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) { u64 addr = galpa.fw_handle + offset; - EDEB_EN(7, "addr=%lx offset=%x ", addr, offset); + ehca_gen_dbg("addr=%lx offset=%x ", addr, offset); u64 out = *(u64 *) addr; - EDEB_EX(7, "addr=%lx value=%lx", addr, out); + ehca_gen_dbg("addr=%lx value=%lx", addr, out); return out; }; void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) { u64 addr = galpa.fw_handle + offset; - EDEB(7, "addr=%lx offset=%x value=%lx", addr, - offset, value); + ehca_gen_dbg("addr=%lx offset=%x value=%lx", addr, offset, value); *(u64 *) addr = value; #ifdef EHCA_USE_HCALL /* hipz_galpa_load(galpa, offset);*/ @@ -76,9 +62,9 @@ void hipz_galpa_store(struct h_galpa gal }; #else /* ___PPC64__ the only way to do 8 byte store in 32 bit mode ?*/ -inline static void load64(u64 * from, u64 *to) +inline static void load64(u64 *from, u64 *to) { - unsigned long temp=0; + unsigned long temp = 0; asm("ld %2,0(%0)\n\t" "std %2,0(%1)":/*no output*/:"r"(from),"r"(to),"r" (temp):"r0"); } @@ -86,34 +72,34 @@ inline static void load64(u64 * from, u6 u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) { void * addr = ((void*)(unsigned long)galpa.fw_handle) + offset; - EDEB_EN(7, "addr=%p offset=%x ", addr, offset); + ehca_gen_dbg("addr=%p offset=%x ", addr, offset); u32 temp[4]; - u64 *outadr=(u64*)((((u32)&temp)+7)&~7);/* 8 byte align the stack adress*/ + /* 8 byte align the stack adress*/ + u64 *outadr=(u64*)((((u32)&temp) + 7) & ~7); load64(addr, outadr); - EDEB_EX(7, "addr=%p value=%Lx", addr, *outadr); + ehca_gen_dbg("addr=%p value=%Lx", addr, *outadr); return *outadr; }; -inline static void store64(void * adr,unsigned int datal, unsigned int datah) +inline static void store64(void *adr, unsigned int datal, unsigned int datah) { unsigned long temp=0; asm("sldi %3,%1,32\n\t" "or 0,%3,%0\n\t" - "std 0,0(%2)\n" :/* no output */: "r" (datal), "r" (datah), "r" (adr), "r" (temp):"r0"); + "std 0,0(%2)\n" :/* no output */: "r" (datal), "r" (datah), + "r" (adr), "r" (temp):"r0"); } void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) { void * addr = ((void*)(unsigned long)galpa.fw_handle) + offset; - EDEB(7, "addr=%p offset=%x value=%Lx", addr, - offset, value); - store64(addr,value&0xffffffffULL,value >>32ULL); + ehca_gen_dbg("addr=%p offset=%x value=%Lx", addr, offset, value); + store64(addr, value&0xffffffffULL, value >>32ULL); #ifdef EHCA_USE_HCALL - /* hipz_galpa_load(galpa, offset); */ - /* synchronize explicitly */ + /* hipz_galpa_load(galpa, offset); + * synchronize explicitly + */ #endif }; #endif /* ___PPC64__*/ - -#endif /* EHCA_USERDRIVER */ diff -Nurp libehca_old/src/hipz_fns_core.h libehca_new/src/hipz_fns_core.h --- libehca_old/src/hipz_fns_core.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/hipz_fns_core.h 2007-01-26 14:27:43.000000000 +0100 @@ -38,8 +38,6 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: hipz_fns_core.h,v 1.2 2006/03/26 21:27:27 nguyen Exp $ */ #ifndef __HIPZ_FNS_CORE_H__ @@ -48,76 +46,71 @@ #include "ehca_galpa.h" #include "hipz_hw.h" -#define hipz_galpa_store_cq(gal,offset,value)\ - hipz_galpa_store(gal,CQTEMM_OFFSET(offset),value) -#define hipz_galpa_load_cq(gal,offset)\ - hipz_galpa_load(gal,CQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qp(gal,offset,value)\ - hipz_galpa_store(gal,QPTEMM_OFFSET(offset),value) -#define hipz_galpa_load_qp(gal,offset)\ - hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal, offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) inline static void hipz_update_SQA(struct ehcau_qp *qp, u16 nr_wqes) { struct h_galpa gal; - EDEB_EN(7, "qp=%p", qp); gal = qp->galpas.kernel; /* ringing doorbell :-) */ hipz_galpa_store_qp(gal, QPx_SQA, EHCA_BMASK_SET(QPx_SQAdder, nr_wqes)); - EDEB_EX(7, "qp=%p QPx_SQA = %i", qp, nr_wqes); + ehca_gen_dbg("qp=%p QPx_SQA = %i", qp, nr_wqes); } inline static void hipz_update_RQA(struct ehcau_qp *qp, u16 nr_wqes) { struct h_galpa gal; - EDEB_EN(7, "qp=%p", qp); gal = qp->galpas.kernel; /* ringing doorbell :-) */ hipz_galpa_store_qp(gal, QPx_RQA, EHCA_BMASK_SET(QPx_RQAdder, nr_wqes)); - EDEB_EX(7, "qp=%p QPx_RQA = %i", qp, nr_wqes); + ehca_gen_dbg("qp=%p QPx_RQA = %i", qp, nr_wqes); } inline static void hipz_update_FECA(struct ehcau_cq *cq, u32 nr_cqes) { struct h_galpa gal; - EDEB_EN(7, "cq=%p", cq); gal = cq->galpas.kernel; hipz_galpa_store_cq(gal, CQx_FECA, EHCA_BMASK_SET(CQx_FECAdder, nr_cqes)); - EDEB_EX(7, "cq=%p CQx_FECA = %i", cq, nr_cqes); + ehca_gen_dbg("cq=%p CQx_FECA = %i", cq, nr_cqes); } inline static void hipz_set_CQx_N0(struct ehcau_cq *cq, u32 value) { struct h_galpa gal; - u64 CQx_N0_reg = 0; + u64 CQx_N0_reg; - EDEB_EN(7, "cq=%p event on solicited completion -- write CQx_N0", - cq); + ehca_gen_dbg("cq=%p event on solicited completion -- write CQx_N0", cq); gal = cq->galpas.kernel; hipz_galpa_store_cq(gal, CQx_N0, EHCA_BMASK_SET(CQx_N0_generate_solicited_comp_event, value)); CQx_N0_reg = hipz_galpa_load_cq(gal, CQx_N0); - EDEB_EX(7, "cq=%p loaded CQx_N0=%lx", cq,(unsigned long)CQx_N0_reg); + ehca_gen_dbg("cq=%p loaded CQx_N0=%lx", cq, (unsigned long)CQx_N0_reg); } inline static void hipz_set_CQx_N1(struct ehcau_cq *cq, u32 value) { struct h_galpa gal; - u64 CQx_N1_reg = 0; + u64 CQx_N1_reg; - EDEB_EN(7, "cq=%p event on completion -- write CQx_N1", - cq); + ehca_gen_dbg("cq=%p event on completion -- write CQx_N1", cq); gal = cq->galpas.kernel; hipz_galpa_store_cq(gal, CQx_N1, EHCA_BMASK_SET(CQx_N1_generate_comp_event, value)); CQx_N1_reg = hipz_galpa_load_cq(gal, CQx_N1); - EDEB_EX(7, "cq=%p loaded CQx_N1=%lx", cq,(unsigned long)CQx_N1_reg); + ehca_gen_dbg("cq=%p loaded CQx_N1=%lx", cq, (unsigned long)CQx_N1_reg); } #endif /* __HIPZ_FNC_CORE_H__ */ diff -Nurp libehca_old/src/hipz_hw.h libehca_new/src/hipz_hw.h --- libehca_old/src/hipz_hw.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/hipz_hw.h 2007-01-26 14:27:43.000000000 +0100 @@ -38,8 +38,6 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: hipz_hw.h,v 1.1 2006/02/22 12:26:55 nguyen Exp $ */ #ifndef __HIPZ_HW_H__ @@ -52,191 +50,189 @@ #include "ehca_utools.h" #endif -/** @brief Queue Pair Table Memory - */ +#define EHCA_MAX_MTU 4 + +/* Queue Pair Table Memory */ struct hipz_QPTEMM { u64 QPx_HCR; -#define QPx_HCR_PKEY_Mode EHCA_BMASK_IBM(1,2) -#define QPx_HCR_Special_QP_Mode EHCA_BMASK_IBM(6,7) +#define QPx_HCR_PKEY_Mode EHCA_BMASK_IBM(1, 2) +#define QPx_HCR_Special_QP_Mode EHCA_BMASK_IBM(6, 7) u64 QPx_C; -#define QPx_C_Enabled EHCA_BMASK_IBM(0,0) -#define QPx_C_Disabled EHCA_BMASK_IBM(1,1) -#define QPx_C_Req_State EHCA_BMASK_IBM(16,23) -#define QPx_C_Res_State EHCA_BMASK_IBM(25,31) -#define QPx_C_disable_ETE_check EHCA_BMASK_IBM(7,7) +#define QPx_C_Enabled EHCA_BMASK_IBM(0, 0) +#define QPx_C_Disabled EHCA_BMASK_IBM(1, 1) +#define QPx_C_Req_State EHCA_BMASK_IBM(16, 23) +#define QPx_C_Res_State EHCA_BMASK_IBM(25, 31) +#define QPx_C_disable_ETE_check EHCA_BMASK_IBM(7, 7) u64 QPx_HERR; u64 QPx_AER; -/* 0x20*/ + /* 0x20*/ u64 QPx_SQA; -#define QPx_SQAdder EHCA_BMASK_IBM(48,63) +#define QPx_SQAdder EHCA_BMASK_IBM(48, 63) u64 QPx_SQC; u64 QPx_RQA; -#define QPx_RQAdder EHCA_BMASK_IBM(48,63) +#define QPx_RQAdder EHCA_BMASK_IBM(48, 63) u64 QPx_RQC; -/* 0x40*/ + /* 0x40*/ u64 QPx_ST; u64 QPx_PMSTATE; -#define QPx_PMSTATE_BITS EHCA_BMASK_IBM(30,31) +#define QPx_PMSTATE_BITS EHCA_BMASK_IBM(30, 31) u64 QPx_PMFA; u64 QPx_PKEY; -#define QPx_PKEY_value EHCA_BMASK_IBM(48,63) -/* 0x60*/ +#define QPx_PKEY_value EHCA_BMASK_IBM(48, 63) + /* 0x60*/ u64 QPx_PKEYA; -#define QPx_PKEYA_index0 EHCA_BMASK_IBM(0,15) -#define QPx_PKEYA_index1 EHCA_BMASK_IBM(16,31) -#define QPx_PKEYA_index2 EHCA_BMASK_IBM(32,47) -#define QPx_PKEYA_index3 EHCA_BMASK_IBM(48,63) +#define QPx_PKEYA_index0 EHCA_BMASK_IBM(0, 15) +#define QPx_PKEYA_index1 EHCA_BMASK_IBM(16, 31) +#define QPx_PKEYA_index2 EHCA_BMASK_IBM(32, 47) +#define QPx_PKEYA_index3 EHCA_BMASK_IBM(48, 63) u64 QPx_PKEYB; -#define QPx_PKEYB_index4 EHCA_BMASK_IBM(0,15) -#define QPx_PKEYB_index5 EHCA_BMASK_IBM(16,31) -#define QPx_PKEYB_index6 EHCA_BMASK_IBM(32,47) -#define QPx_PKEYB_index7 EHCA_BMASK_IBM(48,63) +#define QPx_PKEYB_index4 EHCA_BMASK_IBM(0, 15) +#define QPx_PKEYB_index5 EHCA_BMASK_IBM(16, 31) +#define QPx_PKEYB_index6 EHCA_BMASK_IBM(32, 47) +#define QPx_PKEYB_index7 EHCA_BMASK_IBM(48, 63) u64 QPx_PKEYC; -#define QPx_PKEYC_index8 EHCA_BMASK_IBM(0,15) -#define QPx_PKEYC_index9 EHCA_BMASK_IBM(16,31) -#define QPx_PKEYC_index10 EHCA_BMASK_IBM(32,47) -#define QPx_PKEYC_index11 EHCA_BMASK_IBM(48,63) +#define QPx_PKEYC_index8 EHCA_BMASK_IBM(0, 15) +#define QPx_PKEYC_index9 EHCA_BMASK_IBM(16, 31) +#define QPx_PKEYC_index10 EHCA_BMASK_IBM(32, 47) +#define QPx_PKEYC_index11 EHCA_BMASK_IBM(48, 63) u64 QPx_PKEYD; -#define QPx_PKEYD_index12 EHCA_BMASK_IBM(0,15) -#define QPx_PKEYD_index13 EHCA_BMASK_IBM(16,31) -#define QPx_PKEYD_index14 EHCA_BMASK_IBM(32,47) -#define QPx_PKEYD_index15 EHCA_BMASK_IBM(48,63) -/* 0x80*/ +#define QPx_PKEYD_index12 EHCA_BMASK_IBM(0, 15) +#define QPx_PKEYD_index13 EHCA_BMASK_IBM(16, 31) +#define QPx_PKEYD_index14 EHCA_BMASK_IBM(32, 47) +#define QPx_PKEYD_index15 EHCA_BMASK_IBM(48, 63) + /* 0x80*/ u64 QPx_QKEY; -#define QPx_QKEY_value EHCA_BMASK_IBM(32,63) +#define QPx_QKEY_value EHCA_BMASK_IBM(32, 63) u64 QPx_DQP; -#define QPx_DQP_number EHCA_BMASK_IBM(40,63) +#define QPx_DQP_number EHCA_BMASK_IBM(40, 63) u64 QPx_DLIDP; -#define QPx_DLID_PRIMARY EHCA_BMASK_IBM(48,63) -#define QPx_DLIDP_GRH EHCA_BMASK_IBM(31,31) +#define QPx_DLID_PRIMARY EHCA_BMASK_IBM(48, 63) +#define QPx_DLIDP_GRH EHCA_BMASK_IBM(31, 31) u64 QPx_PORTP; -#define QPx_PORT_Primary EHCA_BMASK_IBM(57,63) -/* 0xa0*/ +#define QPx_PORT_Primary EHCA_BMASK_IBM(57, 63) + /* 0xa0*/ u64 QPx_SLIDP; -#define QPx_SLIDP_p_path EHCA_BMASK_IBM(48,63) -#define QPx_SLIDP_lmc EHCA_BMASK_IBM(37,39) +#define QPx_SLIDP_p_path EHCA_BMASK_IBM(48, 63) +#define QPx_SLIDP_lmc EHCA_BMASK_IBM(37, 39) u64 QPx_SLIDPP; -#define QPx_SLID_PRIM_PATH EHCA_BMASK_IBM(57,63) +#define QPx_SLID_PRIM_PATH EHCA_BMASK_IBM(57, 63) u64 QPx_DLIDA; -#define QPx_DLIDA_GRH EHCA_BMASK_IBM(31,31) +#define QPx_DLIDA_GRH EHCA_BMASK_IBM(31, 31) u64 QPx_PORTA; -#define QPx_PORT_Alternate EHCA_BMASK_IBM(57,63) -/* 0xc0*/ +#define QPx_PORT_Alternate EHCA_BMASK_IBM(57, 63) + /* 0xc0*/ u64 QPx_SLIDA; u64 QPx_SLIDPA; u64 QPx_SLVL; -#define QPx_SLVL_BITS EHCA_BMASK_IBM(56,59) -#define QPx_SLVL_VL EHCA_BMASK_IBM(60,63) +#define QPx_SLVL_BITS EHCA_BMASK_IBM(56, 59) +#define QPx_SLVL_VL EHCA_BMASK_IBM(60, 63) u64 QPx_IPD; -#define QPx_IPD_max_static_rate EHCA_BMASK_IBM(56,63) -/* 0xe0*/ +#define QPx_IPD_max_static_rate EHCA_BMASK_IBM(56, 63) + /* 0xe0*/ u64 QPx_MTU; -#define QPx_MTU_size EHCA_BMASK_IBM(56,63) +#define QPx_MTU_size EHCA_BMASK_IBM(56, 63) u64 QPx_LATO; -#define QPx_LATO_BITS EHCA_BMASK_IBM(59,63) +#define QPx_LATO_BITS EHCA_BMASK_IBM(59, 63) u64 QPx_RLIMIT; -#define QPx_RETRY_COUNT EHCA_BMASK_IBM(61,63) +#define QPx_RETRY_COUNT EHCA_BMASK_IBM(61, 63) u64 QPx_RNRLIMIT; -#define QPx_RNR_RETRY_COUNT EHCA_BMASK_IBM(61,63) -/* 0x100*/ +#define QPx_RNR_RETRY_COUNT EHCA_BMASK_IBM(61, 63) + /* 0x100*/ u64 QPx_T; u64 QPx_SQHP; u64 QPx_SQPTP; u64 QPx_NSPSN; -#define QPx_NSPSN_value EHCA_BMASK_IBM(40,63) -/* 0x120*/ +#define QPx_NSPSN_value EHCA_BMASK_IBM(40, 63) + /* 0x120*/ u64 QPx_NSPSNHWM; -#define QPx_NSPSNHWM_value EHCA_BMASK_IBM(40,63) +#define QPx_NSPSNHWM_value EHCA_BMASK_IBM(40, 63) u64 reserved1; u64 QPx_SDSI; u64 QPx_SDSBC; -/* 0x140*/ + /* 0x140*/ u64 QPx_SQWSIZE; -#define QPx_SQWSIZE_value EHCA_BMASK_IBM(61,63) +#define QPx_SQWSIZE_value EHCA_BMASK_IBM(61, 63) u64 QPx_SQWTS; u64 QPx_LSN; u64 QPx_NSSN; -/* 0x160 */ + /* 0x160 */ u64 QPx_MOR; -#define QPx_MOR_value EHCA_BMASK_IBM(48,63) +#define QPx_MOR_value EHCA_BMASK_IBM(48, 63) u64 QPx_COR; u64 QPx_SQSIZE; -#define QPx_SQSIZE_value EHCA_BMASK_IBM(60,63) +#define QPx_SQSIZE_value EHCA_BMASK_IBM(60, 63) u64 QPx_ERC; -/* 0x180*/ + /* 0x180*/ u64 QPx_RNRRC; -#define QPx_RNRRESP_value EHCA_BMASK_IBM(59,63) +#define QPx_RNRRESP_value EHCA_BMASK_IBM(59, 63) u64 QPx_ERNRWT; u64 QPx_RNRRESP; -#define QPx_RNRRESP_WTR EHCA_BMASK_IBM(59,63) +#define QPx_RNRRESP_WTR EHCA_BMASK_IBM(59, 63) u64 QPx_LMSNA; -/* 0x1a0 */ + /* 0x1a0 */ u64 QPx_SQHPC; u64 QPx_SQCPTP; u64 QPx_SIGT; u64 QPx_WQECNT; -/* 0x1c0*/ - + /* 0x1c0*/ u64 QPx_RQHP; u64 QPx_RQPTP; u64 QPx_RQSIZE; -#define QPx_RQSIZE_value EHCA_BMASK_IBM(60,63) +#define QPx_RQSIZE_value EHCA_BMASK_IBM(60, 63) u64 QPx_NRR; -#define QPx_NRR_value EHCA_BMASK_IBM(61,63) -/* 0x1e0*/ +#define QPx_NRR_value EHCA_BMASK_IBM(61, 63) + /* 0x1e0*/ u64 QPx_RDMAC; -#define QPx_RDMAC_value EHCA_BMASK_IBM(61,63) +#define QPx_RDMAC_value EHCA_BMASK_IBM(61, 63) u64 QPx_NRPSN; -#define QPx_NRPSN_value EHCA_BMASK_IBM(40,63) +#define QPx_NRPSN_value EHCA_BMASK_IBM(40, 63) u64 QPx_LAPSN; -#define QPx_LAPSN_value EHCA_BMASK_IBM(40,63) +#define QPx_LAPSN_value EHCA_BMASK_IBM(40, 63) u64 QPx_LCR; -/* 0x200*/ + /* 0x200*/ u64 QPx_RWC; u64 QPx_RWVA; u64 QPx_RDSI; u64 QPx_RDSBC; -/* 0x220*/ + /* 0x220*/ u64 QPx_RQWSIZE; -#define QPx_RQWSIZE_value EHCA_BMASK_IBM(61,63) +#define QPx_RQWSIZE_value EHCA_BMASK_IBM(61, 63) u64 QPx_CRMSN; u64 QPx_RDD; -#define QPx_RDD_VALUE EHCA_BMASK_IBM(32,63) +#define QPx_RDD_VALUE EHCA_BMASK_IBM(32, 63) u64 QPx_LARPSN; -#define QPx_LARPSN_value EHCA_BMASK_IBM(40,63) -/* 0x240*/ +#define QPx_LARPSN_value EHCA_BMASK_IBM(40, 63) + /* 0x240*/ u64 QPx_PD; u64 QPx_SCQN; u64 QPx_RCQN; u64 QPx_AEQN; -/* 0x260*/ + /* 0x260*/ u64 QPx_AAELOG; u64 QPx_RAM; u64 QPx_RDMAQE0; u64 QPx_RDMAQE1; -/* 0x280*/ + /* 0x280*/ u64 QPx_RDMAQE2; u64 QPx_RDMAQE3; u64 QPx_NRPSNHWM; -#define QPx_NRPSNHWM_value EHCA_BMASK_IBM(40,63) -/* 0x298*/ +#define QPx_NRPSNHWM_value EHCA_BMASK_IBM(40, 63) + /* 0x298*/ u64 reserved[(0x400 - 0x298) / 8]; -/* 0x400 extended data */ + /* 0x400 extended data */ u64 reserved_ext[(0x500 - 0x400) / 8]; -/* 0x500 */ + /* 0x500 */ u64 reserved2[(0x1000 - 0x500) / 8]; -/* 0x1000 */ + /* 0x1000 */ }; -#define QPTEMM_OFFSET(x) offsetof(struct hipz_QPTEMM,x) +#define QPTEMM_OFFSET(x) offsetof(struct hipz_QPTEMM, x) -/** @brief MRMWPT Entry Memory Map - */ +/* MRMWPT Entry Memory Map */ struct hipz_MRMWMM { /* 0x00 */ u64 MRx_HCR; -#define MRx_HCR_LPARID_VALID EHCA_BMASK_IBM(0,0) - +#define MRx_HCR_LPARID_VALID EHCA_BMASK_IBM(0, 0) u64 MRx_C; u64 MRx_HERR; u64 MRx_AER; @@ -249,26 +245,24 @@ struct hipz_MRMWMM { u64 reserved4[(0x200 - 0x40) / 8]; /* 0x200 */ u64 MRx_CTL[64]; - }; -#define MRMWMM_OFFSET(x) offsetof(struct hipz_MRMWMM,x) +#define MRMWMM_OFFSET(x) offsetof(struct hipz_MRMWMM, x) -/** @brief QPEDMM - */ +/* QPEDMM */ struct hipz_QPEDMM { /* 0x00 */ u64 reserved0[(0x400) / 8]; /* 0x400 */ u64 QPEDx_PHH; -#define QPEDx_PHH_TClass EHCA_BMASK_IBM(4,11) -#define QPEDx_PHH_HopLimit EHCA_BMASK_IBM(56,63) -#define QPEDx_PHH_FlowLevel EHCA_BMASK_IBM(12,31) +#define QPEDx_PHH_TClass EHCA_BMASK_IBM(4, 11) +#define QPEDx_PHH_HopLimit EHCA_BMASK_IBM(56, 63) +#define QPEDx_PHH_FlowLevel EHCA_BMASK_IBM(12, 31) u64 QPEDx_PPSGP; -#define QPEDx_PPSGP_PPPidx EHCA_BMASK_IBM(0,63) +#define QPEDx_PPSGP_PPPidx EHCA_BMASK_IBM(0, 63) /* 0x410 */ u64 QPEDx_PPSGU; -#define QPEDx_PPSGU_PPPSGID EHCA_BMASK_IBM(0,63) +#define QPEDx_PPSGU_PPPSGID EHCA_BMASK_IBM(0, 63) u64 QPEDx_PPDGP; /* 0x420 */ u64 QPEDx_PPDGU; @@ -310,73 +304,71 @@ struct hipz_QPEDMM { u64 QPEDx_RRVA3; }; -#define QPEDMM_OFFSET(x) offsetof(struct hipz_QPEDMM,x) +#define QPEDMM_OFFSET(x) offsetof(struct hipz_QPEDMM, x) -/** @brief CQ Table Entry Memory Map - */ +/* CQ Table Entry Memory Map */ struct hipz_CQTEMM { u64 CQx_HCR; -#define CQx_HCR_LPARID_valid EHCA_BMASK_IBM(0,0) +#define CQx_HCR_LPARID_valid EHCA_BMASK_IBM(0, 0) u64 CQx_C; -#define CQx_C_Enable EHCA_BMASK_IBM(0,0) -#define CQx_C_Disable_Complete EHCA_BMASK_IBM(1,1) -#define CQx_C_Error_Reset EHCA_BMASK_IBM(23,23) +#define CQx_C_Enable EHCA_BMASK_IBM(0, 0) +#define CQx_C_Disable_Complete EHCA_BMASK_IBM(1, 1) +#define CQx_C_Error_Reset EHCA_BMASK_IBM(23, 23) u64 CQx_HERR; u64 CQx_AER; -/* 0x20 */ + /* 0x20 */ u64 CQx_PTP; u64 CQx_TP; -#define CQx_FEC_CQE_cnt EHCA_BMASK_IBM(32,63) +#define CQx_FEC_CQE_cnt EHCA_BMASK_IBM(32, 63) u64 CQx_FEC; u64 CQx_FECA; -#define CQx_FECAdder EHCA_BMASK_IBM(32,63) -/* 0x40 */ +#define CQx_FECAdder EHCA_BMASK_IBM(32, 63) + /* 0x40 */ u64 CQx_EP; -#define CQx_EP_Event_Pending EHCA_BMASK_IBM(0,0) -#define CQx_EQ_number EHCA_BMASK_IBM(0,15) -#define CQx_EQ_CQtoken EHCA_BMASK_IBM(32,63) +#define CQx_EP_Event_Pending EHCA_BMASK_IBM(0, 0) +#define CQx_EQ_number EHCA_BMASK_IBM(0, 15) +#define CQx_EQ_CQtoken EHCA_BMASK_IBM(32, 63) u64 CQx_EQ; -/* 0x50 */ + /* 0x50 */ u64 reserved1; u64 CQx_N0; -#define CQx_N0_generate_solicited_comp_event EHCA_BMASK_IBM(0,0) -/* 0x60 */ +#define CQx_N0_generate_solicited_comp_event EHCA_BMASK_IBM(0, 0) + /* 0x60 */ u64 CQx_N1; -#define CQx_N1_generate_comp_event EHCA_BMASK_IBM(0,0) +#define CQx_N1_generate_comp_event EHCA_BMASK_IBM(0, 0) u64 reserved2[(0x1000 - 0x60) / 8]; -/* 0x1000 */ + /* 0x1000 */ }; -#define CQTEMM_OFFSET(x) offsetof(struct hipz_CQTEMM,x) +#define CQTEMM_OFFSET(x) offsetof(struct hipz_CQTEMM, x) -/** @brief EQ Table Entry Memory Map - */ +/* EQ Table Entry Memory Map */ struct hipz_EQTEMM { u64 EQx_HCR; -#define EQx_HCR_LPARID_valid EHCA_BMASK_IBM(0,0) -#define EQx_HCR_ENABLE_PSB EHCA_BMASK_IBM(8,8) +#define EQx_HCR_LPARID_valid EHCA_BMASK_IBM(0, 0) +#define EQx_HCR_ENABLE_PSB EHCA_BMASK_IBM(8, 8) u64 EQx_C; -#define EQx_C_Enable EHCA_BMASK_IBM(0,0) -#define EQx_C_Error_Reset EHCA_BMASK_IBM(23,23) -#define EQx_C_Comp_Event EHCA_BMASK_IBM(17,17) +#define EQx_C_Enable EHCA_BMASK_IBM(0, 0) +#define EQx_C_Error_Reset EHCA_BMASK_IBM(23, 23) +#define EQx_C_Comp_Event EHCA_BMASK_IBM(17, 17) u64 EQx_HERR; u64 EQx_AER; -/* 0x20 */ + /* 0x20 */ u64 EQx_PTP; u64 EQx_TP; u64 EQx_SSBA; u64 EQx_PSBA; -/* 0x40 */ + /* 0x40 */ u64 EQx_CEC; u64 EQx_MEQL; u64 EQx_XISBI; u64 EQx_XISC; -/* 0x60 */ + /* 0x60 */ u64 EQx_IT; }; -#define EQTEMM_OFFSET(x) offsetof(struct hipz_EQTEMM,x) +#define EQTEMM_OFFSET(x) offsetof(struct hipz_EQTEMM, x) #endif diff -Nurp libehca_old/src/ipzu_pt_fn.h libehca_new/src/ipzu_pt_fn.h --- libehca_old/src/ipzu_pt_fn.h 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ipzu_pt_fn.h 2007-01-26 14:27:43.000000000 +0100 @@ -39,8 +39,6 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: ipzu_pt_fn.h,v 1.1 2006/03/26 22:34:11 nguyen Exp $ */ #ifndef __IPZU_PT_FN_H__ @@ -60,33 +58,32 @@ struct ipzu_queue { u32 dummy3; /* 64 bit alignment*/ }; -/** return current Queue Entry - @returns address of current Queue Entry - */ +/* returns address of current Queue Entry */ static inline void *ipzu_qeit_get(struct ipzu_queue *queue) { - return (queue->current_q_addr); + return queue->current_q_addr; } -/** return current Queue Page , increment Queue Page iterator from - page to page in struct ipzu_queue, last increment will return 0! and - NOT wrap - @returns address of current Queue Page - @warning don't use in parallel with ipzu_qeit_get_inc() +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipzu_queue, last increment will return 0! and + * NOT wrap + * warning: don't use in parallel with ipzu_qeit_get_inc() */ void *ipzu_qpageit_get_inc(struct ipzu_queue *queue); -/** return current Queue Entry, increment Queue Entry iterator by one - step in struct ipzu_queue, will wrap in ringbuffer - @returns address of current Queue Entry BEFORE increment - @warning don't use in parallel with ipzu_qpageit_get_inc() - @warning unpredictable results may occur if steps>act_nr_of_queue_entries +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipzu_queue, will wrap in ringbuffer + * (returns address of current Queue Entry BEFORE increment) + * warning: don't use in parallel with ipzu_qpageit_get_inc() + * warning: unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipzu_qeit_get_inc(struct ipzu_queue *queue) { - void *retvalue = 0; + void *retvalue; u8 *last_entry_in_q = queue->queue + queue->queue_length - - queue->qe_size; + - queue->qe_size; retvalue = queue->current_q_addr; queue->current_q_addr += queue->qe_size; @@ -96,38 +93,37 @@ static inline void *ipzu_qeit_get_inc(st queue->toggle_state = (~queue->toggle_state) & 1; } - EDEB(7, "queue=%p retvalue=%p new current_q_addr=%p qe_size=%x", - queue, retvalue, queue->current_q_addr, queue->qe_size); - - return (retvalue); + ehca_gen_dbg("queue=%p retvalue=%p new current_q_addr=%p qe_size=%x", + queue, retvalue, queue->current_q_addr, queue->qe_size); + return retvalue; } -/** return current Queue Entry, increment Queue Entry iterator by one - step in struct ipzu_queue, will wrap in ringbuffer - @returns address of current Queue Entry BEFORE increment - @returns 0 and does not increment, if wrong valid state - @warning don't use in parallel with ipzu_qpageit_get_inc() - @warning unpredictable results may occur if steps>act_nr_of_queue_entries +/* return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipzu_queue, will wrap in ringbuffer + * (returns address of current Queue Entry BEFORE increment) + * (returns 0 and does not increment, if wrong valid state) + * warning: don't use in parallel with ipzu_qpageit_get_inc() + * warning: unpredictable results may occur if steps>act_nr_of_queue_entries */ inline static void *ipzu_qeit_get_inc_valid(struct ipzu_queue *queue) { void *retvalue = ipzu_qeit_get(queue); u32 qe = ((struct ehca_cqe *)retvalue)->cqe_flags; - if ((qe >> 7) == (queue->toggle_state & 1)) { - /* this is a good one */ - ipzu_qeit_get_inc(queue); - } else - retvalue = 0; - return (retvalue); + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + /* this is a good one */ + ipzu_qeit_get_inc(queue); + return retvalue; } -/** returns and resets Queue Entry iterator - @returns address (kv) of first Queue Entry +/* + * returns and resets Queue Entry iterator + * (returns address (kv) of first Queue Entry) */ static inline void *ipzu_qeit_reset(struct ipzu_queue *queue) { queue->current_q_addr = queue->queue; - return (queue->queue); + return queue->queue; } #endif /* __IPZU_PT_FN_H__ */ From ossrosch at linux.vnet.ibm.com Fri Jan 26 08:48:32 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Fri, 26 Jan 2007 17:48:32 +0100 Subject: [openib-general] [Patch ofed1.2 3/3]libehca: cleanup and adjust mmap Message-ID: <200701261748.32603.ossrosch@linux.vnet.ibm.com> Signed-off-by: Stefan Roscher --- diff -Nurp libehca_old/src/ehca_ureqs.c libehca_new/src/ehca_ureqs.c --- libehca_old/src/ehca_ureqs.c 2007-01-26 14:27:03.000000000 +0100 +++ libehca_new/src/ehca_ureqs.c 2007-01-26 14:27:43.000000000 +0100 @@ -38,25 +38,20 @@ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: ehca_ureqs.c,v 1.5 2006/03/26 22:26:54 nguyen Exp $ */ -#define DEB_PREFIX "reqs" - -#include "ehca_uclasses.h" +#include +#include +#include #include +#include "ehca_uclasses.h" #include "ehca_utools.h" #include "hipz_fns_core.h" #include "ehca_everbs.h" #include "ehca_asm.h" #include "ipzu_pt_fn.h" -#include -#include -#include - static inline int write_rwqe(struct ipzu_queue *ipz_rqueue, struct ehca_wqe *wqe_p, struct ibv_recv_wr *recv_wr) @@ -64,34 +59,35 @@ static inline int write_rwqe(struct ipzu u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { - EDEB_ERR(4, "Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); - return (-EINVAL); /* invalid SG list length */ + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ } clear_cacheline(wqe_p); - clear_cacheline((u8 *) wqe_p + 32); - clear_cacheline((u8 *) wqe_p + 64); + clear_cacheline((u8*)wqe_p + 32); + clear_cacheline((u8*)wqe_p + 64); wqe_p->work_request_id = be64_to_cpu(recv_wr->wr_id); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = - be64_to_cpu(recv_wr->sg_list[cnt_ds].addr); + be64_to_cpu(recv_wr->sg_list[cnt_ds].addr); wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = - ntohl(recv_wr->sg_list[cnt_ds].lkey); + ntohl(recv_wr->sg_list[cnt_ds].lkey); wqe_p->u.all_rcv.sg_list[cnt_ds].length = - ntohl(recv_wr->sg_list[cnt_ds].length); + ntohl(recv_wr->sg_list[cnt_ds].length); } - if (IS_EDEB_ON(7)) { - EDEB(7, "RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); - EDEB_DMP(7, wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + if (unlikely(libehca_trace_on)) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp_dbg(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } - return (0); + return 0; } static inline int write_swqe(struct ehcau_qp *qp, @@ -100,18 +96,18 @@ static inline int write_swqe(struct ehca { u32 idx; u64 dma_length; - struct ehcau_av *my_av = NULL; + struct ehcau_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; clear_cacheline(wqe_p); - clear_cacheline((u8 *) wqe_p + 32); + clear_cacheline((u8 *)wqe_p + 32); if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { - EDEB_ERR(4, "Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - send_wr->num_sge, qp->ipz_rqueue.act_nr_of_sg); - return (-EINVAL); /* invalid SG list length */ + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_rqueue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ } wqe_p->work_request_id = be64_to_cpu(send_wr->wr_id); @@ -129,16 +125,16 @@ static inline int write_swqe(struct ehca wqe_p->optype = WQE_OPTYPE_RDMAREAD; break; default: - EDEB_ERR(4, "Invalid opcode=%x", send_wr->opcode); - return (-EINVAL); /* invalid opcode */ + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ } wqe_p->wqef = (send_wr->opcode) & 0xF0; wqe_p->wr_flag = 0; - if (send_wr->send_flags & IBV_SEND_SIGNALED) { + if (send_wr->send_flags & IBV_SEND_SIGNALED) wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; - } + if (send_wr->opcode == IBV_WR_SEND_WITH_IMM || send_wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) { @@ -152,71 +148,70 @@ static inline int write_swqe(struct ehca switch (qp->qp_type) { case IBV_QPT_UD: /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) { + if (send_wr->wr.ud.remote_qkey & 0x80000000) remote_qkey = qp->qkey; - } wqe_p->destination_qp_number = - ntohl(send_wr->wr.ud.remote_qpn << 8); + ntohl(send_wr->wr.ud.remote_qpn << 8); wqe_p->local_ee_context_qkey = ntohl(remote_qkey); - if (send_wr->wr.ud.ah==NULL) { - EDEB_ERR(4, "wr.ud.ah is NULL. qp=%p", qp); - return (-EINVAL); + if (!send_wr->wr.ud.ah) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; } my_av = container_of(send_wr->wr.ud.ah, struct ehcau_av, ib_ah); wqe_p->u.ud_av.ud_av = my_av->av; /* omitted check of IBV_SEND_INLINE - since HW does not support it */ + * since HW does not support it + */ for (idx = 0; idx < send_wr->num_sge; idx++) { wqe_p->u.ud_av.sg_list[idx].vaddr = - be64_to_cpu(send_wr->sg_list[idx].addr); + be64_to_cpu(send_wr->sg_list[idx].addr); wqe_p->u.ud_av.sg_list[idx].lkey = - ntohl(send_wr->sg_list[idx].lkey); + ntohl(send_wr->sg_list[idx].lkey); wqe_p->u.ud_av.sg_list[idx].length = - ntohl(send_wr->sg_list[idx].length); + ntohl(send_wr->sg_list[idx].length); } /* eof for idx */ break; case IBV_QPT_UC: - if (send_wr->send_flags & IBV_SEND_FENCE) { + if (send_wr->send_flags & IBV_SEND_FENCE) wqe_p->wr_flag |= WQE_WRFLAG_FENCE; - } - /* no break is intential here */ + /* no break is intentional here */ case IBV_QPT_RC: /*@@TODO atomic???*/ wqe_p->u.nud.remote_virtual_adress = - be64_to_cpu(send_wr->wr.rdma.remote_addr); + be64_to_cpu(send_wr->wr.rdma.remote_addr); wqe_p->u.nud.rkey = ntohl(send_wr->wr.rdma.rkey); /* omitted checking of IBV_SEND_INLINE - since HW does not support it */ + * since HW does not support it + */ dma_length = 0; for (idx = 0; idx < send_wr->num_sge; idx++) { wqe_p->u.nud.sg_list[idx].vaddr = - be64_to_cpu(send_wr->sg_list[idx].addr); + be64_to_cpu(send_wr->sg_list[idx].addr); wqe_p->u.nud.sg_list[idx].lkey = - ntohl(send_wr->sg_list[idx].lkey); + ntohl(send_wr->sg_list[idx].lkey); wqe_p->u.nud.sg_list[idx].length = - ntohl(send_wr->sg_list[idx].length); + ntohl(send_wr->sg_list[idx].length); dma_length += send_wr->sg_list[idx].length; } /* eof idx */ wqe_p->u.nud.atomic_1st_op_dma_len = be64_to_cpu(dma_length); - break; - default: - EDEB_ERR(4, "Invalid qptype=%x", qp->qp_type); - return (-EINVAL); + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; } - if (IS_EDEB_ON(7)) { - EDEB(7, "SEND WQE written into queue qp=%p ", qp); - EDEB_DMP(7, wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + if (unlikely(libehca_trace_on)) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp_dbg(wqe_p, 16 * (6 + wqe_p->nr_of_data_seg), + "send wqe"); } - return (0); + return 0; } -/** +/* * map_ib_wc_status - convert cqe_status to ib_wc_status */ static inline void map_ib_wc_status(u32 cqe_status, @@ -251,7 +246,8 @@ static inline void map_ib_wc_status(u32 switch ((cqe_status & 0x0000F800) >> 11) { case 0x0: /* PSN Sequence Error! - couldn't find a matching VAPI status! */ + * couldn't find a matching VAPI status! + */ *wc_status = IBV_WC_GENERAL_ERR; break; case 0x1: @@ -297,44 +293,49 @@ static inline void map_ib_wc_status(u32 *wc_status = IBV_WC_FATAL_ERR; } - } else { + } else *wc_status = IBV_WC_SUCCESS; - } } int ehcau_post_send(struct ibv_qp *qp, struct ibv_send_wr *send_wr, struct ibv_send_wr **bad_send_wr) { - struct ehcau_qp *my_qp = NULL; - struct ibv_send_wr *cur_send_wr = NULL; - struct ehca_wqe *wqe_p = NULL; + struct ehcau_qp *my_qp; + struct ibv_send_wr *cur_send_wr; + struct ehca_wqe *wqe_p; int wqe_cnt = 0; int retcode = 0; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EHCA_CHECK_ADR(send_wr); - EDEB_EN(7, "ehcau_qp=%p qp_num=%x send_wr=%p bad_send_wr=%p", - my_qp, qp->qp_num, send_wr, bad_send_wr); + if (!send_wr) { + ehca_gen_err("send_wr=%p check failed line %i", + send_wr, __LINE__); + return -EFAULT; + } + ehca_dbg(qp->context->device, "ehcau_qp=%p qp_num=%x send_wr=%p " + "bad_send_wr=%p", my_qp, qp->qp_num, send_wr, bad_send_wr); /* LOCK the QUEUE */ ehcau_lock(&my_qp->lockvar_s); /* loop processes list of send reqs */ - for (cur_send_wr = send_wr; cur_send_wr != NULL; + for (cur_send_wr = send_wr; cur_send_wr; cur_send_wr = cur_send_wr->next) { void *start_addr = my_qp->ipz_squeue.current_q_addr; /* get pointer next to free WQE */ wqe_p = ipzu_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(wqe_p == NULL)) { + if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_send_wr != NULL) { + if (bad_send_wr) *bad_send_wr = cur_send_wr; - } - if (wqe_cnt==0) { + if (!wqe_cnt) { retcode = -ENOMEM; - EDEB_ERR(4, "Too many posted WQEs qp_num=%x", + ehca_err(qp->context->device, + "Too many posted WQEs qp_num=%x", qp->qp_num); } goto post_send_exit0; @@ -342,20 +343,22 @@ int ehcau_post_send(struct ibv_qp *qp, /* write a SEND WQE into the QUEUE */ retcode = write_swqe(my_qp, wqe_p, cur_send_wr); /* if something failed, reset the - free entry pointer to the start value */ - if (unlikely(retcode != 0)) { + * free entry pointer to the start value + */ + if (unlikely(retcode)) { my_qp->ipz_squeue.current_q_addr = start_addr; *bad_send_wr = cur_send_wr; - if (wqe_cnt==0) { + if (!wqe_cnt) { retcode = -EINVAL; - EDEB_ERR(4, "Could not write WQE qp_num=%x", + ehca_err(qp->context->device, + "Could not write WQE qp_num=%x", qp->qp_num); } goto post_send_exit0; } wqe_cnt++; - EDEB(7, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); } /* eof for cur_send_wr */ post_send_exit0: @@ -363,8 +366,8 @@ int ehcau_post_send(struct ibv_qp *qp, ehcau_unlock(&my_qp->lockvar_s); asm_sync_mem(); /* serialize GAL register access */ hipz_update_SQA(my_qp, wqe_cnt); - EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", - my_qp, qp->qp_num, retcode, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", + my_qp, qp->qp_num, retcode, wqe_cnt); return retcode; } @@ -372,36 +375,43 @@ int ehcau_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_recv_wr) { - struct ehcau_qp *my_qp = NULL; - struct ibv_recv_wr *cur_recv_wr = NULL; - struct ehca_wqe *wqe_p = NULL; + struct ehcau_qp *my_qp; + struct ibv_recv_wr *cur_recv_wr; + struct ehca_wqe *wqe_p; int wqe_cnt = 0; int retcode = 0; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EHCA_CHECK_ADR(recv_wr); - EDEB_EN(7, "ehca_qp=%p qp_num=%x recv_wr=%p bad_recv_wr=%p", - my_qp, qp->qp_num, recv_wr, bad_recv_wr); + if (!recv_wr) { + ehca_gen_err("recv_wr=%p check failed line %i", + recv_wr, __LINE__); + return -EFAULT; + } + ehca_dbg(qp->context->device, + "ehca_qp=%p qp_num=%x recv_wr=%p bad_recv_wr=%p", + my_qp, qp->qp_num, recv_wr, bad_recv_wr); /* LOCK the QUEUE */ ehcau_lock(&my_qp->lockvar_r); /* loop processes list of send reqs */ - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; + for (cur_recv_wr = recv_wr; cur_recv_wr; cur_recv_wr = cur_recv_wr->next) { void *start_addr = my_qp->ipz_rqueue.current_q_addr; /* get pointer next to free WQE */ wqe_p = ipzu_qeit_get_inc(&my_qp->ipz_rqueue); - if (unlikely(wqe_p == NULL)) { + if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_recv_wr != NULL) { + if (bad_recv_wr) *bad_recv_wr = cur_recv_wr; - } - if (wqe_cnt==0) { + if (!wqe_cnt) { retcode = -ENOMEM; - EDEB_ERR(4, "Too many posted WQEs qp_num=%x", + ehca_err(qp->context->device, + "Too many posted WQEs qp_num=%x", qp->qp_num); } goto post_recv_exit0; @@ -409,20 +419,22 @@ int ehcau_post_recv(struct ibv_qp *qp, /* write a RECV WQE into the QUEUE */ retcode = write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); /* if something failed, reset the - free entry pointer to the start value */ - if (unlikely(retcode != 0)) { + * free entry pointer to the start value + */ + if (unlikely(retcode)) { my_qp->ipz_rqueue.current_q_addr = start_addr; *bad_recv_wr = cur_recv_wr; - if (wqe_cnt==0) { + if (!wqe_cnt) { retcode = -EINVAL; - EDEB_ERR(4, "Could not write WQE qp_num=%x", + ehca_err(qp->context->device, + "Could not write WQE qp_num=%x", qp->qp_num); } goto post_recv_exit0; } wqe_cnt++; - EDEB(7, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); } /* eof for cur_recv_wr */ post_recv_exit0: @@ -430,98 +442,109 @@ int ehcau_post_recv(struct ibv_qp *qp, ehcau_unlock(&my_qp->lockvar_r); asm_sync_mem(); /* serialize GAL register access */ hipz_update_RQA(my_qp, wqe_cnt); - EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", - my_qp, qp->qp_num, retcode, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", + my_qp, qp->qp_num, retcode, wqe_cnt); return retcode; } -/** +/* * Table converts ehca wc opcode to ib * Since we use zero to indicate invalid opcode, the actual ib opcode must * be decremented!!! */ static const u8 ib_wc_opcode[255] = { - [0x00] = 1, /* for daqp optype is always zero */ - [0x01] = IBV_WC_RECV+1, - [0x02] = IBV_WC_RECV_RDMA_WITH_IMM+1, - [0x04] = IBV_WC_BIND_MW+1, - [0x08] = IBV_WC_FETCH_ADD+1, - [0x10] = IBV_WC_COMP_SWAP+1, - [0x20] = IBV_WC_RDMA_WRITE+1, - [0x40] = IBV_WC_RDMA_READ+1, - [0x80] = IBV_WC_SEND+1 + [0x00] = 1, /* for daqp optype is always zero */ + [0x01] = IBV_WC_RECV + 1, + [0x02] = IBV_WC_RECV_RDMA_WITH_IMM + 1, + [0x04] = IBV_WC_BIND_MW + 1, + [0x08] = IBV_WC_FETCH_ADD + 1, + [0x10] = IBV_WC_COMP_SWAP + 1, + [0x20] = IBV_WC_RDMA_WRITE + 1, + [0x40] = IBV_WC_RDMA_READ + 1, + [0x80] = IBV_WC_SEND + 1 }; -/** @brief internal function to poll one entry of cq - */ +/* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ibv_cq *cq, struct ibv_wc *wc) { int retcode = 0; struct ehcau_cq *my_cq = container_of(cq, struct ehcau_cq, ib_cq); - struct ehca_cqe *cqe = NULL; + struct ehca_cqe *cqe; int cqe_count = 0; - EDEB_EN(7, "ehca_cq=%p cq_num=%x wc=%p", my_cq, my_cq->cq_number, wc); + ehca_dbg(cq->context->device, "ehca_cq=%p cq_num=%x wc=%p", + my_cq, my_cq->cq_number, wc); - poll_cq_one_read_cqe: +poll_cq_one_read_cqe: cqe = (struct ehca_cqe *)ipzu_qeit_get_inc_valid(&my_cq->ipz_queue); - if (cqe == NULL) { + if (!cqe) { retcode = -EAGAIN; - EDEB(7, "Completion queue is empty ehca_cq=%p cq_num=%x " - "retcode=%x", my_cq, my_cq->cq_number, retcode); + ehca_dbg(cq->context->device, + "Completion queue is empty ehca_cq=%p cq_num=%x " + "retcode=%x", my_cq, my_cq->cq_number, retcode); goto poll_cq_one_exit0; } + + /* prevents loads being reordered across this point */ + lwsync(); + cqe_count++; if (unlikely(cqe->status & 0x10)) { /* purge bit set */ - struct ehcau_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); - int purgeflag = 0; - if (qp==NULL) { /* should not happen */ - EDEB_ERR(4, "cq_num=%x qp_num=%x " + struct ehcau_qp *qp = ehca_cq_get_qp(my_cq, + cqe->local_qp_number); + int purgeflag; + if (!qp) { /* should not happen */ + ehca_err(cq->context->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", my_cq->cq_number, cqe->local_qp_number); - EDEB_DMP(4, cqe, 64, "cq_num=%x qp_num=%x", - my_cq->cq_number, cqe->local_qp_number); + ehca_dmp_err(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ goto poll_cq_one_read_cqe; } ehcau_lock(&qp->lockvar_s); purgeflag = qp->sqerr_purgeflag; ehcau_unlock(&qp->lockvar_s); - if (purgeflag!=0) { - EDEB(6, "Got CQE with purged bit qp_num=%x src_qp=%x", - cqe->local_qp_number, cqe->remote_qp_number); - EDEB_DMP(6, cqe, 64, "qp_num=%x src_qp=%x", + if (purgeflag) { + ehca_dbg(cq->context->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); + ehca_dmp_dbg(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); /* ignore this to avoid double cqes of bad wqe - that caused sqe and turn off purge flag */ + * that caused sqe and turn off purge flag + */ qp->sqerr_purgeflag = 0; goto poll_cq_one_read_cqe; } } /* tracing cqe */ - if (IS_EDEB_ON(7)) { - EDEB(7, "Received COMPLETION ehca_cq=%p cq_num=%x -----", - my_cq, my_cq->cq_number); - EDEB_DMP(7, cqe, 64, "ehca_cq=%p cq_num=%x", + if (unlikely(libehca_trace_on)) { + ehca_dbg(cq->context->device, + "Received COMPLETION ehca_cq=%p cq_num=%x -----", + my_cq, my_cq->cq_number); + ehca_dmp_dbg(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->context->device, + "ehca_cq=%p cq_num=%x -------------------------", my_cq, my_cq->cq_number); - EDEB(7, "ehca_cq=%p cq_num=%x -------------------------", - my_cq, my_cq->cq_number); } /* we got a completion! */ wc->wr_id = cqe->work_request_id; /* eval ib_wc_opcode */ - wc->opcode = ib_wc_opcode[cqe->optype]-1; + wc->opcode = ib_wc_opcode[cqe->optype] - 1; if (unlikely(wc->opcode == -1)) { /* no error code required, but do trace out */ - EDEB_ERR(4, "Invalid cqe->OPType=%x cqe->status=%x " - "ehca_cq=%p cq_num=%x", + ehca_err(cq->context->device, "Invalid cqe->OPType=%x " + "cqe->status=%x ehca_cq=%p cq_num=%x", cqe->optype, cqe->status, my_cq, my_cq->cq_number); /* dump cqe for other infos */ - EDEB_DMP(4, cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); + ehca_dmp_err(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ goto poll_cq_one_exit0; } @@ -530,9 +553,8 @@ static inline int ehca_poll_cq_one(struc if (unlikely(cqe->status & 0x80000000)) { /* complete with errors */ map_ib_wc_status(cqe->status, &wc->status); wc->vendor_err = wc->status; - } else { + } else wc->status = IBV_WC_SUCCESS; - } wc->qp_num = cqe->local_qp_number; wc->byte_len = cqe->nr_bytes_transferred; @@ -544,191 +566,224 @@ static inline int ehca_poll_cq_one(struc wc->imm_data = cqe->immediate_data; wc->sl = cqe->service_level; - poll_cq_one_exit0: - if (cqe_count>0) { +poll_cq_one_exit0: + if (cqe_count > 0) hipz_update_FECA(my_cq, cqe_count); - } - EDEB_EX(7, "retcode=%x ehca_cq=%p cq_number=%x wc=%p " - "status=%x opcode=%x qp_num=%x byte_len=%x", - retcode, my_cq, my_cq->cq_number, wc, wc->status, - wc->opcode, wc->qp_num, wc->byte_len); - return (retcode); + ehca_dbg(cq->context->device, "retcode=%x ehca_cq=%p cq_number=%x " + "wc=%p status=%x opcode=%x qp_num=%x byte_len=%x", + retcode, my_cq, my_cq->cq_number, wc, wc->status, + wc->opcode, wc->qp_num, wc->byte_len); + return retcode; } int ehcau_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) { - EHCA_CHECK_CQ(cq); - EHCA_CHECK_ADR(wc); + if (!cq) { + ehca_gen_err("cq=%p check failed line %i", cq, __LINE__); + return -EFAULT; + } + if (!wc) { + ehca_gen_err("wc=%p check failed line %i", wc, __LINE__); + return -EFAULT; + } int retcode = 0; struct ehcau_cq *my_cq = container_of(cq, struct ehcau_cq, ib_cq); - EHCA_CHECK_CQ(my_cq); if (num_entries < 1) { - EDEB_ERR(4, "ehcau_cq=%p, invalid num_entries=%d", - my_cq, num_entries); + ehca_err(cq->context->device, "ehcau_cq=%p, " + "invalid num_entries=%d", my_cq, num_entries); return -EINVAL; } - EDEB_EN(7, "ehcau_cq=%p num_entries=%d wc=%p", my_cq, num_entries, wc); + ehca_dbg(cq->context->device, "ehcau_cq=%p num_entries=%d wc=%p", + my_cq, num_entries, wc); int nr = 0; struct ibv_wc *current_wc = wc; ehcau_lock(&my_cq->lockvar); for (nr = 0; nr < num_entries; nr++) { retcode = ehca_poll_cq_one(cq, current_wc); - if (0 != retcode) { + if (retcode) break; - } current_wc++; } /* eof for nr */ ehcau_unlock(&my_cq->lockvar); - if (-EAGAIN == retcode || 0 == retcode) { + if (retcode == -EAGAIN || !retcode) retcode = nr; - } - EDEB_EX(7, "retcode=%x cq_number=%x wc=%p nr_entries=%d", - retcode, my_cq->cq_number, wc, nr); - return (retcode); + ehca_dbg(cq->context->device, "retcode=%x cq_number=%x wc=%p " + "nr_entries=%d", retcode, my_cq->cq_number, wc, nr); + return retcode; } int ehcau_req_notify_cq(struct ibv_cq *cq, int solicited) { - int retcode = 0; - struct ehcau_cq *my_cq = NULL; + struct ehcau_cq *my_cq; - EHCA_CHECK_CQ(cq); + if (!cq) { + ehca_gen_err("cq=%p check failed line %i", cq, __LINE__); + return -EFAULT; + } my_cq = container_of(cq, struct ehcau_cq, ib_cq); - EHCA_CHECK_CQ(my_cq); - EDEB_EN(7, "ehcau_cq=%p solicited=%x", my_cq, solicited); + ehca_dbg(cq->context->device, "ehcau_cq=%p solicited=%x", + my_cq, solicited); - if (solicited != 0) { /* IB_CQ_SOLICITED */ + if (solicited) /* IB_CQ_SOLICITED */ hipz_set_CQx_N0(my_cq, 1); - } else { /* IB_CQ_NEXT_COMP */ + else /* IB_CQ_NEXT_COMP */ hipz_set_CQx_N1(my_cq, 1); - } - EDEB_EX(7, "ehcau_cq=%p retcode=%x", cq, retcode); - - return (retcode); + ehca_dbg(cq->context->device, "ehcau_cq=%p", cq); + return 0; } int ehcau_send_wr_trigger(struct ibv_qp *qp, int wqe_count) { - struct ehcau_qp *my_qp = NULL; + struct ehcau_qp *my_qp; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EDEB_EN(7, "ehca_qp=%p wqe_count=%x", my_qp, wqe_count); + ehca_dbg(qp->context->device, "ehca_qp=%p wqe_count=%x", + my_qp, wqe_count); asm_sync_mem(); /* serialize GAL register access */ hipz_update_SQA(my_qp, wqe_count); - EDEB_EX(7, "ehca_qp=%p wqe_count=%x", my_qp, wqe_count); + ehca_dbg(qp->context->device, "ehca_qp=%p wqe_count=%x", + my_qp, wqe_count); return 0; } int ehcau_recv_wr_trigger(struct ibv_qp *qp, int wqe_count) { - struct ehcau_qp *my_qp = NULL; + struct ehcau_qp *my_qp; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EDEB_EN(7, "ehca_qp=%p wqe_count=%x", my_qp, wqe_count); + ehca_dbg(qp->context->device, "ehca_qp=%p wqe_count=%x", + my_qp, wqe_count); asm_sync_mem(); /* serialize GAL register access */ hipz_update_RQA(my_qp, wqe_count); - EDEB_EX(7, "ehca_qp=%p wqe_count=%x", my_qp, wqe_count); + ehca_dbg(qp->context->device, "ehca_qp=%p wqe_count=%x", + my_qp, wqe_count); return 0; } int ehcau_write_swqe(void *wqe, struct ibv_qp *qp, struct ibv_send_wr *send_wr, struct ibv_send_wr **bad_wr) { - struct ehcau_qp *my_qp = NULL; - int retcode = 0; - struct ibv_send_wr *cur_send_wr = NULL; + struct ehcau_qp *my_qp; + int retcode; + struct ibv_send_wr *cur_send_wr; int wqe_cnt = 0; - struct ehca_wqe *wqe_p = NULL; + struct ehca_wqe *wqe_p; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EHCA_CHECK_QP(wqe); - EHCA_CHECK_ADR(send_wr); + if (!wqe) { + ehca_gen_err("wqe=%p check failed line %i", wqe, __LINE__); + return -EFAULT; + } + if (!send_wr) { + ehca_gen_err("send_wr=%p check failed line %i", + send_wr, __LINE__); + return -EFAULT; + } - EDEB_EN(7, "ehcau_qp=%p wqe=%p send_wr=%p bad_wr=%p", - my_qp, wqe, send_wr, bad_wr); + ehca_dbg(qp->context->device, "ehcau_qp=%p wqe=%p send_wr=%p bad_wr=%p", + my_qp, wqe, send_wr, bad_wr); /* LOCK the QUEUE */ ehcau_lock(&my_qp->lockvar_s); /* loop processes list of send reqs */ wqe_p = (struct ehca_wqe*)wqe; - for (cur_send_wr = send_wr; cur_send_wr != NULL; + for (cur_send_wr = send_wr; cur_send_wr; cur_send_wr = cur_send_wr->next) { /* write a SEND WQE into the QUEUE */ retcode = write_swqe(my_qp, wqe_p, cur_send_wr); /* if something failed, leave loop */ - if (unlikely(retcode != 0)) { + if (unlikely(retcode)) { *bad_wr = cur_send_wr; break; } wqe_cnt++; wqe_p++; - EDEB(7, "ehca_qp %p wqe_cnt %d", my_qp, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp %p wqe_cnt %d", + my_qp, wqe_cnt); } /* eof for cur_send_wr */ retcode = wqe_cnt; /* UNLOCK the QUEUE */ ehcau_unlock(&my_qp->lockvar_s); - EDEB_EX(7, "ehca_qp=%p ret=%x wqe_cnt=%d", my_qp, retcode, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p ret=%x wqe_cnt=%d", + my_qp, retcode, wqe_cnt); return retcode; } int ehcau_write_rwqe(void *wqe, struct ibv_qp *qp, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_wr) { - struct ehcau_qp *my_qp = NULL; - int retcode = 0; - struct ibv_recv_wr *cur_recv_wr = NULL; + struct ehcau_qp *my_qp; + int retcode; + struct ibv_recv_wr *cur_recv_wr; int wqe_cnt = 0; - struct ehca_wqe *wqe_p = NULL; + struct ehca_wqe *wqe_p; - EHCA_CHECK_ADR(qp); + if (!qp) { + ehca_gen_err("qp=%p check failed line %i", qp, __LINE__); + return -EFAULT; + } my_qp = container_of(qp, struct ehcau_qp, ib_qp); - EHCA_CHECK_QP(my_qp); - EHCA_CHECK_ADR(wqe); - EHCA_CHECK_ADR(recv_wr); + if (!wqe) { + ehca_gen_err("wqe=%p check failed line %i", wqe, __LINE__); + return -EFAULT; + } + if (!recv_wr) { + ehca_gen_err("recv_wr=%p check failed line %i", + recv_wr, __LINE__); + return -EFAULT; + } - EDEB_EN(7, "ehca_qp=%p wqe=%p recv_wr=%p bad_wr=%p", - my_qp, wqe, recv_wr, bad_wr); + ehca_dbg(qp->context->device, "ehca_qp=%p wqe=%p recv_wr=%p bad_wr=%p", + my_qp, wqe, recv_wr, bad_wr); /* LOCK the QUEUE */ ehcau_lock(&my_qp->lockvar_r); /* loop processes list of send reqs */ wqe_p = (struct ehca_wqe*)wqe; - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; + for (cur_recv_wr = recv_wr; cur_recv_wr; cur_recv_wr = cur_recv_wr->next) { /* write a RECV WQE into the QUEUE */ retcode = write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); /* if something failed, leave loop */ - if (unlikely(retcode != 0)) { + if (unlikely(retcode)) { *bad_wr = cur_recv_wr; break; } wqe_cnt++; wqe_p++; - EDEB(7, "ehca_qp %p wqe_cnt %d", my_qp, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp %p wqe_cnt %d", + my_qp, wqe_cnt); } /* eof for cur_recv_wr */ retcode = wqe_cnt; /* UNLOCK the QUEUE */ ehcau_unlock(&my_qp->lockvar_r); - EDEB_EX(7, "ehca_qp=%p ret=%x wqe_cnt=%d", my_qp, retcode, wqe_cnt); + ehca_dbg(qp->context->device, "ehca_qp=%p ret=%x wqe_cnt=%d", + my_qp, retcode, wqe_cnt); return retcode; } From robert.j.woodruff at intel.com Fri Jan 26 09:12:46 2007 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Fri, 26 Jan 2007 09:12:46 -0800 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: <20070126133651.GM10812@mellanox.co.il> Message-ID: Michael wrote, >I still don't see Ishai listed against SRP. >Is there a problem? Bottom of slide 4. SRP Tools/daemon userspace/srptools Ishai Rabinovitz Ishai at mellanox.co.il Does he also do the SRP kernel driver, rather than Roland? From caitlinb at broadcom.com Fri Jan 26 09:15:55 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Fri, 26 Jan 2007 09:15:55 -0800 Subject: [openib-general] [Bug 325] New: RDMA_CM and address translation broken on sles9sp3 In-Reply-To: Message-ID: <54AD0F12E08D1541B826BE97C98F99F1FBC7F9@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > https://bugs.openfabrics.org/show_bug.cgi?id=325 > > Summary: RDMA_CM and address translation broken on sles9sp3 > Product: OpenFabrics Linux > Version: 1.2 > Platform: X86-64 > OS/Version: SLES 9 > Status: NEW > Severity: critical > Priority: P2 > Component: RDMA CM > AssignedTo: bugzilla at openib.org > ReportedBy: swise at opengridcomputing.com > > > rdma_translate_ip() and friends use > ip_dev_find(local_ip_addr) to obtain a net_device pointer. > Then the device type is used to determine if the rdma > address is iwarp or infiniband. > > On sles9sp3, ip_dev_find(local_ip_addr) is returning the > loopback device. This causes rmda_copy_addr() to fail. I suspect that this is the most obvious case of a more general problem. Specifically there is a higher priority route to the destinatiaon IP address that is not RDMA capable. Essentially the selected route needs to be considered "down" for RDMA traffic, so the less preferred route can be taken. The more specific problem could be addressed by making the loopback device support OFA verbs, but since nobody sells loopback devices there might not be a rush of volunteers. The other issue is that there may always be remote IP addresses that are reachable for non-RDMA traffic but not for RDMA traffic. All it takes is two network interfaces that connect to two networks that have no routes between them where one of them is not RDMA capable. Machines that have Ethernet ports dedicated to an administrative network are one obvious example. Ultimately that step suggests that the test should not be "if the rdma address is iwarp or infiniband" but "iwarp, infiniband or not RDMA accessible". From swise at opengridcomputing.com Fri Jan 26 09:24:28 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 26 Jan 2007 11:24:28 -0600 Subject: [openib-general] [Bug 325] New: RDMA_CM and address translation broken on sles9sp3 In-Reply-To: <54AD0F12E08D1541B826BE97C98F99F1FBC7F9@NT-SJCA-0751.brcm.ad.broadcom.com> References: <54AD0F12E08D1541B826BE97C98F99F1FBC7F9@NT-SJCA-0751.brcm.ad.broadcom.com> Message-ID: <1169832268.2996.28.camel@stevo-desktop> On Fri, 2007-01-26 at 09:15 -0800, Caitlin Bestler wrote: > openib-general-bounces at openib.org wrote: > > https://bugs.openfabrics.org/show_bug.cgi?id=325 > > > > Summary: RDMA_CM and address translation broken on sles9sp3 > > Product: OpenFabrics Linux > > Version: 1.2 > > Platform: X86-64 > > OS/Version: SLES 9 > > Status: NEW > > Severity: critical > > Priority: P2 > > Component: RDMA CM > > AssignedTo: bugzilla at openib.org > > ReportedBy: swise at opengridcomputing.com > > > > > > rdma_translate_ip() and friends use > > ip_dev_find(local_ip_addr) to obtain a net_device pointer. > > Then the device type is used to determine if the rdma > > address is iwarp or infiniband. > > > > On sles9sp3, ip_dev_find(local_ip_addr) is returning the > > loopback device. This causes rmda_copy_addr() to fail. > > I suspect that this is the most obvious case of a more > general problem. Specifically there is a higher priority > route to the destinatiaon IP address that is not RDMA > capable. > We're not doing a routing lookup here. We're just trying to find the local netdev that has a particular ip address bound to it. > Essentially the selected route needs to be considered > "down" for RDMA traffic, so the less preferred route > can be taken. > > The more specific problem could be addressed by making > the loopback device support OFA verbs, but since nobody > sells loopback devices there might not be a rush of > volunteers. > ip_dev_find() shouldn't really be using the routing table IMO. It is supposed to find the netdev device that has a given ip address bound to it. > The other issue is that there may always be remote IP > addresses that are reachable for non-RDMA traffic but > not for RDMA traffic. All it takes is two network interfaces > that connect to two networks that have no routes between > them where one of them is not RDMA capable. Machines that > have Ethernet ports dedicated to an administrative network > are one obvious example. > > Ultimately that step suggests that the test should not > be "if the rdma address is iwarp or infiniband" but "iwarp, > infiniband or not RDMA accessible". From halr at voltaire.com Fri Jan 26 09:42:06 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 12:42:06 -0500 Subject: [openib-general] [PATCH MINOR] opensm: minor usage strings simplification In-Reply-To: <20070124211125.GD28335@sashak.voltaire.com> References: <20070124211125.GD28335@sashak.voltaire.com> Message-ID: <1169833243.8091.8856.camel@hal.voltaire.com> On Wed, 2007-01-24 at 16:11, Sasha Khapyorsky wrote: > Minor usage string simplification - this helps to avoid warning with some > version of vim c code analyzer. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From jgunthorpe at obsidianresearch.com Fri Jan 26 10:08:40 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Fri, 26 Jan 2007 11:08:40 -0700 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: References: <20070126000319.GA12386@obsidianresearch.com> Message-ID: <20070126180840.GD12386@obsidianresearch.com> On Thu, Jan 25, 2007 at 09:15:31PM -0800, Roland Dreier wrote: > > + if ((ret = ib_init_ah_from_path(priv->ca,priv->port,pathrec,&av))) > > kernel style is spaces after commas, like Oops, I'll fix these style things and send a new patch. > > + ipoib_dbg(priv, "PathRec init_ah failed %d for GID " > > printing the error message with ipoib_dbg() is a good idea? It means > the failure will be invisible unless someone explicitly enables > debugging. I don't know. The other two users in the tree ignore the return code right now.. > > + // FIXME: Should this be in ib_init_ah_from_path? > > + av.static_rate = pathrec->rate; > > Yes, I think this should be in ib_init_ah_from_path() ... I don't see > any reason why it shouldn't. I suspect the reason is historical, OK. > BTW do SRP and iSER already work through routers? I haven't tried them, my current goal is to get RDMA CM working over routers and then SDP. Right now the RDMA CM does not work. It looks like it is not setting the hop limit properly but I haven't found out where yet .. Thanks, Jason From halr at voltaire.com Fri Jan 26 10:22:32 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 13:22:32 -0500 Subject: [openib-general] [PATCH] opensm: cleanup unused osm_req_ctrl In-Reply-To: <20070124221937.GF28335@sashak.voltaire.com> References: <20070124221937.GF28335@sashak.voltaire.com> Message-ID: <1169834939.8091.10326.camel@hal.voltaire.com> On Wed, 2007-01-24 at 17:19, Sasha Khapyorsky wrote: > This cleanups unused osm_req_ctrl stuff and corresponded objects. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Fri Jan 26 11:13:15 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 14:13:15 -0500 Subject: [openib-general] [PATCH] OpenSM: Move osm_mtl_bind.h from include/opensm to include/vendor Message-ID: <1169838730.8091.13458.camel@hal.voltaire.com> OpenSM: Move osm_mtl_bind.h from include/opensm to include/vendor where it belongs as it is vendor specific Signed-off-by: Hal Rosenstock osm/include/Makefile.am | 4 ++-- osm/include/{opensm => vendor}/osm_mtl_bind.h | 0 osm/libvendor/osm_vendor_mtl.c | 3 +-- osm/libvendor/osm_vendor_mtl_transaction_mgr.c | 2 +- osm/opensm/Makefile.am | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) rename osm/include/{opensm/osm_mtl_bind.h => vendor/osm_mtl_bind.h} (100%) diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index 5a186ff..5efc11a 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -32,7 +32,6 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_console.h \ $(srcdir)/opensm/osm_req.h \ $(srcdir)/opensm/osm_mcm_info.h \ - $(srcdir)/opensm/osm_mtl_bind.h \ $(srcdir)/opensm/osm_sa_pkey_record.h \ $(srcdir)/opensm/osm_inform.h \ $(srcdir)/opensm/osm_path.h \ @@ -153,6 +152,7 @@ EXTRA_DIST = \ $(srcdir)/vendor/osm_vendor_mlx_txn.h \ $(srcdir)/vendor/osm_vendor_al.h \ $(srcdir)/vendor/osm_vendor_mtl.h \ - $(srcdir)/vendor/osm_vendor_sa_api.h + $(srcdir)/vendor/osm_vendor_sa_api.h \ + $(srcdir)/vendor/osm_mtl_bind.h dist-hook: diff --git a/osm/libvendor/osm_vendor_mtl.c b/osm/libvendor/osm_vendor_mtl.c index 408213e..51561e7 100644 --- a/osm/libvendor/osm_vendor_mtl.c +++ b/osm/libvendor/osm_vendor_mtl.c @@ -53,8 +53,7 @@ #include #include #include - -#include +#include /* Since a race can accure on requests. Meaning - a response is received before diff --git a/osm/libvendor/osm_vendor_mtl_transaction_mgr.c b/osm/libvendor/osm_vendor_mtl_transaction_mgr.c index 08f7833..8f2eb3a 100644 --- a/osm/libvendor/osm_vendor_mtl_transaction_mgr.c +++ b/osm/libvendor/osm_vendor_mtl_transaction_mgr.c @@ -50,7 +50,7 @@ #include #ifdef OSM_VENDOR_INTF_MTL -#include +#include #endif /* this is the callback function of the timer */ diff --git a/osm/include/opensm/osm_mtl_bind.h b/osm/include/opensm/osm_mtl_bind.h deleted file mode 100644 index e1bc747..0000000 --- a/osm/include/opensm/osm_mtl_bind.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef _OSM_BIND_H_ -#define _OSM_BIND_H_ - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -/****s* OpenSM: Vendor/osm_vendor_mgt_bind -* NAME -* osm_vendor_mgt_bind_t -* -* DESCRIPTION -* Tracks the handles returned by IB_MGT to the SMI and GSI -* Nulled on init of the vendor obj. Populated on first bind. -* -* SYNOPSIS -*/ -typedef struct _osm_vendor_mgt_bind -{ - boolean_t smi_init, gsi_init; - IB_MGT_mad_hndl_t smi_mads_hdl; - IB_MGT_mad_hndl_t gsi_mads_hdl; - struct _osm_mtl_bind_info *smi_p_bind; -} -osm_vendor_mgt_bind_t; - -/* -* FIELDS -* smi_mads_hdl -* Handle returned by IB_MGT_get_handle to the IB_MGT_SMI -* -* gsi_mads_hdl -* Handle returned by IB_MGT_get_handle to the IB_MGT_GSI -* -* SEE ALSO -*********/ - -/****s* OpenSM: Vendor osm_mtl_bind_info_t -* NAME -* osm_mtl_bind_info_t -* -* DESCRIPTION -* Handle to the result of binding a class callbacks to IB_MGT. -* -* SYNOPSIS -*/ -typedef struct _osm_mtl_bind_info -{ - IB_MGT_mad_hndl_t mad_hndl; - osm_vendor_t *p_vend; - void *client_context; - VAPI_hca_hndl_t hca_hndl; - VAPI_hca_id_t hca_id; - uint8_t port_num; - osm_vend_mad_recv_callback_t rcv_callback; - osm_vend_mad_send_err_callback_t send_err_callback; - osm_mad_pool_t *p_osm_pool; -} -osm_mtl_bind_info_t; - -/* -* FIELDS -* mad_hndl -* the handle returned from the registration in IB_MGT -* -* p_vend -* Pointer to the vendor object. -* -* client_context -* User's context passed during osm_bind -* -* hca_id -* HCA Id we bind to. -* -* port_num -* Port number (within the HCA) of the bound port. -* -* rcv_callback -* OSM Callback function to be called on receive of MAD. -* -* send_err_callback -* OSM Callback to be called on send error. -* -* p_osm_pool -* Points to the MAD pool used by OSM -* -* -* SEE ALSO -*********/ -ib_api_status_t -osm_mtl_send_mad( - IN osm_mtl_bind_info_t *p_bind, - IN osm_madw_t * const p_madw); - -END_C_DECLS - -#endif // _OSM_BIND_H_ diff --git a/osm/include/vendor/osm_mtl_bind.h b/osm/include/vendor/osm_mtl_bind.h new file mode 100644 index 0000000..e1bc747 --- /dev/null +++ b/osm/include/vendor/osm_mtl_bind.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_BIND_H_ +#define _OSM_BIND_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +/****s* OpenSM: Vendor/osm_vendor_mgt_bind +* NAME +* osm_vendor_mgt_bind_t +* +* DESCRIPTION +* Tracks the handles returned by IB_MGT to the SMI and GSI +* Nulled on init of the vendor obj. Populated on first bind. +* +* SYNOPSIS +*/ +typedef struct _osm_vendor_mgt_bind +{ + boolean_t smi_init, gsi_init; + IB_MGT_mad_hndl_t smi_mads_hdl; + IB_MGT_mad_hndl_t gsi_mads_hdl; + struct _osm_mtl_bind_info *smi_p_bind; +} +osm_vendor_mgt_bind_t; + +/* +* FIELDS +* smi_mads_hdl +* Handle returned by IB_MGT_get_handle to the IB_MGT_SMI +* +* gsi_mads_hdl +* Handle returned by IB_MGT_get_handle to the IB_MGT_GSI +* +* SEE ALSO +*********/ + +/****s* OpenSM: Vendor osm_mtl_bind_info_t +* NAME +* osm_mtl_bind_info_t +* +* DESCRIPTION +* Handle to the result of binding a class callbacks to IB_MGT. +* +* SYNOPSIS +*/ +typedef struct _osm_mtl_bind_info +{ + IB_MGT_mad_hndl_t mad_hndl; + osm_vendor_t *p_vend; + void *client_context; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_id_t hca_id; + uint8_t port_num; + osm_vend_mad_recv_callback_t rcv_callback; + osm_vend_mad_send_err_callback_t send_err_callback; + osm_mad_pool_t *p_osm_pool; +} +osm_mtl_bind_info_t; + +/* +* FIELDS +* mad_hndl +* the handle returned from the registration in IB_MGT +* +* p_vend +* Pointer to the vendor object. +* +* client_context +* User's context passed during osm_bind +* +* hca_id +* HCA Id we bind to. +* +* port_num +* Port number (within the HCA) of the bound port. +* +* rcv_callback +* OSM Callback function to be called on receive of MAD. +* +* send_err_callback +* OSM Callback to be called on send error. +* +* p_osm_pool +* Points to the MAD pool used by OSM +* +* +* SEE ALSO +*********/ +ib_api_status_t +osm_mtl_send_mad( + IN osm_mtl_bind_info_t *p_bind, + IN osm_madw_t * const p_madw); + +END_C_DECLS + +#endif // _OSM_BIND_H_ From Ashish.Batwara at lsi.com Fri Jan 26 12:49:16 2007 From: Ashish.Batwara at lsi.com (Batwara, Ashish) Date: Fri, 26 Jan 2007 13:49:16 -0700 Subject: [openib-general] Fast Memory Registrations (FMR) Message-ID: <01B9E81EECACE94DBBD0A556E768FB8A012AC7B8@NAMAIL2.ad.lsil.com> Hi, I am trying to use the FMR approach with Mellanox HCA. Thinking if somebody has some sample code to start with. Though there are verbs APIs as part of OFED, but if someone can share the code to use those APIs and some references around in this area. Thanks Ashish From rdreier at cisco.com Fri Jan 26 13:01:10 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 26 Jan 2007 13:01:10 -0800 Subject: [openib-general] Fast Memory Registrations (FMR) In-Reply-To: <01B9E81EECACE94DBBD0A556E768FB8A012AC7B8@NAMAIL2.ad.lsil.com> (Ashish Batwara's message of "Fri, 26 Jan 2007 13:49:16 -0700") References: <01B9E81EECACE94DBBD0A556E768FB8A012AC7B8@NAMAIL2.ad.lsil.com> Message-ID: > I am trying to use the FMR approach with Mellanox HCA. Thinking if > somebody has some sample code to start with. Though there are verbs APIs > as part of OFED, but if someone can share the code to use those APIs and > some references around in this area. drivers/infiniband/ulp/{iser,srp} use the FMR pool API. - R. From bugzilla-daemon at lists.openfabrics.org Fri Jan 26 13:08:51 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Fri, 26 Jan 2007 13:08:51 -0800 (PST) Subject: [openib-general] [Bug 326] New: match_token doesn't consume/userstand trailing \n when parsing integer options Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=326 Summary: match_token doesn't consume/userstand trailing \n when parsing integer options Product: OpenFabrics Linux Version: 1.1 Platform: All OS/Version: All Status: NEW Severity: normal Priority: P1 Component: SRP AssignedTo: bugzilla at openib.org ReportedBy: chas at cmf.nrl.navy.mil if i use the following to login to an srp target: echo id_ext=200600A0B80BDE31,ioc_guid=0002c90200400138,dgid=fe800000000000000002c90200400139,pkey=ffff,service_id=200600a0b80bde31,max_sect=1024 > /sys/class/infiniband_srp/srp-mthca0-1/add_target i get the following error in dmesg: ib_srp: unknown parameter or missing value 'max_sect=1024 ' in target creation request a workaround is to use echo -n or put max_sect=1024 at the beginning. don't know what the right fix it. trim trailing \r\n being parsing? -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From swise at opengridcomputing.com Fri Jan 26 13:11:07 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 26 Jan 2007 15:11:07 -0600 Subject: [openib-general] [PATCH 00/12] ofed_1_2 - Neighbour update support In-Reply-To: <1169827215.2996.14.camel@stevo-desktop> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> <1169827215.2996.14.camel@stevo-desktop> Message-ID: <1169845867.2996.48.camel@stevo-desktop> Michael/Vlad: I've pushed these up to my git tree. Can you merge them in? git://staging.openfabrics.com/~swise/ofed_1_2.git cxgb3 Here is the short log of the commits: aedc0b3c1681fb550ec4b8d1021caa2ce3dcbfd7 iw_cxgb3: allow doorbell mappings with VM_READ set. 5ea83b9e3ec6f9c74040944adb83e4faf6613fe1 Backport Chelsio to rhel5 (2.6.18_FC6). ff38246f6f07ff25609eaa304a707748904bf2bf Backport sles9sp3: Simulate neigh update events by snooping ARP packets b88d46d10ce15f8ee725454f4998af6497cc13e1 Backport rhel4u4: Simulate neigh update events by snooping ARP packets ab3a817b10da2df2e3d5bf08018be3d0212dc5bd Backport 2.6.11: Simulate neigh update events by snooping ARP packets e545001a94c180c32b8b15d4ca4351506bd50fc2 Backport 2.6.12: Simulate neigh update events by snooping ARP packets 8ddafe035c1a997c7625ae1bd42767deed148cb7 Backport 2.6.13: Simulate neigh update events by snooping ARP packets ef260b8242d90edcabdc3153b829eda65d451672 Backport 2.6.14: Simulate neigh update events by snooping ARP packets 84c78965a7c6a2a831fb2a49c6936321e2566904 Backport ubuntu606: Simulate neigh update events by snooping ARP packets eb09f52a33471613fc29f898dfad8d9a57238d3e Backport 2.6.15: Simulate neigh update events by snooping ARP packets ddc3ec432bd1898005ab52241d125dd4a71436aa Backport sles10: Simulate neigh update events by snooping ARP packets b4af429744ff06545b2941fc5ef1ab4d6f0c0e77 Backport 2.6.16: Simulate neigh update events by snooping ARP packets fe1a597f3aa409465d5b1b577a3b28c4a002f143 Backport 2.6.17: Simulate neighbour update events by snooping ARP packets 9b3bfe5696aa417d38ce903eb345a03d65743dd2 Handle Ethernet neighbour updates during route resolution. From rdreier at cisco.com Fri Jan 26 13:11:56 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 26 Jan 2007 13:11:56 -0800 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: <20070126180840.GD12386@obsidianresearch.com> (Jason Gunthorpe's message of "Fri, 26 Jan 2007 11:08:40 -0700") References: <20070126000319.GA12386@obsidianresearch.com> <20070126180840.GD12386@obsidianresearch.com> Message-ID: > I don't know. The other two users in the tree ignore the return > code right now.. The only reason it could fail is if it fails to map a local GID to a GID index. Which should never happen under normal circumstance. But it does look like the two other uses are somewhat buggy since they might end up using bogus address info. Maybe the best fix is to have ib_init_ah_from_path() itself print a warning if the GID index can't be found, just set the gid_index to 0 in that case, and change ib_init_ah_from_path() to return void? What do you think of doing something like this for 2.6.21: diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e45afba..a70c380 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -471,8 +471,8 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) } EXPORT_SYMBOL(ib_sa_cancel_query); -int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) +void ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) { int ret; u16 gid_index; @@ -485,19 +485,30 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, if (rec->hop_limit > 1) { ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = rec->dgid; ret = ib_find_cached_gid(device, &rec->sgid, &port_num, &gid_index); if (ret) - return ret; - - ah_attr->grh.sgid_index = gid_index; + printk(KERN_INFO "%s: GID index not found for " + "device %s, GID " + "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:" + "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x", + __func__, device->name, + rec->sgid.raw[ 0], rec->sgid.raw[ 1], + rec->sgid.raw[ 2], rec->sgid.raw[ 3], + rec->sgid.raw[ 4], rec->sgid.raw[ 5], + rec->sgid.raw[ 6], rec->sgid.raw[ 7], + rec->sgid.raw[ 8], rec->sgid.raw[ 9], + rec->sgid.raw[10], rec->sgid.raw[11], + rec->sgid.raw[12], rec->sgid.raw[13], + rec->sgid.raw[14], rec->sgid.raw[15]); + + ah_attr->grh.sgid_index = !ret ? gid_index : 0; + ah_attr->grh.dgid = rec->dgid; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; } - return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 97715b0..7572e98 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -401,8 +401,8 @@ ib_sa_mcmember_rec_delete(struct ib_sa_client *client, * ib_init_ah_from_path - Initialize address handle attributes based on an SA * path record. */ -int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - struct ib_ah_attr *ah_attr); +void ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + struct ib_ah_attr *ah_attr); #endif /* IB_SA_H */ From bugzilla-daemon at lists.openfabrics.org Fri Jan 26 13:14:52 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Fri, 26 Jan 2007 13:14:52 -0800 (PST) Subject: [openib-general] [Bug 327] New: want to set the scsi target id during srp login Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=327 Summary: want to set the scsi target id during srp login Product: OpenFabrics Linux Version: 1.1 Platform: All OS/Version: Other Status: NEW Severity: enhancement Priority: P1 Component: SRP AssignedTo: bugzilla at openib.org ReportedBy: chas at cmf.nrl.navy.mil we have a need to set the scsi id for each login (in order to track it later). diff -u drivers/infiniband/ulp/srp/ib_srp.c.orig drivers/infiniband/ulp/srp/ib_srp.c --- drivers/infiniband/ulp/srp/ib_srp.c.orig 2006-12-21 14:15:33.728164124 -0500 +++ drivers/infiniband/ulp/srp/ib_srp.c 2007-01-26 16:11:10.130470635 -0500 @@ -1544,6 +1544,7 @@ SRP_OPT_MAX_SECT = 1 << 5, SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, SRP_OPT_IO_CLASS = 1 << 7, + SRP_OPT_TARGET_ID = 1 << 8, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -1560,6 +1561,7 @@ { SRP_OPT_MAX_SECT, "max_sect=%d" }, { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, { SRP_OPT_IO_CLASS, "io_class=%x" }, + { SRP_OPT_TARGET_ID, "target_id=%d" }, { SRP_OPT_ERR, NULL } }; @@ -1659,6 +1661,14 @@ target->io_class = token; break; + case SRP_OPT_TARGET_ID: + if (match_int(args, &token)) { + printk(KERN_WARNING PFX "bad target_id parameter '%s'\n", p); + goto out; + } + target->scsi_id = token; + break; + default: printk(KERN_WARNING PFX "unknown parameter or missing value " "'%s' in target creation request\n", p); -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From halr at voltaire.com Fri Jan 26 13:42:53 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 16:42:53 -0500 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B8E089.5000804@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> <45B8E089.5000804@dev.mellanox.co.il> Message-ID: <1169847771.4555.7676.camel@hal.voltaire.com> Hi again Yevgeny, On Thu, 2007-01-25 at 11:53, Yevgeny Kliteynik wrote: > Hi Hal. > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: > >> Hi Hal, Sasha. > >> > >> Here's a description of the QoS policy file, and an > >> example of such file (with more comments inside). > > > > This makes the start of a good document on this. If you add this to > > osm/doc, I will incorporate it into the opensm man page. > > OK, I'll do that. > > >> QoS Policy file > >> -- > >> > >> The QoS policy file is divided into 4 sub sections: > >> > >> * Node Group: a set of HCAs, Routers or Switches that share the same settings. > >> A node groups might be a partition defined by the partition manager policy in > >> terms of GUIDs. > > > > Are these Node or Port Groups ? It looks like port groups from the > > below. > > Good point - it should be "Port Groups". > > >> Future implementations might provide support for NodeDescription > >> based definition of node groups. > >> > >> * Fabric Setup: > >> Defines how the SL2VL and VLArb tables should be setup. This policy definition > >> assumes the computation of target behavior should be performed outside of > >> OpenSM. > >> > >> * QoS-Levels Definition: > >> This section defines the possible sets of parameters for QoS that a client might > >> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > >> (in case LMC > 0 is used for QoS) and TClass. > > > > How does this relate to/interact with partition configuration ? Also, > > what about preexisting QoS ? > > As I understand from the osm man or from the partition-config.txt, > partitions definition is intended to be used for IPoIB only. > [quote] > sl= - specifies SL for this IPoIB MC group > (default is 0) > [/quote] > > I think that QoS policy may only "tighten" the constraints and enforce > lower-than-requested values, both in case of partition and in case of > preexisting QoS settings. I'm not following you on this specific point. A specific SL is chosen by partition config so how can it be "tightened" ? Does it mean it might be changed to a different SL (in which case this QoS config superceeds the partition config for SL setting) ? Have you tried this to be sure ? Are multicast groups handled as part QoS definition in the XML syntax ? If not, might this be a future addition ? If it is, how are they specified ? The other half of the original question was how a QoS request is handled if the original QoS support is enabled rather than this new QoS support in terms of the SA PR and MPR code. > >> * Matching Rules: > >> A list of rules that match an incoming PathRecord request to a QoS-Level. The > >> rules are processed in order such as the first match is applied. Each rule is > >> built out of set of match expressions which should all match for the rule to > >> apply. The matching expressions are defined for the following fields > >> - SRC and DST to lists of node groups > >> - Service-ID to a list of Service-ID or Service-ID ranges > >> - TClass to a list of TClass values or ranges > >> > >> QoS policy file example > >> -- > >> > >> > >> > >> > >> > >> > >> > >> Storage > >> our SRP storage targets > > > > Is the use clause more than commentary ? How is it "used" ? > > The 'use' clause is just a description of the port group that > can be used for logging. Other than for logging, it is just a > commentary. > > >> 0x1000000000000001 > >> 0x1000000000000002 > >> > >> > >> > >> Virtual Servers > >> node desc and IB port # > >> vs1/HCA-1/P1 > >> vs3/HCA-1/P1 > >> vs3/HCA-2/P1 > > > > How are port-names used ? > > The syntax of the port name is as follows: > "hostname/CA-num/Pnum" What's it's purpose ? Is it used somewhere else in the syntax ? > >> > >> > >> > >> Partition 1 > >> default settings > >> Part1 > >> > >> > > > > Is this CA rather than HCA ? (What about TCAs ?) > > Sure, it should be 'CA'. Will this be changed ? If so, when ? > >> > >> Routers > >> all routers > >> ROUTER > >> > >> > >> > >> > >> > > ^^ > > Actually, it is SL > > assuming the device supports SL2VL mapping as indicate by > > IsSLMappingSupported in the PortInfo:CapabilityMask. > > Will the syntax handle single data VL devices which only implement SL > > filtering ? > > Yes, it should. > > > Will the QoS manager support this (SL2VL without VLArb > > settings) or are these required together ? > > Yes, it should support sl2vl w/o vlarb settings as well. > > >> > >> > >> > >> Part1 > >> * > >> * > >> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > >> > >> > >> > >> Storage > >> > >> Storage2 > >> > >> Storage3 > > > > I don't quite follow across-from/to. > > Right, the comments there are garbage. Here the explanation: > SL2VL table describes VL as function of from-port, to-port, and SL. > > group_name: > It defines sl2vl table where 'to-port's belong to group_name > group_name: > Same as above, only that this time 'from-port's belong to group_name > group_name: > sl2vl tables both for 'to-port's 'from-port's that belong to group_name I'm still not following what is going on here and how this is used. > >> * > >> 1 > >> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > >> > >> > >> > >> > >> > >> > >> > >> Storage > >> > >> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > >> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > >> 10 > > > > What happens if the shape of VLArb indicated here does not match the > > device ? > > The part that sets up the QoS in SM (I'm not writing this part right now), What is the plan for this ? > should issue error message in case VLArb definition doesn't match the device > properties. Aside from the error message, is there any additional error handling for this ? > >> > >> > >> > >> > >> > >> > >> > >> 1 > > > > What does sn mean ? What is it used for ? > > 'sn' is an id of this qos level definition. > It is referenced later in by QoS match rules as 'qos-level-sn' What is 'sn' short for ? > >> for the lowest priority comm > >> 16 > >> > >> > >> > >> 2 > >> low latency best bandwidth > >> 0 > >> 7 > > > > What is class ? I saw TClass mentioned earlier. Is this TClass or > > something else ? > > Instead of "TClass" there should be "QoS Class". > The value is the PathRecord.qos_class value that should be > returned in the path record query response when a certain > is applied to the returned path. So these names need to change to be more consistent ? > >> > >> > > > > If specified, do MTU limit and rate limit add extra limits to be imposed > > on what is selected (and realizable) ? > > Yes > > > Strictly speaking, couldn't packet lifetime limit also be added to this > > syntax here ? I presume it was left out as being not "interesting" as > > yet. Is that correct ? > > I can add packet lifetime limit - it's not a big deal > > > Also, how are path bits used ? > > For now I don't do anything with them - we'll discuss this issue in the future. How are they envisioned to be used ? Why are they in the syntax now ? Seems inconsistent with PLL. Should there be a warning if they are specified now since they are not used ? > >> > >> 3 > >> just an example > >> 0 > >> 32 > >> 1 > >> 1 > >> > >> > >> > >> > >> > >> > >> > >> 1 > >> low latency by class 7-9 or 11 > >> 7-9,11 > >> 1 > >> > >> > >> > >> 2 > >> Storage targets connection> > >> Storage > >> 22,4719 > > > > What is service ? What does 22.4719 mean ? > > The syntax is service_id1,service_id1,..., so in the > example above these are actually two service ids. So you can create arbitrary lists of service IDs. What about ranges ? Does the syntax support that ? > As for the exact meaning of this, I'm not sure - I need to think about it... Let me know. I'd really like to understand the syntax. > >> 3 > > > > What are match-levels used for ? > > Actually, they are not used - they shouldn't appear here. > Somehow it was copy-pasted here from one of the older versions > of the policy file. So can this be updated for what is current ? Thanks. -- Hal > -- Yevgeny > > > -- Hal > > > >> > >> > >> > >> > >> > >> > >> > >> -- Yevgeny > >> > >> Yevgeny Kliteynik wrote: > >>> Hi Sasha, > >>> > >>> Sasha Khapyorsky wrote: > >>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>>>> Hi Sasha. > >>>>> > >>>>> Sasha Khapyorsky wrote: > >>>>>> Hi Yevgeny, > >>>>>> > >>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>>>> Hi Hal > >>>>>>> > >>>>>>> The following series of six patches implements QoS policy file parser: > >>>>>>> > >>>>>>> 1. QoS parser Lex file > >>>>>>> 2. QoS parser Lex-generated c file > >>>>>>> 3. QoS parser grammar (Yacc) file > >>>>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>>>> 5. QoS parser header file that defines parse tree data structures > >>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>>>> Is there any description of proposed format and functionality? > >>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>>>> minor modifications. You can find the RFC here: > >>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >>>> This was RFC and couple of issues were discussed then. Now you are about > >>>> implementation phase and exact format description would be desired. For > >>>> example what "few minor modifications" are? > >>> I'll prepare an example file with explanations. > >>> > >>> -- Yevgeny > >>> > >>>>>> Also what about using human readable formats? > >>>>> To me the xml-like format in the RFC looks pretty readable. > >>>>> It has very limited number of keywords (tags), so it's easy > >>>>> to follow and/or to modify. > >>>> It is your opinion, not everybody will agree with it (AFAIR this was > >>>> discussed too during RFC). > >>>> > >>>> I would not be care, but I don't know any example of really successful > >>>> XML using for configuration purposes (especially where advanced graphical > >>>> config editors/viewers were not used). Do you know? > >>>> > >>>> Sasha > >>>> > >>> _______________________________________________ > >>> openib-general mailing list > >>> openib-general at openib.org > >>> http://openib.org/mailman/listinfo/openib-general > >>> > >>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>> > > From sean.hefty at intel.com Fri Jan 26 13:45:02 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 13:45:02 -0800 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: Message-ID: <000001c74193$3c0afee0$ff0da8c0@amr.corp.intel.com> >Maybe the best fix is to have ib_init_ah_from_path() itself print a >warning if the GID index can't be found, just set the gid_index to 0 >in that case, and change ib_init_ah_from_path() to return void? > >What do you think of doing something like this for 2.6.21: Changing this to a void seems fine to me. Hal, do you see any issues with this? Can the problem occur if we have an out of date path record? And even if that can occur, can the problem be pushed off until the user calls ib_create_ah? - Sean From halr at voltaire.com Fri Jan 26 13:57:05 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 16:57:05 -0500 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: <000001c74193$3c0afee0$ff0da8c0@amr.corp.intel.com> References: <000001c74193$3c0afee0$ff0da8c0@amr.corp.intel.com> Message-ID: <1169848625.4555.8553.camel@hal.voltaire.com> On Fri, 2007-01-26 at 16:45, Sean Hefty wrote: > >Maybe the best fix is to have ib_init_ah_from_path() itself print a > >warning if the GID index can't be found, just set the gid_index to 0 > >in that case, and change ib_init_ah_from_path() to return void? > > > >What do you think of doing something like this for 2.6.21: > > Changing this to a void seems fine to me. Hal, do you see any issues with this? > Can the problem occur if we have an out of date path record? There's some weird edge cases perhaps like subnet merge where the subnet prefix might change and cause this sort of failure. It seems the question is whether in these cases, is it better not to respond or to respond with GID index 0 ? I'm not sure. > And even if that can occur, can the problem be pushed off until the user calls ib_create_ah? Not sure what you mean by this. -- Hal > > - Sean From betsy at pathscale.com Fri Jan 26 14:14:52 2007 From: betsy at pathscale.com (Betsy Zeller) Date: Fri, 26 Jan 2007 14:14:52 -0800 Subject: [openib-general] modules compilation status for OFED 1.2 In-Reply-To: <45B77F8C.9060209@mellanox.co.il> References: <45B77F8C.9060209@mellanox.co.il> Message-ID: <1169849692.882.70.camel@sarium.pathscale.com> Bryan is working on recreating the backport patches for the ipath driver. It appears that all of the InfiniPath backport patches were removed from the OFED source tree late last year. By early next week, we'll have a better sense of whether any of these patches will need to come in after Jan 31. - Betsy On Wed, 2007-01-24 at 17:47 +0200, Tziporet Koren wrote: > Hi All, > We are approaching code freeze and I want to make sure that all kernel > modules indeed will compile on the supported OSes of OFED 1.2: > * Redhat EL4 up5 (currently tested on up4) > * Redhat EL5 - if will be available > * SLES9 SP3 > * SLES10 SP1 > * kernel.org: 2.6.19.x and 2.6.20.x > The status is that all modules (except ehca) pass compilation on > kernel 2.6.19. > > The following modules have issues with support for some distros: > * vnic (Ram) - SLES9 > * ipath driver (Bryan) : SLES9, Redhat EL4 up4, SLES10 SP1 > * ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 > Owners of these modules: Please take an action to fix as soon as > possible or reply if you don't want your module to be supported on > some of the distros > > Thanks, > Tziporet > > > -- Betsy Zeller Director of Software Engineering QLogic Corporation System Interconnect Group (formerly PathScale, Inc) 2071 Stierlin Court, Suite 200 Mountain View, CA, 94043 1-650-934-8088 From Ashish.Batwara at lsi.com Fri Jan 26 14:12:49 2007 From: Ashish.Batwara at lsi.com (Batwara, Ashish) Date: Fri, 26 Jan 2007 15:12:49 -0700 Subject: [openib-general] Fast Memory Registrations (FMR) Message-ID: <01B9E81EECACE94DBBD0A556E768FB8A012AC804@NAMAIL2.ad.lsil.com> Thanks for your reply. Another question is that how to use the FMR pool allocated? Thanks Ashish -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Friday, January 26, 2007 3:01 PM To: Batwara, Ashish Cc: openib-general at openib.org Subject: Re: [openib-general] Fast Memory Registrations (FMR) > I am trying to use the FMR approach with Mellanox HCA. Thinking if > somebody has some sample code to start with. Though there are verbs APIs > as part of OFED, but if someone can share the code to use those APIs and > some references around in this area. drivers/infiniband/ulp/{iser,srp} use the FMR pool API. - R. From sashak at voltaire.com Fri Jan 26 14:37:39 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 27 Jan 2007 00:37:39 +0200 Subject: [openib-general] [PATCH 0/6] osm: QoS policy parser In-Reply-To: <45B8E089.5000804@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> <45B8E089.5000804@dev.mellanox.co.il> Message-ID: <20070126223739.GI8333@sashak.voltaire.com> On 18:53 Thu 25 Jan , Yevgeny Kliteynik wrote: > Hi Hal. > > Hal Rosenstock wrote: > > Hi Yevgeny, > > > > On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: > >> Hi Hal, Sasha. > >> > >> Here's a description of the QoS policy file, and an > >> example of such file (with more comments inside). > > > > This makes the start of a good document on this. If you add this to > > osm/doc, I will incorporate it into the opensm man page. > > OK, I'll do that. > > >> QoS Policy file > >> --------------- > >> > >> The QoS policy file is divided into 4 sub sections: > >> > >> * Node Group: a set of HCAs, Routers or Switches that share the same settings. > >> A node groups might be a partition defined by the partition manager policy in > >> terms of GUIDs. > > > > Are these Node or Port Groups ? It looks like port groups from the > > below. > > Good point - it should be "Port Groups". > > >> Future implementations might provide support for NodeDescription > >> based definition of node groups. > >> > >> * Fabric Setup: > >> Defines how the SL2VL and VLArb tables should be setup. This policy definition > >> assumes the computation of target behavior should be performed outside of > >> OpenSM. > >> > >> * QoS-Levels Definition: > >> This section defines the possible sets of parameters for QoS that a client might > >> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > >> (in case LMC > 0 is used for QoS) and TClass. > > > > How does this relate to/interact with partition configuration ? Also, > > what about preexisting QoS ? > > As I understand from the osm man or from the partition-config.txt, > partitions definition is intended to be used for IPoIB only. > [quote] > sl= - specifies SL for this IPoIB MC group > (default is 0) > [/quote] This description is incorrect, sl=val in partition config is defined per partition regardless to IPoIB settings. > > I think that QoS policy may only "tighten" the constraints and enforce > lower-than-requested values, both in case of partition and in case of > preexisting QoS settings. > > >> * Matching Rules: > >> A list of rules that match an incoming PathRecord request to a QoS-Level. The > >> rules are processed in order such as the first match is applied. Each rule is > >> built out of set of match expressions which should all match for the rule to > >> apply. The matching expressions are defined for the following fields > >> - SRC and DST to lists of node groups > >> - Service-ID to a list of Service-ID or Service-ID ranges > >> - TClass to a list of TClass values or ranges > >> > >> QoS policy file example > >> ----------------------- > >> > >> > >> > >> > >> > >> > >> > >> Storage > >> our SRP storage targets > > > > Is the use clause more than commentary ? How is it "used" ? > > The 'use' clause is just a description of the port group that > can be used for logging. Other than for logging, it is just a > commentary. > > >> 0x1000000000000001 > >> 0x1000000000000002 > >> > >> > >> > >> Virtual Servers > >> node desc and IB port # > >> vs1/HCA-1/P1 > >> vs3/HCA-1/P1 > >> vs3/HCA-2/P1 > > > > How are port-names used ? > > The syntax of the port name is as follows: > "hostname/CA-num/Pnum" And how "hostname" is resolved? Also looking in the parser patches I see that it is not implemented yet. > > >> > >> > >> > >> Partition 1 > >> default settings > >> Part1 > >> > >> > > > > Is this CA rather than HCA ? (What about TCAs ?) > > Sure, it should be 'CA'. > > >> > >> Routers > >> all routers > >> ROUTER > >> > >> > >> > >> > >> > > ^^ > > Actually, it is SL > > assuming the device supports SL2VL mapping as indicate by > > IsSLMappingSupported in the PortInfo:CapabilityMask. > > Will the syntax handle single data VL devices which only implement SL > > filtering ? > > Yes, it should. > > > Will the QoS manager support this (SL2VL without VLArb > > settings) or are these required together ? > > Yes, it should support sl2vl w/o vlarb settings as well. > > >> > >> > >> > >> Part1 > >> * > >> * > >> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > >> > >> > >> > >> Storage > >> > >> Storage2 > >> > >> Storage3 > > > > I don't quite follow across-from/to. > > Right, the comments there are garbage. Here the explanation: > SL2VL table describes VL as function of from-port, to-port, and SL. > > group_name: > It defines sl2vl table where 'to-port's belong to group_name > group_name: > Same as above, only that this time 'from-port's belong to group_name > group_name: > sl2vl tables both for 'to-port's 'from-port's that belong to group_name > > >> * > >> 1 > >> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > >> > >> > >> > >> > >> > >> > >> > >> Storage > >> > >> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > >> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > >> 10 > > > > What happens if the shape of VLArb indicated here does not match the > > device ? > > The part that sets up the QoS in SM (I'm not writing this part right now), > should issue error message in case VLArb definition doesn't match the device > properties. Then assuming we can have devices with different capabilities this will require to split port groups according capabilities and have separate QoS configuration for each group. For me it looks like not needed overhead which could be avoided if we will have default "mapping" rule (like it is done now or similar). Sasha > > >> > >> > >> > >> > >> > >> > >> > >> 1 > > > > What does sn mean ? What is it used for ? > > 'sn' is an id of this qos level definition. > It is referenced later in by QoS match rules as 'qos-level-sn' > > >> for the lowest priority comm > >> 16 > >> > >> > >> > >> 2 > >> low latency best bandwidth > >> 0 > >> 7 > > > > What is class ? I saw TClass mentioned earlier. Is this TClass or > > something else ? > > Instead of "TClass" there should be "QoS Class". > The value is the PathRecord.qos_class value that should be > returned in the path record query response when a certain > is applied to the returned path. > > >> > >> > > > > If specified, do MTU limit and rate limit add extra limits to be imposed > > on what is selected (and realizable) ? > > Yes > > > Strictly speaking, couldn't packet lifetime limit also be added to this > > syntax here ? I presume it was left out as being not "interesting" as > > yet. Is that correct ? > > I can add packet lifetime limit - it's not a big deal > > > Also, how are path bits used ? > > For now I don't do anything with them - we'll discuss this issue in the future. > > >> > >> 3 > >> just an example > >> 0 > >> 32 > >> 1 > >> 1 > >> > >> > >> > >> > >> > >> > >> > >> 1 > >> low latency by class 7-9 or 11 > >> 7-9,11 > >> 1 > >> > >> > >> > >> 2 > >> Storage targets connection> > >> Storage > >> 22,4719 > > > > What is service ? What does 22.4719 mean ? > > The syntax is service_id1,service_id1,..., so in the > example above these are actually two service ids. > As for the exact meaning of this, I'm not sure - I need to think about it... > > >> 3 > > > > What are match-levels used for ? > > Actually, they are not used - they shouldn't appear here. > Somehow it was copy-pasted here from one of the older versions > of the policy file. > > -- Yevgeny > > > -- Hal > > > >> > >> > >> > >> > >> > >> > >> > >> -- Yevgeny > >> > >> Yevgeny Kliteynik wrote: > >>> Hi Sasha, > >>> > >>> Sasha Khapyorsky wrote: > >>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>>>> Hi Sasha. > >>>>> > >>>>> Sasha Khapyorsky wrote: > >>>>>> Hi Yevgeny, > >>>>>> > >>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>>>> Hi Hal > >>>>>>> > >>>>>>> The following series of six patches implements QoS policy file parser: > >>>>>>> > >>>>>>> 1. QoS parser Lex file > >>>>>>> 2. QoS parser Lex-generated c file > >>>>>>> 3. QoS parser grammar (Yacc) file > >>>>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>>>> 5. QoS parser header file that defines parse tree data structures > >>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>>>> Is there any description of proposed format and functionality? > >>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>>>> minor modifications. You can find the RFC here: > >>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >>>> This was RFC and couple of issues were discussed then. Now you are about > >>>> implementation phase and exact format description would be desired. For > >>>> example what "few minor modifications" are? > >>> I'll prepare an example file with explanations. > >>> > >>> -- Yevgeny > >>> > >>>>>> Also what about using human readable formats? > >>>>> To me the xml-like format in the RFC looks pretty readable. > >>>>> It has very limited number of keywords (tags), so it's easy > >>>>> to follow and/or to modify. > >>>> It is your opinion, not everybody will agree with it (AFAIR this was > >>>> discussed too during RFC). > >>>> > >>>> I would not be care, but I don't know any example of really successful > >>>> XML using for configuration purposes (especially where advanced graphical > >>>> config editors/viewers were not used). Do you know? > >>>> > >>>> Sasha > >>>> > >>> _______________________________________________ > >>> openib-general mailing list > >>> openib-general at openib.org > >>> http://openib.org/mailman/listinfo/openib-general > >>> > >>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>> > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Fri Jan 26 14:39:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 26 Jan 2007 14:39:45 -0800 Subject: [openib-general] Fast Memory Registrations (FMR) In-Reply-To: <01B9E81EECACE94DBBD0A556E768FB8A012AC804@NAMAIL2.ad.lsil.com> (Ashish Batwara's message of "Fri, 26 Jan 2007 15:12:49 -0700") References: <01B9E81EECACE94DBBD0A556E768FB8A012AC804@NAMAIL2.ad.lsil.com> Message-ID: > Thanks for your reply. Another question is that how to use the FMR pool > allocated? Sorry, I can't parse your question. Try looking at the SRP driver -- it is quite small and the FMR use is an even smaller part, so that should be a good example to follow. - R. From mshefty at ichips.intel.com Fri Jan 26 14:45:58 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 14:45:58 -0800 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: <1169848625.4555.8553.camel@hal.voltaire.com> References: <000001c74193$3c0afee0$ff0da8c0@amr.corp.intel.com> <1169848625.4555.8553.camel@hal.voltaire.com> Message-ID: <45BA84A6.4080809@ichips.intel.com> >> And even if that can occur, can the problem be pushed off until the user >> calls ib_create_ah? > > Not sure what you mean by this. ib_init_ah_from_path() is only used to initialize the ah_attr before calling ib_create_ah(). We have to trap for failure from ib_create_ah(), so if ib_init_ah_from_path simply aborts, would ib_create_ah() simply fail later? - Sean From halr at voltaire.com Fri Jan 26 14:52:21 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 26 Jan 2007 17:52:21 -0500 Subject: [openib-general] [RFC] Performance Manager Message-ID: <1169851941.4555.11868.camel@hal.voltaire.com> Hi, Below is a proposal for an OpenFabrics/OpenIB performance manager (PerfManager). It includes a phased implementation plan. Comments welcome. Thanks in advance. -- Hal Performance Manager This document will describe an architecture and a phased plan for an OpenFabrics OpenIB performance manager. Currently, there is no open source performance manager, only a perfquery diagnostic tool which some have scripted into a "poor man's" performance manager. The primary responsibilities of the performance manager are to: 1. Monitor subnet topology 2. Based on subnet topology, monitor performance and error counters. Also, possible counters related to congestion. 3. Perform data reduction (various calculations (rates, histograms, etc.)) on counters obtained 4. Log performance data and indicate "interesting" related events Performance Manager Components 1. Determine subnet topology Performance manager can determine the subnet topology by subscribing for GID in and out of service events. Upon receipt of a GID in service event, use GID to query SA for corresponding LID by using SubnAdmGet NodeRecord with PortGUID specified. It would utilize the LID and NumPorts returned and add this to the monitoring list. Note that the monitoring list can be extended to be distributed with the manager "balancing" the assignments of new GIDs to the set of known monitors. For GID out of service events, the GID is removed from the monitoring list. 2. Monitoring Counters to be monitored include performance counters (data octets and packets both receive and transmit) and error counters. These are all in the mandatory PortCounters attribute. Future support will include the optional 64 bit counters, PortExtendedCounters (as this is only known to be supported on one IB device currently). Also, one congestion counter (PortXmitWait) will also be monitored (on switch ports) initially. Polling rather than samples will be used as the monitoring technique. The polling rate configurable from 1-65535 seconds (default TBD) Note that with 32 bit counters, on 4x SDR links, byte counts can max out in 16 seconds and on 4x DDR links in 8 seconds. The polling rate needs to deal with this is accurate byte and packet rates are desired. Since IB counters are sticky, the counters need to be reset when they get "close" to max'ing out. This will result in some inaccuracy. When counters are reset, the time of the reset will be tracked in the monitor and will be queryable. Note that when the 64 bit counters are supported more generally, the polling rate can be reduced. The performance manager will support parallel queries. The level of parallelism is configurable with a default of 64 queries outstanding at one time. Configuration and dynamic adjustment of any performance manager "knobs" will be supported. Also, there will be a console interface to obtain performance data. It will be able to reset counters, report on specific nodes or node types of interest (CAs only, switches only, all, ...). The specifics are TBD. 3. Data Reduction For errors, rate rather than raw value will be calculated. Error event is only indicated when rate exceeds a threshold. For packet and byte counters, small changes will be aggregated and only significant changes are updated. Aggregated histograms (per node, all nodes (this is TBD))) for each counter will be provided. Actual counters will also be written to files. NodeGUID will be used to identify node. File formats are TBD. One format to be supported might be CSV. 4. Logging "Interesting" events determined by the performance manager will be logged as well as the performance data itself. There are some interesting scalability issues here especially for the distributed model. Events will be based on rates which are configured as thresholds. There will be configurable thresholds for the error counters with reasonable defaults. Correlation of PerfManager and SM events is interesting but not a mandatory requirement. Performance Manager Scalability Clearly as the polling rate goes up, the number of nodes which can be monitored from a single performance management node decreases. There is some evidence that a single dedicated management node may not be able to monitor the largest clusters at a rapid rate. There are numerous PerfManager models which can be supported: 1. Integrated as thread(s) with OpenSM (run only when SM is master) 2. Standby SM 3. Standalone PerfManager (not running with master or standby SM) 4. Distributed PerfManager (most scalable approach) The simplest model is to run the PerfManager with the master SM. This has the least scalability but is the simplest model. Note that in this model the topology can be obtained without the GID in and out of service events but this is needed for any of the other models to be supported. The next model is to run the PerfManager with a standby SM. Standbys are not doing much currently (polling the master) so there is much idle CPU. The downside of this approach is that if the standby takes over as master, the PerfManager would need to be moved (or is becomes model 1). A totally separate standlone PerfManager would allow for a deployment model which eliminates the downside of model 2 (standby SM). It could still be built in a similar manner with model 2 with unneeded functions (SM and SA) not included. The most scalable model is a distributed PerfManager. One approach to distribution is a hierarchial model where there is a PerfManager at the top level with a number of PerfMonitors which are responsible for some portion of the subnet. The separation of PerfManager from OpenSM brings up the following additional issues: 1. What communication is needed between OpenSM and the PerfManager ? 2. Integration of interesting events with OpenSM log (Does performance manager assume OpenSM ? Does it need to work with vendor SMs ?) Hierarchial distribution brings up some additional issues: 1. How is the hierarchy determined ? 2. How do the PerfManager and PerfMonitors find each other ? 3. How is the subnet divided amongst the PerfMonitors 4. Communication amongst the PerfManager and the PerfMonitors In terms of inter manager communication, there seem to be several choices: 1. Use vendor specific MADs (which can be RMPP'd) and build on top of this 2. Use IPoIB which is much more powerful as sockets can then be utilized. The only downside of IPoIB is that it requires multicast to be functioning. It seems reasonable to require IPoIB across the management nodes. This can either be a separate IPoIB subnet or a shared one with other endnodes on the subnet. (If this communication is built on top of sockets, it can be any IP subnet amongst the manager nodes). The first implementation phase will address models 1-3. Model 3 is optional as it is similar to models 1 and 2 and may be not be needed. Model 4 will be addressed in a subsequent implementation phase (and a future version of this document). Model 4 can be built on the basis of models 1 and 2 where some SM, not necessarily master, is the PerfManager and the rest are PerfMonitors. Performance Manager Redundancy TBD (future version of this document) Congestion Management TBD (future version of this document) QoS Management TBD (future version of this document) From rdreier at cisco.com Fri Jan 26 14:57:39 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 26 Jan 2007 14:57:39 -0800 Subject: [openib-general] [PATCH] IPOIB: Use a GRH when appropriate for unicast packets In-Reply-To: <45BA84A6.4080809@ichips.intel.com> (Sean Hefty's message of "Fri, 26 Jan 2007 14:45:58 -0800") References: <000001c74193$3c0afee0$ff0da8c0@amr.corp.intel.com> <1169848625.4555.8553.camel@hal.voltaire.com> <45BA84A6.4080809@ichips.intel.com> Message-ID: > ib_init_ah_from_path() is only used to initialize the ah_attr before > calling ib_create_ah(). We have to trap for failure from > ib_create_ah(), so if ib_init_ah_from_path simply aborts, would > ib_create_ah() simply fail later? Right now if ib_init_ah_from_path() fails then ib_create_ah() will create an AH with bogus global route info (since it just uses whatever uninitialized junk is there after ib_init_ah_from_path() returns). - R. From sean.hefty at intel.com Fri Jan 26 15:15:11 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 15:15:11 -0800 Subject: [openib-general] [RFC] Performance Manager In-Reply-To: <1169851941.4555.11868.camel@hal.voltaire.com> Message-ID: <000601c7419f$d4470c60$ff0da8c0@amr.corp.intel.com> >There are numerous PerfManager models which can be supported: >1. Integrated as thread(s) with OpenSM (run only when SM is master) >2. Standby SM >3. Standalone PerfManager (not running with master or standby SM) >4. Distributed PerfManager (most scalable approach) IMO, we will eventually need distributed managers, so I would go with the last approach. But, along those lines, if we had a distributed SM, would you still want to separate the performance manager from the SM? It seems more flexible, but with additional load on the fabric. >In terms of inter manager communication, there seem to be several >choices: >1. Use vendor specific MADs (which can be RMPP'd) and build on top of >this >2. Use IPoIB which is much more powerful as sockets can then be utilized. You could also use RC QP communication up/down the hierarchy. - Sean From sean.hefty at intel.com Fri Jan 26 16:00:00 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 16:00:00 -0800 Subject: [openib-general] [RFC] [PATCH 0/2] for 2.6.21/OFED1.2: add IB multicast support Message-ID: <000701c741a6$16dc4760$ff0da8c0@amr.corp.intel.com> The IB SA tracks multicast join/leave requests on a per port basis. In order to support multiple users of the same multicast group from the same port, we need to perform local reference counting on each of the nodes. Add an ib_multicast module to perform reference counting of multicast join/leave requests, and modify ib_ipoib to use the new module. Also extend the rdma_cm to support multicast communication. A new port space is added to the rdma_cm that allows joining the multicast groups used by ipoib, or multicast groups can be joined using the existing RDMA UDP port space. The newly added RDMA_PS_IPOIB port space also allows for unicast operations. All changes are available through my rdma-dev.git tree (multicast branch); although, that tree contains a collection of development patches. Equivalent changes to the userspace librdmacm are also available in my librdmacm.git tree. Assuming that there are no issues after a code review, I would like OFED 1.2 to rebase the multicast patches based on the latest changes. (The changes are also available in my multicast-sa_cache branch.) It is also my intent to create a 1.2 branch in my librdmacm.git tree as a starting point for a release of the librdmacm. (There are 1.0 and 1.1 branches there already, which are left from the initial svn conversion to git.) Signed-off-by: Sean Hefty From sean.hefty at intel.com Fri Jan 26 16:06:30 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 16:06:30 -0800 Subject: [openib-general] [PATCH 1/2] for 2.6.21: ib_sa/ib_ipoib: add IB multicast support In-Reply-To: <000701c741a6$16dc4760$ff0da8c0@amr.corp.intel.com> Message-ID: <000801c741a6$ff2a3040$ff0da8c0@amr.corp.intel.com> The IB SA tracks multicast join/leave requests on a per port basis. In order to support multiple users of the same multicast group from the same port, we need to perform local reference counting on each of the nodes. Add an ib_multicast module to perform reference counting of multicast join/leave requests, and modify ib_ipoib to use the new module. Signed-off-by: Sean Hefty --- Except for the previous bug fix to prevent kernel crashes, I don't believe that this patch has changed. And depending on what happens with ib_init_ah_from_path, we will likely want to have ib_init_ah_from_mc match. diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 50fb1cd..189e5d4 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ ib_mad-y := mad.o smi.o agent.o mad_rmpp.o -ib_sa-y := sa_query.o +ib_sa-y := sa_query.o multicast.o ib_cm-y := cm.o diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c new file mode 100644 index 0000000..fde977e --- /dev/null +++ b/drivers/infiniband/core/multicast.c @@ -0,0 +1,848 @@ +/* + * Copyright (c) 2006 Intel Corporation.  All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "sa.h" + +static void mcast_add_one(struct ib_device *device); +static void mcast_remove_one(struct ib_device *device); + +static struct ib_client mcast_client = { + .name = "ib_multicast", + .add = mcast_add_one, + .remove = mcast_remove_one +}; + +static struct ib_sa_client sa_client; +static struct ib_event_handler event_handler; +static struct workqueue_struct *mcast_wq; +static union ib_gid mgid0; + +struct mcast_device; + +struct mcast_port { + struct mcast_device *dev; + spinlock_t lock; + struct rb_root table; + atomic_t refcount; + struct completion comp; + u8 port_num; +}; + +struct mcast_device { + struct ib_device *device; + int start_port; + int end_port; + struct mcast_port port[0]; +}; + +enum mcast_state { + MCAST_IDLE, + MCAST_JOINING, + MCAST_MEMBER, + MCAST_BUSY, + MCAST_ERROR +}; + +struct mcast_member; + +struct mcast_group { + struct ib_sa_mcmember_rec rec; + struct rb_node node; + struct mcast_port *port; + spinlock_t lock; + struct work_struct work; + struct list_head pending_list; + struct list_head active_list; + struct mcast_member *last_join; + int members[3]; + atomic_t refcount; + enum mcast_state state; + struct ib_sa_query *query; + int query_id; +}; + +struct mcast_member { + struct ib_sa_multicast multicast; + struct ib_sa_client *client; + struct mcast_group *group; + struct list_head list; + enum mcast_state state; + atomic_t refcount; + struct completion comp; +}; + +static void join_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context); +static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context); + +static struct mcast_group *mcast_find(struct mcast_port *port, + union ib_gid *mgid) +{ + struct rb_node *node = port->table.rb_node; + struct mcast_group *group; + int ret; + + while (node) { + group = rb_entry(node, struct mcast_group, node); + ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); + if (!ret) + return group; + + if (ret < 0) + node = node->rb_left; + else + node = node->rb_right; + } + return NULL; +} + +static struct mcast_group *mcast_insert(struct mcast_port *port, + struct mcast_group *group, + int allow_duplicates) +{ + struct rb_node **link = &port->table.rb_node; + struct rb_node *parent = NULL; + struct mcast_group *cur_group; + int ret; + + while (*link) { + parent = *link; + cur_group = rb_entry(parent, struct mcast_group, node); + + ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, + sizeof group->rec.mgid); + if (ret < 0) + link = &(*link)->rb_left; + else if (ret > 0) + link = &(*link)->rb_right; + else if (allow_duplicates) + link = &(*link)->rb_left; + else + return cur_group; + } + rb_link_node(&group->node, parent, link); + rb_insert_color(&group->node, &port->table); + return NULL; +} + +static void deref_port(struct mcast_port *port) +{ + if (atomic_dec_and_test(&port->refcount)) + complete(&port->comp); +} + +static void release_group(struct mcast_group *group) +{ + struct mcast_port *port = group->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + if (atomic_dec_and_test(&group->refcount)) { + rb_erase(&group->node, &port->table); + spin_unlock_irqrestore(&port->lock, flags); + kfree(group); + deref_port(port); + } else + spin_unlock_irqrestore(&port->lock, flags); +} + +static void deref_member(struct mcast_member *member) +{ + if (atomic_dec_and_test(&member->refcount)) + complete(&member->comp); +} + +static void queue_join(struct mcast_member *member) +{ + struct mcast_group *group = member->group; + unsigned long flags; + + spin_lock_irqsave(&group->lock, flags); + list_add(&member->list, &group->pending_list); + if (group->state == MCAST_IDLE) { + group->state = MCAST_BUSY; + atomic_inc(&group->refcount); + queue_work(mcast_wq, &group->work); + } + spin_unlock_irqrestore(&group->lock, flags); +} + +/* + * A multicast group has three types of members: full member, non member, and + * send only member. We need to keep track of the number of members of each + * type based on their join state. Adjust the number of members the belong to + * the specified join states. + */ +static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) +{ + int i; + + for (i = 0; i < 3; i++, join_state >>= 1) + if (join_state & 0x1) + group->members[i] += inc; +} + +/* + * If a multicast group has zero members left for a particular join state, but + * the group is still a member with the SA, we need to leave that join state. + * Determine which join states we still belong to, but that do not have any + * active members. + */ +static u8 get_leave_state(struct mcast_group *group) +{ + u8 leave_state = 0; + int i; + + for (i = 0; i < 3; i++) + if (!group->members[i]) + leave_state |= (0x1 << i); + + return leave_state & group->rec.join_state; +} + +static int check_selector(ib_sa_comp_mask comp_mask, + ib_sa_comp_mask selector_mask, + ib_sa_comp_mask value_mask, + u8 selector, u8 src_value, u8 dst_value) +{ + int err; + + if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) + return 0; + + switch (selector) { + case IB_SA_GT: + err = (src_value <= dst_value); + break; + case IB_SA_LT: + err = (src_value >= dst_value); + break; + case IB_SA_EQ: + err = (src_value != dst_value); + break; + default: + err = 0; + break; + } + + return err; +} + +static int cmp_rec(struct ib_sa_mcmember_rec *src, + struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) +{ + /* MGID must already match */ + + if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && + memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) + return -EINVAL; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, + IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, + src->mtu, dst->mtu)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && + src->traffic_class != dst->traffic_class) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) + return -EINVAL; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, + IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, + src->rate, dst->rate)) + return -EINVAL; + if (check_selector(comp_mask, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, + dst->packet_life_time_selector, + src->packet_life_time, dst->packet_life_time)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && + src->flow_label != dst->flow_label) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && + src->hop_limit != dst->hop_limit) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) + return -EINVAL; + + /* join_state checked separately, proxy_join ignored */ + + return 0; +} + +static int send_join(struct mcast_group *group, struct mcast_member *member) +{ + struct mcast_port *port = group->port; + int ret; + + group->last_join = member; + ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, + port->port_num, IB_MGMT_METHOD_SET, + &member->multicast.rec, + member->multicast.comp_mask, + 3000, GFP_KERNEL, join_handler, group, + &group->query); + if (ret >= 0) { + group->query_id = ret; + ret = 0; + } + return ret; +} + +static int send_leave(struct mcast_group *group, u8 leave_state) +{ + struct mcast_port *port = group->port; + struct ib_sa_mcmember_rec rec; + int ret; + + rec = group->rec; + rec.join_state = leave_state; + + ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, + port->port_num, IB_SA_METHOD_DELETE, &rec, + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE, + 3000, GFP_KERNEL, leave_handler, + group, &group->query); + if (ret >= 0) { + group->query_id = ret; + ret = 0; + } + return ret; +} + +static void join_group(struct mcast_group *group, struct mcast_member *member, + u8 join_state) +{ + member->state = MCAST_MEMBER; + adjust_membership(group, join_state, 1); + group->rec.join_state |= join_state; + member->multicast.rec = group->rec; + member->multicast.rec.join_state = join_state; + list_del(&member->list); + list_add(&member->list, &group->active_list); +} + +static int fail_join(struct mcast_group *group, struct mcast_member *member, + int status) +{ + spin_lock_irq(&group->lock); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + return member->multicast.callback(status, &member->multicast); +} + +static void process_group_error(struct mcast_group *group) +{ + struct mcast_member *member; + int ret; + + spin_lock_irq(&group->lock); + while (!list_empty(&group->active_list)) { + member = list_entry(group->active_list.next, + struct mcast_member, list); + atomic_inc(&member->refcount); + list_del_init(&member->list); + adjust_membership(group, member->multicast.rec.join_state, -1); + member->state = MCAST_ERROR; + spin_unlock_irq(&group->lock); + + ret = member->multicast.callback(-ENETRESET, + &member->multicast); + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + spin_lock_irq(&group->lock); + } + + group->rec.join_state = 0; + group->state = MCAST_BUSY; + spin_unlock_irq(&group->lock); +} + +static void mcast_work_handler(struct work_struct *work) +{ + struct mcast_group *group; + struct mcast_member *member; + struct ib_sa_multicast *multicast; + int status, ret; + u8 join_state; + + group = container_of(work, typeof(*group), work); +retest: + spin_lock_irq(&group->lock); + while (!list_empty(&group->pending_list) || + (group->state == MCAST_ERROR)) { + + if (group->state == MCAST_ERROR) { + spin_unlock_irq(&group->lock); + process_group_error(group); + goto retest; + } + + member = list_entry(group->pending_list.next, + struct mcast_member, list); + multicast = &member->multicast; + join_state = multicast->rec.join_state; + atomic_inc(&member->refcount); + + if (join_state == (group->rec.join_state & join_state)) { + status = cmp_rec(&group->rec, &multicast->rec, + multicast->comp_mask); + if (!status) + join_group(group, member, join_state); + else + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + ret = multicast->callback(status, multicast); + } else { + spin_unlock_irq(&group->lock); + status = send_join(group, member); + if (!status) { + deref_member(member); + return; + } + ret = fail_join(group, member, status); + } + + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + spin_lock_irq(&group->lock); + } + + join_state = get_leave_state(group); + if (join_state) { + group->rec.join_state &= ~join_state; + spin_unlock_irq(&group->lock); + if (send_leave(group, join_state)) + goto retest; + } else { + group->state = MCAST_IDLE; + spin_unlock_irq(&group->lock); + release_group(group); + } +} + +/* + * Fail a join request if it is still active - at the head of the pending queue. + */ +static void process_join_error(struct mcast_group *group, int status) +{ + struct mcast_member *member; + int ret; + + spin_lock_irq(&group->lock); + member = list_entry(group->pending_list.next, + struct mcast_member, list); + if (group->last_join == member) { + atomic_inc(&member->refcount); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + ret = member->multicast.callback(status, &member->multicast); + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + } else + spin_unlock_irq(&group->lock); +} + +static void join_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context) +{ + struct mcast_group *group = context; + + if (status) + process_join_error(group, status); + else { + spin_lock_irq(&group->port->lock); + group->rec = *rec; + if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { + rb_erase(&group->node, &group->port->table); + mcast_insert(group->port, group, 1); + } + spin_unlock_irq(&group->port->lock); + } + mcast_work_handler(&group->work); +} + +static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context) +{ + struct mcast_group *group = context; + + mcast_work_handler(&group->work); +} + +static struct mcast_group *acquire_group(struct mcast_port *port, + union ib_gid *mgid, gfp_t gfp_mask) +{ + struct mcast_group *group, *cur_group; + unsigned long flags; + int is_mgid0; + + is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); + if (!is_mgid0) { + spin_lock_irqsave(&port->lock, flags); + group = mcast_find(port, mgid); + if (group) + goto found; + spin_unlock_irqrestore(&port->lock, flags); + } + + group = kzalloc(sizeof *group, gfp_mask); + if (!group) + return NULL; + + group->port = port; + group->rec.mgid = *mgid; + INIT_LIST_HEAD(&group->pending_list); + INIT_LIST_HEAD(&group->active_list); + INIT_WORK(&group->work, mcast_work_handler); + spin_lock_init(&group->lock); + + spin_lock_irqsave(&port->lock, flags); + cur_group = mcast_insert(port, group, is_mgid0); + if (cur_group) { + kfree(group); + group = cur_group; + } else + atomic_inc(&port->refcount); +found: + atomic_inc(&group->refcount); + spin_unlock_irqrestore(&port->lock, flags); + return group; +} + +/* + * We serialize all join requests to a single group to make our lives much + * easier. Otherwise, two users could try to join the same group + * simultaneously, with different configurations, one could leave while the + * join is in progress, etc., which makes locking around error recovery + * difficult. + */ +struct ib_sa_multicast * +ib_sa_join_multicast(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, gfp_t gfp_mask, + int (*callback)(int status, + struct ib_sa_multicast *multicast), + void *context) +{ + struct mcast_device *dev; + struct mcast_member *member; + struct ib_sa_multicast *multicast; + int ret; + + dev = ib_get_client_data(device, &mcast_client); + if (!dev) + return ERR_PTR(-ENODEV); + + member = kzalloc(sizeof *member, gfp_mask); + if (!member) + return ERR_PTR(-ENOMEM); + + ib_sa_client_get(client); + member->client = client; + member->multicast.rec = *rec; + member->multicast.comp_mask = comp_mask; + member->multicast.callback = callback; + member->multicast.context = context; + init_completion(&member->comp); + atomic_set(&member->refcount, 1); + member->state = MCAST_JOINING; + + member->group = acquire_group(&dev->port[port_num - dev->start_port], + &rec->mgid, gfp_mask); + if (!member->group) { + ret = -ENOMEM; + goto err; + } + + /* + * The user will get the multicast structure in their callback. They + * could then free the multicast structure before we can return from + * this routine. So we save the pointer to return before queuing + * any callback. + */ + multicast = &member->multicast; + queue_join(member); + return multicast; + +err: + ib_sa_client_put(client); + kfree(member); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_sa_join_multicast); + +void ib_sa_free_multicast(struct ib_sa_multicast *multicast) +{ + struct mcast_member *member; + struct mcast_group *group; + + member = container_of(multicast, struct mcast_member, multicast); + group = member->group; + + spin_lock_irq(&group->lock); + if (member->state == MCAST_MEMBER) + adjust_membership(group, multicast->rec.join_state, -1); + + list_del_init(&member->list); + + if (group->state == MCAST_IDLE) { + group->state = MCAST_BUSY; + spin_unlock_irq(&group->lock); + /* Continue to hold reference on group until callback */ + queue_work(mcast_wq, &group->work); + } else { + spin_unlock_irq(&group->lock); + release_group(group); + } + + deref_member(member); + wait_for_completion(&member->comp); + ib_sa_client_put(member->client); + kfree(member); +} +EXPORT_SYMBOL(ib_sa_free_multicast); + +int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, + union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) +{ + struct mcast_device *dev; + struct mcast_port *port; + struct mcast_group *group; + unsigned long flags; + int ret = 0; + + dev = ib_get_client_data(device, &mcast_client); + if (!dev) + return -ENODEV; + + port = &dev->port[port_num - dev->start_port]; + if (mgid && memcmp(mgid, &mgid0, sizeof mgid0)) { + spin_lock_irqsave(&port->lock, flags); + group = mcast_find(port, mgid); + if (group) + *rec = group->rec; + else + ret = -EADDRNOTAVAIL; + spin_unlock_irqrestore(&port->lock, flags); + } else { + memset(rec, 0, sizeof *rec); + ib_get_cached_gid(device, port_num, 0, &rec->port_gid); + rec->pkey = 0xFFFF; + get_random_bytes(&rec->qkey, sizeof rec->qkey); + rec->join_state = 1; + } + + return ret; +} +EXPORT_SYMBOL(ib_sa_get_mcmember_rec); + +int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + struct ib_ah_attr *ah_attr) +{ + int ret; + u16 gid_index; + u8 p; + + ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); + if (ret) + return ret; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(rec->mlid); + ah_attr->sl = rec->sl; + ah_attr->port_num = port_num; + ah_attr->static_rate = rec->rate; + + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = rec->mgid; + + ah_attr->grh.sgid_index = (u8) gid_index; + ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); + ah_attr->grh.hop_limit = rec->hop_limit; + ah_attr->grh.traffic_class = rec->traffic_class; + + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_mcmember); + +static void mcast_groups_lost(struct mcast_port *port) +{ + struct mcast_group *group; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + for (node = rb_first(&port->table); node; node = rb_next(node)) { + group = rb_entry(node, struct mcast_group, node); + spin_lock(&group->lock); + if (group->state == MCAST_IDLE) { + atomic_inc(&group->refcount); + queue_work(mcast_wq, &group->work); + } + group->state = MCAST_ERROR; + spin_unlock(&group->lock); + } + spin_unlock_irqrestore(&port->lock, flags); +} + +static void mcast_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct mcast_device *dev; + + dev = ib_get_client_data(event->device, &mcast_client); + if (!dev) + return; + + switch (event->event) { + case IB_EVENT_PORT_ERR: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + mcast_groups_lost(&dev->port[event->element.port_num - + dev->start_port]); + break; + default: + break; + } +} + +static void mcast_add_one(struct ib_device *device) +{ + struct mcast_device *dev; + struct mcast_port *port; + int i; + + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, + GFP_KERNEL); + if (!dev) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) + dev->start_port = dev->end_port = 0; + else { + dev->start_port = 1; + dev->end_port = device->phys_port_cnt; + } + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + port = &dev->port[i]; + port->dev = dev; + port->port_num = dev->start_port + i; + spin_lock_init(&port->lock); + port->table = RB_ROOT; + init_completion(&port->comp); + atomic_set(&port->refcount, 1); + } + + dev->device = device; + ib_set_client_data(device, &mcast_client, dev); + + INIT_IB_EVENT_HANDLER(&event_handler, device, mcast_event_handler); + ib_register_event_handler(&event_handler); +} + +static void mcast_remove_one(struct ib_device *device) +{ + struct mcast_device *dev; + struct mcast_port *port; + int i; + + dev = ib_get_client_data(device, &mcast_client); + if (!dev) + return; + + ib_unregister_event_handler(&event_handler); + flush_workqueue(mcast_wq); + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + port = &dev->port[i]; + deref_port(port); + wait_for_completion(&port->comp); + } + + kfree(dev); +} + +int mcast_init(void) +{ + int ret; + + mcast_wq = create_singlethread_workqueue("ib_mcast_wq"); + if (!mcast_wq) + return -ENOMEM; + + ib_sa_register_client(&sa_client); + + ret = ib_register_client(&mcast_client); + if (ret) + goto err; + return 0; + +err: + ib_sa_unregister_client(&sa_client); + destroy_workqueue(mcast_wq); + return ret; +} + +void mcast_cleanup(void) +{ + ib_unregister_client(&mcast_client); + ib_sa_unregister_client(&sa_client); + destroy_workqueue(mcast_wq); +} diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h new file mode 100644 index 0000000..24c93fd --- /dev/null +++ b/drivers/infiniband/core/sa.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef SA_H +#define SA_H + +#include + +static inline void ib_sa_client_get(struct ib_sa_client *client) +{ + atomic_inc(&client->users); +} + +static inline void ib_sa_client_put(struct ib_sa_client *client) +{ + if (atomic_dec_and_test(&client->users)) + complete(&client->comp); +} + +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + +int mcast_init(void); +void mcast_cleanup(void); + +#endif /* SA_H */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e45afba..d7d4a53 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -47,8 +47,8 @@ #include #include -#include #include +#include "sa.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); @@ -425,17 +425,6 @@ void ib_sa_register_client(struct ib_sa_client *client) } EXPORT_SYMBOL(ib_sa_register_client); -static inline void ib_sa_client_get(struct ib_sa_client *client) -{ - atomic_inc(&client->users); -} - -static inline void ib_sa_client_put(struct ib_sa_client *client) -{ - if (atomic_dec_and_test(&client->users)) - complete(&client->comp); -} - void ib_sa_unregister_client(struct ib_sa_client *client) { ib_sa_client_put(client); @@ -901,7 +890,6 @@ err1: kfree(query); return ret; } -EXPORT_SYMBOL(ib_sa_mcmember_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) @@ -1053,14 +1041,27 @@ static int __init ib_sa_init(void) get_random_bytes(&tid, sizeof tid); ret = ib_register_client(&sa_client); - if (ret) + if (ret) { printk(KERN_ERR "Couldn't register ib_sa client\n"); + goto err1; + } + + ret = mcast_init(); + if (ret) { + printk(KERN_ERR "Couldn't initialize multicast handling\n"); + goto err2; + } + return 0; +err2: + ib_unregister_client(&sa_client); +err1: return ret; } static void __exit ib_sa_cleanup(void) { + mcast_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b04b72c..0a3d52a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex); /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ struct ipoib_mcast { struct ib_sa_mcmember_rec mcmember; + struct ib_sa_multicast *mc; struct ipoib_ah *ah; struct rb_node rb_node; struct list_head list; - struct completion done; - - int query_id; - struct ib_sa_query *query; unsigned long created; unsigned long backoff; @@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, return 0; } -static void +static int ipoib_mcast_sendonly_join_complete(int status, - struct ib_sa_mcmember_rec *mcmember, - void *mcast_ptr) + struct ib_sa_multicast *multicast) { - struct ipoib_mcast *mcast = mcast_ptr; + struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); + /* We trap for port events ourselves. */ + if (status == -ENETRESET) + return 0; + if (!status) - ipoib_mcast_join_finish(mcast, mcmember); - else { + status = ipoib_mcast_join_finish(mcast, &multicast->rec); + + if (status) { if (mcast->logcount++ < 20) ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " IPOIB_GID_FMT ", status %d\n", @@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status, spin_unlock_irq(&priv->tx_lock); /* Clear the busy flag so we try again */ - clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - mcast->query = NULL; + status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, + &mcast->flags); } - - complete(&mcast->done); + return status; } static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) @@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) rec.port_gid = priv->local_gid; rec.pkey = cpu_to_be16(priv->pkey); - init_completion(&mcast->done); - - ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, - IB_SA_MCMEMBER_REC_MGID | - IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_PKEY | - IB_SA_MCMEMBER_REC_JOIN_STATE, - 1000, GFP_ATOMIC, - ipoib_mcast_sendonly_join_complete, - mcast, &mcast->query); - if (ret < 0) { - ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", + mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, + priv->port, &rec, + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | + IB_SA_MCMEMBER_REC_JOIN_STATE, + GFP_ATOMIC, + ipoib_mcast_sendonly_join_complete, + mcast); + if (IS_ERR(mcast->mc)) { + ret = PTR_ERR(mcast->mc); + clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret); } else { ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT ", starting join\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); - - mcast->query_id = ret; } return ret; } -static void ipoib_mcast_join_complete(int status, - struct ib_sa_mcmember_rec *mcmember, - void *mcast_ptr) +static int ipoib_mcast_join_complete(int status, + struct ib_sa_multicast *multicast) { - struct ipoib_mcast *mcast = mcast_ptr; + struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status, " (status %d)\n", IPOIB_GID_ARG(mcast->mcmember.mgid), status); - if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { + /* We trap for port events ourselves. */ + if (status == -ENETRESET) + return 0; + + if (!status) + status = ipoib_mcast_join_finish(mcast, &multicast->rec); + + if (!status) { mcast->backoff = 1; mutex_lock(&mcast_mutex); if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - complete(&mcast->done); - return; - } - - if (status == -EINTR) { - complete(&mcast->done); - return; + return 0; } - if (status && mcast->logcount++ < 20) { - if (status == -ETIMEDOUT || status == -EINTR) { + if (mcast->logcount++ < 20) { + if (status == -ETIMEDOUT) { ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT ", status %d\n", IPOIB_GID_ARG(mcast->mcmember.mgid), @@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status, if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; - mutex_lock(&mcast_mutex); + /* Clear the busy flag so we try again */ + status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + mutex_lock(&mcast_mutex); spin_lock_irq(&priv->lock); - mcast->query = NULL; - - if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { - if (status == -ETIMEDOUT) - queue_delayed_work(ipoib_workqueue, &priv->mcast_task, - 0); - else - queue_delayed_work(ipoib_workqueue, &priv->mcast_task, - mcast->backoff * HZ); - } else - complete(&mcast->done); + if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) + queue_delayed_work(ipoib_workqueue, &priv->mcast_task, + mcast->backoff * HZ); spin_unlock_irq(&priv->lock); mutex_unlock(&mcast_mutex); - return; + return status; } static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, @@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.hop_limit = priv->broadcast->mcmember.hop_limit; } - init_completion(&mcast->done); - - ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, - &rec, comp_mask, mcast->backoff * 1000, - GFP_ATOMIC, ipoib_mcast_join_complete, - mcast, &mcast->query); - - if (ret < 0) { - ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); + set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, + &rec, comp_mask, GFP_KERNEL, + ipoib_mcast_join_complete, mcast); + if (IS_ERR(mcast->mc)) { + clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + ret = PTR_ERR(mcast->mc); + ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); mcast->backoff *= 2; if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) @@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, &priv->mcast_task, mcast->backoff * HZ); mutex_unlock(&mcast_mutex); - } else - mcast->query_id = ret; + } } void ipoib_mcast_join_task(struct work_struct *work) @@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->local_rate = attr.active_speed * ib_width_enum_to_int(attr.active_width); } else - ipoib_warn(priv, "ib_query_port failed\n"); + ipoib_warn(priv, "ib_query_port failed\n"); } if (!priv->broadcast) { @@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work) } if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { - ipoib_mcast_join(dev, priv->broadcast, 0); + if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) + ipoib_mcast_join(dev, priv->broadcast, 0); return; } @@ -623,26 +615,9 @@ int ipoib_mcast_start_thread(struct net_device *dev) return 0; } -static void wait_for_mcast_join(struct ipoib_dev_priv *priv, - struct ipoib_mcast *mcast) -{ - spin_lock_irq(&priv->lock); - if (mcast && mcast->query) { - ib_sa_cancel_query(mcast->query_id, mcast->query); - mcast->query = NULL; - spin_unlock_irq(&priv->lock); - ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); - wait_for_completion(&mcast->done); - } - else - spin_unlock_irq(&priv->lock); -} - int ipoib_mcast_stop_thread(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_mcast *mcast; ipoib_dbg_mcast(priv, "stopping multicast thread\n"); @@ -658,52 +633,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) if (flush) flush_workqueue(ipoib_workqueue); - wait_for_mcast_join(priv, priv->broadcast); - - list_for_each_entry(mcast, &priv->multicast_list, list) - wait_for_mcast_join(priv, mcast); - return 0; } static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sa_mcmember_rec rec = { - .join_state = 1 - }; int ret = 0; - if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) - return 0; - - ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); - - rec.mgid = mcast->mcmember.mgid; - rec.port_gid = priv->local_gid; - rec.pkey = cpu_to_be16(priv->pkey); + if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { + ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); - /* Remove ourselves from the multicast group */ - ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); - if (ret) - ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + /* Remove ourselves from the multicast group */ + ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), + &mcast->mcmember.mgid); + if (ret) + ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + } - /* - * Just make one shot at leaving and don't wait for a reply; - * if we fail, too bad. - */ - ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec, - IB_SA_MCMEMBER_REC_MGID | - IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_PKEY | - IB_SA_MCMEMBER_REC_JOIN_STATE, - 0, GFP_ATOMIC, NULL, - mcast, &mcast->query); - if (ret < 0) - ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " - "for leave (result = %d)\n", ret); + if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) + ib_sa_free_multicast(mcast->mc); return 0; } @@ -756,7 +706,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) dev_kfree_skb_any(skb); } - if (mcast->query) + if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) ipoib_dbg_mcast(priv, "no address vector, " "but multicast join already started\n"); else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) @@ -914,7 +864,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) /* We have to cancel outside of the spinlock */ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { - wait_for_mcast_join(priv, mcast); ipoib_mcast_leave(mcast->dev, mcast); ipoib_mcast_free(mcast); } diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c094e50..c36750f 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -110,6 +110,12 @@ static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) dev_addr->broadcast[9] = (unsigned char) pkey; } +static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); +} + static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 97715b0..3b957e5 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -285,18 +285,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, void *context, struct ib_sa_query **query); -int ib_sa_mcmember_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - u8 method, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query); - int ib_sa_service_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, @@ -309,93 +297,87 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, void *context, struct ib_sa_query **sa_query); +struct ib_sa_multicast { + struct ib_sa_mcmember_rec rec; + ib_sa_comp_mask comp_mask; + int (*callback)(int status, + struct ib_sa_multicast *multicast); + void *context; +}; + /** - * ib_sa_mcmember_rec_set - Start an MCMember set query - * @client:SA client - * @device:device to send query on - * @port_num: port number to send query on - * @rec:MCMember Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query + * ib_sa_join_multicast - Initiates a join request to the specified multicast + * group. + * @client: SA client + * @device: Device associated with the multicast group. + * @port_num: Port on the specified device to associate with the multicast + * group. + * @rec: SA multicast member record specifying group attributes. + * @comp_mask: Component mask indicating which group attributes of %rec are + * valid. + * @gfp_mask: GFP mask for memory allocations. + * @callback: User callback invoked once the join operation completes. + * @context: User specified context stored with the ib_sa_multicast structure. * - * Send an MCMember Set query to the SA (eg to join a multicast - * group). The callback function will be called when the query - * completes (or fails); status is 0 for a successful response, -EINTR - * if the query is canceled, -ETIMEDOUT is the query timed out, or - * -EIO if an error occurred sending the query. The resp parameter of - * the callback is only valid if status is 0. + * This call initiates a multicast join request with the SA for the specified + * multicast group. If the join operation is started successfully, it returns + * an ib_sa_multicast structure that is used to track the multicast operation. + * Users must free this structure by calling ib_free_multicast, even if the + * join operation later fails. (The callback status is non-zero.) * - * If the return value of ib_sa_mcmember_rec_set() is negative, it is - * an error code. Otherwise it is a query ID that can be used to - * cancel the query. + * If the join operation fails; status will be non-zero, with the following + * failures possible: + * -ETIMEDOUT: The request timed out. + * -EIO: An error occurred sending the query. + * -EINVAL: The MCMemberRecord values differed from the existing group's. + * -ENETRESET: Indicates that an fatal error has occurred on the multicast + * group, and the user must rejoin the group to continue using it. */ -static inline int -ib_sa_mcmember_rec_set(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query) -{ - return ib_sa_mcmember_rec_query(client, device, port_num, - IB_MGMT_METHOD_SET, - rec, comp_mask, - timeout_ms, gfp_mask, callback, - context, query); -} +struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, gfp_t gfp_mask, + int (*callback)(int status, + struct ib_sa_multicast + *multicast), + void *context); /** - * ib_sa_mcmember_rec_delete - Start an MCMember delete query - * @client:SA client - * @device:device to send query on - * @port_num: port number to send query on - * @rec:MCMember Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query + * ib_free_multicast - Frees the multicast tracking structure, and releases + * any reference on the multicast group. + * @multicast: Multicast tracking structure allocated by ib_join_multicast. * - * Send an MCMember Delete query to the SA (eg to leave a multicast - * group). The callback function will be called when the query - * completes (or fails); status is 0 for a successful response, -EINTR - * if the query is canceled, -ETIMEDOUT is the query timed out, or - * -EIO if an error occurred sending the query. The resp parameter of - * the callback is only valid if status is 0. + * This call blocks until the multicast identifier is destroyed. It may + * not be called from within the multicast callback; however, returning a non- + * zero value from the callback will result in destroying the multicast + * tracking structure. + */ +void ib_sa_free_multicast(struct ib_sa_multicast *multicast); + +/** + * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and + * returns it if found. + * @device: Device associated with the multicast group. + * @port_num: Port on the specified device to associate with the multicast + * group. + * @mgid: optional MGID of multicast group. + * @rec: Location to copy SA multicast member record. * - * If the return value of ib_sa_mcmember_rec_delete() is negative, it - * is an error code. Otherwise it is a query ID that can be used to - * cancel the query. + * If an MGID is specified, returns an existing multicast member record if + * one is found for the local port. If no MGID is specified, or the specified + * MGID is 0, returns a multicast member record filled in with default values + * that may be used to create a new multicast group. */ -static inline int -ib_sa_mcmember_rec_delete(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query) -{ - return ib_sa_mcmember_rec_query(client, device, port_num, - IB_SA_METHOD_DELETE, - rec, comp_mask, - timeout_ms, gfp_mask, callback, - context, query); -} +int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, + union ib_gid *mgid, struct ib_sa_mcmember_rec *rec); + +/** + * ib_init_ah_from_mcmember - Initialize address handle attributes based on + * an SA multicast member record. + */ +int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + struct ib_ah_attr *ah_attr); /** * ib_init_ah_from_path - Initialize address handle attributes based on an SA From sean.hefty at intel.com Fri Jan 26 16:19:16 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 16:19:16 -0800 Subject: [openib-general] [RFC] [PATCH 2/2] for 2.6.21/OFED1.2 rdma_cm: add multicast support In-Reply-To: <000701c741a6$16dc4760$ff0da8c0@amr.corp.intel.com> Message-ID: <000901c741a8$c8080b30$ff0da8c0@amr.corp.intel.com> Extend the rdma_cm to support multicast communication. Multicast support is added to the existing RDMA_PS_UDP port space, as well as to new port space, RDMA_PS_IPOIB. The latter port space allows joining the multicast groups used by ipoib, which enable offloading ipoib traffic to a separate QP. The port space determines the signature used in the MGID when joining the group. The newly added RDMA_PS_IPOIB port space also allows for unicast operations. Supporting RDMA_PS_IPOIB requires changing how UD QPs are initialized, since we can no longer assume that the qkey is constant. This requires saving the qkey to use when attaching to a device, so that it is available when creating the QP. The qkey information is exported to the user through the existing rdma_init_qp_attr() routine. Multicast support is exported to userspace through the rdma_ucm. Signed-off-by: Sean Hefty --- Changes from previous patches include adding the RDMA_PS_IPOIB port space. diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9e0ab04..827df2a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); +static DEFINE_IDR(ipoib_ps); struct cma_device { struct list_head list; @@ -115,6 +116,7 @@ struct rdma_id_private { struct list_head list; struct list_head listen_list; struct cma_device *cma_dev; + struct list_head mc_list; enum cma_state state; spinlock_t lock; @@ -133,10 +135,23 @@ struct rdma_id_private { } cm_id; u32 seq_num; + u32 qkey; u32 qp_num; u8 srq; }; +struct cma_multicast { + struct rdma_id_private *id_priv; + union { + struct ib_sa_multicast *ib; + } multicast; + struct list_head list; + void *context; + struct sockaddr addr; + u8 pad[sizeof(struct sockaddr_in6) - + sizeof(struct sockaddr)]; +}; + struct cma_work { struct work_struct work; struct rdma_id_private *id; @@ -242,6 +257,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); } +static inline int cma_is_ud_ps(enum rdma_port_space ps) +{ + return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); +} + static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -264,19 +284,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) id_priv->cma_dev = NULL; } +static int cma_set_qkey(struct ib_device *device, u8 port_num, + enum rdma_port_space ps, + struct rdma_dev_addr *dev_addr, u32 *qkey) +{ + struct ib_sa_mcmember_rec rec; + int ret = 0; + + switch (ps) { + case RDMA_PS_UDP: + *qkey = RDMA_UDP_QKEY; + break; + case RDMA_PS_IPOIB: + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec); + *qkey = be32_to_cpu(rec.qkey); + break; + default: + break; + } + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { - enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; union ib_gid gid; int ret = -ENODEV; - switch (rdma_node_get_transport(dev_type)) { + switch (rdma_node_get_transport(dev_addr->dev_type)) { case RDMA_TRANSPORT_IB: - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + ib_addr_get_sgid(dev_addr, &gid); break; case RDMA_TRANSPORT_IWARP: - iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + iw_addr_get_sgid(dev_addr, &gid); break; default: return -ENODEV; @@ -286,7 +328,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); if (!ret) { - cma_attach_to_dev(id_priv, cma_dev); + ret = cma_set_qkey(cma_dev->device, + id_priv->id.port_num, + id_priv->id.ps, dev_addr, + &id_priv->qkey); + if (!ret) + cma_attach_to_dev(id_priv, cma_dev); break; } } @@ -324,40 +371,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, init_waitqueue_head(&id_priv->wait_remove); atomic_set(&id_priv->dev_remove, 0); INIT_LIST_HEAD(&id_priv->listen_list); + INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); return &id_priv->id; } EXPORT_SYMBOL(rdma_create_id); -static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; - struct rdma_dev_addr *dev_addr; - int ret; + int qp_attr_mask, ret; - dev_addr = &id_priv->id.route.addr.dev_addr; - ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, - ib_addr_get_pkey(dev_addr), - &qp_attr.pkey_index); + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; - qp_attr.qp_state = IB_QPS_INIT; - qp_attr.qp_access_flags = 0; - qp_attr.port_num = id_priv->id.port_num; - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_PORT); + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTS; + qp_attr.sq_psn = 0; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + + return ret; } -static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; - qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); } int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, @@ -375,18 +432,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - ret = cma_init_ib_qp(id_priv, qp); - break; - case RDMA_TRANSPORT_IWARP: - ret = cma_init_iw_qp(id_priv, qp); - break; - default: - ret = -ENOSYS; - break; - } - + if (cma_is_ud_ps(id_priv->id.ps)) + ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); if (ret) goto err; @@ -459,23 +508,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id) return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); } +static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int ret; + + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, + ib_addr_get_pkey(dev_addr), + &qp_attr->pkey_index); + if (ret) + return ret; + + qp_attr->port_num = id_priv->id.port_num; + *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; + + if (cma_is_ud_ps(id_priv->id.ps)) { + qp_attr->qkey = id_priv->qkey; + *qp_attr_mask |= IB_QP_QKEY; + } else { + qp_attr->qp_access_flags = 0; + *qp_attr_mask |= IB_QP_ACCESS_FLAGS; + } + return 0; +} + int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_id_private *id_priv; - int ret; + int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, - qp_attr_mask); + if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) + ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); + else + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, + qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; case RDMA_TRANSPORT_IWARP: - ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, - qp_attr_mask); + if (!id_priv->cm_id.iw) { + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + } else + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, + qp_attr_mask); break; default: ret = -ENOSYS; @@ -697,6 +778,19 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } +static void cma_leave_mc_groups(struct rdma_id_private *id_priv) +{ + struct cma_multicast *mc; + + while (!list_empty(&id_priv->mc_list)) { + mc = container_of(id_priv->mc_list.next, + struct cma_multicast, list); + list_del(&mc->list); + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } +} + void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; @@ -721,6 +815,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) default: break; } + cma_leave_mc_groups(id_priv); mutex_lock(&lock); cma_detach_from_dev(id_priv); } @@ -971,7 +1066,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (listen_id->id.ps == RDMA_PS_UDP) { + if (cma_is_ud_ps(listen_id->id.ps)) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1805,6 +1900,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_UDP: ps = &udp_ps; break; + case RDMA_PS_IPOIB: + ps = &ipoib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -1919,7 +2017,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = ib_event->param.sidr_rep_rcvd.status; break; } - if (rep->qkey != RDMA_UD_QKEY) { + if (id_priv->qkey != rep->qkey) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; @@ -2118,7 +2216,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (cma_is_ud_ps(id->ps)) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); @@ -2214,7 +2312,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.status = status; if (status == IB_SIDR_SUCCESS) { rep.qp_num = id_priv->qp_num; - rep.qkey = RDMA_UD_QKEY; + rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; @@ -2238,7 +2336,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (cma_is_ud_ps(id->ps)) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->private_data, conn_param->private_data_len); @@ -2299,7 +2397,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->ps == RDMA_PS_UDP) + if (cma_is_ud_ps(id->ps)) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, private_data, private_data_len); else @@ -2350,6 +2448,161 @@ out: } EXPORT_SYMBOL(rdma_disconnect); +static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc = multicast->context; + struct rdma_cm_event event; + int ret; + + id_priv = mc->id_priv; + atomic_inc(&id_priv->dev_remove); + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) + goto out; + + if (!status && id_priv->id.qp) + status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, + multicast->rec.mlid); + + memset(&event, 0, sizeof event); + event.status = status; + event.param.ud.private_data = mc->context; + if (!status) { + event.event = RDMA_CM_EVENT_MULTICAST_JOIN; + ib_init_ah_from_mcmember(id_priv->id.device, + id_priv->id.port_num, &multicast->rec, + &event.param.ud.ah_attr); + event.param.ud.qp_num = 0xFFFFFF; + event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); + } else + event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return 0; + } +out: + cma_release_remove(id_priv); + return 0; +} + +static int cma_join_ib_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct ib_sa_mcmember_rec rec; + unsigned char mc_map[MAX_ADDR_LEN]; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr; + ib_sa_comp_mask comp_mask; + int ret; + + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return ret; + + ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) { + mc_map[7] = 0x01; /* Use RDMA CM signature */ + rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + } + mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; + mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); + + rec.mgid = *(union ib_gid *) (mc_map + 4); + ib_addr_get_sgid(dev_addr, &rec.port_gid); + rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + rec.join_state = 1; + + comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | + IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | + IB_SA_MCMEMBER_REC_FLOW_LABEL | + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + + mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, + id_priv->id.port_num, &rec, + comp_mask, GFP_KERNEL, + cma_ib_mc_handler, mc); + if (IS_ERR(mc->multicast.ib)) + return PTR_ERR(mc->multicast.ib); + + return 0; +} + +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, + void *context) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) + return -EINVAL; + + mc = kmalloc(sizeof *mc, GFP_KERNEL); + if (!mc) + return -ENOMEM; + + memcpy(&mc->addr, addr, ip_addr_size(addr)); + mc->context = context; + mc->id_priv = id_priv; + + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ret = cma_join_ib_multicast(id_priv, mc); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) { + spin_lock_irq(&id_priv->lock); + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); + kfree(mc); + } + return ret; +} +EXPORT_SYMBOL(rdma_join_multicast); + +void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irq(&id_priv->lock); + list_for_each_entry(mc, &id_priv->mc_list, list) { + if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); + + if (id->qp) + ib_detach_mcast(id->qp, + &mc->multicast.ib->rec.mgid, + mc->multicast.ib->rec.mlid); + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + return; + } + } + spin_unlock_irq(&id_priv->lock); +} +EXPORT_SYMBOL(rdma_leave_multicast); + static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; @@ -2476,6 +2729,7 @@ static void cma_cleanup(void) idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); + idr_destroy(&ipoib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index e2e8d32..f8d117a 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -70,10 +70,24 @@ struct ucma_context { u64 uid; struct list_head list; + struct list_head mc_list; +}; + +struct ucma_multicast { + struct ucma_context *ctx; + int id; + int events_reported; + + u64 uid; + struct list_head list; + struct sockaddr addr; + u8 pad[sizeof(struct sockaddr_in6) - + sizeof(struct sockaddr)]; }; struct ucma_event { struct ucma_context *ctx; + struct ucma_multicast *mc; struct list_head list; struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; @@ -81,6 +95,7 @@ struct ucma_event { static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); +static DEFINE_IDR(multicast_idr); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; do { @@ -147,6 +163,37 @@ error: return NULL; } +static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) +{ + struct ucma_multicast *mc; + int ret; + + mc = kzalloc(sizeof(*mc), GFP_KERNEL); + if (!mc) + return NULL; + + do { + ret = idr_pre_get(&multicast_idr, GFP_KERNEL); + if (!ret) + goto error; + + mutex_lock(&mut); + ret = idr_get_new(&multicast_idr, mc, &mc->id); + mutex_unlock(&mut); + } while (ret == -EAGAIN); + + if (ret) + goto error; + + mc->ctx = ctx; + list_add_tail(&mc->list, &ctx->mc_list); + return mc; + +error: + kfree(mc); + return NULL; +} + static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, struct rdma_conn_param *src) { @@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx, struct ucma_event *uevent) { uevent->ctx = ctx; - uevent->resp.uid = ctx->uid; - uevent->resp.id = ctx->id; + switch (event->event) { + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + uevent->mc = (struct ucma_multicast *) + event->param.ud.private_data; + uevent->resp.uid = uevent->mc->uid; + uevent->resp.id = uevent->mc->id; + break; + default: + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; + break; + } } static int ucma_event_handler(struct rdma_cm_id *cm_id, @@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP) + if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, @@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, list_del(&uevent->list); uevent->ctx->events_reported++; + if (uevent->mc) + uevent->mc->events_reported++; kfree(uevent); done: mutex_unlock(&file->mut); @@ -342,6 +402,19 @@ err1: return ret; } +static void ucma_cleanup_multicast(struct ucma_context *ctx) +{ + struct ucma_multicast *mc, *tmp; + + mutex_lock(&mut); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); + idr_remove(&multicast_idr, mc->id); + kfree(mc); + } + mutex_unlock(&mut); +} + static void ucma_cleanup_events(struct ucma_context *ctx) { struct ucma_event *uevent, *tmp; @@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx) } } +static void ucma_cleanup_mc_events(struct ucma_multicast *mc) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { + if (uevent->mc != mc) + continue; + + list_del(&uevent->list); + kfree(uevent); + } +} + static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; @@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx) /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); + ucma_cleanup_multicast(ctx); + /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); ucma_cleanup_events(ctx); @@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, return ret; } +static ssize_t ucma_join_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_mcast cmd; + struct rdma_ucm_create_id_resp resp; + struct ucma_context *ctx; + struct ucma_multicast *mc; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&file->mut); + mc = ucma_alloc_multicast(ctx); + if (IS_ERR(mc)) { + ret = PTR_ERR(mc); + goto err1; + } + + mc->uid = cmd.uid; + memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); + ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc); + if (ret) + goto err2; + + resp.id = mc->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err3; + } + + mutex_unlock(&file->mut); + ucma_put_ctx(ctx); + return 0; + +err3: + rdma_leave_multicast(ctx->cm_id, &mc->addr); + ucma_cleanup_mc_events(mc); +err2: + mutex_lock(&mut); + idr_remove(&multicast_idr, mc->id); + mutex_unlock(&mut); + list_del(&mc->list); + kfree(mc); +err1: + mutex_unlock(&file->mut); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_leave_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_destroy_id cmd; + struct rdma_ucm_destroy_id_resp resp; + struct ucma_multicast *mc; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&mut); + mc = idr_find(&multicast_idr, cmd.id); + if (!mc) + mc = ERR_PTR(-ENOENT); + else if (mc->ctx->file != file) + mc = ERR_PTR(-EINVAL); + else { + idr_remove(&multicast_idr, mc->id); + atomic_inc(&mc->ctx->ref); + } + mutex_unlock(&mut); + + if (IS_ERR(mc)) { + ret = PTR_ERR(mc); + goto out; + } + + rdma_leave_multicast(mc->ctx->cm_id, &mc->addr); + mutex_lock(&mc->ctx->file->mut); + ucma_cleanup_mc_events(mc); + list_del(&mc->list); + mutex_unlock(&mc->ctx->file->mut); + + ucma_put_ctx(mc->ctx); + resp.events_reported = mc->events_reported; + kfree(mc); + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; +out: + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_GET_OPTION] = NULL, [RDMA_USER_CM_CMD_SET_OPTION] = NULL, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 36cd8a8..2d6a770 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -52,10 +52,13 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_ESTABLISHED, RDMA_CM_EVENT_DISCONNECTED, RDMA_CM_EVENT_DEVICE_REMOVAL, + RDMA_CM_EVENT_MULTICAST_JOIN, + RDMA_CM_EVENT_MULTICAST_ERROR }; enum rdma_port_space { RDMA_PS_SDP = 0x0001, + RDMA_PS_IPOIB= 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, RDMA_PS_SCTP = 0x0183 @@ -294,5 +297,21 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, */ int rdma_disconnect(struct rdma_cm_id *id); -#endif /* RDMA_CM_H */ +/** + * rdma_join_multicast - Join the multicast group specified by the given + * address. + * @id: Communication identifier associated with the request. + * @addr: Multicast address identifying the group to join. + * @context: User-defined context associated with the join request, returned + * to the user through the private_data pointer in multicast events. + */ +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, + void *context); +/** + * rdma_leave_multicast - Leave the multicast group specified by the given + * address. + */ +void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); + +#endif /* RDMA_CM_H */ diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 9b176df..950424b 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -44,7 +44,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths); -/* Global qkey for UD QPs and multicast groups. */ -#define RDMA_UD_QKEY 0x01234567 +/* Global qkey for UDP QPs and multicast groups. */ +#define RDMA_UDP_QKEY 0x01234567 #endif /* RDMA_CM_IB_H */ diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index 9572ab8..f632b0c 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -38,7 +38,7 @@ #include #include -#define RDMA_USER_CM_ABI_VERSION 3 +#define RDMA_USER_CM_ABI_VERSION 4 #define RDMA_MAX_PRIVATE_DATA 256 @@ -58,7 +58,9 @@ enum { RDMA_USER_CM_CMD_GET_EVENT, RDMA_USER_CM_CMD_GET_OPTION, RDMA_USER_CM_CMD_SET_OPTION, - RDMA_USER_CM_CMD_NOTIFY + RDMA_USER_CM_CMD_NOTIFY, + RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_LEAVE_MCAST }; /* @@ -188,6 +190,13 @@ struct rdma_ucm_notify { __u32 event; }; +struct rdma_ucm_join_mcast { + __u64 response; /* rdma_ucm_create_id_resp */ + __u64 uid; + struct sockaddr_in6 addr; + __u32 id; +}; + struct rdma_ucm_get_event { __u64 response; }; From afriedle at open-mpi.org Fri Jan 26 19:47:00 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Fri, 26 Jan 2007 19:47:00 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <15ddcffd0701241028i3f087571i648c2c3c282bf0cd@mail.gmail.com> References: <45B4DA10.3080400@indiana.edu> <000401c73f42$3b83e700$16d0180a@amr.corp.intel.com> <15ddcffd0701241028i3f087571i648c2c3c282bf0cd@mail.gmail.com> Message-ID: <45BACB34.5030202@open-mpi.org> > Once this routing is in place, the only thing they need is to enhance > the MPI job starter/etc to allocate to each job (say) two unique > multicast --IP-- addresses on the relevant subnet and provide these IP > addresses to each rank. Now the rank can use the RDMA CM without any > hack. I don't this isn't as easy as you've made it sound. I see two approaches to preventing address collision -- both require voluntary participation. First is a centralized authority approach (this has been used for IP multicast-based protocols). This means running some sort of daemon in a location all peers can communicate with. I'm not really keen on the idea of requiring a separate daemon just to support multicast in Open MPI. Second is peer-to-peer based approaches. These are doable, but difficult due to numerous race conditions. It's also highly desireable to minimize the time cost of joining a multicast group; this is especially difficult with a peer-to-peer solutions. Also, I'd rather not assume a single MPI job requires a constant (small) number of multicast groups/addresses. The obvious correllation is to use one multicast group per MPI communicator. Most applications will use only a few, though some may use hundreds, and may even vary the number in use as the app executes. I've also been considering approaches utilizing many groups per communicator, so again we could be looking at hundreds of multicast groups per MPI job. As I've said, implementing solutions at the MPI level is doable but difficult. I knew from earlier discussions that IB is able to allocate new, unused multicast addresses and was hoping expose that functionality and avoid the multicast address allocation problem. However I hadn't thought of the fact that other networks supported by the RDMA CM might not have similar functionality.. so this might not be appropriate there. But maybe it is worth considering how hard it is for those other networks to provide the functionality? Andrew From sean.hefty at intel.com Fri Jan 26 17:01:10 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 26 Jan 2007 17:01:10 -0800 Subject: [openib-general] RDMA CM multicast In-Reply-To: <45BACB34.5030202@open-mpi.org> Message-ID: <000a01c741ae$a200e0f0$ff0da8c0@amr.corp.intel.com> >I don't this isn't as easy as you've made it sound. I see two >approaches to preventing address collision -- both require voluntary >participation. First is a centralized authority approach (this has been >used for IP multicast-based protocols). This means running some sort of >daemon in a location all peers can communicate with. I'm not really >keen on the idea of requiring a separate daemon just to support >multicast in Open MPI. Second is peer-to-peer based approaches. These >are doable, but difficult due to numerous race conditions. It's also >highly desireable to minimize the time cost of joining a multicast >group; this is especially difficult with a peer-to-peer solutions. > >As I've said, implementing solutions at the MPI level is doable but >difficult. I knew from earlier discussions that IB is able to allocate >new, unused multicast addresses and was hoping expose that functionality >and avoid the multicast address allocation problem. However I hadn't >thought of the fact that other networks supported by the RDMA CM might >not have similar functionality.. so this might not be appropriate there. The criteria that I would use when deciding this is how much does one technology hijack the rdma_cm. If a feature can be considered transport neutral, but is only actually implemented by a specific transport, then I'm inclined to include it. I don't think that it's too much of a stretch to consider this feature somewhat transport neutral, as long as we can come up with a fairly clean implementation, which I think we can. That said, even transport specific features, like support for SDP and the IPOIB port space, can make sense to add into the rdma_cm because of the commonality of the code. > But maybe it is worth considering how hard it is for those other >networks to provide the functionality? My guess is that other hardware would need to do one of the two options that you listed. Obviously IB chose the centralized authority approach. - Sean From hnguyen at linux.vnet.ibm.com Fri Jan 26 16:59:20 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Sat, 27 Jan 2007 01:59:20 +0100 Subject: [openib-general] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 Message-ID: <200701270159.21073.hnguyen@linux.vnet.ibm.com> Hello Vladimir! Here is a patch set for ehca device driver with the following changes: * Fix yield with spinlock held (this has been queued for 2.6.20) * Fix mismatched unlock in irq handler (this has been queued for 2.6.20) * Remove mmap (this has been queued for 2.6.21) * Remove obsolete prototypes (this has been queued for 2.6.21) * Backport for 2.6.16, 2.6.16sles10, 2.6.18, 2.6.18_FC6 (RHEL5) With those patches we are able to support the following platforms: * 2.6.20, 2.6.19, 2.6.18, RHEL5, 2.6.16, SLES10 Thanks Nam PS1: The first two patches are actually not required since ofed-1.2 is based on 2.6.20. Since I did not see them in daily build yesterday I'm sending here for completeness. PS2: For backport on 2.6.16 resp. SLES10 I saw that there is a hvcall.h under backport/2.6.16/include/linux. However that one is not sufficient for ehca and include/linux is the wrong place. Hence, I'm patching a new one under include/asm. If I'm right, please remove include/linux/hvcall.h! From hnguyen at linux.vnet.ibm.com Fri Jan 26 16:59:46 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Sat, 27 Jan 2007 01:59:46 +0100 Subject: [openib-general] [PATCH ofed-1.2 1/6] ehca: fix yield with spinlock held and mismatched unlock Message-ID: <200701270159.47105.hnguyen@linux.vnet.ibm.com> ehca_cq.c: fix yield with spinlock held ehca_irq.c: fix mismatched unlock Signed-off-by: Hoang-Nam Nguyen --- ehca_cq.c | 5 ++++- ehca_irq.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_cq.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_cq.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-27 00:35:09.000000000 +0100 @@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq) unsigned long flags; spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_irq.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_irq.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-27 00:39:12.000000000 +0100 @@ -440,7 +440,8 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); break; } From hnguyen at linux.vnet.ibm.com Fri Jan 26 17:00:41 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Sat, 27 Jan 2007 02:00:41 +0100 Subject: [openib-general] [PATCH ofed-1.2 2/6] ehca: remove do_mmap and obsolete prototypes Message-ID: <200701270200.42026.hnguyen@linux.vnet.ibm.com> Remove do_mmap and obsolete prototypes Signed-off-by: Hoang-Nam Nguyen --- ehca_classes.h | 29 +--- ehca_cq.c | 65 ++------- ehca_iverbs.h | 10 - ehca_main.c | 8 - ehca_qp.c | 78 ++--------- ehca_uverbs.c | 395 +++++++++++++++++++++++---------------------------------- 6 files changed, 206 insertions(+), 379 deletions(-) diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_classes.h ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_classes.h --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_classes.h 2007-01-27 00:44:28.000000000 +0100 @@ -119,13 +119,14 @@ struct ehca_qp { struct ipz_qp_handle ipz_qp_handle; struct ehca_pfqp pf; struct ib_qp_init_attr init_attr; - u64 uspace_squeue; - u64 uspace_rqueue; - u64 uspace_fwh; struct ehca_cq *send_cq; struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; }; /* must be power of 2 */ @@ -142,13 +143,14 @@ struct ehca_cq { struct ipz_cq_handle ipz_cq_handle; struct ehca_pfcq pf; spinlock_t cb_lock; - u64 uspace_queue; - u64 uspace_fwh; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; u32 nr_callbacks; spinlock_t task_lock; u32 ownpid; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; }; enum ehca_mr_flag { @@ -248,20 +250,6 @@ struct ehca_ucontext { struct ib_ucontext ib_ucontext; }; -struct ehca_module *ehca_module_new(void); - -int ehca_module_delete(struct ehca_module *me); - -int ehca_eq_ctor(struct ehca_eq *eq); - -int ehca_eq_dtor(struct ehca_eq *eq); - -struct ehca_shca *ehca_shca_new(void); - -int ehca_shca_delete(struct ehca_shca *me); - -struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); - int ehca_init_pd_cache(void); void ehca_cleanup_pd_cache(void); int ehca_init_cq_cache(void); @@ -283,7 +271,6 @@ extern int ehca_port_act_time; extern int ehca_use_hp_mr; struct ipzu_queue_resp { - u64 queue; /* points to first queue entry */ u32 qe_size; /* queue entry size */ u32 act_nr_of_sg; u32 queue_length; /* queue length allocated in bytes */ @@ -296,7 +283,6 @@ struct ehca_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; - struct h_galpas galpas; }; struct ehca_create_qp_resp { @@ -309,7 +295,6 @@ struct ehca_create_qp_resp { u32 dummy; /* padding for 8 byte alignment */ struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; - struct h_galpas galpas; }; struct ehca_alloc_cq_parms { diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_cq.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_cq.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-27 00:37:54.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-27 00:44:28.000000000 +0100 @@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; - struct vm_area_struct *vma; memset(&resp, 0, sizeof(resp)); resp.cq_number = my_cq->cq_number; resp.token = my_cq->token; @@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d resp.ipz_queue.queue_length = ipz_queue->queue_length; resp.ipz_queue.pagesize = ipz_queue->pagesize; resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, - ipz_queue->queue_length, - (void**)&resp.ipz_queue.queue, - &vma); - if (ret) { - ehca_err(device, "Could not mmap queue pages"); - cq = ERR_PTR(ret); - goto create_cq_exit4; - } - my_cq->uspace_queue = resp.ipz_queue.queue; - resp.galpas = my_cq->galpas; - ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(device, "Could not mmap fw_handle"); - cq = ERR_PTR(ret); - goto create_cq_exit5; - } - my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); - goto create_cq_exit6; + goto create_cq_exit4; } } return cq; -create_cq_exit6: - ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - -create_cq_exit5: - ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); - create_cq_exit4: ipz_queue_dtor(&my_cq->ipz_queue); @@ -333,7 +306,6 @@ create_cq_exit1: int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; - int ret; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int cq_num = my_cq->cq_number; struct ib_device *device = cq->device; @@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq) u32 cur_pid = current->tgid; unsigned long flags; + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + if (my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x " + "cq_num=%x", + cur_pid, my_cq->ownpid, my_cq->cq_number); + return -EINVAL; + } + } + spin_lock_irqsave(&ehca_cq_idr_lock, flags); while (my_cq->nr_callbacks) { spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); @@ -353,25 +339,6 @@ int ehca_destroy_cq(struct ib_cq *cq) idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { - ehca_err(device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_cq->ownpid); - return -EINVAL; - } - - /* un-mmap if vma alloc */ - if (my_cq->uspace_queue ) { - ret = ehca_munmap(my_cq->uspace_queue, - my_cq->ipz_queue.queue_length); - if (ret) - ehca_err(device, "Could not munmap queue ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(device, "Could not munmap fwh ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - } - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ @@ -400,7 +367,7 @@ int ehca_resize_cq(struct ib_cq *cq, int struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); u32 cur_pid = current->tgid; - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + if (cq->uobject && my_cq->ownpid != cur_pid) { ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", cur_pid, my_cq->ownpid); return -EINVAL; diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_iverbs.h ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_iverbs.h --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_iverbs.h 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_iverbs.h 2007-01-27 00:44:28.000000000 +0100 @@ -171,19 +171,11 @@ int ehca_mmap(struct ib_ucontext *contex void ehca_poll_eqs(unsigned long data); -int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, - struct vm_area_struct **vma); - -int ehca_mmap_register(u64 physical,void **mapped, - struct vm_area_struct **vma); - -int ehca_munmap(unsigned long addr, size_t len); - #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void*) get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_main.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_main.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_main.c 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_main.c 2007-01-27 00:44:28.000000000 +0100 @@ -52,7 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0019"); +MODULE_VERSION("SVNEHCA_0020"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -288,7 +288,7 @@ int ehca_init_device(struct ehca_shca *s strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_abi_ver = 6; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -352,7 +352,7 @@ int ehca_init_device(struct ehca_shca *s shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; shca->ib_device.attach_mcast = ehca_attach_mcast; shca->ib_device.detach_mcast = ehca_detach_mcast; - /* shca->ib_device.process_mad = ehca_process_mad; */ + /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; return ret; @@ -790,7 +790,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0019)\n"); + "(Rel.: SVNEHCA_0020)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_qp.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_qp.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_qp.c 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_qp.c 2007-01-27 00:44:28.000000000 +0100 @@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_p struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; - struct vm_area_struct * vma; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; @@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_p resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, - ipz_rqueue->queue_length, - (void**)&resp.ipz_rqueue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap rqueue pages"); - goto create_qp_exit3; - } - my_qp->uspace_rqueue = resp.ipz_rqueue.queue; /* squeue properties */ resp.ipz_squeue.qe_size = ipz_squeue->qe_size; resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; resp.ipz_squeue.queue_length = ipz_squeue->queue_length; resp.ipz_squeue.pagesize = ipz_squeue->pagesize; resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, - ipz_squeue->queue_length, - (void**)&resp.ipz_squeue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap squeue pages"); - goto create_qp_exit4; - } - my_qp->uspace_squeue = resp.ipz_squeue.queue; - /* fw_handle */ - resp.galpas = my_qp->galpas; - ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap fw_handle"); - goto create_qp_exit5; - } - my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit3; } } return &my_qp->ib_qp; -create_qp_exit6: - ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - -create_qp_exit5: - ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); - -create_qp_exit4: - ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); - create_qp_exit3: ipz_queue_dtor(&my_qp->ipz_rqueue); ipz_queue_dtor(&my_qp->ipz_squeue); @@ -931,7 +892,7 @@ static int internal_modify_qp(struct ib_ my_qp->qp_type == IB_QPT_SMI) && statetrans == IB_QPST_SQE2RTS) { /* mark next free wqe if kernel */ - if (my_qp->uspace_squeue == 0) { + if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); @@ -1417,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp) enum ib_qp_type qp_type; unsigned long flags; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; + if (ibqp->uobject) { + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(ibqp->device, "Resources still referenced in " + "user space qp_num=%x", ibqp->qp_num); + return -EINVAL; + } + if (my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } } if (my_qp->send_cq) { @@ -1439,24 +1407,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp) idr_remove(&ehca_qp_idr, my_qp->token); spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - /* un-mmap if vma alloc */ - if (my_qp->uspace_rqueue) { - ret = ehca_munmap(my_qp->uspace_rqueue, - my_qp->ipz_rqueue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap rqueue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_squeue, - my_qp->ipz_squeue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap squeue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", - qp_num); - } - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " diff -Nurp ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_uverbs.c ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_uverbs.c --- ofed_1_2_jan26/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-27 00:28:38.000000000 +0100 +++ ofed_1_2_nam/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-27 00:44:28.000000000 +0100 @@ -68,105 +68,183 @@ int ehca_dealloc_ucontext(struct ib_ucon return 0; } -struct page *ehca_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static void ehca_mm_open(struct vm_area_struct *vma) { - struct page *mypage = NULL; - u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; - u32 idr_handle = fileoffset >> 32; - u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ - u32 cur_pid = current->tgid; - unsigned long flags; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ehca_pd *pd; - u64 offset; - void *vaddr; + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - switch (q_type) { - case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - /* make sure this mmap really belongs to the authorized user */ - if (!cq) { - ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; +static struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, + vsize, vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_RESERVED; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%x", ret); + return ret; } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; - if (cq->ownpid != cur_pid) { + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, cq->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_fw() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } + break; - if (rsrc_type == 2) { - ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); - ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; - case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); - qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } - /* make sure this mmap really belongs to the authorized user */ - if (!qp) { - ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; } + break; - pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); - if (pd->ownpid != cur_pid) { + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, pd->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } + break; - if (rsrc_type == 2) { /* rqueue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } else if (rsrc_type == 3) { /* squeue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; default: - ehca_gen_err("bad queue type %x", q_type); - return NOPAGE_SIGBUS; - } - - if (!mypage) { - ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; } - get_page(mypage); - return mypage; + return 0; } -static struct vm_operations_struct ehcau_vm_ops = { - .nopage = ehca_nopage, -}; - int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; @@ -175,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *contex u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - u64 vsize, physical; unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; @@ -201,44 +278,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(cq->ib_cq.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = cq->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(cq->ib_cq.device, - "vsize=%lx physical=%lx", vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(cq->ib_cq.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; @@ -262,50 +307,12 @@ int ehca_mmap(struct ib_ucontext *contex if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(qp->ib_qp.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = qp->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", - vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - case 3: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; @@ -316,77 +323,3 @@ int ehca_mmap(struct ib_ucontext *contex return 0; } - -int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, - struct vm_area_struct **vma) -{ - down_write(¤t->mm->mmap_sem); - *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - foffset); - up_write(¤t->mm->mmap_sem); - if (!(*mapped)) { - ehca_gen_err("couldn't mmap foffset=%lx length=%lx", - foffset, length); - return -EINVAL; - } - - *vma = find_vma(current->mm, (u64)*mapped); - if (!(*vma)) { - down_write(¤t->mm->mmap_sem); - do_munmap(current->mm, 0, length); - up_write(¤t->mm->mmap_sem); - ehca_gen_err("couldn't find vma queue=%p", *mapped); - return -EINVAL; - } - (*vma)->vm_flags |= VM_RESERVED; - (*vma)->vm_ops = &ehcau_vm_ops; - - return 0; -} - -int ehca_mmap_register(u64 physical, void **mapped, - struct vm_area_struct **vma) -{ - int ret; - unsigned long vsize; - /* ehca hw supports only 4k page */ - ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); - if (ret) { - ehca_gen_err("could'nt mmap physical=%lx", physical); - return ret; - } - - (*vma)->vm_flags |= VM_RESERVED; - vsize = (*vma)->vm_end - (*vma)->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", - (*vma)->vm_end - (*vma)->vm_start); - return -EINVAL; - } - - (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); - (*vma)->vm_flags |= VM_IO | VM_RESERVED; - - ret = remap_pfn_range((*vma), (*vma)->vm_start, - physical >> PAGE_SHIFT, vsize, - (*vma)->vm_page_prot); - if (ret) { - ehca_gen_err("remap_pfn_range() failed ret=%x", ret); - return -ENOMEM; - } - - return 0; - -} - -int ehca_munmap(unsigned long addr, size_t len) { - int ret = 0; - struct mm_struct *mm = current->mm; - if (mm) { - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - } - return ret; -} From hnguyen at linux.vnet.ibm.com Fri Jan 26 17:03:25 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Sat, 27 Jan 2007 02:03:25 +0100 Subject: [openib-general] [PATCH ofed-1.2 3/6] ehca: backport 2.6.16 Message-ID: <200701270203.26088.hnguyen@linux.vnet.ibm.com> backport hvcall for 2.6.16 Signed-off-by: Hoang-Nam Nguyen --- hvcall.h | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 142 insertions(+) diff -Nurp ofed_1_2_jan26/kernel_addons/backport/2.6.16/include/asm/hvcall.h ofed_1_2_nam/kernel_addons/backport/2.6.16/include/asm/hvcall.h --- ofed_1_2_jan26/kernel_addons/backport/2.6.16/include/asm/hvcall.h 1970-01-01 01:00:00.000000000 +0100 +++ ofed_1_2_nam/kernel_addons/backport/2.6.16/include/asm/hvcall.h 2007-01-27 00:29:44.000000000 +0100 @@ -0,0 +1,142 @@ +#ifndef ASM_HVCALL_BACKPORT_2616_H +#define ASM_HVCALL_BACKPORT_2616_H + +#include_next + +#ifdef __KERNEL__ + +#define H_SUCCESS H_Success +#define H_BUSY H_Busy +#define H_CONSTRAINED H_Constrained +#define H_PAGE_REGISTERED 15 + +#define H_PARAMETER H_Parameter +#define H_NO_MEM H_NoMem +#define H_RESOURCE H_Resource +#define H_HARDWARE H_Hardware +#define H_ADAPTER_PARM -17 +#define H_RH_PARM -18 +#define H_RT_PARM -22 +#define H_MLENGTH_PARM -27 +#define H_MEM_PARM -28 +#define H_MEM_ACCESS_PARM -29 +#define H_ALIAS_EXIST -39 +#define H_TABLE_FULL -41 +#define H_NOT_ENOUGH_RESOURCES -44 +#define H_R_STATE -45 + +#define H_CB_ALIGNMENT 4096 + +#define H_RESET_EVENTS 0x15C +#define H_ALLOC_RESOURCE 0x160 +#define H_FREE_RESOURCE 0x164 +#define H_MODIFY_QP 0x168 +#define H_QUERY_QP 0x16C +#define H_REREGISTER_PMR 0x170 +#define H_REGISTER_SMR 0x174 +#define H_QUERY_MR 0x178 +#define H_QUERY_MW 0x17C +#define H_QUERY_HCA 0x180 +#define H_QUERY_PORT 0x184 +#define H_MODIFY_PORT 0x188 +#define H_DEFINE_AQP1 0x18C +#define H_DEFINE_AQP0 0x194 +#define H_RESIZE_MR 0x198 +#define H_ATTACH_MCQP 0x19C +#define H_DETACH_MCQP 0x1A0 +#define H_REGISTER_RPAGES 0x1AC +#define H_DISABLE_AND_GETC 0x1B0 +#define H_ERROR_DATA 0x1B4 +#define H_QUERY_INT_STATE 0x1E4 + +#define H_LONG_BUSY_ORDER_1_MSEC H_LongBusyOrder1msec +#define H_LONG_BUSY_ORDER_10_MSEC H_LongBusyOrder10msec +#define H_LONG_BUSY_ORDER_100_MSEC H_LongBusyOrder100msec +#define H_LONG_BUSY_ORDER_1_SEC H_LongBusyOrder1sec +#define H_LONG_BUSY_ORDER_10_SEC H_LongBusyOrder10sec +#define H_LONG_BUSY_ORDER_100_SEC H_LongBusyOrder100sec +#define H_IS_LONG_BUSY(x) ((x >= H_LongBusyStartRange) && (x <= H_LongBusyEndRange)) + + +#ifndef __ASSEMBLY__ +#include + +#define PLPAR_HCALL9_BUFSIZE 9 +inline static long plpar_hcall9(unsigned long opcode, + unsigned long *retbuf, + unsigned long arg1, /* backport hvcall for sles10 Signed-off-by: Hoang-Nam Nguyen --- hvcall.h | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 142 insertions(+) diff -Nurp ofed_1_2_jan26/kernel_addons/backport/2.6.16_sles10/include/asm/hvcall.h ofed_1_2_nam/kernel_addons/backport/2.6.16_sles10/include/asm/hvcall.h --- ofed_1_2_jan26/kernel_addons/backport/2.6.16_sles10/include/asm/hvcall.h 1970-01-01 01:00:00.000000000 +0100 +++ ofed_1_2_nam/kernel_addons/backport/2.6.16_sles10/include/asm/hvcall.h 2007-01-27 00:29:44.000000000 +0100 @@ -0,0 +1,142 @@ +#ifndef ASM_HVCALL_BACKPORT_2616_H +#define ASM_HVCALL_BACKPORT_2616_H + +#include_next + +#ifdef __KERNEL__ + +#define H_SUCCESS H_Success +#define H_BUSY H_Busy +#define H_CONSTRAINED H_Constrained +#define H_PAGE_REGISTERED 15 + +#define H_PARAMETER H_Parameter +#define H_NO_MEM H_NoMem +#define H_RESOURCE H_Resource +#define H_HARDWARE H_Hardware +#define H_ADAPTER_PARM -17 +#define H_RH_PARM -18 +#define H_RT_PARM -22 +#define H_MLENGTH_PARM -27 +#define H_MEM_PARM -28 +#define H_MEM_ACCESS_PARM -29 +#define H_ALIAS_EXIST -39 +#define H_TABLE_FULL -41 +#define H_NOT_ENOUGH_RESOURCES -44 +#define H_R_STATE -45 + +#define H_CB_ALIGNMENT 4096 + +#define H_RESET_EVENTS 0x15C +#define H_ALLOC_RESOURCE 0x160 +#define H_FREE_RESOURCE 0x164 +#define H_MODIFY_QP 0x168 +#define H_QUERY_QP 0x16C +#define H_REREGISTER_PMR 0x170 +#define H_REGISTER_SMR 0x174 +#define H_QUERY_MR 0x178 +#define H_QUERY_MW 0x17C +#define H_QUERY_HCA 0x180 +#define H_QUERY_PORT 0x184 +#define H_MODIFY_PORT 0x188 +#define H_DEFINE_AQP1 0x18C +#define H_DEFINE_AQP0 0x194 +#define H_RESIZE_MR 0x198 +#define H_ATTACH_MCQP 0x19C +#define H_DETACH_MCQP 0x1A0 +#define H_REGISTER_RPAGES 0x1AC +#define H_DISABLE_AND_GETC 0x1B0 +#define H_ERROR_DATA 0x1B4 +#define H_QUERY_INT_STATE 0x1E4 + +#define H_LONG_BUSY_ORDER_1_MSEC H_LongBusyOrder1msec +#define H_LONG_BUSY_ORDER_10_MSEC H_LongBusyOrder10msec +#define H_LONG_BUSY_ORDER_100_MSEC H_LongBusyOrder100msec +#define H_LONG_BUSY_ORDER_1_SEC H_LongBusyOrder1sec +#define H_LONG_BUSY_ORDER_10_SEC H_LongBusyOrder10sec +#define H_LONG_BUSY_ORDER_100_SEC H_LongBusyOrder100sec +#define H_IS_LONG_BUSY(x) ((x >= H_LongBusyStartRange) && (x <= H_LongBusyEndRange)) + + +#ifndef __ASSEMBLY__ +#include + +#define PLPAR_HCALL9_BUFSIZE 9 +inline static long plpar_hcall9(unsigned long opcode, + unsigned long *retbuf, + unsigned long arg1, /* backport hvcall for 2.6.18 Signed-off-by: Hoang-Nam Nguyen --- hvcall.h | 32 ++++++++++++++++++++++++++++++++ 1 files changed, 32 insertions(+) diff -Nurp ofed_1_2_jan26/kernel_addons/backport/2.6.18/include/asm/hvcall.h ofed_1_2_nam/kernel_addons/backport/2.6.18/include/asm/hvcall.h --- ofed_1_2_jan26/kernel_addons/backport/2.6.18/include/asm/hvcall.h 1970-01-01 01:00:00.000000000 +0100 +++ ofed_1_2_nam/kernel_addons/backport/2.6.18/include/asm/hvcall.h 2007-01-27 00:29:44.000000000 +0100 @@ -0,0 +1,32 @@ +#ifndef ASM_HVCALL_BACKPORT_2618_H +#define ASM_HVCALL_BACKPORT_2618_H + +#include_next + +#ifdef __KERNEL__ + +#define PLPAR_HCALL9_BUFSIZE 9 + +inline static long plpar_hcall9(unsigned long opcode, + unsigned long *retbuf, + unsigned long arg1, /* backport hvcall for RHEL5 resp. 2.6.18_FC5 Signed-off-by: Hoang-Nam Nguyen --- hvcall.h | 32 ++++++++++++++++++++++++++++++++ 1 files changed, 32 insertions(+) diff -Nurp ofed_1_2_jan26/kernel_addons/backport/2.6.18_FC6/include/asm/hvcall.h ofed_1_2_nam/kernel_addons/backport/2.6.18_FC6/include/asm/hvcall.h --- ofed_1_2_jan26/kernel_addons/backport/2.6.18_FC6/include/asm/hvcall.h 1970-01-01 01:00:00.000000000 +0100 +++ ofed_1_2_nam/kernel_addons/backport/2.6.18_FC6/include/asm/hvcall.h 2007-01-27 00:29:44.000000000 +0100 @@ -0,0 +1,32 @@ +#ifndef ASM_HVCALL_BACKPORT_2618_H +#define ASM_HVCALL_BACKPORT_2618_H + +#include_next + +#ifdef __KERNEL__ + +#define PLPAR_HCALL9_BUFSIZE 9 + +inline static long plpar_hcall9(unsigned long opcode, + unsigned long *retbuf, + unsigned long arg1, /* References: <6C2C79E72C305246B504CBA17B5500C9A0DC2C@mtlexch01.mtl.com> Message-ID: <20070125072549.GE11092@mellanox.co.il> > Quoting Hoang-Nam Nguyen : > Subject: Re: [openfabrics-ewg] modules compilation status for OFED 1.2 > > > We stay with same build process but the backport patches give a solution > > for such cases. > > Michael Tsirkin can help you how we solved such problems with other > > kernel code we needed. > > I need to be more specific here: ibmebus requires two symbols in > arch/ppc64/kernel/dma.c to be exported, which means one really needs > to rebuild and install the patched kernel. As far as I understood > from Michael, when we looked at ofed-1.1, that approach is not > supported by ofed build process. Yes, I don't have a solution for that on ppc. For other modules we simply worked around the problem copying bits from newer kernels but this might not work for you. -- MST From online.banking at regions.com Fri Jan 26 20:56:32 2007 From: online.banking at regions.com (Regions Bank) Date: Sat, 27 Jan 2007 06:56:32 +0200 Subject: [openib-general] You have 1 new secure message ! Message-ID: An HTML attachment was scrubbed... URL: From eitan at sw053.yok.mtl.com Fri Jan 26 21:21:56 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sat, 27 Jan 2007 07:21:56 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-27:normal completion Message-ID: <200701270521.l0R5Luvk031664@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Fri_Jan_26_13:02:35_2007 4153c3 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From ramachandra.kuchimanchi at qlogic.com Fri Jan 26 23:45:03 2007 From: ramachandra.kuchimanchi at qlogic.com (Ramachandra Kuchimanchi) Date: Sat, 27 Jan 2007 01:45:03 -0600 Subject: [openib-general] modules compilation status for OFED 1.2 References: <45B77F8C.9060209@mellanox.co.il> Message-ID: Tziporet, We investigated supporting the VNIC driver on SLES 9 SP3 and it looks like the backport patch for this may not be completed by the Jan 31st code freeze. Thus for now, VNIC driver will not be supported on SLES 9 SP3. Regards, Ram ________________________________ From: Tziporet Koren [mailto:tziporet at mellanox.co.il] Sent: Wed 1/24/2007 9:17 PM To: EWG; Hoang-Nam Nguyen; Bryan O'Sullivan Cc: OPENIB; Betsy Zeller; Ramachandra Kuchimanchi Subject: modules compilation status for OFED 1.2 Hi All, We are approaching code freeze and I want to make sure that all kernel modules indeed will compile on the supported OSes of OFED 1.2: * Redhat EL4 up5 (currently tested on up4) * Redhat EL5 - if will be available * SLES9 SP3 * SLES10 SP1 * kernel.org: 2.6.19.x and 2.6.20.x The status is that all modules (except ehca) pass compilation on kernel 2.6.19. The following modules have issues with support for some distros: * vnic (Ram) - SLES9 * ipath driver (Bryan) : SLES9, Redhat EL4 up4, SLES10 SP1 * ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 Owners of these modules: Please take an action to fix as soon as possible or reply if you don't want your module to be supported on some of the distros Thanks, Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Sat Jan 27 02:21:13 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Sat, 27 Jan 2007 02:21:13 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070127-0200 daily build status Message-ID: <20070127102114.092D0E603D0@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.15 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.16 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Failed: From swise at opengridcomputing.com Sat Jan 27 08:00:39 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Sat, 27 Jan 2007 10:00:39 -0600 Subject: [openib-general] [RFC] [PATCH 2/2] for 2.6.21/OFED1.2 rdma_cm: add multicast support References: <000901c741a8$c8080b30$ff0da8c0@amr.corp.intel.com> Message-ID: <007d01c7422c$4a740460$020010ac@haggard> Sean, were you able to try this with an iWARP device to check for regressions? ----- Original Message ----- From: "Sean Hefty" To: "Hefty, Sean" ; ; "'Roland Dreier'" Cc: "'Or Gerlitz'" Sent: Friday, January 26, 2007 6:19 PM Subject: [openib-general] [RFC] [PATCH 2/2] for 2.6.21/OFED1.2 rdma_cm: add multicast support > Extend the rdma_cm to support multicast communication. Multicast > support is added to the existing RDMA_PS_UDP port space, as well as > to new port space, RDMA_PS_IPOIB. The latter port space allows > joining the multicast groups used by ipoib, which enable offloading > ipoib traffic to a separate QP. The port space determines the > signature > used in the MGID when joining the group. The newly added > RDMA_PS_IPOIB port space also allows for unicast operations. > > Supporting RDMA_PS_IPOIB requires changing how UD QPs are initialized, > since we can no longer assume that the qkey is constant. This > requires > saving the qkey to use when attaching to a device, so that it is > available when creating the QP. The qkey information is exported to > the user through the existing rdma_init_qp_attr() routine. > > Multicast support is exported to userspace through the rdma_ucm. > > Signed-off-by: Sean Hefty > --- > Changes from previous patches include adding the RDMA_PS_IPOIB port > space. > > diff --git a/drivers/infiniband/core/cma.c > b/drivers/infiniband/core/cma.c > index 9e0ab04..827df2a 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq; > static DEFINE_IDR(sdp_ps); > static DEFINE_IDR(tcp_ps); > static DEFINE_IDR(udp_ps); > +static DEFINE_IDR(ipoib_ps); > > struct cma_device { > struct list_head list; > @@ -115,6 +116,7 @@ struct rdma_id_private { > struct list_head list; > struct list_head listen_list; > struct cma_device *cma_dev; > + struct list_head mc_list; > > enum cma_state state; > spinlock_t lock; > @@ -133,10 +135,23 @@ struct rdma_id_private { > } cm_id; > > u32 seq_num; > + u32 qkey; > u32 qp_num; > u8 srq; > }; > > +struct cma_multicast { > + struct rdma_id_private *id_priv; > + union { > + struct ib_sa_multicast *ib; > + } multicast; > + struct list_head list; > + void *context; > + struct sockaddr addr; > + u8 pad[sizeof(struct sockaddr_in6) - > + sizeof(struct sockaddr)]; > +}; > + > struct cma_work { > struct work_struct work; > struct rdma_id_private *id; > @@ -242,6 +257,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh > *hh, u8 ip_ver) > hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); > } > > +static inline int cma_is_ud_ps(enum rdma_port_space ps) > +{ > + return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); > +} > + > static void cma_attach_to_dev(struct rdma_id_private *id_priv, > struct cma_device *cma_dev) > { > @@ -264,19 +284,41 @@ static void cma_detach_from_dev(struct > rdma_id_private *id_priv) > id_priv->cma_dev = NULL; > } > > +static int cma_set_qkey(struct ib_device *device, u8 port_num, > + enum rdma_port_space ps, > + struct rdma_dev_addr *dev_addr, u32 *qkey) > +{ > + struct ib_sa_mcmember_rec rec; > + int ret = 0; > + > + switch (ps) { > + case RDMA_PS_UDP: > + *qkey = RDMA_UDP_QKEY; > + break; > + case RDMA_PS_IPOIB: > + ib_addr_get_mgid(dev_addr, &rec.mgid); > + ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec); > + *qkey = be32_to_cpu(rec.qkey); > + break; > + default: > + break; > + } > + return ret; > +} > + > static int cma_acquire_dev(struct rdma_id_private *id_priv) > { > - enum rdma_node_type dev_type = > id_priv->id.route.addr.dev_addr.dev_type; > + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; > struct cma_device *cma_dev; > union ib_gid gid; > int ret = -ENODEV; > > - switch (rdma_node_get_transport(dev_type)) { > + switch (rdma_node_get_transport(dev_addr->dev_type)) { > case RDMA_TRANSPORT_IB: > - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); > + ib_addr_get_sgid(dev_addr, &gid); > break; > case RDMA_TRANSPORT_IWARP: > - iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); > + iw_addr_get_sgid(dev_addr, &gid); > break; > default: > return -ENODEV; > @@ -286,7 +328,12 @@ static int cma_acquire_dev(struct rdma_id_private > *id_priv) > ret = ib_find_cached_gid(cma_dev->device, &gid, > &id_priv->id.port_num, NULL); > if (!ret) { > - cma_attach_to_dev(id_priv, cma_dev); > + ret = cma_set_qkey(cma_dev->device, > + id_priv->id.port_num, > + id_priv->id.ps, dev_addr, > + &id_priv->qkey); > + if (!ret) > + cma_attach_to_dev(id_priv, cma_dev); > break; > } > } > @@ -324,40 +371,50 @@ struct rdma_cm_id > *rdma_create_id(rdma_cm_event_handler event_handler, > init_waitqueue_head(&id_priv->wait_remove); > atomic_set(&id_priv->dev_remove, 0); > INIT_LIST_HEAD(&id_priv->listen_list); > + INIT_LIST_HEAD(&id_priv->mc_list); > get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); > > return &id_priv->id; > } > EXPORT_SYMBOL(rdma_create_id); > > -static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct > ib_qp *qp) > +static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct > ib_qp *qp) > { > struct ib_qp_attr qp_attr; > - struct rdma_dev_addr *dev_addr; > - int ret; > + int qp_attr_mask, ret; > > - dev_addr = &id_priv->id.route.addr.dev_addr; > - ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, > - ib_addr_get_pkey(dev_addr), > - &qp_attr.pkey_index); > + qp_attr.qp_state = IB_QPS_INIT; > + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); > if (ret) > return ret; > > - qp_attr.qp_state = IB_QPS_INIT; > - qp_attr.qp_access_flags = 0; > - qp_attr.port_num = id_priv->id.port_num; > - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | > - IB_QP_PKEY_INDEX | IB_QP_PORT); > + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); > + if (ret) > + return ret; > + > + qp_attr.qp_state = IB_QPS_RTR; > + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); > + if (ret) > + return ret; > + > + qp_attr.qp_state = IB_QPS_RTS; > + qp_attr.sq_psn = 0; > + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); > + > + return ret; > } > > -static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct > ib_qp *qp) > +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct > ib_qp *qp) > { > struct ib_qp_attr qp_attr; > + int qp_attr_mask, ret; > > qp_attr.qp_state = IB_QPS_INIT; > - qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; > + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); > + if (ret) > + return ret; > > - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); > + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); > } > > int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, > @@ -375,18 +432,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct > ib_pd *pd, > if (IS_ERR(qp)) > return PTR_ERR(qp); > > - switch (rdma_node_get_transport(id->device->node_type)) { > - case RDMA_TRANSPORT_IB: > - ret = cma_init_ib_qp(id_priv, qp); > - break; > - case RDMA_TRANSPORT_IWARP: > - ret = cma_init_iw_qp(id_priv, qp); > - break; > - default: > - ret = -ENOSYS; > - break; > - } > - > + if (cma_is_ud_ps(id_priv->id.ps)) > + ret = cma_init_ud_qp(id_priv, qp); > + else > + ret = cma_init_conn_qp(id_priv, qp); > if (ret) > goto err; > > @@ -459,23 +508,55 @@ static int cma_modify_qp_err(struct rdma_cm_id > *id) > return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); > } > > +static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, > + struct ib_qp_attr *qp_attr, int *qp_attr_mask) > +{ > + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; > + int ret; > + > + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, > + ib_addr_get_pkey(dev_addr), > + &qp_attr->pkey_index); > + if (ret) > + return ret; > + > + qp_attr->port_num = id_priv->id.port_num; > + *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; > + > + if (cma_is_ud_ps(id_priv->id.ps)) { > + qp_attr->qkey = id_priv->qkey; > + *qp_attr_mask |= IB_QP_QKEY; > + } else { > + qp_attr->qp_access_flags = 0; > + *qp_attr_mask |= IB_QP_ACCESS_FLAGS; > + } > + return 0; > +} > + > int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr > *qp_attr, > int *qp_attr_mask) > { > struct rdma_id_private *id_priv; > - int ret; > + int ret = 0; > > id_priv = container_of(id, struct rdma_id_private, id); > switch (rdma_node_get_transport(id_priv->id.device->node_type)) { > case RDMA_TRANSPORT_IB: > - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, > - qp_attr_mask); > + if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) > + ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); > + else > + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, > + qp_attr_mask); > if (qp_attr->qp_state == IB_QPS_RTR) > qp_attr->rq_psn = id_priv->seq_num; > break; > case RDMA_TRANSPORT_IWARP: > - ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, > - qp_attr_mask); > + if (!id_priv->cm_id.iw) { > + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; > + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; > + } else > + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, > + qp_attr_mask); > break; > default: > ret = -ENOSYS; > @@ -697,6 +778,19 @@ static void cma_release_port(struct > rdma_id_private *id_priv) > mutex_unlock(&lock); > } > > +static void cma_leave_mc_groups(struct rdma_id_private *id_priv) > +{ > + struct cma_multicast *mc; > + > + while (!list_empty(&id_priv->mc_list)) { > + mc = container_of(id_priv->mc_list.next, > + struct cma_multicast, list); > + list_del(&mc->list); > + ib_sa_free_multicast(mc->multicast.ib); > + kfree(mc); > + } > +} > + > void rdma_destroy_id(struct rdma_cm_id *id) > { > struct rdma_id_private *id_priv; > @@ -721,6 +815,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) > default: > break; > } > + cma_leave_mc_groups(id_priv); > mutex_lock(&lock); > cma_detach_from_dev(id_priv); > } > @@ -971,7 +1066,7 @@ static int cma_req_handler(struct ib_cm_id > *cm_id, struct ib_cm_event > *ib_event) > memset(&event, 0, sizeof event); > offset = cma_user_data_offset(listen_id->id.ps); > event.event = RDMA_CM_EVENT_CONNECT_REQUEST; > - if (listen_id->id.ps == RDMA_PS_UDP) { > + if (cma_is_ud_ps(listen_id->id.ps)) { > conn_id = cma_new_udp_id(&listen_id->id, ib_event); > event.param.ud.private_data = ib_event->private_data + offset; > event.param.ud.private_data_len = > @@ -1805,6 +1900,9 @@ static int cma_get_port(struct rdma_id_private > *id_priv) > case RDMA_PS_UDP: > ps = &udp_ps; > break; > + case RDMA_PS_IPOIB: > + ps = &ipoib_ps; > + break; > default: > return -EPROTONOSUPPORT; > } > @@ -1919,7 +2017,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id > *cm_id, > event.status = ib_event->param.sidr_rep_rcvd.status; > break; > } > - if (rep->qkey != RDMA_UD_QKEY) { > + if (id_priv->qkey != rep->qkey) { > event.event = RDMA_CM_EVENT_UNREACHABLE; > event.status = -EINVAL; > break; > @@ -2118,7 +2216,7 @@ int rdma_connect(struct rdma_cm_id *id, struct > rdma_conn_param *conn_param) > > switch (rdma_node_get_transport(id->device->node_type)) { > case RDMA_TRANSPORT_IB: > - if (id->ps == RDMA_PS_UDP) > + if (cma_is_ud_ps(id->ps)) > ret = cma_resolve_ib_udp(id_priv, conn_param); > else > ret = cma_connect_ib(id_priv, conn_param); > @@ -2214,7 +2312,7 @@ static int cma_send_sidr_rep(struct > rdma_id_private *id_priv, > rep.status = status; > if (status == IB_SIDR_SUCCESS) { > rep.qp_num = id_priv->qp_num; > - rep.qkey = RDMA_UD_QKEY; > + rep.qkey = id_priv->qkey; > } > rep.private_data = private_data; > rep.private_data_len = private_data_len; > @@ -2238,7 +2336,7 @@ int rdma_accept(struct rdma_cm_id *id, struct > rdma_conn_param *conn_param) > > switch (rdma_node_get_transport(id->device->node_type)) { > case RDMA_TRANSPORT_IB: > - if (id->ps == RDMA_PS_UDP) > + if (cma_is_ud_ps(id->ps)) > ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, > conn_param->private_data, > conn_param->private_data_len); > @@ -2299,7 +2397,7 @@ int rdma_reject(struct rdma_cm_id *id, const > void *private_data, > > switch (rdma_node_get_transport(id->device->node_type)) { > case RDMA_TRANSPORT_IB: > - if (id->ps == RDMA_PS_UDP) > + if (cma_is_ud_ps(id->ps)) > ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, > private_data, private_data_len); > else > @@ -2350,6 +2448,161 @@ out: > } > EXPORT_SYMBOL(rdma_disconnect); > > +static int cma_ib_mc_handler(int status, struct ib_sa_multicast > *multicast) > +{ > + struct rdma_id_private *id_priv; > + struct cma_multicast *mc = multicast->context; > + struct rdma_cm_event event; > + int ret; > + > + id_priv = mc->id_priv; > + atomic_inc(&id_priv->dev_remove); > + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && > + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) > + goto out; > + > + if (!status && id_priv->id.qp) > + status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, > + multicast->rec.mlid); > + > + memset(&event, 0, sizeof event); > + event.status = status; > + event.param.ud.private_data = mc->context; > + if (!status) { > + event.event = RDMA_CM_EVENT_MULTICAST_JOIN; > + ib_init_ah_from_mcmember(id_priv->id.device, > + id_priv->id.port_num, &multicast->rec, > + &event.param.ud.ah_attr); > + event.param.ud.qp_num = 0xFFFFFF; > + event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); > + } else > + event.event = RDMA_CM_EVENT_MULTICAST_ERROR; > + > + ret = id_priv->id.event_handler(&id_priv->id, &event); > + if (ret) { > + cma_exch(id_priv, CMA_DESTROYING); > + cma_release_remove(id_priv); > + rdma_destroy_id(&id_priv->id); > + return 0; > + } > +out: > + cma_release_remove(id_priv); > + return 0; > +} > + > +static int cma_join_ib_multicast(struct rdma_id_private *id_priv, > + struct cma_multicast *mc) > +{ > + struct ib_sa_mcmember_rec rec; > + unsigned char mc_map[MAX_ADDR_LEN]; > + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; > + struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr; > + ib_sa_comp_mask comp_mask; > + int ret; > + > + ib_addr_get_mgid(dev_addr, &rec.mgid); > + ret = ib_sa_get_mcmember_rec(id_priv->id.device, > id_priv->id.port_num, > + &rec.mgid, &rec); > + if (ret) > + return ret; > + > + ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); > + if (id_priv->id.ps == RDMA_PS_UDP) { > + mc_map[7] = 0x01; /* Use RDMA CM signature */ > + rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > + } > + mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; > + mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); > + > + rec.mgid = *(union ib_gid *) (mc_map + 4); > + ib_addr_get_sgid(dev_addr, &rec.port_gid); > + rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); > + rec.join_state = 1; > + > + comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | > + IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | > + IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | > + IB_SA_MCMEMBER_REC_FLOW_LABEL | > + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; > + > + mc->multicast.ib = ib_sa_join_multicast(&sa_client, > id_priv->id.device, > + id_priv->id.port_num, &rec, > + comp_mask, GFP_KERNEL, > + cma_ib_mc_handler, mc); > + if (IS_ERR(mc->multicast.ib)) > + return PTR_ERR(mc->multicast.ib); > + > + return 0; > +} > + > +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, > + void *context) > +{ > + struct rdma_id_private *id_priv; > + struct cma_multicast *mc; > + int ret; > + > + id_priv = container_of(id, struct rdma_id_private, id); > + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && > + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) > + return -EINVAL; > + > + mc = kmalloc(sizeof *mc, GFP_KERNEL); > + if (!mc) > + return -ENOMEM; > + > + memcpy(&mc->addr, addr, ip_addr_size(addr)); > + mc->context = context; > + mc->id_priv = id_priv; > + > + spin_lock(&id_priv->lock); > + list_add(&mc->list, &id_priv->mc_list); > + spin_unlock(&id_priv->lock); > + > + switch (rdma_node_get_transport(id->device->node_type)) { > + case RDMA_TRANSPORT_IB: > + ret = cma_join_ib_multicast(id_priv, mc); > + break; > + default: > + ret = -ENOSYS; > + break; > + } > + > + if (ret) { > + spin_lock_irq(&id_priv->lock); > + list_del(&mc->list); > + spin_unlock_irq(&id_priv->lock); > + kfree(mc); > + } > + return ret; > +} > +EXPORT_SYMBOL(rdma_join_multicast); > + > +void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr > *addr) > +{ > + struct rdma_id_private *id_priv; > + struct cma_multicast *mc; > + > + id_priv = container_of(id, struct rdma_id_private, id); > + spin_lock_irq(&id_priv->lock); > + list_for_each_entry(mc, &id_priv->mc_list, list) { > + if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { > + list_del(&mc->list); > + spin_unlock_irq(&id_priv->lock); > + > + if (id->qp) > + ib_detach_mcast(id->qp, > + &mc->multicast.ib->rec.mgid, > + mc->multicast.ib->rec.mlid); > + ib_sa_free_multicast(mc->multicast.ib); > + kfree(mc); > + return; > + } > + } > + spin_unlock_irq(&id_priv->lock); > +} > +EXPORT_SYMBOL(rdma_leave_multicast); > + > static void cma_add_one(struct ib_device *device) > { > struct cma_device *cma_dev; > @@ -2476,6 +2729,7 @@ static void cma_cleanup(void) > idr_destroy(&sdp_ps); > idr_destroy(&tcp_ps); > idr_destroy(&udp_ps); > + idr_destroy(&ipoib_ps); > } > > module_init(cma_init); > diff --git a/drivers/infiniband/core/ucma.c > b/drivers/infiniband/core/ucma.c > index e2e8d32..f8d117a 100644 > --- a/drivers/infiniband/core/ucma.c > +++ b/drivers/infiniband/core/ucma.c > @@ -70,10 +70,24 @@ struct ucma_context { > u64 uid; > > struct list_head list; > + struct list_head mc_list; > +}; > + > +struct ucma_multicast { > + struct ucma_context *ctx; > + int id; > + int events_reported; > + > + u64 uid; > + struct list_head list; > + struct sockaddr addr; > + u8 pad[sizeof(struct sockaddr_in6) - > + sizeof(struct sockaddr)]; > }; > > struct ucma_event { > struct ucma_context *ctx; > + struct ucma_multicast *mc; > struct list_head list; > struct rdma_cm_id *cm_id; > struct rdma_ucm_event_resp resp; > @@ -81,6 +95,7 @@ struct ucma_event { > > static DEFINE_MUTEX(mut); > static DEFINE_IDR(ctx_idr); > +static DEFINE_IDR(multicast_idr); > > static inline struct ucma_context *_ucma_find_context(int id, > struct ucma_file *file) > @@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct > ucma_file *file) > > atomic_set(&ctx->ref, 1); > init_completion(&ctx->comp); > + INIT_LIST_HEAD(&ctx->mc_list); > ctx->file = file; > > do { > @@ -147,6 +163,37 @@ error: > return NULL; > } > > +static struct ucma_multicast* ucma_alloc_multicast(struct > ucma_context *ctx) > +{ > + struct ucma_multicast *mc; > + int ret; > + > + mc = kzalloc(sizeof(*mc), GFP_KERNEL); > + if (!mc) > + return NULL; > + > + do { > + ret = idr_pre_get(&multicast_idr, GFP_KERNEL); > + if (!ret) > + goto error; > + > + mutex_lock(&mut); > + ret = idr_get_new(&multicast_idr, mc, &mc->id); > + mutex_unlock(&mut); > + } while (ret == -EAGAIN); > + > + if (ret) > + goto error; > + > + mc->ctx = ctx; > + list_add_tail(&mc->list, &ctx->mc_list); > + return mc; > + > +error: > + kfree(mc); > + return NULL; > +} > + > static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, > struct rdma_conn_param *src) > { > @@ -180,8 +227,19 @@ static void ucma_set_event_context(struct > ucma_context *ctx, > struct ucma_event *uevent) > { > uevent->ctx = ctx; > - uevent->resp.uid = ctx->uid; > - uevent->resp.id = ctx->id; > + switch (event->event) { > + case RDMA_CM_EVENT_MULTICAST_JOIN: > + case RDMA_CM_EVENT_MULTICAST_ERROR: > + uevent->mc = (struct ucma_multicast *) > + event->param.ud.private_data; > + uevent->resp.uid = uevent->mc->uid; > + uevent->resp.id = uevent->mc->id; > + break; > + default: > + uevent->resp.uid = ctx->uid; > + uevent->resp.id = ctx->id; > + break; > + } > } > > static int ucma_event_handler(struct rdma_cm_id *cm_id, > @@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id > *cm_id, > ucma_set_event_context(ctx, event, uevent); > uevent->resp.event = event->event; > uevent->resp.status = event->status; > - if (cm_id->ps == RDMA_PS_UDP) > + if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) > ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); > else > ucma_copy_conn_event(&uevent->resp.param.conn, > @@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file > *file, const char __user *inbuf, > > list_del(&uevent->list); > uevent->ctx->events_reported++; > + if (uevent->mc) > + uevent->mc->events_reported++; > kfree(uevent); > done: > mutex_unlock(&file->mut); > @@ -342,6 +402,19 @@ err1: > return ret; > } > > +static void ucma_cleanup_multicast(struct ucma_context *ctx) > +{ > + struct ucma_multicast *mc, *tmp; > + > + mutex_lock(&mut); > + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { > + list_del(&mc->list); > + idr_remove(&multicast_idr, mc->id); > + kfree(mc); > + } > + mutex_unlock(&mut); > +} > + > static void ucma_cleanup_events(struct ucma_context *ctx) > { > struct ucma_event *uevent, *tmp; > @@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct > ucma_context *ctx) > } > } > > +static void ucma_cleanup_mc_events(struct ucma_multicast *mc) > +{ > + struct ucma_event *uevent, *tmp; > + > + list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, > list) { > + if (uevent->mc != mc) > + continue; > + > + list_del(&uevent->list); > + kfree(uevent); > + } > +} > + > static int ucma_free_ctx(struct ucma_context *ctx) > { > int events_reported; > @@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx) > /* No new events will be generated after destroying the id. */ > rdma_destroy_id(ctx->cm_id); > > + ucma_cleanup_multicast(ctx); > + > /* Cleanup events not yet reported to the user. */ > mutex_lock(&ctx->file->mut); > ucma_cleanup_events(ctx); > @@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file > *file, const char __user *inbuf, > return ret; > } > > +static ssize_t ucma_join_multicast(struct ucma_file *file, > + const char __user *inbuf, > + int in_len, int out_len) > +{ > + struct rdma_ucm_join_mcast cmd; > + struct rdma_ucm_create_id_resp resp; > + struct ucma_context *ctx; > + struct ucma_multicast *mc; > + int ret; > + > + if (out_len < sizeof(resp)) > + return -ENOSPC; > + > + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) > + return -EFAULT; > + > + ctx = ucma_get_ctx(file, cmd.id); > + if (IS_ERR(ctx)) > + return PTR_ERR(ctx); > + > + mutex_lock(&file->mut); > + mc = ucma_alloc_multicast(ctx); > + if (IS_ERR(mc)) { > + ret = PTR_ERR(mc); > + goto err1; > + } > + > + mc->uid = cmd.uid; > + memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); > + ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc); > + if (ret) > + goto err2; > + > + resp.id = mc->id; > + if (copy_to_user((void __user *)(unsigned long)cmd.response, > + &resp, sizeof(resp))) { > + ret = -EFAULT; > + goto err3; > + } > + > + mutex_unlock(&file->mut); > + ucma_put_ctx(ctx); > + return 0; > + > +err3: > + rdma_leave_multicast(ctx->cm_id, &mc->addr); > + ucma_cleanup_mc_events(mc); > +err2: > + mutex_lock(&mut); > + idr_remove(&multicast_idr, mc->id); > + mutex_unlock(&mut); > + list_del(&mc->list); > + kfree(mc); > +err1: > + mutex_unlock(&file->mut); > + ucma_put_ctx(ctx); > + return ret; > +} > + > +static ssize_t ucma_leave_multicast(struct ucma_file *file, > + const char __user *inbuf, > + int in_len, int out_len) > +{ > + struct rdma_ucm_destroy_id cmd; > + struct rdma_ucm_destroy_id_resp resp; > + struct ucma_multicast *mc; > + int ret = 0; > + > + if (out_len < sizeof(resp)) > + return -ENOSPC; > + > + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) > + return -EFAULT; > + > + mutex_lock(&mut); > + mc = idr_find(&multicast_idr, cmd.id); > + if (!mc) > + mc = ERR_PTR(-ENOENT); > + else if (mc->ctx->file != file) > + mc = ERR_PTR(-EINVAL); > + else { > + idr_remove(&multicast_idr, mc->id); > + atomic_inc(&mc->ctx->ref); > + } > + mutex_unlock(&mut); > + > + if (IS_ERR(mc)) { > + ret = PTR_ERR(mc); > + goto out; > + } > + > + rdma_leave_multicast(mc->ctx->cm_id, &mc->addr); > + mutex_lock(&mc->ctx->file->mut); > + ucma_cleanup_mc_events(mc); > + list_del(&mc->list); > + mutex_unlock(&mc->ctx->file->mut); > + > + ucma_put_ctx(mc->ctx); > + resp.events_reported = mc->events_reported; > + kfree(mc); > + > + if (copy_to_user((void __user *)(unsigned long)cmd.response, > + &resp, sizeof(resp))) > + ret = -EFAULT; > +out: > + return ret; > +} > + > static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, > const char __user *inbuf, > int in_len, int out_len) = { > @@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct > ucma_file *file, > [RDMA_USER_CM_CMD_GET_OPTION] = NULL, > [RDMA_USER_CM_CMD_SET_OPTION] = NULL, > [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, > + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, > + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, > }; > > static ssize_t ucma_write(struct file *filp, const char __user *buf, > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h > index 36cd8a8..2d6a770 100644 > --- a/include/rdma/rdma_cm.h > +++ b/include/rdma/rdma_cm.h > @@ -52,10 +52,13 @@ enum rdma_cm_event_type { > RDMA_CM_EVENT_ESTABLISHED, > RDMA_CM_EVENT_DISCONNECTED, > RDMA_CM_EVENT_DEVICE_REMOVAL, > + RDMA_CM_EVENT_MULTICAST_JOIN, > + RDMA_CM_EVENT_MULTICAST_ERROR > }; > > enum rdma_port_space { > RDMA_PS_SDP = 0x0001, > + RDMA_PS_IPOIB= 0x0002, > RDMA_PS_TCP = 0x0106, > RDMA_PS_UDP = 0x0111, > RDMA_PS_SCTP = 0x0183 > @@ -294,5 +297,21 @@ int rdma_reject(struct rdma_cm_id *id, const void > *private_data, > */ > int rdma_disconnect(struct rdma_cm_id *id); > > -#endif /* RDMA_CM_H */ > +/** > + * rdma_join_multicast - Join the multicast group specified by the > given > + * address. > + * @id: Communication identifier associated with the request. > + * @addr: Multicast address identifying the group to join. > + * @context: User-defined context associated with the join request, > returned > + * to the user through the private_data pointer in multicast events. > + */ > +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, > + void *context); > > +/** > + * rdma_leave_multicast - Leave the multicast group specified by the > given > + * address. > + */ > +void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr > *addr); > + > +#endif /* RDMA_CM_H */ > diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h > index 9b176df..950424b 100644 > --- a/include/rdma/rdma_cm_ib.h > +++ b/include/rdma/rdma_cm_ib.h > @@ -44,7 +44,7 @@ > int rdma_set_ib_paths(struct rdma_cm_id *id, > struct ib_sa_path_rec *path_rec, int num_paths); > > -/* Global qkey for UD QPs and multicast groups. */ > -#define RDMA_UD_QKEY 0x01234567 > +/* Global qkey for UDP QPs and multicast groups. */ > +#define RDMA_UDP_QKEY 0x01234567 > > #endif /* RDMA_CM_IB_H */ > diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h > index 9572ab8..f632b0c 100644 > --- a/include/rdma/rdma_user_cm.h > +++ b/include/rdma/rdma_user_cm.h > @@ -38,7 +38,7 @@ > #include > #include > > -#define RDMA_USER_CM_ABI_VERSION 3 > +#define RDMA_USER_CM_ABI_VERSION 4 > > #define RDMA_MAX_PRIVATE_DATA 256 > > @@ -58,7 +58,9 @@ enum { > RDMA_USER_CM_CMD_GET_EVENT, > RDMA_USER_CM_CMD_GET_OPTION, > RDMA_USER_CM_CMD_SET_OPTION, > - RDMA_USER_CM_CMD_NOTIFY > + RDMA_USER_CM_CMD_NOTIFY, > + RDMA_USER_CM_CMD_JOIN_MCAST, > + RDMA_USER_CM_CMD_LEAVE_MCAST > }; > > /* > @@ -188,6 +190,13 @@ struct rdma_ucm_notify { > __u32 event; > }; > > +struct rdma_ucm_join_mcast { > + __u64 response; /* rdma_ucm_create_id_resp */ > + __u64 uid; > + struct sockaddr_in6 addr; > + __u32 id; > +}; > + > struct rdma_ucm_get_event { > __u64 response; > }; > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > From sean.hefty at intel.com Sat Jan 27 08:16:27 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Sat, 27 Jan 2007 08:16:27 -0800 Subject: [openib-general] [RFC] [PATCH 2/2] for 2.6.21/OFED1.2 rdma_cm: add multicast support In-Reply-To: <007d01c7422c$4a740460$020010ac@haggard> Message-ID: <000001c7422e$7f80ba20$a5d8180a@amr.corp.intel.com> >Sean, were you able to try this with an iWARP device to check for >regressions? No - I don't have any iWarp devices available to me. I thought about possible regressions too, since this changes QP initialization, and is why I listed this patch in the series as an RFC. Hopefully by getting this code into OFED 1.2 by feature freeze, we can discover if any regressions occurred. - Sean From HNGUYEN at de.ibm.com Sat Jan 27 08:44:22 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Sat, 27 Jan 2007 11:44:22 -0500 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 In-Reply-To: <20070125072549.GE11092@mellanox.co.il> Message-ID: Hi Michael! > > I need to be more specific here: ibmebus requires two symbols in > > arch/ppc64/kernel/dma.c to be exported, which means one really needs > > to rebuild and install the patched kernel. As far as I understood > > from Michael, when we looked at ofed-1.1, that approach is not > > supported by ofed build process. > Yes, I don't have a solution for that on ppc. > For other modules we simply worked around the problem copying bits from newer > kernels but this might not work for you. No. It can not, because two symbols need to be exported from rhel4.5/sles9sp3 kernel. I'm looking for a solution for users/customers that are willing to use ofed-1.2 plus ehca on rhel4.5/sles9sp3 and to patch their kernel themselves. Can we include those backport patches in ofed-1.2 under a dir contrib as we have on svn? That dir can be placed in kernel_addons or the like. I can then provide a howto for ehca in such one cases. Thanks Nam From rdreier at cisco.com Sat Jan 27 09:35:52 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 27 Jan 2007 09:35:52 -0800 Subject: [openib-general] [PATCH 2.6.21 4/4] ehca: remove obsolete prototypes In-Reply-To: <200701240014.19439.hnguyen@linux.vnet.ibm.com> (Hoang-Nam Nguyen's message of "Wed, 24 Jan 2007 00:14:18 +0100") References: <200701240014.19439.hnguyen@linux.vnet.ibm.com> Message-ID: Thanks, merged patches 3-4 for 2.6.21. From mst at mellanox.co.il Sat Jan 27 09:49:05 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 27 Jan 2007 19:49:05 +0200 Subject: [openib-general] [Patch ofed1.2 0/3]libehca: cleanup and adjust mmap In-Reply-To: <200701261746.35189.ossrosch@linux.vnet.ibm.com> References: <200701261746.35189.ossrosch@linux.vnet.ibm.com> Message-ID: <20070127174905.GT10812@mellanox.co.il> > Quoting Stefan Roscher : > Subject: [Patch ofed1.2 0/3]libehca: cleanup and adjust mmap > > Hi, > > This 3 patches changes the libehca coding style to kernel coding and kernel > tracing style.The userspace mmap code needs to be adjusted to the changed > userspace mapping introduced in kernel patch > [PATCH/RFC 2.6.21 0/5] ehca: remove use of do_mmap() from kernel space. Note that ofed 1.2 has not branched yet, so any changes just need to be merged up to maintainer's tree. -- MST From Websupport at service.discovercard.com Sat Jan 27 08:45:02 2007 From: Websupport at service.discovercard.com (Discover Card Support) Date: Sat, 27 Jan 2007 17:45:02 +0100 Subject: [openib-general] Verify Discover Card Account Information Message-ID: <1169916302.164077.qmail@service.discovercard.com> An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Sat Jan 27 11:19:46 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 27 Jan 2007 21:19:46 +0200 Subject: [openib-general] [openfabrics-ewg] Current List of OFA Linux components and maintainers In-Reply-To: References: Message-ID: <20070127191946.GA28934@mellanox.co.il> > Quoting Woodruff, Robert J : > Subject: RE: [openfabrics-ewg] Current List of OFA Linux components and maintainers > > Michael wrote, > >I still don't see Ishai listed against SRP. > >Is there a problem? > > Bottom of slide 4. > SRP Tools/daemon > userspace/srptools Ishai Rabinovitz Ishai at mellanox.co.il > > Does he also do the SRP kernel driver, rather than Roland? Ishai maintains SRP port to older kernels. -- MST From mst at mellanox.co.il Sat Jan 27 11:25:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sat, 27 Jan 2007 21:25:22 +0200 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 In-Reply-To: References: Message-ID: <20070127192522.GC28934@mellanox.co.il> Quoting Hoang-Nam Nguyen : Subject: Re: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 Hi Michael! > > I need to be more specific here: ibmebus requires two symbols in > > arch/ppc64/kernel/dma.c to be exported, which means one really needs > > to rebuild and install the patched kernel. As far as I understood > > from Michael, when we looked at ofed-1.1, that approach is not > > supported by ofed build process. > Yes, I don't have a solution for that on ppc. > For other modules we simply worked around the problem copying bits from newer > kernels but this might not work for you. > No. It can not, because two symbols need to be exported from > rhel4.5/sles9sp3 kernel. > I'm looking for a solution for users/customers that are willing to use > ofed-1.2 plus ehca on rhel4.5/sles9sp3 and to patch their kernel themselves. Can we > include those backport patches in ofed-1.2 under a dir contrib as we have > on svn? That dir can be placed in kernel_addons or the like. > I can then provide a howto for ehca in such one cases. > Thanks > Nam This is possible. Please put these under your ~/public_html on OFA server for now, we'll find some place to put it when ofed 1.2 feature freeze kicks in and Vlad starts working on packaging. -- MST From mst at mellanox.co.il Sat Jan 27 14:11:34 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 00:11:34 +0200 Subject: [openib-general] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: <200701270159.21073.hnguyen@linux.vnet.ibm.com> References: <200701270159.21073.hnguyen@linux.vnet.ibm.com> Message-ID: <20070127221134.GF32270@mellanox.co.il> > PS2: For backport on 2.6.16 resp. SLES10 I saw that there is a > hvcall.h under backport/2.6.16/include/linux. However that one > is not sufficient for ehca and include/linux is the wrong place. > Hence, I'm patching a new one under include/asm. If I'm right, > please remove include/linux/hvcall.h! I remember this was needed for iser backport for some reason. Does someone remember? -- MST From eitan at sw053.yok.mtl.com Sat Jan 27 21:21:48 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Sun, 28 Jan 2007 07:21:48 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-28:normal completion Message-ID: <200701280521.l0S5Lmn2006886@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Fri_Jan_26_13:55:36_2007 8fb41d ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From tziporet at mellanox.co.il Sun Jan 28 01:33:30 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Sun, 28 Jan 2007 11:33:30 +0200 Subject: [openib-general] modules compilation status for OFED 1.2 Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DC4C@mtlexch01.mtl.com> OK with me Tziporet ________________________________ From: Ramachandra Kuchimanchi [mailto:ramachandra.kuchimanchi at qlogic.com] Sent: Saturday, January 27, 2007 9:45 AM To: Tziporet Koren; EWG; Hoang-Nam Nguyen; Bryan O'Sullivan Cc: OPENIB; Betsy Zeller Subject: RE: modules compilation status for OFED 1.2 Tziporet, We investigated supporting the VNIC driver on SLES 9 SP3 and it looks like the backport patch for this may not be completed by the Jan 31st code freeze. Thus for now, VNIC driver will not be supported on SLES 9 SP3. Regards, Ram ________________________________ From: Tziporet Koren [mailto:tziporet at mellanox.co.il] Sent: Wed 1/24/2007 9:17 PM To: EWG; Hoang-Nam Nguyen; Bryan O'Sullivan Cc: OPENIB; Betsy Zeller; Ramachandra Kuchimanchi Subject: modules compilation status for OFED 1.2 Hi All, We are approaching code freeze and I want to make sure that all kernel modules indeed will compile on the supported OSes of OFED 1.2: * Redhat EL4 up5 (currently tested on up4) * Redhat EL5 - if will be available * SLES9 SP3 * SLES10 SP1 * kernel.org: 2.6.19.x and 2.6.20.x The status is that all modules (except ehca) pass compilation on kernel 2.6.19. The following modules have issues with support for some distros: * vnic (Ram) - SLES9 * ipath driver (Bryan) : SLES9, Redhat EL4 up4, SLES10 SP1 * ehca driver (Nam) - SLES9, Redhat EL4 up4, SLES10 SP1, 2.6.19 Owners of these modules: Please take an action to fix as soon as possible or reply if you don't want your module to be supported on some of the distros Thanks, Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From mst at mellanox.co.il Sun Jan 28 02:13:30 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 12:13:30 +0200 Subject: [openib-general] patch for -stable Message-ID: <20070128101329.GJ6799@mellanox.co.il> Roland, went over the logs, and I think the following is severe enough to go into -stable: commit bf628dc22a09ed2022abb32c76011ae5f99ad6b0 Author: Roland Dreier Date: Fri Dec 15 14:01:49 2006 -0800 IB/srp: Fix FMR mapping for 32-bit kernels and addresses above 4G What do you think? Can you send it or do you want me to? -- MST From vlad at lists.openfabrics.org Sun Jan 28 02:21:50 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Sun, 28 Jan 2007 02:21:50 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070128-0200 daily build status Message-ID: <20070128102150.7E893E603D1@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.18 Passed on powerpc with linux-2.6.17 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.16 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Failed: From monis at voltaire.com Sun Jan 28 03:24:34 2007 From: monis at voltaire.com (Moni Shoua) Date: Sun, 28 Jan 2007 13:24:34 +0200 Subject: [openib-general] Add bonding suuport to OFED In-Reply-To: <1169735584.3731.180.camel@swlab50.yok.mtl.com> References: <45B6412A.6020207@voltaire.com> <45B77F81.7090808@voltaire.com> <1169735584.3731.180.camel@swlab50.yok.mtl.com> Message-ID: <45BC87F2.1040002@voltaire.com> Vladimir Sokolovsky wrote: > Hi Moni, > Please review the following patch to ib-bonding.spec: > > Use %{_prefix} in RPM spec file instead of hard-coded /usr/local/ofed. > > Signed-off-by: Vladimir Sokolovsky > --- > > diff --git a/ib-bonding.spec b/ib-bonding.spec > index db02fe8..77e51e0 100644 > --- a/ib-bonding.spec > +++ b/ib-bonding.spec > @@ -5,6 +5,8 @@ > > %define _build_name_fmt %%{ARCH}/%%{NAME}-%%{VERSION}-%%{RELEASE}-%%{DISTRIBUTION}-%%{ARCH}.rpm > > +%{!?_prefix: %define _prefix /usr/local/ofed} > + > Summary : ib_bonding patch and modules. > Name : %{name} > Version : %{version} > @@ -39,11 +41,11 @@ fi > %install > [ "${RPM_BUILD_ROOT}" != "/" -a -d ${RPM_BUILD_ROOT} ] && rm -rf ${RPM_BUILD_ROOT} > mkdir -p ${RPM_BUILD_ROOT}/lib/modules/%{kversion}/kernel/drivers/net/bonding/ > -mkdir -p ${RPM_BUILD_ROOT}/usr/local/ofed/bin > -mkdir -p ${RPM_BUILD_ROOT}/usr/local/ofed/docs > +mkdir -p ${RPM_BUILD_ROOT}%{_prefix}/bin > +mkdir -p ${RPM_BUILD_ROOT}%{_prefix}/docs > install -m 755 linux/drivers/net/bonding/bonding.ko ${RPM_BUILD_ROOT}/lib/modules/%{kversion}/kernel/drivers/net/bonding/ > -install -m 755 bin/bond-init.sh ${RPM_BUILD_ROOT}/usr/local/ofed/bin > -install -m 755 docs/ib-bonding.txt ${RPM_BUILD_ROOT}/usr/local/ofed/docs > +install -m 755 bin/bond-init.sh ${RPM_BUILD_ROOT}%{_prefix}/bin > +install -m 755 docs/ib-bonding.txt ${RPM_BUILD_ROOT}%{_prefix}/docs > > > > @@ -51,7 +53,7 @@ install -m 755 docs/ib-bonding.txt ${RP > if [ ! -z $STACK_PREFIX ] ; then > backup_dir=$STACK_PREFIX/backup > else > - backup_dir=/usr/local/ofed/backup > + backup_dir=%{_prefix}/backup > fi > > > @@ -69,7 +71,7 @@ STACK_PREFIX=$(test -x /etc/infiniband/i > if [ ! -z $STACK_PREFIX ] ; then > backup_dir=$STACK_PREFIX/backup > else > - backup_dir=/usr/local/ofed/backup > + backup_dir=%{_prefix}/backup > fi > cd $backup_dir > found_file=$(find -name bonding.ko) > @@ -81,6 +83,6 @@ fi > > %files > /lib/modules/%{kversion}/kernel/drivers/net/bonding/bonding.ko > -/usr/local/ofed/bin/bond-init.sh > -/usr/local/ofed/docs/ib-bonding.txt > +%{_prefix}/bin/bond-init.sh > +%{_prefix}/docs/ib-bonding.txt > > > Thabks. I applied that. From mst at mellanox.co.il Sun Jan 28 05:17:55 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 15:17:55 +0200 Subject: [openib-general] oops at device removal Message-ID: <20070128131755.GS6799@mellanox.co.il> We have observed the following crash: Unable to handle kernel paging request at 0000000000100108 RIP: {:ib_core:ib_unregister_event_handler+31} PGD 117034067 PUD 102047067 PMD 0 Oops: 0002 [1] SMP last sysfs file: /devices/pci0000:00/0000:00:06.0/0000:08:00.0/subsystem_device CPU 2 Modules linked in: autofs4 ipv6 raw ib_sa ib_uverbs ib_umad nfs lockd nfs_acl sunrpc ib_mt hca ib_mad ib_core memtrack af_packet button battery ac apparmor aamatch_pcre loop dm_mod ehci_hcd uhci_hcd ide_cd cdrom i8xx_tco usbcore shpchp e1000 pci_hotplug floppy ext3 jbd e dd fan thermal processor sg mptspi mptscsih mptbase scsi_transport_spi piix sd_mod scsi_mo d ide_disk ide_core Pid: 9241, comm: modprobe Tainted: G U 2.6.16.21-0.8-smp #1 RIP: 0010:[] {:ib_core:ib_unregister_event_handler+31} RSP: 0000:ffff810100801e68 EFLAGS: 00010046 RAX: 0000000000200200 RBX: ffffffff883282e0 RCX: ffffffff883282f0 RDX: 0000000000100100 RSI: 0000000000000282 RDI: ffff810119836058 RBP: ffff8101119ce480 R08: ffff8101119ce608 R09: ffff810100801e40 R10: 0000000000000001 R11: ffff81010f493e38 R12: 0000000000000000 R13: ffffffff88324020 R14: ffff810119af9080 R15: 0000000000000292 FS: 00002b7fa92af6d0(0000) GS:ffff81011c06b340(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000100108 CR3: 0000000101a04000 CR4: 00000000000006e0 Process modprobe (pid: 9241, threadinfo ffff810100800000, task ffff81011b1b0080) Stack: ffff8101119ce490 ffffffff8831f036 ffff810119af9070 ffff810119836000 ffff810111ab2de0 ffffffff8823b615 ffff810110005820 ffffffff8033da60 ffffffff88324080 0000000000000080 Call Trace: {:ib_sa:mcast_remove_one+43} {:ib_core:ib_unregister_client+55} {:ib_sa:mcast_cleanup+16} {:ib_sa:ib_sa_cleanup +9} {sys_delete_module+540} {do_munmap+619} {__up_write+33} {system_call+126} Code: 48 89 42 08 48 89 10 48 c7 41 08 00 02 20 00 48 8b 3b 48 c7 RIP {:ib_core:ib_unregister_event_handler+31} RSP CR2: 0000000000100108 Address ib_unregister_event_handler+31 is here: /tmp/openib_gen2/last_stable/gen2_devel_kernel/drivers/infiniband/core/device.c:450 list_del(): /usr/src/linux-2.6.16.21-0.8/include/linux/list.h:165 __list_del(): /usr/src/linux-2.6.16.21-0.8/include/linux/list.h:153 1cdb: 48 89 42 08 mov %rax,0x8(%rdx) -- MST From mst at mellanox.co.il Sun Jan 28 06:24:09 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 16:24:09 +0200 Subject: [openib-general] oops at device removal In-Reply-To: <20070128131755.GS6799@mellanox.co.il> References: <20070128131755.GS6799@mellanox.co.il> Message-ID: <20070128142409.GA9832@mellanox.co.il> > We have observed the following crash: OK, I think I see a reason for this. I notice the following in code, file multicast.c, function mcast_add_one: ib_set_client_data(device, &mcast_client, dev); INIT_IB_EVENT_HANDLER(&event_handler, device, mcast_event_handler); ib_register_event_handler(&event_handler); So it seems like if I have 2 devices, &event_handler will be registered twice. This will trigger data corruption as same entry will be added to list twice. Or so it seems. Sean, what's the idea here? -- MST From mst at mellanox.co.il Sun Jan 28 06:35:44 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 16:35:44 +0200 Subject: [openib-general] [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). In-Reply-To: <1169824119.2996.7.camel@stevo-desktop> References: <1169824119.2996.7.camel@stevo-desktop> Message-ID: <20070128143544.GC9832@mellanox.co.il> > Quoting Steve Wise : > Subject: Re: [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). > > On Fri, 2007-01-26 at 09:35 +0200, Michael S. Tsirkin wrote: > > > Quoting Steve Wise : > > > Subject: [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). > > > > > > > > > Backport Chelsio to rhel5 (2.6.18_FC6). > > > > BTW, steve, is FC4 supported? I don't see a backport ... > > > > > > I haven't done that one. I wasn't planning on it since its not one of > the OFED 1.2 supported distros. It's trivial to add it, but I don't > have the kernel src. Actually, what's the reason to keep these backports around still? Vlad, let's remove. -- MST From mst at mellanox.co.il Sun Jan 28 11:33:03 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Sun, 28 Jan 2007 21:33:03 +0200 Subject: [openib-general] oops at device removal In-Reply-To: <20070128142409.GA9832@mellanox.co.il> References: <20070128131755.GS6799@mellanox.co.il> <20070128142409.GA9832@mellanox.co.il> Message-ID: <20070128193303.GB4738@mellanox.co.il> > Quoting Michael S. Tsirkin : > Subject: Re: oops at device removal > > > We have observed the following crash: > > OK, I think I see a reason for this. > > I notice the following in code, file multicast.c, function mcast_add_one: > > ib_set_client_data(device, &mcast_client, dev); > > INIT_IB_EVENT_HANDLER(&event_handler, device, > mcast_event_handler); > ib_register_event_handler(&event_handler); > > So it seems like if I have 2 devices, &event_handler will be registered twice. > This will trigger data corruption as same entry will be added to list twice. > > Or so it seems. Sean, what's the idea here? It seems something like the following would fix it (untested). ------------------------------------------------ Make new multicast code not crash on platforms with multiple HCAs. Signed-off-by: Michael S. Tsirkin --- diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index fde977e..e51a078 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -51,7 +51,6 @@ static struct ib_client mcast_client = { }; static struct ib_sa_client sa_client; -static struct ib_event_handler event_handler; static struct workqueue_struct *mcast_wq; static union ib_gid mgid0; @@ -71,6 +70,7 @@ struct mcast_device { int start_port; int end_port; struct mcast_port port[0]; + struct ib_event_handler event_handler; }; enum mcast_state { @@ -793,8 +793,8 @@ static void mcast_add_one(struct ib_device *device) dev->device = device; ib_set_client_data(device, &mcast_client, dev); - INIT_IB_EVENT_HANDLER(&event_handler, device, mcast_event_handler); - ib_register_event_handler(&event_handler); + INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); + ib_register_event_handler(&dev->event_handler); } static void mcast_remove_one(struct ib_device *device) @@ -807,7 +807,7 @@ static void mcast_remove_one(struct ib_device *device) if (!dev) return; - ib_unregister_event_handler(&event_handler); + ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { -- MST From akepner at sgi.com Sun Jan 28 15:17:14 2007 From: akepner at sgi.com (akepner at sgi.com) Date: Sun, 28 Jan 2007 15:17:14 -0800 (PST) Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: References: Message-ID: Here's a first cut at a patch. I'd appreciate comments. (The patch is against 1.1-rc7, and doesn't quite apply to 1.2.) The attached patches cause CQ allocation to (unconditionally) be done using dma_alloc_coherent(). The mmap() interface is (ab)used to allow access to user-level CQs. Is this going in the right direction? Should the allocations be done conditionally (i.e., should user-level CQs continue to be allocated with a plain old malloc(), or something similar, unless the platform requires otherwise)? This is the first time I've done anything beyond minor first aid to OFED code, so please let me know if I've broken anything, too. -- Arthur -------------- next part -------------- diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_allocator.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_allocator.c --- openib-1.1/drivers/infiniband/hw/mthca/mthca_allocator.c 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_allocator.c 2007-01-28 14:16:41.859588954 -0800 @@ -194,7 +194,7 @@ void mthca_array_cleanup(struct mthca_ar */ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, - union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + union mthca_buf *buf, int *is_direct, u32 pdn, int hca_write, struct mthca_mr *mr) { int err = -ENOMEM; @@ -259,9 +259,7 @@ int mthca_buf_alloc(struct mthca_dev *de } } - err = mthca_mr_alloc_phys(dev, pd->pd_num, - dma_list, shift, npages, - 0, size, + err = mthca_mr_alloc_phys(dev, pdn, dma_list, shift, npages, 0, size, MTHCA_MPT_FLAG_LOCAL_READ | (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), mr); diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_cq.c --- openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_cq.c 2007-01-28 14:05:18.585901589 -0800 @@ -342,7 +342,8 @@ void mthca_cq_resize_copy_cqes(struct mt get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE); } -int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent) +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, + int nent, u32 pdn) { int ret; int i; @@ -350,7 +351,7 @@ int mthca_alloc_cq_buf(struct mthca_dev ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE, MTHCA_MAX_DIRECT_CQ_SIZE, &buf->queue, &buf->is_direct, - &dev->driver_pd, 1, &buf->mr); + pdn, 1, &buf->mr); if (ret) return ret; @@ -813,11 +814,10 @@ int mthca_init_cq(struct mthca_dev *dev, cq_context = mailbox->buf; - if (cq->is_kernel) { - err = mthca_alloc_cq_buf(dev, &cq->buf, nent); - if (err) - goto err_out_mailbox; - } + err = mthca_alloc_cq_buf(dev, &cq->buf, nent, + ctx ? pdn : dev->driver_pd.pd_num); + if (err) + goto err_out_mailbox; spin_lock_init(&cq->lock); cq->refcount = 1; @@ -873,8 +873,7 @@ int mthca_init_cq(struct mthca_dev *dev, return 0; err_out_free_mr: - if (cq->is_kernel) - mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_out_mailbox: mthca_free_mailbox(dev, mailbox); @@ -950,12 +949,10 @@ void mthca_free_cq(struct mthca_dev *dev wait_event(cq->wait, !get_cq_refcount(dev, cq)); - if (cq->is_kernel) { - mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); - } + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); } mthca_table_put(dev, dev->cq_table.table, cq->cqn); diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_dev.h openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_dev.h --- openib-1.1/drivers/infiniband/hw/mthca/mthca_dev.h 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_dev.h 2007-01-28 13:58:46.069861105 -0800 @@ -120,6 +120,8 @@ enum { MTHCA_CMD_NUM_DBELL_DWORDS = 8 }; +#define MTHCA_MAGIC_CQ_OFFSET 0xcffe + struct mthca_cmd { struct pci_pool *pool; struct mutex hcr_mutex; @@ -420,7 +422,7 @@ void mthca_array_clear(struct mthca_arra int mthca_array_init(struct mthca_array *array, int nent); void mthca_array_cleanup(struct mthca_array *array, int nent); int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, - union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + union mthca_buf *buf, int *is_direct, u32 pdn, int hca_write, struct mthca_mr *mr); void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, int is_direct, struct mthca_mr *mr); @@ -499,7 +501,8 @@ void mthca_cq_event(struct mthca_dev *de void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, struct mthca_srq *srq); void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); -int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent); +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, + int nent, u32 pdn); void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_provider.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_provider.c --- openib-1.1/drivers/infiniband/hw/mthca/mthca_provider.c 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_provider.c 2007-01-28 14:14:26.753475417 -0800 @@ -377,20 +377,74 @@ static int mthca_dealloc_ucontext(struct return 0; } -static int mthca_mmap_uar(struct ib_ucontext *context, +static int mthca_remap_buf(struct vm_area_struct *vma, union mthca_buf *buf, + int is_direct) +{ + unsigned long size = vma->vm_end - vma->vm_start; + unsigned long pfn; + + /* XXX sanity check size */ + if (is_direct) { + pfn = __pa(buf->direct.buf); + pfn >>= PAGE_SHIFT; + if (remap_pfn_range(vma, vma->vm_start, pfn, size, + vma->vm_page_prot)) + return -EAGAIN; + } else { + struct mthca_buf_list *page_list = buf->page_list; + int npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long addr = vma->vm_start; + int i; + + for (i = 0; i < npages; ++i) { + pfn = __pa(page_list[i].buf); + pfn >>= PAGE_SHIFT; + addr += i << PAGE_SHIFT; + + if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, + vma->vm_page_prot)) + return -EAGAIN; + } + } + return 0; +} + +static int mthca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; + unsigned long pgoff = vma->vm_pgoff & 0xffff; + int cqn; + struct mthca_dev *dev; + struct mthca_cq *cq; + struct mthca_cq_buf *buf; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + switch (pgoff) { + case MTHCA_MAGIC_CQ_OFFSET: + cqn = vma->vm_pgoff >> (32 - PAGE_SHIFT); + dev = to_mdev(context->device); + + spin_lock_irq(&dev->cq_table.lock); + cq = mthca_array_get(&dev->cq_table.cq, cqn & + (dev->limits.num_cqs - 1)); + spin_unlock(&dev->cq_table.lock); + if (!cq) + return -EINVAL; - if (io_remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; + buf = &cq->buf; - return 0; + return mthca_remap_buf(vma, &buf->queue, buf->is_direct); + default: /* uar */ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + return 0; + } } static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, @@ -696,7 +750,6 @@ static struct ib_cq *mthca_create_cq(str } if (context) { - cq->buf.mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; } @@ -737,7 +790,7 @@ err_unmap_set: } static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, - int entries) + int entries, u32 pdn) { int ret; @@ -763,7 +816,8 @@ unlock: if (ret) return ret; - ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries, pdn); + if (ret) { spin_lock_irq(&cq->lock); kfree(cq->resize_buf); @@ -786,9 +840,10 @@ static int mthca_resize_cq(struct ib_cq struct mthca_dev *dev = to_mdev(ibcq->device); struct mthca_cq *cq = to_mcq(ibcq); struct mthca_resize_cq ucmd; - u32 lkey; + struct mthca_cq_buf tbuf; + u32 lkey, pdn; u8 status; - int ret; + int ret, tcqe; if (entries < 1 || entries > dev->limits.max_cqes) return -EINVAL; @@ -801,18 +856,22 @@ static int mthca_resize_cq(struct ib_cq goto out; } - if (cq->is_kernel) { - ret = mthca_alloc_resize_buf(dev, cq, entries); - if (ret) - goto out; - lkey = cq->resize_buf->buf.mr.ibmr.lkey; - } else { + if (cq->is_kernel) + pdn = dev->driver_pd.pd_num; + else { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { ret = -EFAULT; goto out; } - lkey = ucmd.lkey; - } + pdn = ucmd.pdn; + } + + ret = mthca_alloc_resize_buf(dev, cq, entries, pdn); + + if (ret) + goto out; + + lkey = cq->resize_buf->buf.mr.ibmr.lkey; ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status); if (status) @@ -830,29 +889,24 @@ static int mthca_resize_cq(struct ib_cq goto out; } - if (cq->is_kernel) { - struct mthca_cq_buf tbuf; - int tcqe; - spin_lock_irq(&cq->lock); - if (cq->resize_buf->state == CQ_RESIZE_READY) { - mthca_cq_resize_copy_cqes(cq); - tbuf = cq->buf; - tcqe = cq->ibcq.cqe; - cq->buf = cq->resize_buf->buf; - cq->ibcq.cqe = cq->resize_buf->cqe; - } else { - tbuf = cq->resize_buf->buf; - tcqe = cq->resize_buf->cqe; - } + spin_lock_irq(&cq->lock); + if (cq->resize_buf->state == CQ_RESIZE_READY) { + mthca_cq_resize_copy_cqes(cq); + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + } else { + tbuf = cq->resize_buf->buf; + tcqe = cq->resize_buf->cqe; + } - kfree(cq->resize_buf); - cq->resize_buf = NULL; - spin_unlock_irq(&cq->lock); + kfree(cq->resize_buf); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); - mthca_free_cq_buf(dev, &tbuf, tcqe); - } else - ibcq->cqe = entries - 1; + mthca_free_cq_buf(dev, &tbuf, tcqe); out: mutex_unlock(&cq->mutex); @@ -1300,7 +1354,7 @@ int mthca_register_device(struct mthca_d dev->ib_dev.query_gid = mthca_query_gid; dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; - dev->ib_dev.mmap = mthca_mmap_uar; + dev->ib_dev.mmap = mthca_mmap; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_qp.c --- openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_qp.c 2007-01-28 13:58:46.082556115 -0800 @@ -1008,7 +1008,8 @@ static int mthca_alloc_wqe_buf(struct mt goto err_out; err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, - &qp->queue, &qp->is_direct, pd, 0, &qp->mr); + &qp->queue, &qp->is_direct, pd->pd_num, 0, + &qp->mr); if (err) goto err_out; diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_srq.c --- openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_srq.c 2007-01-28 13:58:46.086462272 -0800 @@ -155,7 +155,8 @@ static int mthca_alloc_srq_buf(struct mt err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, MTHCA_MAX_DIRECT_SRQ_SIZE, - &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + &srq->queue, &srq->is_direct, pd->pd_num, 1, + &srq->mr); if (err) { kfree(srq->wrid); return err; diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_user.h openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_user.h --- openib-1.1/drivers/infiniband/hw/mthca/mthca_user.h 2006-10-05 06:07:01.000000000 -0700 +++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_user.h 2007-01-28 13:58:46.095251125 -0800 @@ -77,7 +77,7 @@ struct mthca_create_cq_resp { struct mthca_resize_cq { __u32 lkey; - __u32 reserved; + __u32 pdn; }; struct mthca_create_srq { -------------- next part -------------- diff -rup openib-1.1/src/userspace/libmthca/src/mthca-abi.h openib-1.1.cq/src/userspace/libmthca/src/mthca-abi.h --- openib-1.1/src/userspace/libmthca/src/mthca-abi.h 2006-10-05 06:07:02.000000000 -0700 +++ openib-1.1.cq/src/userspace/libmthca/src/mthca-abi.h 2007-01-28 13:58:46.056189555 -0800 @@ -69,7 +69,7 @@ struct mthca_create_cq_resp { struct mthca_resize_cq { struct ibv_resize_cq ibv_cmd; __u32 lkey; - __u32 reserved; + __u32 pdn; }; struct mthca_create_srq { diff -rup openib-1.1/src/userspace/libmthca/src/mthca.h openib-1.1.cq/src/userspace/libmthca/src/mthca.h --- openib-1.1/src/userspace/libmthca/src/mthca.h 2006-10-05 06:07:02.000000000 -0700 +++ openib-1.1.cq/src/userspace/libmthca/src/mthca.h 2007-01-28 13:58:46.053259937 -0800 @@ -88,6 +88,8 @@ enum { MTHCA_OPCODE_INVALID = 0xff }; +#define MTHCA_MAGIC_CQ_OFFSET 0xcffe + struct mthca_ah_page; struct mthca_device { diff -rup openib-1.1/src/userspace/libmthca/src/verbs.c openib-1.1.cq/src/userspace/libmthca/src/verbs.c --- openib-1.1/src/userspace/libmthca/src/verbs.c 2006-10-05 06:07:02.000000000 -0700 +++ openib-1.1.cq/src/userspace/libmthca/src/verbs.c 2007-01-28 14:02:07.830680091 -0800 @@ -43,6 +43,7 @@ #include #include #include +#include #include "mthca.h" #include "mthca-abi.h" @@ -172,7 +173,9 @@ struct ibv_cq *mthca_create_cq(struct ib struct mthca_create_cq cmd; struct mthca_create_cq_resp resp; struct mthca_cq *cq; + int page_size, npages; int ret; + off_t offset; /* Sanity check CQ size before proceeding */ if (cqe > 131072) @@ -188,16 +191,6 @@ struct ibv_cq *mthca_create_cq(struct ib goto err; cqe = align_cq_size(cqe); - if (mthca_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe)) - goto err; - - cq->mr = __mthca_reg_mr(to_mctx(context)->pd, cq->buf.buf, - cqe * MTHCA_CQ_ENTRY_SIZE, - 0, IBV_ACCESS_LOCAL_WRITE); - if (!cq->mr) - goto err_buf; - - cq->mr->context = context; if (mthca_is_memfree(context)) { cq->arm_sn = 1; @@ -205,7 +198,7 @@ struct ibv_cq *mthca_create_cq(struct ib MTHCA_DB_TYPE_CQ_SET_CI, &cq->set_ci_db); if (cq->set_ci_db_index < 0) - goto err_unreg; + goto err; cq->arm_db_index = mthca_alloc_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_ARM, @@ -219,7 +212,6 @@ struct ibv_cq *mthca_create_cq(struct ib cmd.set_db_index = cq->set_ci_db_index; } - cmd.lkey = cq->mr->lkey; cmd.pdn = to_mpd(to_mctx(context)->pd)->pdn; ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector, &cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd, @@ -229,6 +221,22 @@ struct ibv_cq *mthca_create_cq(struct ib cq->cqn = resp.cqn; + page_size = to_mdev(context->device)->page_size; + npages = (cqe * MTHCA_CQ_ENTRY_SIZE + (page_size - 1))/page_size; + + /* offset encodes CQ and cqn; lower PAGE_SHIFT bits MBZ */ + offset = cq->cqn; + offset <<= 32; + offset += MTHCA_MAGIC_CQ_OFFSET * page_size; + + cq->buf.buf = mmap(NULL, npages * page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, context->cmd_fd, offset); + + if (cq->buf.buf == MAP_FAILED) + goto err_cmd_create_cq; + + cq->buf.length = npages * page_size; + if (mthca_is_memfree(context)) { mthca_set_db_qn(cq->set_ci_db, MTHCA_DB_TYPE_CQ_SET_CI, cq->cqn); mthca_set_db_qn(cq->arm_db, MTHCA_DB_TYPE_CQ_ARM, cq->cqn); @@ -236,6 +244,9 @@ struct ibv_cq *mthca_create_cq(struct ib return &cq->ibv_cq; +err_cmd_create_cq: + ibv_cmd_destroy_cq(&cq->ibv_cq); + err_arm_db: if (mthca_is_memfree(context)) mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_ARM, @@ -246,12 +257,6 @@ err_set_db: mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); -err_unreg: - mthca_dereg_mr(cq->mr); - -err_buf: - mthca_free_buf(&cq->buf); - err: free(cq); @@ -260,12 +265,12 @@ err: int mthca_resize_cq(struct ibv_cq *ibcq, int cqe) { + struct ibv_context *context = ibcq->context; struct mthca_cq *cq = to_mcq(ibcq); struct mthca_resize_cq cmd; - struct ibv_mr *mr; - struct mthca_buf buf; - int old_cqe; - int ret; + int page_size, npages; + off_t offset; + int ret; /* Sanity check CQ size before proceeding */ if (cqe > 131072) @@ -279,38 +284,34 @@ int mthca_resize_cq(struct ibv_cq *ibcq, goto out; } - ret = mthca_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe); - if (ret) - goto out; - - mr = __mthca_reg_mr(to_mctx(ibcq->context)->pd, buf.buf, - cqe * MTHCA_CQ_ENTRY_SIZE, - 0, IBV_ACCESS_LOCAL_WRITE); - if (!mr) { - mthca_free_buf(&buf); - ret = ENOMEM; + if (munmap(cq->buf.buf, cq->buf.length) != 0) { + ret = errno; goto out; } - mr->context = ibcq->context; - - old_cqe = ibcq->cqe; + cmd.pdn = to_mpd(to_mctx(context)->pd)->pdn; - cmd.lkey = mr->lkey; ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd); - if (ret) { - mthca_dereg_mr(mr); - mthca_free_buf(&buf); + if (ret) goto out; - } - mthca_cq_resize_copy_cqes(cq, buf.buf, old_cqe); + page_size = to_mdev(context->device)->page_size; + npages = (cqe * MTHCA_CQ_ENTRY_SIZE + (page_size - 1))/page_size; + + /* offset encodes CQ and cqn; lower PAGE_SHIFT bits MBZ */ + offset = cq->cqn; + offset <<= 32; + offset += MTHCA_MAGIC_CQ_OFFSET * page_size; - mthca_dereg_mr(cq->mr); - mthca_free_buf(&cq->buf); + cq->buf.buf = mmap(NULL, npages * page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, context->cmd_fd, offset); + + if (cq->buf.buf == MAP_FAILED) { + ret = errno; + goto out; + } - cq->buf = buf; - cq->mr = mr; + cq->buf.length = npages * page_size; out: pthread_spin_unlock(&cq->lock); @@ -332,8 +333,6 @@ int mthca_destroy_cq(struct ibv_cq *cq) to_mcq(cq)->arm_db_index); } - mthca_dereg_mr(to_mcq(cq)->mr); - mthca_free_buf(&to_mcq(cq)->buf); free(to_mcq(cq)); return 0; From sashak at voltaire.com Sun Jan 28 16:50:03 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 29 Jan 2007 02:50:03 +0200 Subject: [openib-general] [PATCH TRIVIAL] opensm: remove unused p_subn->node_lid_tbl field Message-ID: <20070129005003.GD10171@sashak.voltaire.com> This removes unused node_lid_tbl field in osm_subn_t structure. Signed-off-by: Sasha Khapyorsky --- osm/include/opensm/osm_subnet.h | 1 - osm/opensm/osm_subnet.c | 15 --------------- 2 files changed, 0 insertions(+), 16 deletions(-) diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h index c256621..a6ffd45 100644 --- a/osm/include/opensm/osm_subnet.h +++ b/osm/include/opensm/osm_subnet.h @@ -512,7 +512,6 @@ typedef struct _osm_subn cl_list_t light_sweep_physp_list; cl_qlist_t sa_sr_list; cl_qlist_t sa_infr_list; - cl_ptr_vector_t node_lid_tbl; cl_ptr_vector_t port_lid_tbl; ib_net16_t master_sm_base_lid; ib_net16_t sm_base_lid; diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c index 221a367..f2e909b 100644 --- a/osm/opensm/osm_subnet.c +++ b/osm/opensm/osm_subnet.c @@ -73,7 +73,6 @@ osm_subn_construct( IN osm_subn_t* const p_subn ) { memset( p_subn, 0, sizeof(*p_subn) ); - cl_ptr_vector_construct( &p_subn->node_lid_tbl ); cl_ptr_vector_construct( &p_subn->port_lid_tbl ); cl_qmap_init( &p_subn->sw_guid_tbl ); cl_qmap_init( &p_subn->node_guid_tbl ); @@ -113,8 +112,6 @@ osm_subn_destroy( osm_node_delete( &p_node ); } - cl_ptr_vector_destroy( &p_subn->node_lid_tbl ); - p_next_port = (osm_port_t*)cl_qmap_head( &p_subn->port_guid_tbl ); while( p_next_port != (osm_port_t*)cl_qmap_end( &p_subn->port_guid_tbl ) ) { @@ -186,23 +183,12 @@ osm_subn_init( p_subn->p_osm = p_osm; - status = cl_ptr_vector_init( &p_subn->node_lid_tbl, - OSM_SUBNET_VECTOR_MIN_SIZE, - OSM_SUBNET_VECTOR_GROW_SIZE ); - if( status != CL_SUCCESS ) - return( status ); - status = cl_ptr_vector_init( &p_subn->port_lid_tbl, OSM_SUBNET_VECTOR_MIN_SIZE, OSM_SUBNET_VECTOR_GROW_SIZE ); if( status != CL_SUCCESS ) return( status ); - status = cl_ptr_vector_set_capacity( &p_subn->node_lid_tbl, - OSM_SUBNET_VECTOR_CAPACITY ); - if( status != CL_SUCCESS ) - return( status ); - status = cl_ptr_vector_set_capacity( &p_subn->port_lid_tbl, OSM_SUBNET_VECTOR_CAPACITY ); if( status != CL_SUCCESS ) @@ -212,7 +198,6 @@ osm_subn_init( LID zero is not valid. NULL out this entry for the convenience of other code. */ - cl_ptr_vector_set( &p_subn->node_lid_tbl, 0, NULL ); cl_ptr_vector_set( &p_subn->port_lid_tbl, 0, NULL ); p_subn->opt = *p_opt; -- 1.5.0.rc2.g11a3 From rdreier at cisco.com Sun Jan 28 20:45:12 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 28 Jan 2007 20:45:12 -0800 Subject: [openib-general] patch for -stable In-Reply-To: <20070128101329.GJ6799@mellanox.co.il> (Michael S. Tsirkin's message of "Sun, 28 Jan 2007 12:13:30 +0200") References: <20070128101329.GJ6799@mellanox.co.il> Message-ID: > Roland, went over the logs, and I think the following is severe enough to go into -stable: > commit bf628dc22a09ed2022abb32c76011ae5f99ad6b0 > Author: Roland Dreier > Date: Fri Dec 15 14:01:49 2006 -0800 > > IB/srp: Fix FMR mapping for 32-bit kernels and addresses above 4G I sent this back in December (although I see I didn't CC anyone other than stable@), and looking at the changelog I see it's already in 2.6.19.2. - R. From HNGUYEN at de.ibm.com Sun Jan 28 20:58:51 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Sun, 28 Jan 2007 23:58:51 -0500 Subject: [openib-general] [Patch ofed1.2 0/3]libehca: cleanup and adjust mmap In-Reply-To: <20070127174905.GT10812@mellanox.co.il> Message-ID: > > This 3 patches changes the libehca coding style to kernel coding and kernel > > tracing style.The userspace mmap code needs to be adjusted to the changed > > userspace mapping introduced in kernel patch > > [PATCH/RFC 2.6.21 0/5] ehca: remove use of do_mmap() from kernel space. > Note that ofed 1.2 has not branched yet, so any changes just need > to be merged up to maintainer's tree. applied to ~hnguyen/libehca Regards Nam From banking at regions.com Sun Jan 28 18:17:36 2007 From: banking at regions.com (Regions NET Bank) Date: Sun, 28 Jan 2007 18:17:36 -0800 Subject: [openib-general] You have 1 new secure message ! Message-ID: <20070129021733.E2A7780EC4@towns-a.com> An HTML attachment was scrubbed... URL: From eitan at sw053.yok.mtl.com Sun Jan 28 21:20:19 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Mon, 29 Jan 2007 07:20:19 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-29:normal completion Message-ID: <200701290520.l0T5KJ40031785@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Sat_Jan_27_11:46:49_2007 85cb73 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo 9 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From fixclean at 017.net.il Sun Jan 28 21:59:57 2007 From: fixclean at 017.net.il (=?windows-1255?Q?=F9=E9=F8=E5=FA=E9_=F0=E9=F7=E9=E5=EF?=) Date: Mon, 29 Jan 2007 07:59:57 +0200 Subject: [openib-general] =?windows-1255?b?8ezp5+QuLi4g4Pog7uf0+fog8uHl?= =?windows-1255?b?4+Qg6+7w5Oz6IOHr6fjkPw==?= Message-ID: An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: image001.jpg Type: image/jpeg Size: 8719 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: image002.jpg Type: image/jpeg Size: 6982 bytes Desc: not available URL: From vlad at lists.openfabrics.org Mon Jan 29 02:21:40 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Mon, 29 Jan 2007 02:21:40 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070129-0200 daily build status Message-ID: <20070129102141.0863AE607F3@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.16 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Failed: From halr at voltaire.com Mon Jan 29 04:01:29 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 07:01:29 -0500 Subject: [openib-general] [PATCH TRIVIAL] opensm: remove unused p_subn->node_lid_tbl field In-Reply-To: <20070129005003.GD10171@sashak.voltaire.com> References: <20070129005003.GD10171@sashak.voltaire.com> Message-ID: <1170072087.4555.241511.camel@hal.voltaire.com> On Sun, 2007-01-28 at 19:50, Sasha Khapyorsky wrote: > This removes unused node_lid_tbl field in osm_subn_t structure. > > Signed-off-by: Sasha Khapyorsky Thanks. Applied. -- Hal From halr at voltaire.com Mon Jan 29 04:12:39 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 07:12:39 -0500 Subject: [openib-general] [RFC] Performance Manager In-Reply-To: <000601c7419f$d4470c60$ff0da8c0@amr.corp.intel.com> References: <000601c7419f$d4470c60$ff0da8c0@amr.corp.intel.com> Message-ID: <1170072757.4555.242192.camel@hal.voltaire.com> On Fri, 2007-01-26 at 18:15, Sean Hefty wrote: > >There are numerous PerfManager models which can be supported: > >1. Integrated as thread(s) with OpenSM (run only when SM is master) > >2. Standby SM > >3. Standalone PerfManager (not running with master or standby SM) > >4. Distributed PerfManager (most scalable approach) > > IMO, we will eventually need distributed managers, Yes. > so I would go with the last approach. Initially ? It is also an implementation phasing issue as stated. The core support is needed in both so there is very little unneeded work to get to the first phase in terms of a distributed approach. We would certainly grow/evolve towards this after that initial implementation. > But, along those lines, if we had a distributed SM, There has been some early discussion on a distributed SA. Distributing SM is much harder IMO. > would you still want to separate the performance manager from the SM? > It seems more flexible, but with additional load on the fabric. Ideally, it would be a deployment choice and the implementation would support both modes. The problem is that we've already seen that the SM node has enough to do at times in a large cluster and PerfManagement in addition with its constant demands is likely not a good addition in terms of this. The additional fabric load is twofold: first, the reports for nodes coming and going, and second, any intermanager communication. I don't think the first is a significant load and I'm not yet sure about the second. In any case, the second load can be constrained to the portion of the subnet where the management nodes are in those cases where this is a concern. > >In terms of inter manager communication, there seem to be several > >choices: > >1. Use vendor specific MADs (which can be RMPP'd) and build on top of > >this > >2. Use IPoIB which is much more powerful as sockets can then be utilized. > > You could also use RC QP communication up/down the hierarchy. Wouldn't that have the same issues as approach 1 (as compared with approach 2) ? -- Hal > - Sean From tziporet at mellanox.co.il Mon Jan 29 04:54:44 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 29 Jan 2007 14:54:44 +0200 Subject: [openib-general] [openfabrics-ewg] modules compilation status for OFED 1.2 In-Reply-To: <1169849692.882.70.camel@sarium.pathscale.com> References: <45B77F8C.9060209@mellanox.co.il> <1169849692.882.70.camel@sarium.pathscale.com> Message-ID: <45BDEE94.8030108@mellanox.co.il> Betsy Zeller wrote: > Bryan is working on recreating the backport patches for the ipath > driver. It appears that all of the InfiniPath backport patches were > removed from the OFED source tree late last year. > > By early next week, we'll have a better sense of whether any of these > patches will need to come in after Jan 31. > > Bryan - any news? Tziporet From tziporet at mellanox.co.il Mon Jan 29 05:04:32 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 29 Jan 2007 15:04:32 +0200 Subject: [openib-general] [PATCH] ofed_1_2 Backport Chelsio to rhel5 (2.6.18_FC6). In-Reply-To: <20070128143544.GC9832@mellanox.co.il> References: <1169824119.2996.7.camel@stevo-desktop> <20070128143544.GC9832@mellanox.co.il> Message-ID: <45BDF0E0.60100@mellanox.co.il> Michael S. Tsirkin wrote: >> > > Actually, what's the reason to keep these backports around still? > Vlad, let's remove. > > We will keep only backport for FC6 and remove FC4 Tziporet From cbookshop at leapmail.net Mon Jan 29 05:03:18 2007 From: cbookshop at leapmail.net (Fred) Date: Mon, 29 Jan 2007 08:03:18 -0500 Subject: [openib-general] Government funding available Message-ID: <20070129130242.1429E3B0008@sentry-two.sandia.gov> Press Release The American Grants and Loans Directory is now available. This publication contains more than 1500 financial programs, subsidies, scholarships, grants and loans offered by the US federal government. It also includes over 700 financing programs available by foundations and associations across the United States. Businesses, students, individuals, municipalities, government departments, institutions, foundations and associations will find a wealth of information that will help them with their new ventures or existing projects. What you get: -Description of Grant available -Url to government website -Full mailing address -Phone and fax number The Canadian Subsidy Directory is also available for Canada. CD version: $69.95 Printed version: $149.95 To order please call: 819-322-7533 If you do not wish to receive communication from us in the future please write "agl" in the subject line to: rem1 at email.com **ADVERTISEMENT** Canada Books 833 Boise de la Riviere Prevost, Qc Canada J0R 1T0 From Tim.Snider at lsi.com Mon Jan 29 05:49:10 2007 From: Tim.Snider at lsi.com (Snider, Tim) Date: Mon, 29 Jan 2007 06:49:10 -0700 Subject: [openib-general] OFED 1.0 Install problems - Kernel Compile error - RH 2.6.9-42EL Message-ID: <18A61515E49B764AB09447A336E51F56905FA8@NAMAIL2.ad.lsil.com> Trying to install OFED 1.0 on RH EL 2.6.9-42. Recompile of kernel gives redefinition of gfp_t. Can someone point me to a fix? I suspect there's a kernel setting I need to tweak. [root at FedoraCore121 ~]# uname -a Linux FedoraCore121 2.6.9-42.EL_lustre.1.5.95smp #1 SMP Thu Sep 28 06:36:13 MDT 2006 i686 i686 i386 GNU/Linux [root at FedoraCore121 ~]# vim /tmp/OFED.1479.log gcc -Wp,-MD,/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core /.index.o.d -nostdinc -iwithprefix include -D__KERNEL__ -I/var/tmp/OFED/tmp/openib/openib/include -I/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include -Iinclude -Iinclude2 -I/usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include -I/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -fomit-frame-pointer -Wdeclaration-after-statement -pipe -msoft-float -m32 -fno-builtin-sprintf -fno-builtin-log2 -fno-builtin-puts -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -mregparm=3 -I/usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/asm-i386/mach-generic -Iinclude/asm-i386/mach-generic -I/usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/asm-i386/mach-default -Iinclude/asm-i386/mach-default -I/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include -I/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/ulp/ipoib -I/var/tmp/OFED/tmp/openib/openib/drivers/infiniband/debug -D__nocast= -DMODULE -DKBUILD_BASENAME=index -DKBUILD_MODNAME=findex -c -o /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/.tmp_in dex.o /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/index.c In file included from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/slab.h:15, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/slab.h:4, from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/percpu.h:4, from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/sched.h:31, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/sched.h:4, from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/module.h:10, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/index.c :34: /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/gfp.h:133: error: redefinition of typedef 'gfp_t' /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/types.h:7: error: previous declaration of 'gfp_t' was here In file included from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/percpu.h:4, from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/sched.h:31, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/sched.h:4, from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/module.h:10, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/index.c :34: /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/slab.h:8: error: conflicting types for 'kzalloc' /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/slab.h:101: error: previous declaration of 'kzalloc' was here In file included from /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/module.h:10, from /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/index.c :34: /var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/include/linu x/sched.h:8: error: static declaration of 'wait_for_completion_timeout' follows non-static declaration /usr/src/linux-2.6.9-42.EL_lustre.1.5.95/include/linux/completion.h:32: error: previous declaration of 'wait_for_completion_timeout' was here make[5]: *** [/var/tmp/OFED/tmp/openib/openib/src/linux-kernel/infiniband/core/index. o] Error 1 Timothy Snider Storage Architect Strategic Planning, Technology and Architecture LSI Logic Corporation 3718 North Rock Road Wichita, KS 67226 (316) 636-8736 tim.snider at lsi.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at mellanox.co.il Mon Jan 29 06:05:05 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 29 Jan 2007 16:05:05 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today Message-ID: <45BDFF11.9080901@mellanox.co.il> Hi, This is the proposal for OFED 1.2 branching and tagging: *Sources developed in OFA:* 1. Each git owner will open a branch with the name ofed_1_2. This branch should be opened on 31-Jan (based on code readiness we will review today). 2. Vlad will open a new /pub/ofed_1_2. 3. All ofed_1_2 branches will be cloned to this directory. (Note: libibverbs and libmthca will be cloned from kernel.org for Roland's trees.) 4. Any change that should be included in the next OFED package will be first check-in to the maintainer ofed_1_2 branch. A mail should be sent to Vlad (and cc the list) to pull this change. 5. A tag will be set before any package is build. Tag name convention: ofed_1_2_ where version will be the suffix of OFED package (e.g. 1.2-alpha1) 6. OFED package will be built based on this tag. 7. There will be a build script (as in OFED 1.1) to enable each owner to build the package for testing. *MPI packages: *1. MPI packages are provided as source RPMs 2. Each MPI owner will have an account on the OFA server and will open a directory named ofed_1_2 3. The SRPM package will be placed in this directory, with version indication in the filename (e.g.ompi-1.2.1-xxx) 4. There will be a file named latest.txt that will contain the package that should be taken in the OFED package Any other external packages that supplied as SRPs (e.g bonding) and not source will use the same method as above. Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From sashak at voltaire.com Mon Jan 29 06:24:31 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 29 Jan 2007 16:24:31 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BDFF11.9080901@mellanox.co.il> References: <45BDFF11.9080901@mellanox.co.il> Message-ID: <20070129142431.GC20659@sashak.voltaire.com> Hi Tziporet, On 16:05 Mon 29 Jan , Tziporet Koren wrote: > > This is the proposal for OFED 1.2 branching and tagging: > > *Sources developed in OFA:* > 1. Each git owner will open a branch with the name ofed_1_2. This branch > should be opened on 31-Jan (based on code readiness we will review today). > 2. Vlad will open a new /pub/ofed_1_2. > 3. All ofed_1_2 branches will be cloned to this directory. (Note: > libibverbs and libmthca will be cloned from kernel.org for Roland's trees.) > 4. Any change that should be included in the next OFED package will be > first check-in to the maintainer ofed_1_2 branch. > A mail should be sent to Vlad (and cc the list) to pull this change. This looks pretty similar to ofed-1.1/SVN release procedure. What about discussed idea of "per package release by maintainers"? Sasha > 5. A tag will be set before any package is build. Tag name convention: > ofed_1_2_ where version will be the suffix of OFED package > (e.g. 1.2-alpha1) > 6. OFED package will be built based on this tag. > 7. There will be a build script (as in OFED 1.1) to enable each owner to > build the package for testing. > > *MPI packages: > *1. MPI packages are provided as source RPMs > 2. Each MPI owner will have an account on the OFA server and will open a > directory named ofed_1_2 > 3. The SRPM package will be placed in this directory, with version > indication in the filename (e.g.ompi-1.2.1-xxx) > 4. There will be a file named latest.txt that will contain the package > that should be taken in the OFED package > > Any other external packages that supplied as SRPs (e.g bonding) and not > source will use the same method as above. > > Tziporet > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Mon Jan 29 06:40:49 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 29 Jan 2007 08:40:49 -0600 Subject: [openib-general] [PATCH] ofed_1_2 Copy in the library cq rptr address only for non T3A devices. Message-ID: <1170081649.24133.3.camel@stevo-desktop> This fixes a bug with the rev 0 Chelsio T3 hardware... It needs to be pulled into ofed_1_2. Roland, it will need to be merged in with the T3 rdma driver. I'm maintaining this in my git tree, so I can resend it to you once you finish reviewing/merging the T3 driver. Thanks, Steve. ------- Don't copy in the library cq rptr address for T3A devices. T3A doesn't support kernel bypass, so we must _not_ save off the lib's cq rptr address for these devices. Otherwise the re-arm logic will try and use the library rptr value for T3A re-arm. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 28be418..dbb3f71 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -151,7 +151,7 @@ static struct ib_cq *iwch_create_cq(stru if (!chp) return ERR_PTR(-ENOMEM); - if (context) { + if (context && !t3a_device(rhp)) { if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { kfree(chp); return ERR_PTR(-EFAULT); From swise at opengridcomputing.com Mon Jan 29 06:53:18 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 29 Jan 2007 08:53:18 -0600 Subject: [openib-general] iWARP/Chelsio OFED_1_2 status In-Reply-To: <20070129142431.GC20659@sashak.voltaire.com> References: <45BDFF11.9080901@mellanox.co.il> <20070129142431.GC20659@sashak.voltaire.com> Message-ID: <1170082398.24133.13.camel@stevo-desktop> Quick status update on iWARP and Chelsio Support: - Most of the chelsio driver backports integrated into the ofed_1_2. Awaiting rhel5 merge from vlad - compile, load, and successful iwarp rping test on rhel4u4, rhel5 beta 2, and sles 10 - neighbour change notifications needed for rdma_cm users has been implemented and posted for merging into ofed_1_2. Awaiting merge from vlad - sles9sp3 bug is prohibiting rdma_cm functionality. Need a fix for this (bug 325) - ammasso driver/lib: I could add support this in but it will be late. Does the group see value in adding ammasso? Steve. From tziporet at mellanox.co.il Mon Jan 29 07:11:36 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 29 Jan 2007 17:11:36 +0200 Subject: [openib-general] [openfabrics-ewg] OFED 1.0 Install problems - Kernel Compile error - RH 2.6.9-42EL In-Reply-To: <18A61515E49B764AB09447A336E51F56905FA8@NAMAIL2.ad.lsil.com> References: <18A61515E49B764AB09447A336E51F56905FA8@NAMAIL2.ad.lsil.com> Message-ID: <45BE0EA8.7050200@mellanox.co.il> Snider, Tim wrote: > Trying to install OFED 1.0 on RH EL 2.6.9-42. Recompile of kernel > gives redefinition of gfp_t. Can someone point me to a fix? I suspect > there's a kernel setting I need to tweak. I think we never tested OFED 1.0 on this kernel. Can you try OFED 1.1? Tziporet From bugzilla-daemon at lists.openfabrics.org Mon Jan 29 07:59:14 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Mon, 29 Jan 2007 07:59:14 -0800 (PST) Subject: [openib-general] [Bug 325] RDMA_CM and address translation broken on sles9sp3 In-Reply-To: Message-ID: <20070129155914.5575EE607F1@openfabrics.org> https://bugs.openfabrics.org/show_bug.cgi?id=325 swise at opengridcomputing.com changed: What |Removed |Added ---------------------------------------------------------------------------- AssignedTo|bugzilla at openib.org |swise at opengridcomputing.com ------- Comment #1 from swise at opengridcomputing.com 2007-01-29 07:59 ------- This is an iwarp-only issue, I think. And I believe I have a solution. -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. You are the assignee for the bug, or are watching the assignee. From mst at mellanox.co.il Mon Jan 29 08:20:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 29 Jan 2007 18:20:15 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <20070129142431.GC20659@sashak.voltaire.com> References: <45BDFF11.9080901@mellanox.co.il> <20070129142431.GC20659@sashak.voltaire.com> Message-ID: <20070129162015.GD20398@mellanox.co.il> > Quoting Sasha Khapyorsky : > Subject: Re: [openib-general] OFED 1.2 release - to be reviewed in the meeting today > > Hi Tziporet, > > On 16:05 Mon 29 Jan , Tziporet Koren wrote: > > > > This is the proposal for OFED 1.2 branching and tagging: > > > > *Sources developed in OFA:* > > 1. Each git owner will open a branch with the name ofed_1_2. This branch > > should be opened on 31-Jan (based on code readiness we will review today). > > 2. Vlad will open a new /pub/ofed_1_2. > > 3. All ofed_1_2 branches will be cloned to this directory. (Note: > > libibverbs and libmthca will be cloned from kernel.org for Roland's trees.) > > 4. Any change that should be included in the next OFED package will be > > first check-in to the maintainer ofed_1_2 branch. > > A mail should be sent to Vlad (and cc the list) to pull this change. > > This looks pretty similar to ofed-1.1/SVN release procedure. What about > discussed idea of "per package release by maintainers"? I guess there's no conflict: maintainers can make ofed_1_2 point to their release. But I agree we need maintainers' buy-in and commitment to schedule that matches OFED release schedule. -- MST From HNGUYEN at de.ibm.com Mon Jan 29 08:34:51 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Mon, 29 Jan 2007 11:34:51 -0500 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <20070129162015.GD20398@mellanox.co.il> Message-ID: Hi, > > This looks pretty similar to ofed-1.1/SVN release procedure. What about > > discussed idea of "per package release by maintainers"? > I guess there's no conflict: maintainers can make ofed_1_2 point to > their release. > But I agree we need maintainers' buy-in and commitment to schedule > that matches > OFED release schedule. Can you explain me what "per package release by maintainers" means for me? Thx Nam From vlad at mellanox.co.il Mon Jan 29 08:39:45 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 29 Jan 2007 18:39:45 +0200 Subject: [openib-general] [PATCH] ofed_1_2 Copy in the library cq rptr address only for non T3A devices. In-Reply-To: <1170081649.24133.3.camel@stevo-desktop> References: <1170081649.24133.3.camel@stevo-desktop> Message-ID: <1170088785.6680.16.camel@vladsk-laptop> On Mon, 2007-01-29 at 08:40 -0600, Steve Wise wrote: > This fixes a bug with the rev 0 Chelsio T3 hardware... > > It needs to be pulled into ofed_1_2. > > Roland, it will need to be merged in with the T3 rdma driver. I'm > maintaining this in my git tree, so I can resend it to you once you > finish reviewing/merging the T3 driver. > > Thanks, > > Steve. > > ------- > > > Don't copy in the library cq rptr address for T3A devices. > > T3A doesn't support kernel bypass, so we must _not_ save off the lib's > cq rptr address for these devices. Otherwise the re-arm logic will try > and use the library rptr value for T3A re-arm. > > Signed-off-by: Steve Wise > --- Applied. -- Vladimir Sokolovsky Mellanox Technologies Ltd. From mst at mellanox.co.il Mon Jan 29 08:46:24 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 29 Jan 2007 18:46:24 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: References: Message-ID: <20070129164624.GH20398@mellanox.co.il> > Quoting Hoang-Nam Nguyen : > Subject: Re: [openib-general] OFED 1.2 release - to be reviewed in the meeting today > > > > This looks pretty similar to ofed-1.1/SVN release procedure. What about > > > discussed idea of "per package release by maintainers"? > > > > I guess there's no conflict: maintainers can make ofed_1_2 point to > > their release. But I agree we need maintainers' buy-in and commitment to schedule > > that matches OFED release schedule. > > Can you explain me what "per package release by maintainers" means for me? This really has to do with package versioning, not branching/tagging. Basically this boils down to the agreement that maintainers create a release of their package that matches code in OFED, taking care of library versioning (that is, assigning a version number to this package). Then 1. OFED versions the packages according to what maintainers do. 2. Properly versioned packages are available separately for other distributions. You can look at how libibverbs is versionned to get the idea. -- MST From mshefty at ichips.intel.com Mon Jan 29 09:01:53 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 09:01:53 -0800 Subject: [openib-general] [RFC] Performance Manager In-Reply-To: <1170072757.4555.242192.camel@hal.voltaire.com> References: <000601c7419f$d4470c60$ff0da8c0@amr.corp.intel.com> <1170072757.4555.242192.camel@hal.voltaire.com> Message-ID: <45BE2881.1010603@ichips.intel.com> > Initially ? It is also an implementation phasing issue as stated. The > core support is needed in both so there is very little unneeded work to > get to the first phase in terms of a distributed approach. We would > certainly grow/evolve towards this after that initial implementation. Based on what you're saying, then a phased approach makes sense to me. >>>1. Use vendor specific MADs (which can be RMPP'd) and build on top of >>>this >>>2. Use IPoIB which is much more powerful as sockets can then be utilized. >> >>You could also use RC QP communication up/down the hierarchy. > > > Wouldn't that have the same issues as approach 1 (as compared with > approach 2) ? MAD overhead is significant. You would get less overhead plus RDMA capabilities, which could affect the implementation design. - Sean From rdreier at cisco.com Mon Jan 29 09:18:11 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 29 Jan 2007 09:18:11 -0800 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: <20070119211659.18220.52930.stgit@dell3.ogc.int> (Steve Wise's message of "Fri, 19 Jan 2007 15:16:59 -0600") References: <20070119211659.18220.52930.stgit@dell3.ogc.int> Message-ID: Thanks, applied to master and stable branches. From rdreier at cisco.com Mon Jan 29 09:25:08 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 29 Jan 2007 09:25:08 -0800 Subject: [openib-general] [RFT] [PATCH] Add ABI compatibility for apps linked against libibverbs 1.0 In-Reply-To: (Roland Dreier's message of "Thu, 25 Jan 2007 10:04:46 -0800") References: Message-ID: > Updated patch is below. I would still appreciate test reports with > other apps, but now I think I'm confident enough that I will push this > out on the libibverbs.git master branch soon. OK, I've committed this to the libibverbs master branch and pushed it out. I think I'm getting close to a libibverbs 1.1-rc1 release -- the only items remaining on my todo list are to add stub low-level driver methods for reregister memory region and memory window handling, so that we have a chance at adding those things to later libibverbs 1.1 releases without breaking ABI. - R. From eeb at bartonsoftware.com Mon Jan 29 09:27:29 2007 From: eeb at bartonsoftware.com (Eric Barton) Date: Mon, 29 Jan 2007 17:27:29 GMT Subject: [openib-general] CM callbacks Message-ID: <200701291727.l0THRTtU025058@robert.bartonsoftware.com> Is the following possible? 1. I listen for connection requests. 2. RDMA_CM_EVENT_CONNECT_REQUEST is delivered, I rdma_accept() successfully and return from the callback. 3. RDMA_CM_EVENT_DISCONNECTED is delivered. Am I wrong to assume I can only get RDMA_CM_EVENT_DISCONNECTED after I've seen RDMA_CM_EVENT_ESTABLISHED? I thought I'd get one of the other callbacks (e.g. RDMA_CM_EVENT_CONNECT_ERROR) if something went wrong before the ESTABLISHED callback. -- Cheers, Eric From sean.hefty at intel.com Mon Jan 29 09:44:09 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 09:44:09 -0800 Subject: [openib-general] [PATCH] ib_sa/multicast: Fix crash when multiple HCAs are present In-Reply-To: <20070128193303.GB4738@mellanox.co.il> Message-ID: <000001c743cd$14922ff0$8698070a@amr.corp.intel.com> We need to use a per device event handler, rather than a single, global handler that gets reinitialized when a new device is added to the system. Signed-off-by: Sean Hefty --- diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index fde977e..039f1eb 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -51,7 +51,6 @@ static struct ib_client mcast_client = { }; static struct ib_sa_client sa_client; -static struct ib_event_handler event_handler; static struct workqueue_struct *mcast_wq; static union ib_gid mgid0; @@ -68,6 +67,7 @@ struct mcast_port { struct mcast_device { struct ib_device *device; + struct ib_event_handler event_handler; int start_port; int end_port; struct mcast_port port[0]; @@ -742,9 +742,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, { struct mcast_device *dev; - dev = ib_get_client_data(event->device, &mcast_client); - if (!dev) - return; + dev = container_of(handler, struct mcast_device, event_handler); switch (event->event) { case IB_EVENT_PORT_ERR: @@ -793,8 +791,8 @@ static void mcast_add_one(struct ib_device *device) dev->device = device; ib_set_client_data(device, &mcast_client, dev); - INIT_IB_EVENT_HANDLER(&event_handler, device, mcast_event_handler); - ib_register_event_handler(&event_handler); + INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); + ib_register_event_handler(&dev->event_handler); } static void mcast_remove_one(struct ib_device *device) @@ -807,7 +805,7 @@ static void mcast_remove_one(struct ib_device *device) if (!dev) return; - ib_unregister_event_handler(&event_handler); + ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { From mst at mellanox.co.il Mon Jan 29 09:51:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 29 Jan 2007 19:51:35 +0200 Subject: [openib-general] [PATCH] ib_sa/multicast: Fix crash when multiple HCAs are present In-Reply-To: <000001c743cd$14922ff0$8698070a@amr.corp.intel.com> References: <000001c743cd$14922ff0$8698070a@amr.corp.intel.com> Message-ID: <20070129175135.GA25809@mellanox.co.il> > @@ -742,9 +742,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, > { > struct mcast_device *dev; > > - dev = ib_get_client_data(event->device, &mcast_client); > - if (!dev) > - return; > + dev = container_of(handler, struct mcast_device, event_handler); > > switch (event->event) { > case IB_EVENT_PORT_ERR: I'm seeing crashes with the patch that I posted. This seems identical to my patch except for the container_of trick. Right? Is there a reason why ib_get_client_data won't work? -- MST From mshefty at ichips.intel.com Mon Jan 29 09:53:00 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 09:53:00 -0800 Subject: [openib-general] oops at device removal In-Reply-To: <20070128193303.GB4738@mellanox.co.il> References: <20070128131755.GS6799@mellanox.co.il> <20070128142409.GA9832@mellanox.co.il> <20070128193303.GB4738@mellanox.co.il> Message-ID: <45BE347C.7000706@ichips.intel.com> > @@ -71,6 +70,7 @@ struct mcast_device { > int start_port; > int end_port; > struct mcast_port port[0]; > + struct ib_event_handler event_handler; > }; The mcast_port data is allocated at the end of the structure. event_handler will need to be located up in the structure. - Sean From halr at voltaire.com Mon Jan 29 09:57:22 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 12:57:22 -0500 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BDFF11.9080901@mellanox.co.il> References: <45BDFF11.9080901@mellanox.co.il> Message-ID: <1170093441.15660.12785.camel@hal.voltaire.com> On Mon, 2007-01-29 at 09:05, Tziporet Koren wrote: > Hi, > > This is the proposal for OFED 1.2 branching and tagging: > > Sources developed in OFA: > 1. Each git owner will open a branch with the name ofed_1_2. This > branch should be opened on 31-Jan (based on code readiness we will > review today). > 2. Vlad will open a new /pub/ofed_1_2. > 3. All ofed_1_2 branches will be cloned to this directory. (Note: > libibverbs and libmthca will be cloned from kernel.org for Roland's > trees.) I'm confused about releasing the libraries as libxxx-.tar.gz. How is this to be handled (aside from what is in the ofed_1.2 branch) ? -- Hal > 4. Any change that should be included in the next OFED package will be > first check-in to the maintainer ofed_1_2 branch. > A mail should be sent to Vlad (and cc the list) to pull this > change. > 5. A tag will be set before any package is build. Tag name convention: > ofed_1_2_ where version will be the suffix of OFED package > (e.g. 1.2-alpha1) > 6. OFED package will be built based on this tag. > 7. There will be a build script (as in OFED 1.1) to enable each owner > to build the package for testing. > > MPI packages: > 1. MPI packages are provided as source RPMs > 2. Each MPI owner will have an account on the OFA server and will open > a directory named ofed_1_2 > 3. The SRPM package will be placed in this directory, with version > indication in the filename (e.g.ompi-1.2.1-xxx) > 4. There will be a file named latest.txt that will contain the package > that should be taken in the OFED package > > Any other external packages that supplied as SRPs (e.g bonding) and > not source will use the same method as above. > > Tziporet > > > > ______________________________________________________________________ > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From mst at mellanox.co.il Mon Jan 29 10:04:47 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Mon, 29 Jan 2007 20:04:47 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <1170093441.15660.12785.camel@hal.voltaire.com> References: <45BDFF11.9080901@mellanox.co.il> <1170093441.15660.12785.camel@hal.voltaire.com> Message-ID: <20070129180447.GB25809@mellanox.co.il> > Quoting Hal Rosenstock : > Subject: Re: OFED 1.2 release - to be reviewed in the meeting today > > On Mon, 2007-01-29 at 09:05, Tziporet Koren wrote: > > Hi, > > > > This is the proposal for OFED 1.2 branching and tagging: > > > > Sources developed in OFA: > > 1. Each git owner will open a branch with the name ofed_1_2. This > > branch should be opened on 31-Jan (based on code readiness we will > > review today). > > 2. Vlad will open a new /pub/ofed_1_2. > > 3. All ofed_1_2 branches will be cloned to this directory. (Note: > > libibverbs and libmthca will be cloned from kernel.org for Roland's > > trees.) > > I'm confused about releasing the libraries as libxxx-.tar.gz. > How is this to be handled (aside from what is in the ofed_1.2 branch) ? I think this is not covered by this proposal, need to be discussed separately. -- MST From mshefty at ichips.intel.com Mon Jan 29 10:22:13 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 10:22:13 -0800 Subject: [openib-general] CM callbacks In-Reply-To: <200701291727.l0THRTtU025058@robert.bartonsoftware.com> References: <200701291727.l0THRTtU025058@robert.bartonsoftware.com> Message-ID: <45BE3B55.6020701@ichips.intel.com> Eric Barton wrote: > Is the following possible? > > 1. I listen for connection requests. > > 2. RDMA_CM_EVENT_CONNECT_REQUEST is delivered, I rdma_accept() successfully and > return from the callback. > > 3. RDMA_CM_EVENT_DISCONNECTED is delivered. > > Am I wrong to assume I can only get RDMA_CM_EVENT_DISCONNECTED after I've seen > RDMA_CM_EVENT_ESTABLISHED? I thought I'd get one of the other callbacks > (e.g. RDMA_CM_EVENT_CONNECT_ERROR) if something went wrong before the > ESTABLISHED callback. This is possible. To see why, we need to follow the IB CM protocol: client server listen connect send REQ recv REQ -> causes CONNECT_REQUEST accept send REP recv REP send RTU RTU wanders away and gets lost ESTABLISHED disconnect send DREQ recv DREQ - DISCONNECTED event From the viewpoint of the client a connection was established, and data could have been transferred over the connection. - Sean From chas at cmf.nrl.navy.mil Mon Jan 29 10:17:22 2007 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Mon, 29 Jan 2007 13:17:22 -0500 Subject: [openib-general] ipoib, ipv6 and multicast groups Message-ID: <200701291817.l0TIHMIV003634@cmf.nrl.navy.mil> recently our sm started throwing the following errors: Jan 29 18:10:49 706710 [42003940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken Jan 29 18:10:49 706721 [42003940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed Jan 29 18:10:51 345113 [42804940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken Jan 29 18:10:51 345132 [42804940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed Jan 29 18:10:51 514312 [41802940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken Jan 29 18:10:51 514320 [41802940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed Jan 29 18:10:51 735732 [42804940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken we tracked this down to a problem with ipoib interaction with ipv6. ipv6 joins two multicast groups, instead of just one like ipv4. # netstat -A inet6 -g -n ... IPv6/IPv4 Group Memberships Interface RefCnt Group --------------- ------ --------------------- lo 1 ff02::1 ib0 1 ff02::1:ff00:77a2 ib0 1 ff02::1 # netstat -A inet6 -g -n ... IPv6/IPv4 Group Memberships Interface RefCnt Group --------------- ------ --------------------- lo 1 224.0.0.1 ib0 1 224.0.0.1 # cat /sys/kernel/debug/ipoib/ib0_mcg GID: ff12:401b:ffff:0:0:0:0:1 created: 4298482097 queuelen: 0 complete: yes send_only: no GID: ff12:401b:ffff:0:0:0:ffff:ffff created: 4298482097 queuelen: 0 complete: yes send_only: no GID: ff12:601b:ffff:0:0:0:0:1 created: 4298482097 queuelen: 0 complete: yes send_only: no GID: ff12:601b:ffff:0:0:1:ff00:77a2 created: 4298482097 queuelen: 0 complete: yes send_only: no the ff02::1:ff00:77a2 group is specific to the interface (link local), so each of our ib hosts running ipv6 registers its own unique multicast group. since our network is bigger than 32 hosts, it appears that we have exceeded the multicast tables in our local switches and this is making opensm generate the above error. besides not running ipv6, are there any thoughts about this? From ossrosch at linux.vnet.ibm.com Mon Jan 29 10:23:10 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Mon, 29 Jan 2007 19:23:10 +0100 Subject: [openib-general] [PATCH ofed1.2 1/2] libehca: create config directory in autogen.sh Message-ID: <200701291923.10612.ossrosch@linux.vnet.ibm.com> Hi, this patch change autogen.sh, so the config directory is created if it's not exist. Regards Stefan Signed-off-by: Stefan Roscher --- diff -Nurp libehca_old/autogen.sh libehca_new/autogen.sh --- libehca_old/autogen.sh 2007-01-29 17:16:22.000000000 +0100 +++ libehca_new/autogen.sh 2007-01-29 17:17:01.000000000 +0100 @@ -1,5 +1,6 @@ #! /bin/sh +mkdir -p config set -x aclocal -I config libtoolize --force --copy From ossrosch at linux.vnet.ibm.com Mon Jan 29 10:23:29 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Mon, 29 Jan 2007 19:23:29 +0100 Subject: [openib-general] [PATCH ofed1.2 2/2] libehca: change path to ehca.driver for make dist Message-ID: <200701291923.29730.ossrosch@linux.vnet.ibm.com> Hi, this patch fixes the path to ehca.driver in Makfile.am. Regards Stefan Signed-off-by: Stefan Roscher --- Files libehca_old/.git/index and libehca_new/.git/index differ diff -Nurp libehca_old/Makefile.am libehca_new/Makefile.am --- libehca_old/Makefile.am 2007-01-29 17:16:22.000000000 +0100 +++ libehca_new/Makefile.am 2007-01-29 17:17:18.000000000 +0100 @@ -70,7 +70,7 @@ EXTRA_DIST = src/ehca_asm.h \ src/ehca_utools.h \ src/hipz_hw.h \ src/libehca.map \ - src/ehca.driver + ehca.driver # dist-hook: libehca.spec # cp libehca.spec $(distdir) From halr at voltaire.com Mon Jan 29 10:32:48 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 13:32:48 -0500 Subject: [openib-general] ipoib, ipv6 and multicast groups In-Reply-To: <200701291817.l0TIHMIV003634@cmf.nrl.navy.mil> References: <200701291817.l0TIHMIV003634@cmf.nrl.navy.mil> Message-ID: <1170095568.15660.14935.camel@hal.voltaire.com> On Mon, 2007-01-29 at 13:17, chas williams - CONTRACTOR wrote: > recently our sm started throwing the following errors: > > Jan 29 18:10:49 706710 [42003940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken > Jan 29 18:10:49 706721 [42003940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed > Jan 29 18:10:51 345113 [42804940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken > Jan 29 18:10:51 345132 [42804940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed > Jan 29 18:10:51 514312 [41802940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken > Jan 29 18:10:51 514320 [41802940] -> osm_mcmr_rcv_create_new_mgrp: ERR 1B19: __get_new_mlid failed > Jan 29 18:10:51 735732 [42804940] -> __get_new_mlid: ERR 1B23: All available:32 mlids are taken 32 is too low for MLID space support IMO. > we tracked this down to a problem with ipoib interaction > with ipv6. ipv6 joins two multicast groups, instead of > just one like ipv4. > > # netstat -A inet6 -g -n > ... > IPv6/IPv4 Group Memberships > Interface RefCnt Group > --------------- ------ --------------------- > lo 1 ff02::1 > ib0 1 ff02::1:ff00:77a2 > ib0 1 ff02::1 > > > # netstat -A inet6 -g -n > ... > IPv6/IPv4 Group Memberships > Interface RefCnt Group > --------------- ------ --------------------- > lo 1 224.0.0.1 > ib0 1 224.0.0.1 > > > # cat /sys/kernel/debug/ipoib/ib0_mcg > GID: ff12:401b:ffff:0:0:0:0:1 > created: 4298482097 > queuelen: 0 > complete: yes > send_only: no > > GID: ff12:401b:ffff:0:0:0:ffff:ffff > created: 4298482097 > queuelen: 0 > complete: yes > send_only: no > > GID: ff12:601b:ffff:0:0:0:0:1 > created: 4298482097 > queuelen: 0 > complete: yes > send_only: no > > GID: ff12:601b:ffff:0:0:1:ff00:77a2 > created: 4298482097 > queuelen: 0 > complete: yes > send_only: no > > > the ff02::1:ff00:77a2 group is specific to the interface (link local), > so each of our ib hosts running ipv6 registers its own unique multicast > group. since our network is bigger than 32 hosts, it appears that we > have exceeded the multicast tables in our local switches and this is > making opensm generate the above error. > > besides not running ipv6, are there any thoughts about this? This has been discussed on the list before. Last time was a thread on "IPv6 and IPoIB scalability issue" back in late November (11/30) to early December (12/2). There are some options presented. None have been pursued to the best of my knowledge. -- Hal > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From HNGUYEN at de.ibm.com Mon Jan 29 11:29:01 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Mon, 29 Jan 2007 14:29:01 -0500 Subject: [openib-general] [openfabrics-ewg] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: <20070127221134.GF32270@mellanox.co.il> Message-ID: openfabrics-ewg-bounces at openib.org wrote on 27.01.2007 17:11:34: > > PS2: For backport on 2.6.16 resp. SLES10 I saw that there is a > > hvcall.h under backport/2.6.16/include/linux. However that one > > is not sufficient for ehca and include/linux is the wrong place. > > Hence, I'm patching a new one under include/asm. If I'm right, > > please remove include/linux/hvcall.h! > I remember this was needed for iser backport for some reason. > Does someone remember? Can someone from iser group please check this? On pseries hvcall.h is placed under include/asm which is a link to include/asm-ppc resp include/asm-powerpc. Thanks Nam From sean.hefty at intel.com Mon Jan 29 12:06:43 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 12:06:43 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BACB34.5030202@open-mpi.org> Message-ID: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> Modify rdma_join_multicast to allow the user to specify that they want the underlying transport to assign them a unique multicast address. This is done by specifying an IP address of 0, which will translate into an IB MGID of 0. To allow others to join this group, we need a way to determine if additional join requests are for a specific MGID, or require IP to MGID mapping. This is done by comparing the requested join address against SA assigned MGIDs. Signed-off-by: Sean Hefty --- diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 827df2a..395cf2f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2490,13 +2490,36 @@ out: return 0; } +static void cma_set_mgid(struct rdma_id_private *id_priv, + struct sockaddr *addr, union ib_gid *mgid) +{ + unsigned char mc_map[MAX_ADDR_LEN]; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct sockaddr_in *sin = (struct sockaddr_in *) addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if ((addr->sa_family == AF_INET6) && + ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) == + 0xFF10A01B)) { + /* IPv6 address is an SA assigned MGID. */ + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else { + ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; + mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); + *mgid = *(union ib_gid *) (mc_map + 4); + } +} + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; - unsigned char mc_map[MAX_ADDR_LEN]; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr; ib_sa_comp_mask comp_mask; int ret; @@ -2506,15 +2529,9 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; - ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); - if (id_priv->id.ps == RDMA_PS_UDP) { - mc_map[7] = 0x01; /* Use RDMA CM signature */ + cma_set_mgid(id_priv, &mc->addr, &rec.mgid); + if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - } - mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; - mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); - - rec.mgid = *(union ib_gid *) (mc_map + 4); ib_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; From afriedle at open-mpi.org Mon Jan 29 13:45:40 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Mon, 29 Jan 2007 16:45:40 -0500 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> Message-ID: <45BE6B04.2020602@open-mpi.org> Sean Hefty wrote: > Modify rdma_join_multicast to allow the user to specify that > they want the underlying transport to assign them a unique > multicast address. This is done by specifying an IP address > of 0, which will translate into an IB MGID of 0. > > To allow others to join this group, we need a way to determine > if additional join requests are for a specific MGID, or require > IP to MGID mapping. This is done by comparing the requested > join address against SA assigned MGIDs. Still not understanding this part -- this means that I'm not able to get some sort of portable handle for the group on the process that initially joins the group, and pass it to other processes who would then use that to join the group? Andrew From rdreier at cisco.com Mon Jan 29 13:49:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 29 Jan 2007 13:49:04 -0800 Subject: [openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race In-Reply-To: ( akepner@sgi.com's message of "Sun, 28 Jan 2007 15:17:14 -0800 (PST)") References: Message-ID: Hmm... Well, first the changes to the userspace libmthca need to be such that new libmthca continues to work with old kernels. I'm OK with saying to people, "You upgraded your kernel so you also have to upgrade your userspace library." But I'm not OK with saying to people, "To get a fix for that bug, you need to upgrade libmthca, which means you also need to upgrade your kernel," and I also don't want to tell people, "If you reboot into an older kernel then you need to downgrade your userspace library." Also, > + off_t offset; > + /* offset encodes CQ and cqn; lower PAGE_SHIFT bits MBZ */ > + offset = cq->cqn; > + offset <<= 32; > + offset += MTHCA_MAGIC_CQ_OFFSET * page_size; is obviously not going to work on architectures where off_t is 32 bits. Even with that resolved this all seems rather unfortunate to me. I don't like the idea of having the kernel keep all these buffers around and then have the userspace library have to map the right buffer. It leads to awkwardness like the fact that mthca_resize_cq() seems to be totally screwed if ibv_cmd_resize_cq() fails for some reason -- it already munmap'ed the original buffer, and it can't map the new buffer, and so the CQ is dead with no chance to recover. The really strange thing about this is that this Altix coherent/consistent memory really isn't about the memory itself, but about the relationship of that memory with DMA elsewhere -- as I understand the code, doing dma_alloc_coherent() returns normal memory with a special DMA address that tells the system to flush other DMAs before doing DMA to the coherent region. Which isn't really what most people understand coherent memory to be, but it has the magic property of making most drivers work. So I'd really like a better solution, but I don't have one in mind unfortunately. Maybe we can all meditate on this and try to come up with something cleaner -- I really hope there is a better way to handle this. - R. From mshefty at ichips.intel.com Mon Jan 29 14:26:02 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 29 Jan 2007 14:26:02 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BE6B04.2020602@open-mpi.org> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> Message-ID: <45BE747A.4060900@ichips.intel.com> >>To allow others to join this group, we need a way to determine >>if additional join requests are for a specific MGID, or require >>IP to MGID mapping. This is done by comparing the requested >>join address against SA assigned MGIDs. > > > Still not understanding this part -- this means that I'm not able to get > some sort of portable handle for the group on the process that initially > joins the group, and pass it to other processes who would then use that > to join the group? I believe that this patch lets you can do what you're trying to do. The group handle would be the returned mgid from the initial join that created the group. The mgid would need to be passed to other processes as an IPv6 address, who issue a join request on that group. (The mgid is available from the rdma_cm_event.param.ud.ah_attr.grh.dgid.) Typically, the rdma_cm maps IP addresses to mgids using the ipoib ip mapping algorithm. This patch avoids that mapping if the upper 32-bits of the IP address match a specific pattern. - Sean From pradeep at us.ibm.com Mon Jan 29 14:44:48 2007 From: pradeep at us.ibm.com (Pradeep Satyanarayana) Date: Mon, 29 Jan 2007 14:44:48 -0800 Subject: [openib-general] IPOIB CM with Non SRQ support In-Reply-To: <20070126074128.GJ10812@mellanox.co.il> Message-ID: Hello Michael, Yes, the code seems to get complex with lots of small changes spread across all over the recieve side. Plus special cassing them with #ifdef makes it look a little messy. It is unlikely I can get this out by Feb 1st. As I was working through this I noticed a few things and here are my observations: -ipoib_cm_modify_rx_rts() does not actually transition the passive side qp to RTS state and remains in the RTR state. However, the active side qp does transition to RTS. -One artifact of the current send side implemantation is that for every message we create a new set of tx qps. So, if one were to use IB for the cluster heartbeat mechanism as an example, then for every heartbeat we end up creating an ipoib_cm_tx structure and initiating a set of CM exchanges. This might consume a lot of resources (even on an "idle" system). Changing this has a potential performance upside. Pradeep pradeep at us.ibm.com "Michael S. Tsirkin" wrote on 01/25/2007 11:41:28 PM: > > Quoting Pradeep Satyanarayana : > > Subject: IPOIB CM with Non SRQ support > > > > > > Michael, > > > > I am working on a prototype based on your IPOIB CM patch to > incorporate support for Non SRQ as well. IPOIB CM was planned to be > in OFED 1.2 if I remember correctly. If I were to submit a patch for > non SRQ support, what would be the cut off date to make it > > into OFED 1.2? > > I think it must be ready for merge by feature freeze on Feb 1st, but at this > stage it really needs to be a small patch. I can't commit to merging it > before I see it. > > I have to warn you that I thought about this problem, and unfortunately > I do not see a way to implement it in a robust fashion without complicating > the code significantly. In this case, you'll just might have to maintain it > as a separate patch until the code lands upstream, and propose as a separate > improvement later. > > -- > MST -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at dev.mellanox.co.il Mon Jan 29 15:10:23 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 01:10:23 +0200 Subject: [openib-general] QoS in OSM In-Reply-To: <1169847771.4555.7676.camel@hal.voltaire.com> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> <45B8E089.5000804@dev.mellanox.co.il> <1169847771.4555.7676.camel@hal.voltaire.com> Message-ID: <45BE7EDF.4090708@dev.mellanox.co.il> Hi guys. I've finished the first implementation of QoS-aware PathRecord. The path selection logic itself is implemented in a separate function that is called only when QoS in OpenSM is on. It cases some code duplication, but as we've discussed, the idea is to minimize the changes in the existing logic in OSM. Tonight the regression testing is running on this OSM version to make sure that I didn't screw something up. Since none of the QoS patches has made its way to the trunk yet, the patch series will be pretty long. It will include: - QoS policy file parser (Lex & Yacc files that implement grammar, C & H files that implements parser auxiliary functions) - Additional fields is path_record_t (instead of 'reserved' fields) - Additional command line option for OpenSM to specify the QoS policy file name - QoS-aware selection of PathRecord. I'll issue the patch series with all the details in the morning, and then I'll start working on MultiPath Record. In addition to all the questions that you already have and I haven't answered yet, I'm sure you'll have many questions and remarks regarding these patches. I suggest that we set up a conference call to discuss all these questions - it might save us a lot of time and clear some issues. How about tomorrow morning? (I mean Hal's morning). The earlier the better. Please let me know what you think about it. Thanks, -- Yevgeny Hal Rosenstock wrote: > Hi again Yevgeny, > > On Thu, 2007-01-25 at 11:53, Yevgeny Kliteynik wrote: >> Hi Hal. >> >> Hal Rosenstock wrote: >>> Hi Yevgeny, >>> >>> On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: >>>> Hi Hal, Sasha. >>>> >>>> Here's a description of the QoS policy file, and an >>>> example of such file (with more comments inside). >>> This makes the start of a good document on this. If you add this to >>> osm/doc, I will incorporate it into the opensm man page. >> OK, I'll do that. >> >>>> QoS Policy file >>>> -- >>>> >>>> The QoS policy file is divided into 4 sub sections: >>>> >>>> * Node Group: a set of HCAs, Routers or Switches that share the same settings. >>>> A node groups might be a partition defined by the partition manager policy in >>>> terms of GUIDs. >>> Are these Node or Port Groups ? It looks like port groups from the >>> below. >> Good point - it should be "Port Groups". >> >>>> Future implementations might provide support for NodeDescription >>>> based definition of node groups. >>>> >>>> * Fabric Setup: >>>> Defines how the SL2VL and VLArb tables should be setup. This policy definition >>>> assumes the computation of target behavior should be performed outside of >>>> OpenSM. >>>> >>>> * QoS-Levels Definition: >>>> This section defines the possible sets of parameters for QoS that a client might >>>> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits >>>> (in case LMC > 0 is used for QoS) and TClass. >>> How does this relate to/interact with partition configuration ? Also, >>> what about preexisting QoS ? >> As I understand from the osm man or from the partition-config.txt, >> partitions definition is intended to be used for IPoIB only. >> [quote] >> sl= - specifies SL for this IPoIB MC group >> (default is 0) >> [/quote] >> >> I think that QoS policy may only "tighten" the constraints and enforce >> lower-than-requested values, both in case of partition and in case of >> preexisting QoS settings. > > I'm not following you on this specific point. A specific SL is chosen by > partition config so how can it be "tightened" ? Does it mean it might be > changed to a different SL (in which case this QoS config superceeds the > partition config for SL setting) ? Have you tried this to be sure ? > > Are multicast groups handled as part QoS definition in the XML syntax ? > If not, might this be a future addition ? If it is, how are they > specified ? > > The other half of the original question was how a QoS request is handled > if the original QoS support is enabled rather than this new QoS support > in terms of the SA PR and MPR code. > >>>> * Matching Rules: >>>> A list of rules that match an incoming PathRecord request to a QoS-Level. The >>>> rules are processed in order such as the first match is applied. Each rule is >>>> built out of set of match expressions which should all match for the rule to >>>> apply. The matching expressions are defined for the following fields >>>> - SRC and DST to lists of node groups >>>> - Service-ID to a list of Service-ID or Service-ID ranges >>>> - TClass to a list of TClass values or ranges >>>> >>>> QoS policy file example >>>> -- >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> Storage >>>> our SRP storage targets >>> Is the use clause more than commentary ? How is it "used" ? >> The 'use' clause is just a description of the port group that >> can be used for logging. Other than for logging, it is just a >> commentary. >> >>>> 0x1000000000000001 >>>> 0x1000000000000002 >>>> >>>> >>>> >>>> Virtual Servers >>>> node desc and IB port # >>>> vs1/HCA-1/P1 >>>> vs3/HCA-1/P1 >>>> vs3/HCA-2/P1 >>> How are port-names used ? >> The syntax of the port name is as follows: >> "hostname/CA-num/Pnum" > > What's it's purpose ? Is it used somewhere else in the syntax ? > >>>> >>>> >>>> >>>> Partition 1 >>>> default settings >>>> Part1 >>>> >>>> >>> Is this CA rather than HCA ? (What about TCAs ?) >> Sure, it should be 'CA'. > > Will this be changed ? If so, when ? > >>>> >>>> Routers >>>> all routers >>>> ROUTER >>>> >>>> >>>> >>>> >>>> >>> ^^ >>> Actually, it is SL >>> assuming the device supports SL2VL mapping as indicate by >>> IsSLMappingSupported in the PortInfo:CapabilityMask. >>> Will the syntax handle single data VL devices which only implement SL >>> filtering ? >> Yes, it should. >> >>> Will the QoS manager support this (SL2VL without VLArb >>> settings) or are these required together ? >> Yes, it should support sl2vl w/o vlarb settings as well. >> >>>> >>>> >>>> >>>> Part1 >>>> * >>>> * >>>> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 >>>> >>>> >>>> >>>> Storage >>>> >>>> Storage2 >>>> >>>> Storage3 >>> I don't quite follow across-from/to. >> Right, the comments there are garbage. Here the explanation: >> SL2VL table describes VL as function of from-port, to-port, and SL. >> >> group_name: >> It defines sl2vl table where 'to-port's belong to group_name >> group_name: >> Same as above, only that this time 'from-port's belong to group_name >> group_name: >> sl2vl tables both for 'to-port's 'from-port's that belong to group_name > > I'm still not following what is going on here and how this is used. > >>>> * >>>> 1 >>>> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> Storage >>>> >>>> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 >>>> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 >>>> 10 >>> What happens if the shape of VLArb indicated here does not match the >>> device ? >> The part that sets up the QoS in SM (I'm not writing this part right now), > > What is the plan for this ? > >> should issue error message in case VLArb definition doesn't match the device >> properties. > > Aside from the error message, is there any additional error handling for > this ? > >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> 1 >>> What does sn mean ? What is it used for ? >> 'sn' is an id of this qos level definition. >> It is referenced later in by QoS match rules as 'qos-level-sn' > > What is 'sn' short for ? > >>>> for the lowest priority comm >>>> 16 >>>> >>>> >>>> >>>> 2 >>>> low latency best bandwidth >>>> 0 >>>> 7 >>> What is class ? I saw TClass mentioned earlier. Is this TClass or >>> something else ? >> Instead of "TClass" there should be "QoS Class". >> The value is the PathRecord.qos_class value that should be >> returned in the path record query response when a certain >> is applied to the returned path. > > So these names need to change to be more consistent ? > >>>> >>>> >>> If specified, do MTU limit and rate limit add extra limits to be imposed >>> on what is selected (and realizable) ? >> Yes >> >>> Strictly speaking, couldn't packet lifetime limit also be added to this >>> syntax here ? I presume it was left out as being not "interesting" as >>> yet. Is that correct ? >> I can add packet lifetime limit - it's not a big deal >> >>> Also, how are path bits used ? >> For now I don't do anything with them - we'll discuss this issue in the future. > > How are they envisioned to be used ? > > Why are they in the syntax now ? Seems inconsistent with PLL. > > Should there be a warning if they are specified now since they are not > used ? > >>>> >>>> 3 >>>> just an example >>>> 0 >>>> 32 >>>> 1 >>>> 1 >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> 1 >>>> low latency by class 7-9 or 11 >>>> 7-9,11 >>>> 1 >>>> >>>> >>>> >>>> 2 >>>> Storage targets connection> >>>> Storage >>>> 22,4719 >>> What is service ? What does 22.4719 mean ? >> The syntax is service_id1,service_id1,..., so in the >> example above these are actually two service ids. > > So you can create arbitrary lists of service IDs. What about ranges ? > Does the syntax support that ? > >> As for the exact meaning of this, I'm not sure - I need to think about it... > > Let me know. I'd really like to understand the syntax. > >>>> 3 >>> What are match-levels used for ? >> Actually, they are not used - they shouldn't appear here. >> Somehow it was copy-pasted here from one of the older versions >> of the policy file. > > So can this be updated for what is current ? > > Thanks. > > -- Hal > >> -- Yevgeny >> >>> -- Hal >>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> -- Yevgeny >>>> >>>> Yevgeny Kliteynik wrote: >>>>> Hi Sasha, >>>>> >>>>> Sasha Khapyorsky wrote: >>>>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: >>>>>>> Hi Sasha. >>>>>>> >>>>>>> Sasha Khapyorsky wrote: >>>>>>>> Hi Yevgeny, >>>>>>>> >>>>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: >>>>>>>>> Hi Hal >>>>>>>>> >>>>>>>>> The following series of six patches implements QoS policy file parser: >>>>>>>>> >>>>>>>>> 1. QoS parser Lex file >>>>>>>>> 2. QoS parser Lex-generated c file >>>>>>>>> 3. QoS parser grammar (Yacc) file >>>>>>>>> 4. QoS parser Yacc-generated grammar c and h file >>>>>>>>> 5. QoS parser header file that defines parse tree data structures >>>>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files >>>>>>>> Is there any description of proposed format and functionality? >>>>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few >>>>>>> minor modifications. You can find the RFC here: >>>>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html >>>>>> This was RFC and couple of issues were discussed then. Now you are about >>>>>> implementation phase and exact format description would be desired. For >>>>>> example what "few minor modifications" are? >>>>> I'll prepare an example file with explanations. >>>>> >>>>> -- Yevgeny >>>>> >>>>>>>> Also what about using human readable formats? >>>>>>> To me the xml-like format in the RFC looks pretty readable. >>>>>>> It has very limited number of keywords (tags), so it's easy >>>>>>> to follow and/or to modify. >>>>>> It is your opinion, not everybody will agree with it (AFAIR this was >>>>>> discussed too during RFC). >>>>>> >>>>>> I would not be care, but I don't know any example of really successful >>>>>> XML using for configuration purposes (especially where advanced graphical >>>>>> config editors/viewers were not used). Do you know? >>>>>> >>>>>> Sasha >>>>>> >>>>> _______________________________________________ >>>>> openib-general mailing list >>>>> openib-general at openib.org >>>>> http://openib.org/mailman/listinfo/openib-general >>>>> >>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>>>> > From halr at voltaire.com Mon Jan 29 15:32:41 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 18:32:41 -0500 Subject: [openib-general] [PATCH] OpenSM: Experimental code for LASH routing Message-ID: <1170113556.15660.33797.camel@hal.voltaire.com> OpenSM: Experimental code for LASH routing LASH is an acronym for LAyered SHortest Path Routing. This algorithm was developed by Simula Research Lab. LASH uses VL layers to create deadlock free paths. LASH routing is experimental status in OpenSM and currently limited to small topologies. It is low risk as there are almost no changes to the mainline code paths in OpenSM. Signed-off-by: Thomas Sødring --- osm/include/opensm/osm_switch.h | 1 + osm/opensm/Makefile.am | 2 +- osm/opensm/osm_opensm.c | 2 + osm/opensm/osm_sa_path_record.c | 12 +- osm/opensm/osm_ucast_lash.c | 1553 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 1568 insertions(+), 2 deletions(-) create mode 100644 osm/opensm/osm_ucast_lash.c diff --git a/osm/include/opensm/osm_switch.h b/osm/include/opensm/osm_switch.h index b2bf0db..1c66028 100644 --- a/osm/include/opensm/osm_switch.h +++ b/osm/include/opensm/osm_switch.h @@ -111,6 +111,7 @@ typedef struct _osm_switch osm_port_profile_t *p_prof; osm_mcast_tbl_t mcast_tbl; uint32_t discovery_count; + void *priv; } osm_switch_t; /* * FIELDS diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index b1028d8..15af336 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -54,7 +54,7 @@ opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_sweep_fail_ctrl.c osm_sw_info_rcv.c osm_switch.c \ osm_prtn.c osm_prtn_config.c osm_qos.c osm_router.c \ osm_trap_rcv.c osm_ucast_mgr.c osm_ucast_updn.c \ - osm_ucast_file.c osm_ucast_ftree.c \ + osm_ucast_lash.c osm_ucast_file.c osm_ucast_ftree.c \ osm_vl15intf.c osm_vl_arb_rcv.c \ st.c if OSMV_OPENIB diff --git a/osm/opensm/osm_opensm.c b/osm/opensm/osm_opensm.c index 1c17979..337130f 100644 --- a/osm/opensm/osm_opensm.c +++ b/osm/opensm/osm_opensm.c @@ -72,6 +72,7 @@ struct routing_engine_module { extern int osm_ucast_updn_setup(osm_opensm_t *p_osm); extern int osm_ucast_file_setup(osm_opensm_t *p_osm); extern int osm_ucast_ftree_setup(osm_opensm_t *p_osm); +extern int osm_ucast_lash_setup(osm_opensm_t * p_osm); static int osm_ucast_null_setup(osm_opensm_t *p_osm); @@ -80,6 +81,7 @@ const static struct routing_engine_module routing_modules[] = { { "updn", osm_ucast_updn_setup }, { "file", osm_ucast_file_setup }, { "ftree", osm_ucast_ftree_setup }, + { "lash", osm_ucast_lash_setup }, { NULL, NULL } }; diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index a0dbb07..71cadda 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -66,6 +66,7 @@ #include #include #include +#include #ifdef ROUTER_EXP #include #include @@ -74,6 +75,10 @@ #define OSM_PR_RCV_POOL_MIN_SIZE 64 #define OSM_PR_RCV_POOL_GROW_SIZE 64 +extern uint8_t osm_get_lash_sl(osm_opensm_t *p_osm, + const osm_port_t *p_src_port, + const osm_port_t *p_dst_port); + typedef struct _osm_pr_item { cl_pool_item_t pool_item; @@ -674,7 +679,12 @@ __osm_pr_rcv_get_path_parms( } } - sl = OSM_DEFAULT_SL; + if (p_rcv->p_subn->opt.routing_engine_name && + strcmp(p_rcv->p_subn->opt.routing_engine_name, "lash") == 0) + // slid and dest_lid are stored in network in lash + sl = osm_get_lash_sl(p_rcv->p_subn->p_osm, p_src_port, p_dest_port); + else + sl = OSM_DEFAULT_SL; if (pkey) { p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, diff --git a/osm/opensm/osm_ucast_lash.c b/osm/opensm/osm_ucast_lash.c new file mode 100644 index 0000000..c716855 --- /dev/null +++ b/osm/opensm/osm_ucast_lash.c @@ -0,0 +1,1553 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2007 Simula Research Laboratory. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: osm_ucast_updn.c 10057 2006-11-03 16:35:23Z halr $ + */ + + +/* + * Abstract: + * Implementation of LASH algorithm Calculation functions + * + * Environment: + * Linux User Mode + * + * $Revision: 1.0 $ + */ + + + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* //////////////////////////// */ +/* Local types */ +/* //////////////////////////// */ + +enum { + MAX_INT = 9999, + NONE = MAX_INT +}; + + +typedef struct _cdg_vertex { + int switch_size; + int num_dependencies; + struct _cdg_vertex ** dependency; + int from; + int to; + int seen; + int temp; + int visiting_number; + struct _cdg_vertex * next; + int num_temp_depend; + int num_using_vertex; + int *num_using_this_depend; +} cdg_vertex_t; + +typedef struct _reachable_dest { + int switch_id; + struct _reachable_dest * next; +} reachable_dest_t; + +typedef struct _q_item { + int sw; + struct _q_item * next; +} q_item_t; + +typedef struct _switch { + osm_switch_t *p_sw; + struct _switch ** dij_channels; + int id; + int used_channels; + int mst_member; + int q_member; + int dist; + int prev; + struct routing_table { + unsigned out_link; + unsigned lane; + } *routing_table; + unsigned int num_connections; +#if 0 + struct connections { + unsigned sw; + unsigned port; + } *connections; +#else + int *virtual_physical_port_table; + int *phys_connections; +#endif +} switch_t; + + +typedef struct _lash +{ + osm_opensm_t *p_osm; + int num_switches; + uint8_t vl_min; + int balance_limit; + switch_t ** switches; + int q_count; + q_item_t * q_head; + cdg_vertex_t **** cdg_vertex_matrix; + int *num_mst_in_lane; + int **adj_matrix; + int ***virtual_location; +} lash_t; + + + +static cdg_vertex_t* create_cdg_vertex(unsigned num_switches) +{ + cdg_vertex_t* cdg_vertex = (cdg_vertex_t*) malloc(sizeof(cdg_vertex_t)); + + cdg_vertex->dependency = malloc((num_switches-1)*sizeof(cdg_vertex_t*)); + cdg_vertex->num_using_this_depend =(int*)malloc((num_switches-1)*sizeof(int)); + return cdg_vertex; +} + + +/* +cl_map_t map_switch_guid_to_lash_id; + +0x200001 -> 0; +0x200002 -> 1; +0x200003 -> 2; +0x200004 -> 3; +0x200005 -> 4; +0x200006 -> 5; +*/ + +static int connect_switches(lash_t *p_lash, int sw1, int sw2, int physical_port_1, int physical_port_2) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned num = p_lash->switches[sw1]->num_connections; + + p_lash->switches[sw1]->phys_connections[num] = sw2; + p_lash->switches[sw1]->virtual_physical_port_table[num] = physical_port_1; + p_lash->switches[sw1]->num_connections++; + + osm_log(p_log, OSM_LOG_DEBUG, + "connect_switches: " + "LASH connect: %d, %d, %d, %d \n", + sw1, sw2, physical_port_1, physical_port_1); + + return p_lash->adj_matrix[sw1][sw2] = 1; +} + + +static uint64_t osm_lash_get_switch_guid(IN const osm_switch_t *p_sw) { + + uint64_t switch_guid = -1; + osm_physp_t* p_physp = osm_node_get_physp_ptr(p_sw->p_node, 0); + + if (p_physp && osm_physp_is_valid (p_physp)) { + switch_guid = osm_physp_get_port_guid(p_physp); + } + + return switch_guid; +} + + +static osm_switch_t *get_osm_switch_from_port(osm_port_t *port) +{ + osm_physp_t *p = osm_port_get_default_phys_ptr(port); + if (p->p_node->sw) + return p->p_node->sw; + else if (p->p_remote_physp->p_node->sw) + return p->p_remote_physp->p_node->sw; + return NULL; +} + +static osm_switch_t *get_osm_switch_from_lid(osm_opensm_t *osm, uint16_t lid) +{ + osm_port_t *port = cl_ptr_vector_get(&osm->subn.port_lid_tbl, lid); + if (!port) + return NULL; + return get_osm_switch_from_port(port); +} + +// This is a time consuming way to find a port from a lid +// we will come up with a better way later +static uint8_t find_port_from_lid(IN const ib_net16_t lid_no, + IN const osm_switch_t *p_sw) { + + uint8_t port_count = 0; + uint8_t i=0; + osm_physp_t *p_current_physp, *p_remote_physp = NULL; + + uint8_t egress_port = 255; + + if (p_sw->p_node) { + port_count = osm_node_get_num_physp (p_sw->p_node); + } + + // process management port first + p_current_physp = osm_node_get_physp_ptr(p_sw->p_node, 0); + + ib_port_info_t *port_info = &p_current_physp->port_info; + ib_net16_t port_lid = port_info->base_lid; + if (port_lid == lid_no) { + egress_port = 0; + goto Exit; + } + // process each port on this switch + for (i=1; ip_node, i); + + if (p_current_physp && osm_physp_is_valid (p_current_physp)) { + + p_remote_physp = p_current_physp->p_remote_physp; + + if (p_remote_physp && osm_physp_is_valid ( p_remote_physp )) { + osm_node_t *p_opposite_node = osm_physp_get_node_ptr(p_remote_physp); + + if (osm_node_get_type( p_opposite_node ) == IB_NODE_TYPE_CA) { + ib_port_info_t *port_info = &p_remote_physp->port_info; + ib_net16_t remote_port_lid = port_info->base_lid; + if (remote_port_lid == lid_no) { + egress_port = i; + goto Exit; + } + } + } + } + }// for + + Exit: + return egress_port; +} + + +static int randint ( int high ) +{ + int r; + + if (high == 0) return 0; + r = rand(); + high++; + return (r%high); +} + + +/************************************ + + CYCLE EXISTS + +************************************/ + +static int cycle_exists(cdg_vertex_t * start, cdg_vertex_t * current, + cdg_vertex_t * prev, int visit_num) { + + cdg_vertex_t * h; + int i, new_visit_num; + int cycle_found = 0; + + if(current!= NULL && current->visiting_number > 0) { + if(visit_num > current->visiting_number && current->seen == 0) { + h = start; + cycle_found = 1; + } + } else { + if(current == NULL) { + current = start; + assert(prev == NULL); + } + + current->visiting_number = visit_num; + + if(prev != NULL) { + prev->next = current; + assert(prev->to == current->from); + assert(prev->visiting_number > 0); + } + + new_visit_num = visit_num + 1; + + for(i=0; inum_dependencies; i++) { + cycle_found = cycle_exists(start, current->dependency[i], current, + new_visit_num); + if(cycle_found == 1) + i = current->num_dependencies; + } + + current->seen = 1; + if(prev != NULL) + prev->next = NULL; + } + + return cycle_found; + + + +} + + + +/************************************ + + REMOVE SEMIPERMANENTDEPEND FOR SP + +************************************/ + +static void remove_semipermanent_depend_for_sp(lash_t *p_lash, int sw, int dest_switch, int lane) +{ + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int i_next_switch, output_link, i, next_link, i_next_next_switch, depend =0; + cdg_vertex_t * v; + int found; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + i_next_switch = switches[sw]->phys_connections[output_link]; + + while(sw != dest_switch){ + v = cdg_vertex_matrix[lane][sw][i_next_switch]; + assert(v != NULL); + + if(v->num_using_vertex == 1) { + + cdg_vertex_matrix[lane][sw][i_next_switch] = NULL; + + free(v); + } else { + v->num_using_vertex--; + if(i_next_switch != dest_switch) { + next_link = switches[i_next_switch]->routing_table[dest_switch].out_link; + i_next_next_switch = switches[i_next_switch]->phys_connections[next_link]; + found = 0; + + for(i=0; inum_dependencies; i++) + if(v->dependency[i] == cdg_vertex_matrix[lane][i_next_switch][i_next_next_switch]) { + found = 1; + depend = i; + } + + assert(found); + + if(v->num_using_this_depend[depend] == 1) { + for(i=depend; inum_dependencies-1; i++) { + v->dependency[i] = v->dependency[i+1]; + v->num_using_this_depend[i] = v->num_using_this_depend[i+1]; + } + + v->num_dependencies--; + } else + v->num_using_this_depend[depend]--; + } + } + + sw = i_next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if(sw != dest_switch) + i_next_switch = switches[sw]->phys_connections[output_link]; + } +} + + + + +/************************************ + + ENQUEUE + +************************************/ + + +static void enqueue(lash_t *p_lash, int sw, int dist, int prev) +{ + switch_t **switches = p_lash->switches; + q_item_t *q_head; + + assert(switches[sw]->q_member == 0); + switches[sw]->q_member = 1; + switches[sw]->dist = dist; + switches[sw]->prev = prev; + + q_head = (q_item_t*) malloc(sizeof(q_item_t)); + q_head->sw = sw; + q_head->next = p_lash->q_head; + p_lash->q_head = q_head; + p_lash->q_count++; +} + + +/************************************ + + DEQUEUE + +************************************/ + +static void dequeue(lash_t *p_lash, int * sw, int * dist, int * prev) +{ + switch_t **switches = p_lash->switches; + q_item_t * q_h, * q_min = NULL, * q_prev; + int min_dist = MAX_INT; + + q_h = p_lash->q_head; + + while(!(q_h == NULL)) { + if (switches[q_h->sw]->dist < min_dist) { + min_dist = switches[q_h->sw]->dist; + q_min = q_h; + } + + q_h = q_h->next; + } + + if(q_min == p_lash->q_head) + p_lash->q_head = p_lash->q_head->next; + else { + q_prev = p_lash->q_head; + while(!(q_prev->next == q_min)) + q_prev = q_prev->next; + + q_prev->next = q_min->next; + } + p_lash->q_count--; + + *sw = q_min->sw; + *dist = switches[q_min->sw]->dist; + *prev = switches[q_min->sw]->prev; + + assert(switches[q_min->sw]->q_member == 1 && !switches[q_min->sw]->mst_member); + switches[q_min->sw]->q_member = 0; + free(q_min); +} + + +/************************************ + + GET PHYS CONNECTION + +************************************/ + +static int get_phys_connection(switch_t **switches, int switch_from, int switch_to) +{ + int i = 0; + + for (i = 0; i < switches[switch_from]->num_connections; i++) + if(switches[switch_from]->phys_connections[i] == switch_to) + return i; + assert(1==1); + return i; +} + + +/************************************ + + SHORTEST PATH + +************************************/ + +static void shortest_path(lash_t *p_lash, int ir, int num_switches) +{ + switch_t **switches = p_lash->switches; + int sw, dist, prev, i, channel; + + p_lash->q_head = NULL; + p_lash->q_count = 0; + + enqueue(p_lash, ir,0,NONE); + + while(p_lash->q_count > 0) { + dequeue(p_lash, &sw, &dist, &prev); + switches[sw]->mst_member = 1; + + if(prev != NONE) { + channel = switches[prev]->used_channels; + switches[prev]->dij_channels[channel] = switches[sw]; + switches[prev]->used_channels++; + } + + for(i=0; iadj_matrix[sw][i]==1) { + if(!switches[i]->mst_member) { + if(switches[i]->q_member) { + if(dist+1 == switches[i]->dist) { + channel = switches[sw]->used_channels; + switches[sw]->dij_channels[channel] = switches[i]; + switches[sw]->used_channels++; + } else if(dist+1 < switches[i]->dist) { + switches[i]->dist = dist+1; + switches[i]->prev = sw; + } + } else { + enqueue(p_lash, i,dist+1,sw); + } + } + } + } + } +} + + + +/************************************ + + GENERATE ROUTING FUNC FOR MST + +************************************/ + +static void generate_routing_func_for_mst(lash_t *p_lash, int sw, reachable_dest_t ** destinations) +{ + int i, next_switch; + switch_t **switches = p_lash->switches; + int num_channels = switches[sw]->used_channels; + reachable_dest_t * dest, * i_dest, * concat_dest = NULL, * prev; + + for(i=0; idij_channels[i]->id; + generate_routing_func_for_mst(p_lash, next_switch, &dest); + + i_dest = dest; + prev = i_dest; + + while(i_dest != NULL) { + if(switches[sw]->routing_table[i_dest->switch_id].out_link == NONE) { + switches[sw]->routing_table[i_dest->switch_id].out_link = + get_phys_connection(switches, sw, next_switch); + } + + prev = i_dest; + i_dest = i_dest->next; + } + + assert(prev->next == NULL); + prev->next = concat_dest; + concat_dest = dest; + } + + i_dest = (reachable_dest_t*) malloc(sizeof(reachable_dest_t)); + i_dest->switch_id = sw; + i_dest->next = concat_dest; + *destinations = i_dest; +} + + + +/************************************ + + GENERATE CDG FOR SP + +************************************/ + +static void generate_cdg_for_sp(lash_t*p_lash, int sw, int dest_switch, int lane) +{ + unsigned num_switches = p_lash->num_switches; + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link, j, exists; + cdg_vertex_t * v, * prev = NULL; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = switches[sw]->phys_connections[output_link]; + + while(sw != dest_switch) { + + if(cdg_vertex_matrix[lane][sw][next_switch] == NULL) { + v = create_cdg_vertex(num_switches); + + int i; + + for(i=0; idependency[i] = NULL; + v->num_using_this_depend[i] = 0; + } + + v->num_using_vertex = 0; + v->num_dependencies = 0; + v->from = sw; + v->to = next_switch; + v->seen = 0; + v->visiting_number = 0; + v->next = NULL; + v->temp = 1; + v->num_temp_depend = 0; + + cdg_vertex_matrix[lane][sw][next_switch] = v; + } else { + v = cdg_vertex_matrix[lane][sw][next_switch]; + } + + v->num_using_vertex++; + + if(prev!=NULL) { + exists = 0; + + for(j=0; jnum_dependencies; j++) + if(prev->dependency[j] == v) { + exists = 1; + prev->num_using_this_depend[j]++; + } + + if(exists == 0) { + prev->dependency[prev->num_dependencies] = v; + prev->num_using_this_depend[prev->num_dependencies]++; + prev->num_dependencies++; + + assert(prev->num_dependencies < num_switches); + + if(prev->temp==0) + prev->num_temp_depend++; + + } + } + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if(sw != dest_switch) { + assert(output_link != NONE); + next_switch = switches[sw]->phys_connections[output_link]; + } + + prev = v; + } +} + + + +/************************************ + + SET TEMP DEPEND TO PERMANENT FOR SP + +************************************/ + +static void set_temp_depend_to_permanent_for_sp(lash_t *p_lash, int sw, int dest_switch, int lane) +{ + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link; + cdg_vertex_t * v; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = switches[sw]->phys_connections[output_link]; + + while(sw != dest_switch) { + v = cdg_vertex_matrix[lane][sw][next_switch]; + assert(v != NULL); + + if(v->temp == 1) { + v->temp = 0; + } else { + v->num_temp_depend = 0; + } + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if(sw != dest_switch) + next_switch = switches[sw]->phys_connections[output_link]; + } + +} + + +/************************************ + + REMOVE TEMP DEPEND FOR SP + +************************************/ + +static void remove_temp_depend_for_sp(lash_t *p_lash, int sw, int dest_switch, int lane) +{ + switch_t **switches =p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link, i; + cdg_vertex_t * v; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = switches[sw]->phys_connections[output_link]; + + while(sw != dest_switch) { + v = cdg_vertex_matrix[lane][sw][next_switch]; + assert(v != NULL); + + if(v->temp==1) { + cdg_vertex_matrix[lane][sw][next_switch] = NULL; + free(v); + } else { + assert(v->num_temp_depend <= v->num_dependencies); + v->num_dependencies = v->num_dependencies - v->num_temp_depend; + v->num_temp_depend = 0; + v->num_using_vertex--; + + for(i = v->num_dependencies; inum_switches-1; i++) + v->num_using_this_depend[i] = 0; + } + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if(sw != dest_switch) + next_switch = switches[sw]->phys_connections[output_link]; + + } +} + + +/************************************ + + BALANCE VIRTUAL LANES + +************************************/ + +static void balance_virtual_lanes(lash_t *p_lash, unsigned lanes_needed) +{ + unsigned num_switches = p_lash->num_switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int *num_mst_in_lane = p_lash->num_mst_in_lane; + int ***virtual_location = p_lash->virtual_location; + int min_filled_lane, max_filled_lane, medium_filled_lane, trials; + int old_min_filled_lane, old_max_filled_lane, i, j, new_num_min_lane, new_num_max_lane; + int src, dest, start, next_switch, output_link; + int stop = 0, cycle_found; + + max_filled_lane = 0; + min_filled_lane = lanes_needed-1; + + if(max_filled_lane > 1) + medium_filled_lane = max_filled_lane-1; + + trials = num_mst_in_lane[max_filled_lane]; + if(lanes_needed == 1) + stop = 1; + + while(stop == 0) { + src = abs(rand())%(num_switches); + dest = abs(rand())%(num_switches); + + while(virtual_location[src][dest][max_filled_lane] != 1) { + start = dest; + if(dest == num_switches-1) + dest = 0; + else + dest++; + + while(dest != start && virtual_location[src][dest][max_filled_lane] != 1) { + if(dest == num_switches-1) + dest = 0; + else + dest++; + } + + if(virtual_location[src][dest][max_filled_lane] != 1) { + if(src == num_switches-1) + src = 0; + else + src++; + } + } + + generate_cdg_for_sp(p_lash, src, dest, min_filled_lane); + output_link = p_lash->switches[src]->routing_table[dest].out_link; + next_switch = p_lash->switches[src]->phys_connections[output_link]; + + assert(cdg_vertex_matrix[min_filled_lane][src][next_switch] != NULL); + cycle_found = cycle_exists(cdg_vertex_matrix[min_filled_lane][src][next_switch], NULL, NULL, 1); + + for(i=0; ivisiting_number = 0; + cdg_vertex_matrix[min_filled_lane][i][j]->seen = 0; + } + + if(cycle_found == 1) { + remove_temp_depend_for_sp(p_lash, src, dest, min_filled_lane); + virtual_location[src][dest][max_filled_lane] = 2; + trials--; + } else { + set_temp_depend_to_permanent_for_sp(p_lash, src, dest, min_filled_lane); + num_mst_in_lane[max_filled_lane]--; + num_mst_in_lane[min_filled_lane]++; + + remove_semipermanent_depend_for_sp(p_lash, src, dest,max_filled_lane); + virtual_location[src][dest][max_filled_lane] = 0; + virtual_location[src][dest][min_filled_lane] = 1; + p_lash->switches[src]->routing_table[dest].lane = min_filled_lane; + } + + if(trials==0) + stop = 1; + else { + if(num_mst_in_lane[max_filled_lane]-num_mst_in_lane[min_filled_lane] < p_lash->balance_limit) + stop = 1; + } + + old_min_filled_lane = min_filled_lane; + old_max_filled_lane = max_filled_lane; + + new_num_min_lane = MAX_INT; + new_num_max_lane = 0; + + for(i=0; i new_num_max_lane) { + new_num_max_lane = num_mst_in_lane[i]; + max_filled_lane = i; + } + } + + if(old_min_filled_lane != min_filled_lane) { + trials = num_mst_in_lane[max_filled_lane]; + for(i=0; inum_switches; + switch_t *sw; + int i; + + sw = malloc(sizeof(*sw)); + if (!sw) + return NULL; + + memset(sw, 0, sizeof(*sw)); + + sw->id = id; + sw->dist = MAX_INT; + + sw->dij_channels = malloc((num_switches)*sizeof(switch_t*)); + if (!sw->dij_channels) { + free(sw); + return NULL; + } + memset(sw->dij_channels, 0, (num_switches)*sizeof(switch_t*)); + + sw->virtual_physical_port_table = malloc(num_switches*sizeof(int)); + if (!sw->virtual_physical_port_table) { + free(sw->dij_channels); + free(sw); + return NULL; + } + + sw->phys_connections = malloc(num_switches*sizeof(int)); + if (!sw->phys_connections) + return NULL; + + sw->routing_table = malloc(num_switches*sizeof(sw->routing_table[0])); + if (!sw->routing_table) + return NULL; + + for (i = 0; i < num_switches; i++) { + sw->routing_table[i].out_link = NONE; + sw->routing_table[i].lane = NONE; + sw->virtual_physical_port_table[i] = -1; + if(i < num_switches-1) + sw->phys_connections[i] = NONE; + } + + sw->p_sw = p_sw; + if (p_sw) + p_sw->priv = sw; + + return sw; +} + +static void switch_delete(switch_t *sw) +{ + if (sw->dij_channels) + free(sw->dij_channels); + if (sw->virtual_physical_port_table) + free(sw->virtual_physical_port_table); + if (sw->phys_connections) + free(sw->phys_connections); + if (sw->routing_table) + free(sw->routing_table); + free(sw); +} + +static void free_lash_structures(lash_t *p_lash) +{ + int i,j,k; + unsigned num_switches = p_lash->num_switches; + osm_log_t *p_log = &p_lash->p_osm->log; + + OSM_LOG_ENTER( p_log, free_lash_structures); + + // free cdg_vertex_matrix + for (i = 0; i < p_lash->vl_min; i++) { + for (j = 0; j < num_switches; j++) { + for (k = 0; k < num_switches; k++) { + if (p_lash->cdg_vertex_matrix[i][j][k]) { + + if (p_lash->cdg_vertex_matrix[i][j][k]->dependency) + free(p_lash->cdg_vertex_matrix[i][j][k]->dependency); + + if (p_lash->cdg_vertex_matrix[i][j][k]->num_using_this_depend) + free(p_lash->cdg_vertex_matrix[i][j][k]->num_using_this_depend); + + free(p_lash->cdg_vertex_matrix[i][j][k]); + } + } + if (p_lash->cdg_vertex_matrix[i][j]) + free(p_lash->cdg_vertex_matrix[i][j]); + } + if (p_lash->cdg_vertex_matrix[i]) + free(p_lash->cdg_vertex_matrix[i]); + } + + if (p_lash->cdg_vertex_matrix) + free(p_lash->cdg_vertex_matrix); + + + // free virtual_location + for (i = 0; i < num_switches; i++) { + for (j = 0; j < num_switches; j++) { + if (p_lash->virtual_location[i][j]) + free(p_lash->virtual_location[i][j]); + } + if (p_lash->virtual_location[i]) + free(p_lash->virtual_location[i]); + } + if (p_lash->virtual_location) + free(p_lash->virtual_location); + + + for (i = 0; i < num_switches; i++) { + if (p_lash->adj_matrix[i]) { + free(p_lash->adj_matrix[i]); + } + } + free(p_lash->adj_matrix); + + if(p_lash->num_mst_in_lane) + free(p_lash->num_mst_in_lane); +} + + +static int init_lash_structures(lash_t *p_lash) +{ + unsigned vl_min = p_lash->vl_min; + unsigned num_switches = p_lash->num_switches; + osm_log_t *p_log = &p_lash->p_osm->log; + + OSM_LOG_ENTER( p_log, init_lash_structures); + + int status = IB_SUCCESS; + int i, j, k; + + // initialise cdg_vertex_matrix[num_switches][num_switches][num_switches] + p_lash->cdg_vertex_matrix = (cdg_vertex_t****)malloc(vl_min * sizeof(cdg_vertex_t ****)); + for (i = 0; i < vl_min; i++) { + p_lash->cdg_vertex_matrix[i] =(cdg_vertex_t***) malloc(num_switches * sizeof(cdg_vertex_t ***)); + + if (p_lash->cdg_vertex_matrix[i] == NULL) + goto Exit_Mem_Error; + } + + for (i = 0; i < vl_min; i++) { + for (j = 0; j < num_switches; j++) { + p_lash->cdg_vertex_matrix[i][j] = (cdg_vertex_t**)malloc(num_switches * sizeof(cdg_vertex_t**)); + if (p_lash->cdg_vertex_matrix[i][j] == NULL) + goto Exit_Mem_Error; + + for (k = 0; k < num_switches; k++) { + p_lash->cdg_vertex_matrix[i][j][k] = NULL; + } + } + } + + // initialise virtual_location[num_switches][num_switches][num_layers], + // default value = 0 + p_lash->virtual_location = (int***)malloc(num_switches * sizeof(int ***)); + if (p_lash->virtual_location == NULL) + goto Exit_Mem_Error; + + for (i = 0; i < num_switches; i++) { + p_lash->virtual_location[i] =(int**) malloc(num_switches * sizeof(int **)); + if (p_lash->virtual_location[i] == NULL) + goto Exit_Mem_Error; + } + + for (i = 0; i < num_switches; i++) { + for (j = 0; j < num_switches; j++) { + p_lash->virtual_location[i][j] = (int*)malloc(vl_min * sizeof(int*)); + if (p_lash->virtual_location[i][j] == NULL) + goto Exit_Mem_Error; + for (k = 0; k < vl_min; k++) { + p_lash->virtual_location[i][j][k] = 0; + } + } + } + + // initialise adj_matrix[num_switches][num_switches], default value + // = 0 + p_lash->adj_matrix = (int**)malloc(num_switches * sizeof(int **)); + if (p_lash->adj_matrix == NULL) + goto Exit_Mem_Error; + + for (i = 0; i < num_switches; i++) { + p_lash->adj_matrix[i] =(int*) malloc(num_switches * sizeof(int)); + if (p_lash->adj_matrix[i] == NULL) + goto Exit_Mem_Error; + + for (j = 0; j < num_switches; j++) + p_lash->adj_matrix[i][j] = 0; + } + + // initialise num_mst_in_lane[num_switches], default 0 + p_lash->num_mst_in_lane = (int*) malloc(num_switches * sizeof(int));; + if (p_lash->num_mst_in_lane == NULL) + goto Exit_Mem_Error; + memset(p_lash->num_mst_in_lane, 0, + num_switches*sizeof(p_lash->num_mst_in_lane[0])); + + goto Exit; + + Exit_Mem_Error: + status = IB_ERROR; + osm_log(p_log, OSM_LOG_DEBUG, + "lash_init_structures (ERROR) " + "Could not allocate required memeory for LASH errno = : %d, errno for lack of memory = %d\n", + errno, ENOMEM); + + Exit: + OSM_LOG_EXIT( p_log ); + return status; +} + + + + + +static int lash_core(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned num_switches = p_lash->num_switches; + switch_t **switches = p_lash->switches; + unsigned lanes_needed = 1; + int i, j, k, dest_switch = 0; + reachable_dest_t * dests, * idest; + int cycle_found = 0; + int v_lane, stop = 0, output_link, i_next_switch; + int status = IB_SUCCESS; + + OSM_LOG_ENTER( p_log, lash_core); + + + for(i=0; inext; + free(idest); + idest = dests; + } + + for(dest_switch=0; dest_switchrouting_table[dest_switch].out_link; + i_next_switch = switches[i]->phys_connections[output_link]; + + assert(p_lash->cdg_vertex_matrix[v_lane][i][i_next_switch] != NULL); + cycle_found = cycle_exists(p_lash->cdg_vertex_matrix[v_lane][i][i_next_switch], NULL, NULL, 1); + + for(j=0; jcdg_vertex_matrix[v_lane][j][k] != NULL) { + p_lash->cdg_vertex_matrix[v_lane][j][k]->visiting_number = 0; + p_lash->cdg_vertex_matrix[v_lane][j][k]->seen = 0; + } + + if(cycle_found == 1) { + remove_temp_depend_for_sp(p_lash, i, dest_switch, v_lane); + v_lane++; + } else { + set_temp_depend_to_permanent_for_sp(p_lash, i, dest_switch, v_lane); + stop = 1; + p_lash->num_mst_in_lane[v_lane]++; + } + } + + switches[i]->routing_table[dest_switch].lane = v_lane; + + if(cycle_found == 1) { + generate_cdg_for_sp(p_lash, i, dest_switch, v_lane); + set_temp_depend_to_permanent_for_sp(p_lash, i, dest_switch, v_lane); + + if (lanes_needed + 1 > p_lash->vl_min){ + lanes_needed++; + goto Error_Not_Enough_Lanes; + } + else + lanes_needed++; + + // goto error exit with message + p_lash->num_mst_in_lane[v_lane]++; + } + p_lash->virtual_location[i][dest_switch][v_lane] = 1; + } + + for(j=0; jdij_channels[k] = NULL; + switches[j]->used_channels = 0; + switches[j]->mst_member = switches[j]->q_member = 0; + switches[j]->dist = MAX_INT; + } + } + + osm_log(p_log, OSM_LOG_DEBUG, + "lash_core: " + "Lanes needed: %d, Balancing\n", lanes_needed); + + for(i = 0; inum_mst_in_lane[i]); + } + + balance_virtual_lanes(p_lash, lanes_needed); + + + for(i = 0; inum_mst_in_lane[i]); + } + + goto Exit; + + Error_Not_Enough_Lanes: + status = IB_ERROR; + osm_log(p_log, OSM_LOG_DEBUG, + "lash_core (ERROR): " + "Lane requirements (%d) exceeds available lanes (%d)\n", + p_lash->vl_min, lanes_needed); + Exit: + OSM_LOG_EXIT( p_log ); + return status; +} + + +static unsigned get_lash_id(osm_switch_t *p_sw) +{ + return ((switch_t *)p_sw->priv)->id; +} + + +static void populate_fwd_tbls(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_opensm_t *p_osm = p_lash->p_osm; + osm_switch_t *p_sw, *p_next_sw, *p_dst_sw; + uint16_t max_lid_ho, lid = 0; + + OSM_LOG_ENTER( p_log, populate_fwd_tbls ); + + p_next_sw = (osm_switch_t*)cl_qmap_head( &p_subn->sw_guid_tbl ); + + // Go through each swtich individually + while(p_next_sw != (osm_switch_t*)cl_qmap_end( &p_subn->sw_guid_tbl )) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item ); + + max_lid_ho = osm_switch_get_max_lid_ho(p_sw); + uint64_t current_guid = p_sw->p_node->node_info.port_guid; + switch_t *sw = p_sw->priv; + + memset(p_osm->sm.ucast_mgr.lft_buf, 0xff, IB_LID_UCAST_END_HO + 1); + + for (lid = 1; lid <= max_lid_ho; lid++) { + p_dst_sw = get_osm_switch_from_lid(p_lash->p_osm, lid); + + if (p_dst_sw == NULL) { + osm_log(p_log, OSM_LOG_DEBUG, + "populate_fwd_tbls: ERROR " + "LASH fwd NULL Cannot find GUID 0x%016" PRIx64 + " src lash id (%d), src lid no(0x%04X)\n", + cl_ntoh64(current_guid), sw->id, lid); + } else if (p_dst_sw == p_sw) { + uint8_t egress_port = find_port_from_lid(cl_hton16(lid), p_sw); + p_osm->sm.ucast_mgr.lft_buf[lid] = egress_port; + osm_log(p_log, OSM_LOG_DEBUG, + "populate_fwd_tbls: " + "LASH fwd MY SRC SRC GUID 0x%016" PRIx64 + " src lash id (%d), src lid no(0x%04X) src lash port (%d) " + "DST GUID 0x%016" PRIx64 " src lash id (%d), src lash port (%d)\n", + cl_ntoh64(current_guid), -1, lid, egress_port, + cl_ntoh64(current_guid), -1, egress_port); + } else { + unsigned dst_lash_switch_id = get_lash_id(p_dst_sw); + uint8_t lash_egress_port = sw->routing_table[dst_lash_switch_id].out_link; + uint8_t physical_egress_port = sw->virtual_physical_port_table[lash_egress_port]; + + p_osm->sm.ucast_mgr.lft_buf[lid] = physical_egress_port; + osm_log(p_log, OSM_LOG_DEBUG, + "populate_fwd_tbls: " + "LASH fwd SRC GUID 0x%016" PRIx64 " src lash id (%d), " + "src lid no( 0x%04X ) src lash port (%d) " + "DST GUID 0x%016" PRIx64 " src lash id (%d), src lash port (%d)\n", + cl_ntoh64(current_guid), sw->id, lid, lash_egress_port, + cl_ntoh64(p_dst_sw->p_node->node_info.port_guid), + dst_lash_switch_id, physical_egress_port); + } + } // for + osm_ucast_mgr_set_fwd_table(&p_osm->sm.ucast_mgr, p_sw); + } + OSM_LOG_EXIT( p_log ); +} + + + +static void print_fwd_table(IN const osm_switch_t *p_sw) +{ + uint16_t max_lid_ho, lid_ho; + uint64_t switch_guid = osm_lash_get_switch_guid(p_sw); + + max_lid_ho = osm_switch_get_max_lid_ho(p_sw); + printf("FWDTBL: 0x%016" PRIx64 " max LID 0x%04X\n", cl_ntoh64(switch_guid), max_lid_ho); + + // starting at 1, not 0. Assuming no LID with an ID of 0 + for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) { + uint8_t port_num = osm_switch_get_port_by_lid( p_sw, lid_ho ); + + if (port_num == OSM_NO_PATH) + printf("0x%04X : UNREACHABLE\n", lid_ho); + else + printf("0x%04X : %d \n", lid_ho, port_num); + } + printf("\n"); +} + + + + +static void print_fwd_tables(lash_t *p_lash) +{ + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_next_sw, *p_sw; + + p_next_sw = (osm_switch_t*)cl_qmap_head( &p_subn->sw_guid_tbl ); + while(p_next_sw != (osm_switch_t*)cl_qmap_end( &p_subn->sw_guid_tbl ) ) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item ); + + if (p_sw && p_sw->p_node) { + print_fwd_table(p_sw); + } + } +} + + +static void osm_lash_process_switch(lash_t *p_lash, osm_switch_t *p_sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i, port_count; + osm_physp_t *p_current_physp, *p_remote_physp; + unsigned switch_a_lash_id, switch_b_lash_id; + + OSM_LOG_ENTER(p_log, _osm_lash_process_switch); + + switch_a_lash_id = get_lash_id(p_sw); + port_count = osm_node_get_num_physp(p_sw->p_node); + + // starting at port 1, ignoring management port on switch + for (i=1; ip_node, i); + + if (osm_physp_is_valid (p_current_physp)) { + p_remote_physp = p_current_physp->p_remote_physp; + + if (p_remote_physp && osm_physp_is_valid ( p_remote_physp ) && + p_remote_physp->p_node->sw) { + int physical_port_a_num = osm_physp_get_port_num(p_current_physp); + int physical_port_b_num = osm_physp_get_port_num(p_remote_physp); + switch_b_lash_id = get_lash_id(p_remote_physp->p_node->sw); + + if(connect_switches(p_lash, switch_a_lash_id, switch_b_lash_id, + physical_port_a_num, physical_port_b_num) == TRUE) { + osm_log(p_log, OSM_LOG_DEBUG, + "osm_lash_process_switch: " + "LASH SUCSESS connected G 0x%016" PRIx64 + " , lash_id(%u), P(%u) " + " to G 0x%016" PRIx64 " , lash_id(%u) , P(%u)\n", + cl_ntoh64(osm_physp_get_port_guid(p_current_physp)), + switch_a_lash_id, physical_port_a_num, + cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), + switch_b_lash_id, physical_port_b_num); + } + } + } + } + + OSM_LOG_EXIT(p_log); +} + + +static void lash_cleanup(lash_t *p_lash) +{ + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_next_sw, *p_sw; + + /* drop any existing references to old lash switches */ + p_next_sw = (osm_switch_t*)cl_qmap_head( &p_subn->sw_guid_tbl ); + while (p_next_sw != (osm_switch_t*)cl_qmap_end(&p_subn->sw_guid_tbl)) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t*)cl_qmap_next(&p_sw->map_item); + p_sw->priv = NULL; + } + + if (p_lash->switches) { + unsigned id; + for (id = 0; id < p_lash->num_switches ; id++) + if (p_lash->switches[id]) + switch_delete(p_lash->switches[id]); + free(p_lash->switches); + } + p_lash->switches = NULL; +} + +/* + static int discover_network_properties() + Traverse the topology of the network in order to determine + - the maximum number of switches, + - the minimum number of virtual layers +*/ + +static int discover_network_properties(lash_t *p_lash) +{ + int i = 0, id = 0; + uint8_t vl_min; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_next_sw, *p_sw; + osm_log_t *p_log = &p_lash->p_osm->log; + + p_lash->num_switches = cl_qmap_count(&p_subn->sw_guid_tbl); + + p_lash->switches = malloc(p_lash->num_switches*sizeof(switch_t *)); + if (!p_lash->switches) + return -1; + memset(p_lash->switches, 0, p_lash->num_switches*sizeof(switch_t *)); + + vl_min = 128; // set to a high value + + p_next_sw = (osm_switch_t*)cl_qmap_head( &p_subn->sw_guid_tbl ); + while(p_next_sw != (osm_switch_t*)cl_qmap_end( &p_subn->sw_guid_tbl ) ) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item ); + + p_lash->switches[id] = switch_create(p_lash, id, p_sw); + if (!p_lash->switches[id]) + return -1; + id++; + + uint16_t port_count = osm_node_get_num_physp (p_sw->p_node); + + /// Note, ignoring port 0. management port + for (i=1; ip_node, i); + + if (p_current_physp && osm_physp_is_valid (p_current_physp) && + p_current_physp->p_remote_physp) { + + ib_port_info_t *p_port_info = &p_current_physp->port_info; + int port_vl_min = ib_port_info_get_op_vls(p_port_info); + if (port_vl_min && port_vl_min < vl_min) + vl_min = port_vl_min; + } + } // for + } // while + + vl_min = 1 << (vl_min - 1); + if (vl_min > 15) vl_min = 15; + + p_lash->vl_min = vl_min; + + osm_log(p_log, OSM_LOG_DEBUG, + "lash discover_network_properties: " + "min operatioanl vl(%d) max_switches(%d)\n", + p_lash->vl_min, p_lash->num_switches); + return 0; +} + + +static void process_switches(lash_t *p_lash) +{ + osm_switch_t *p_sw, *p_next_sw; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + + /* Go through each swithc and process it. i.e build the connection + strucure required by LASH */ + p_next_sw = (osm_switch_t*)cl_qmap_head( &p_subn->sw_guid_tbl ); + while(p_next_sw != (osm_switch_t*)cl_qmap_end( &p_subn->sw_guid_tbl ) ) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item ); + + osm_lash_process_switch(p_lash, p_sw); + } +} + + +static int lash_process(void *context) +{ + lash_t *p_lash = context; + osm_log_t *p_log = &p_lash->p_osm->log; + int return_status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log, lash_process); + p_lash->balance_limit = 3; + + // everything starts here + lash_cleanup(p_lash); + + discover_network_properties(p_lash); + + return_status = init_lash_structures(p_lash); + if (return_status != IB_SUCCESS) + goto Exit; + + process_switches(p_lash); + + return_status = lash_core(p_lash); + if (return_status != IB_SUCCESS) + goto Exit; + + populate_fwd_tbls(p_lash); + print_fwd_tables(p_lash); + + Exit: + free_lash_structures(p_lash); + OSM_LOG_EXIT(p_log); + + return return_status; +} + +static lash_t* lash_create(osm_opensm_t *p_osm) +{ + lash_t* p_lash; + + p_lash = malloc(sizeof(lash_t)); + if (!p_lash) + return NULL; + + memset(p_lash, 0, sizeof(lash_t)); + p_lash->p_osm = p_osm; + + return(p_lash); +} + +static void lash_delete(void *context) +{ + lash_t *p_lash = context; + if (p_lash->switches) { + unsigned id; + for (id = 0; id < p_lash->num_switches ; id++) + if (p_lash->switches[id]) + switch_delete(p_lash->switches[id]); + free(p_lash->switches); + } + free(p_lash); +} + +uint8_t osm_get_lash_sl(osm_opensm_t *p_osm, + osm_port_t *p_src_port, osm_port_t *p_dst_port) +{ + unsigned dst_id; + osm_switch_t *p_sw; + + if (p_osm->routing_engine.ucast_build_fwd_tables != lash_process) + return OSM_DEFAULT_SL; + + p_sw = get_osm_switch_from_port(p_dst_port); + if (!p_sw) + return OSM_DEFAULT_SL; + dst_id = get_lash_id(p_sw); + + p_sw = get_osm_switch_from_port(p_src_port); + if (!p_sw || !p_sw->priv) + return OSM_DEFAULT_SL; + + return ((switch_t *)p_sw->priv)->routing_table[dst_id].lane; +} + +int osm_ucast_lash_setup(osm_opensm_t * p_osm) +{ + lash_t *p_lash = lash_create(p_osm); + if (!p_lash) + return -1; + + p_osm->routing_engine.context = p_lash; + p_osm->routing_engine.ucast_build_fwd_tables = lash_process; + p_osm->routing_engine.delete = lash_delete; + + return 0; +} -- 1.5.0.rc2.g11a3 From sashak at voltaire.com Mon Jan 29 15:53:25 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 30 Jan 2007 01:53:25 +0200 Subject: [openib-general] QoS in OSM In-Reply-To: <45BE7EDF.4090708@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> <45B8E089.5000804@dev.mellanox.co.il> <1169847771.4555.7676.camel@hal.voltaire.com> <45BE7EDF.4090708@dev.mellanox.co.il> Message-ID: <20070129235325.GG3837@sashak.voltaire.com> On 01:10 Tue 30 Jan , Yevgeny Kliteynik wrote: > Hi guys. > > I've finished the first implementation of QoS-aware PathRecord. > The path selection logic itself is implemented in a separate function > that is called only when QoS in OpenSM is on. > It cases some code duplication, but as we've discussed, the idea is to > minimize the changes in the existing logic in OSM. > Tonight the regression testing is running on this OSM version to make > sure that I didn't screw something up. > Since none of the QoS patches has made its way to the trunk yet, the > patch series will be pretty long. It will include: > - QoS policy file parser (Lex & Yacc files that implement grammar, > C & H files that implements parser auxiliary functions) > - Additional fields is path_record_t (instead of 'reserved' fields) > - Additional command line option for OpenSM to specify the QoS > policy file name > - QoS-aware selection of PathRecord. > I'll issue the patch series with all the details in the morning, and then > I'll start working on MultiPath Record. And what about integration with VLArb and SL2VL port's setup? Sasha > > In addition to all the questions that you already have and I haven't answered > yet, I'm sure you'll have many questions and remarks regarding these patches. > > I suggest that we set up a conference call to discuss all these questions - it > might save us a lot of time and clear some issues. > > How about tomorrow morning? (I mean Hal's morning). The earlier the better. > > Please let me know what you think about it. > > Thanks, > > -- Yevgeny > > Hal Rosenstock wrote: > > Hi again Yevgeny, > > > > On Thu, 2007-01-25 at 11:53, Yevgeny Kliteynik wrote: > >> Hi Hal. > >> > >> Hal Rosenstock wrote: > >>> Hi Yevgeny, > >>> > >>> On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: > >>>> Hi Hal, Sasha. > >>>> > >>>> Here's a description of the QoS policy file, and an > >>>> example of such file (with more comments inside). > >>> This makes the start of a good document on this. If you add this to > >>> osm/doc, I will incorporate it into the opensm man page. > >> OK, I'll do that. > >> > >>>> QoS Policy file > >>>> -- > >>>> > >>>> The QoS policy file is divided into 4 sub sections: > >>>> > >>>> * Node Group: a set of HCAs, Routers or Switches that share the same settings. > >>>> A node groups might be a partition defined by the partition manager policy in > >>>> terms of GUIDs. > >>> Are these Node or Port Groups ? It looks like port groups from the > >>> below. > >> Good point - it should be "Port Groups". > >> > >>>> Future implementations might provide support for NodeDescription > >>>> based definition of node groups. > >>>> > >>>> * Fabric Setup: > >>>> Defines how the SL2VL and VLArb tables should be setup. This policy definition > >>>> assumes the computation of target behavior should be performed outside of > >>>> OpenSM. > >>>> > >>>> * QoS-Levels Definition: > >>>> This section defines the possible sets of parameters for QoS that a client might > >>>> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > >>>> (in case LMC > 0 is used for QoS) and TClass. > >>> How does this relate to/interact with partition configuration ? Also, > >>> what about preexisting QoS ? > >> As I understand from the osm man or from the partition-config.txt, > >> partitions definition is intended to be used for IPoIB only. > >> [quote] > >> sl= - specifies SL for this IPoIB MC group > >> (default is 0) > >> [/quote] > >> > >> I think that QoS policy may only "tighten" the constraints and enforce > >> lower-than-requested values, both in case of partition and in case of > >> preexisting QoS settings. > > > > I'm not following you on this specific point. A specific SL is chosen by > > partition config so how can it be "tightened" ? Does it mean it might be > > changed to a different SL (in which case this QoS config superceeds the > > partition config for SL setting) ? Have you tried this to be sure ? > > > > Are multicast groups handled as part QoS definition in the XML syntax ? > > If not, might this be a future addition ? If it is, how are they > > specified ? > > > > The other half of the original question was how a QoS request is handled > > if the original QoS support is enabled rather than this new QoS support > > in terms of the SA PR and MPR code. > > > >>>> * Matching Rules: > >>>> A list of rules that match an incoming PathRecord request to a QoS-Level. The > >>>> rules are processed in order such as the first match is applied. Each rule is > >>>> built out of set of match expressions which should all match for the rule to > >>>> apply. The matching expressions are defined for the following fields > >>>> - SRC and DST to lists of node groups > >>>> - Service-ID to a list of Service-ID or Service-ID ranges > >>>> - TClass to a list of TClass values or ranges > >>>> > >>>> QoS policy file example > >>>> -- > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> Storage > >>>> our SRP storage targets > >>> Is the use clause more than commentary ? How is it "used" ? > >> The 'use' clause is just a description of the port group that > >> can be used for logging. Other than for logging, it is just a > >> commentary. > >> > >>>> 0x1000000000000001 > >>>> 0x1000000000000002 > >>>> > >>>> > >>>> > >>>> Virtual Servers > >>>> node desc and IB port # > >>>> vs1/HCA-1/P1 > >>>> vs3/HCA-1/P1 > >>>> vs3/HCA-2/P1 > >>> How are port-names used ? > >> The syntax of the port name is as follows: > >> "hostname/CA-num/Pnum" > > > > What's it's purpose ? Is it used somewhere else in the syntax ? > > > >>>> > >>>> > >>>> > >>>> Partition 1 > >>>> default settings > >>>> Part1 > >>>> > >>>> > >>> Is this CA rather than HCA ? (What about TCAs ?) > >> Sure, it should be 'CA'. > > > > Will this be changed ? If so, when ? > > > >>>> > >>>> Routers > >>>> all routers > >>>> ROUTER > >>>> > >>>> > >>>> > >>>> > >>>> > >>> ^^ > >>> Actually, it is SL > >>> assuming the device supports SL2VL mapping as indicate by > >>> IsSLMappingSupported in the PortInfo:CapabilityMask. > >>> Will the syntax handle single data VL devices which only implement SL > >>> filtering ? > >> Yes, it should. > >> > >>> Will the QoS manager support this (SL2VL without VLArb > >>> settings) or are these required together ? > >> Yes, it should support sl2vl w/o vlarb settings as well. > >> > >>>> > >>>> > >>>> > >>>> Part1 > >>>> * > >>>> * > >>>> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > >>>> > >>>> > >>>> > >>>> Storage > >>>> > >>>> Storage2 > >>>> > >>>> Storage3 > >>> I don't quite follow across-from/to. > >> Right, the comments there are garbage. Here the explanation: > >> SL2VL table describes VL as function of from-port, to-port, and SL. > >> > >> group_name: > >> It defines sl2vl table where 'to-port's belong to group_name > >> group_name: > >> Same as above, only that this time 'from-port's belong to group_name > >> group_name: > >> sl2vl tables both for 'to-port's 'from-port's that belong to group_name > > > > I'm still not following what is going on here and how this is used. > > > >>>> * > >>>> 1 > >>>> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> Storage > >>>> > >>>> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > >>>> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > >>>> 10 > >>> What happens if the shape of VLArb indicated here does not match the > >>> device ? > >> The part that sets up the QoS in SM (I'm not writing this part right now), > > > > What is the plan for this ? > > > >> should issue error message in case VLArb definition doesn't match the device > >> properties. > > > > Aside from the error message, is there any additional error handling for > > this ? > > > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> 1 > >>> What does sn mean ? What is it used for ? > >> 'sn' is an id of this qos level definition. > >> It is referenced later in by QoS match rules as 'qos-level-sn' > > > > What is 'sn' short for ? > > > >>>> for the lowest priority comm > >>>> 16 > >>>> > >>>> > >>>> > >>>> 2 > >>>> low latency best bandwidth > >>>> 0 > >>>> 7 > >>> What is class ? I saw TClass mentioned earlier. Is this TClass or > >>> something else ? > >> Instead of "TClass" there should be "QoS Class". > >> The value is the PathRecord.qos_class value that should be > >> returned in the path record query response when a certain > >> is applied to the returned path. > > > > So these names need to change to be more consistent ? > > > >>>> > >>>> > >>> If specified, do MTU limit and rate limit add extra limits to be imposed > >>> on what is selected (and realizable) ? > >> Yes > >> > >>> Strictly speaking, couldn't packet lifetime limit also be added to this > >>> syntax here ? I presume it was left out as being not "interesting" as > >>> yet. Is that correct ? > >> I can add packet lifetime limit - it's not a big deal > >> > >>> Also, how are path bits used ? > >> For now I don't do anything with them - we'll discuss this issue in the future. > > > > How are they envisioned to be used ? > > > > Why are they in the syntax now ? Seems inconsistent with PLL. > > > > Should there be a warning if they are specified now since they are not > > used ? > > > >>>> > >>>> 3 > >>>> just an example > >>>> 0 > >>>> 32 > >>>> 1 > >>>> 1 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> 1 > >>>> low latency by class 7-9 or 11 > >>>> 7-9,11 > >>>> 1 > >>>> > >>>> > >>>> > >>>> 2 > >>>> Storage targets connection> > >>>> Storage > >>>> 22,4719 > >>> What is service ? What does 22.4719 mean ? > >> The syntax is service_id1,service_id1,..., so in the > >> example above these are actually two service ids. > > > > So you can create arbitrary lists of service IDs. What about ranges ? > > Does the syntax support that ? > > > >> As for the exact meaning of this, I'm not sure - I need to think about it... > > > > Let me know. I'd really like to understand the syntax. > > > >>>> 3 > >>> What are match-levels used for ? > >> Actually, they are not used - they shouldn't appear here. > >> Somehow it was copy-pasted here from one of the older versions > >> of the policy file. > > > > So can this be updated for what is current ? > > > > Thanks. > > > > -- Hal > > > >> -- Yevgeny > >> > >>> -- Hal > >>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> -- Yevgeny > >>>> > >>>> Yevgeny Kliteynik wrote: > >>>>> Hi Sasha, > >>>>> > >>>>> Sasha Khapyorsky wrote: > >>>>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>>>>>> Hi Sasha. > >>>>>>> > >>>>>>> Sasha Khapyorsky wrote: > >>>>>>>> Hi Yevgeny, > >>>>>>>> > >>>>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>>>>>> Hi Hal > >>>>>>>>> > >>>>>>>>> The following series of six patches implements QoS policy file parser: > >>>>>>>>> > >>>>>>>>> 1. QoS parser Lex file > >>>>>>>>> 2. QoS parser Lex-generated c file > >>>>>>>>> 3. QoS parser grammar (Yacc) file > >>>>>>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>>>>>> 5. QoS parser header file that defines parse tree data structures > >>>>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>>>>>> Is there any description of proposed format and functionality? > >>>>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>>>>>> minor modifications. You can find the RFC here: > >>>>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >>>>>> This was RFC and couple of issues were discussed then. Now you are about > >>>>>> implementation phase and exact format description would be desired. For > >>>>>> example what "few minor modifications" are? > >>>>> I'll prepare an example file with explanations. > >>>>> > >>>>> -- Yevgeny > >>>>> > >>>>>>>> Also what about using human readable formats? > >>>>>>> To me the xml-like format in the RFC looks pretty readable. > >>>>>>> It has very limited number of keywords (tags), so it's easy > >>>>>>> to follow and/or to modify. > >>>>>> It is your opinion, not everybody will agree with it (AFAIR this was > >>>>>> discussed too during RFC). > >>>>>> > >>>>>> I would not be care, but I don't know any example of really successful > >>>>>> XML using for configuration purposes (especially where advanced graphical > >>>>>> config editors/viewers were not used). Do you know? > >>>>>> > >>>>>> Sasha > >>>>>> > >>>>> _______________________________________________ > >>>>> openib-general mailing list > >>>>> openib-general at openib.org > >>>>> http://openib.org/mailman/listinfo/openib-general > >>>>> > >>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>>>> > > From halr at voltaire.com Mon Jan 29 17:11:07 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 29 Jan 2007 20:11:07 -0500 Subject: [openib-general] QoS in OSM In-Reply-To: <45BE7EDF.4090708@dev.mellanox.co.il> References: <45B768E2.9070604@dev.mellanox.co.il> <1169734241.13608.75454.camel@hal.voltaire.com> <45B8E089.5000804@dev.mellanox.co.il> <1169847771.4555.7676.camel@hal.voltaire.com> <45BE7EDF.4090708@dev.mellanox.co.il> Message-ID: <1170119419.15660.39919.camel@hal.voltaire.com> Hi Yevgeny, On Mon, 2007-01-29 at 18:10, Yevgeny Kliteynik wrote: > Hi guys. > > I've finished the first implementation of QoS-aware PathRecord. > The path selection logic itself is implemented in a separate function > that is called only when QoS in OpenSM is on. > It cases some code duplication, but as we've discussed, the idea is to > minimize the changes in the existing logic in OSM. > Tonight the regression testing is running on this OSM version to make > sure that I didn't screw something up. > Since none of the QoS patches has made its way to the trunk yet, the > patch series will be pretty long. It will include: > - QoS policy file parser (Lex & Yacc files that implement grammar, > C & H files that implements parser auxiliary functions) > - Additional fields is path_record_t (instead of 'reserved' fields) > - Additional command line option for OpenSM to specify the QoS > policy file name > - QoS-aware selection of PathRecord. > I'll issue the patch series with all the details in the morning, and then > I'll start working on MultiPath Record. > > In addition to all the questions that you already have and I haven't answered > yet, I'm sure you'll have many questions and remarks regarding these patches. I would like some time to go over the new set of patches. > I suggest that we set up a conference call to discuss all these questions - it > might save us a lot of time and clear some issues. Some might make sense via con call but there are a number of outstanding ones which could be answered ahead of time. I'm not sure why these can't be done on the list. > How about tomorrow morning? (I mean Hal's morning). The earlier the better. I'm not sure that leaves sufficient time for review. I'll look at them tomorrow and we'll figure out a plan from there. -- Hal > Please let me know what you think about it. > Thanks, > > -- Yevgeny > > Hal Rosenstock wrote: > > Hi again Yevgeny, > > > > On Thu, 2007-01-25 at 11:53, Yevgeny Kliteynik wrote: > >> Hi Hal. > >> > >> Hal Rosenstock wrote: > >>> Hi Yevgeny, > >>> > >>> On Wed, 2007-01-24 at 09:10, Yevgeny Kliteynik wrote: > >>>> Hi Hal, Sasha. > >>>> > >>>> Here's a description of the QoS policy file, and an > >>>> example of such file (with more comments inside). > >>> This makes the start of a good document on this. If you add this to > >>> osm/doc, I will incorporate it into the opensm man page. > >> OK, I'll do that. > >> > >>>> QoS Policy file > >>>> -- > >>>> > >>>> The QoS policy file is divided into 4 sub sections: > >>>> > >>>> * Node Group: a set of HCAs, Routers or Switches that share the same settings. > >>>> A node groups might be a partition defined by the partition manager policy in > >>>> terms of GUIDs. > >>> Are these Node or Port Groups ? It looks like port groups from the > >>> below. > >> Good point - it should be "Port Groups". > >> > >>>> Future implementations might provide support for NodeDescription > >>>> based definition of node groups. > >>>> > >>>> * Fabric Setup: > >>>> Defines how the SL2VL and VLArb tables should be setup. This policy definition > >>>> assumes the computation of target behavior should be performed outside of > >>>> OpenSM. > >>>> > >>>> * QoS-Levels Definition: > >>>> This section defines the possible sets of parameters for QoS that a client might > >>>> be mapped to. Each set holds: SL and optionally: Max MTU, Max Rate, Path Bits > >>>> (in case LMC > 0 is used for QoS) and TClass. > >>> How does this relate to/interact with partition configuration ? Also, > >>> what about preexisting QoS ? > >> As I understand from the osm man or from the partition-config.txt, > >> partitions definition is intended to be used for IPoIB only. > >> [quote] > >> sl= - specifies SL for this IPoIB MC group > >> (default is 0) > >> [/quote] > >> > >> I think that QoS policy may only "tighten" the constraints and enforce > >> lower-than-requested values, both in case of partition and in case of > >> preexisting QoS settings. > > > > I'm not following you on this specific point. A specific SL is chosen by > > partition config so how can it be "tightened" ? Does it mean it might be > > changed to a different SL (in which case this QoS config superceeds the > > partition config for SL setting) ? Have you tried this to be sure ? > > > > Are multicast groups handled as part QoS definition in the XML syntax ? > > If not, might this be a future addition ? If it is, how are they > > specified ? > > > > The other half of the original question was how a QoS request is handled > > if the original QoS support is enabled rather than this new QoS support > > in terms of the SA PR and MPR code. > > > >>>> * Matching Rules: > >>>> A list of rules that match an incoming PathRecord request to a QoS-Level. The > >>>> rules are processed in order such as the first match is applied. Each rule is > >>>> built out of set of match expressions which should all match for the rule to > >>>> apply. The matching expressions are defined for the following fields > >>>> - SRC and DST to lists of node groups > >>>> - Service-ID to a list of Service-ID or Service-ID ranges > >>>> - TClass to a list of TClass values or ranges > >>>> > >>>> QoS policy file example > >>>> -- > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> Storage > >>>> our SRP storage targets > >>> Is the use clause more than commentary ? How is it "used" ? > >> The 'use' clause is just a description of the port group that > >> can be used for logging. Other than for logging, it is just a > >> commentary. > >> > >>>> 0x1000000000000001 > >>>> 0x1000000000000002 > >>>> > >>>> > >>>> > >>>> Virtual Servers > >>>> node desc and IB port # > >>>> vs1/HCA-1/P1 > >>>> vs3/HCA-1/P1 > >>>> vs3/HCA-2/P1 > >>> How are port-names used ? > >> The syntax of the port name is as follows: > >> "hostname/CA-num/Pnum" > > > > What's it's purpose ? Is it used somewhere else in the syntax ? > > > >>>> > >>>> > >>>> > >>>> Partition 1 > >>>> default settings > >>>> Part1 > >>>> > >>>> > >>> Is this CA rather than HCA ? (What about TCAs ?) > >> Sure, it should be 'CA'. > > > > Will this be changed ? If so, when ? > > > >>>> > >>>> Routers > >>>> all routers > >>>> ROUTER > >>>> > >>>> > >>>> > >>>> > >>>> > >>> ^^ > >>> Actually, it is SL > >>> assuming the device supports SL2VL mapping as indicate by > >>> IsSLMappingSupported in the PortInfo:CapabilityMask. > >>> Will the syntax handle single data VL devices which only implement SL > >>> filtering ? > >> Yes, it should. > >> > >>> Will the QoS manager support this (SL2VL without VLArb > >>> settings) or are these required together ? > >> Yes, it should support sl2vl w/o vlarb settings as well. > >> > >>>> > >>>> > >>>> > >>>> Part1 > >>>> * > >>>> * > >>>> 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > >>>> > >>>> > >>>> > >>>> Storage > >>>> > >>>> Storage2 > >>>> > >>>> Storage3 > >>> I don't quite follow across-from/to. > >> Right, the comments there are garbage. Here the explanation: > >> SL2VL table describes VL as function of from-port, to-port, and SL. > >> > >> group_name: > >> It defines sl2vl table where 'to-port's belong to group_name > >> group_name: > >> Same as above, only that this time 'from-port's belong to group_name > >> group_name: > >> sl2vl tables both for 'to-port's 'from-port's that belong to group_name > > > > I'm still not following what is going on here and how this is used. > > > >>>> * > >>>> 1 > >>>> 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> Storage > >>>> > >>>> 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 > >>>> 8:255,9:127,10:63,11:31,12:15,13:7,14:3 > >>>> 10 > >>> What happens if the shape of VLArb indicated here does not match the > >>> device ? > >> The part that sets up the QoS in SM (I'm not writing this part right now), > > > > What is the plan for this ? > > > >> should issue error message in case VLArb definition doesn't match the device > >> properties. > > > > Aside from the error message, is there any additional error handling for > > this ? > > > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> 1 > >>> What does sn mean ? What is it used for ? > >> 'sn' is an id of this qos level definition. > >> It is referenced later in by QoS match rules as 'qos-level-sn' > > > > What is 'sn' short for ? > > > >>>> for the lowest priority comm > >>>> 16 > >>>> > >>>> > >>>> > >>>> 2 > >>>> low latency best bandwidth > >>>> 0 > >>>> 7 > >>> What is class ? I saw TClass mentioned earlier. Is this TClass or > >>> something else ? > >> Instead of "TClass" there should be "QoS Class". > >> The value is the PathRecord.qos_class value that should be > >> returned in the path record query response when a certain > >> is applied to the returned path. > > > > So these names need to change to be more consistent ? > > > >>>> > >>>> > >>> If specified, do MTU limit and rate limit add extra limits to be imposed > >>> on what is selected (and realizable) ? > >> Yes > >> > >>> Strictly speaking, couldn't packet lifetime limit also be added to this > >>> syntax here ? I presume it was left out as being not "interesting" as > >>> yet. Is that correct ? > >> I can add packet lifetime limit - it's not a big deal > >> > >>> Also, how are path bits used ? > >> For now I don't do anything with them - we'll discuss this issue in the future. > > > > How are they envisioned to be used ? > > > > Why are they in the syntax now ? Seems inconsistent with PLL. > > > > Should there be a warning if they are specified now since they are not > > used ? > > > >>>> > >>>> 3 > >>>> just an example > >>>> 0 > >>>> 32 > >>>> 1 > >>>> 1 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> 1 > >>>> low latency by class 7-9 or 11 > >>>> 7-9,11 > >>>> 1 > >>>> > >>>> > >>>> > >>>> 2 > >>>> Storage targets connection> > >>>> Storage > >>>> 22,4719 > >>> What is service ? What does 22.4719 mean ? > >> The syntax is service_id1,service_id1,..., so in the > >> example above these are actually two service ids. > > > > So you can create arbitrary lists of service IDs. What about ranges ? > > Does the syntax support that ? > > > >> As for the exact meaning of this, I'm not sure - I need to think about it... > > > > Let me know. I'd really like to understand the syntax. > > > >>>> 3 > >>> What are match-levels used for ? > >> Actually, they are not used - they shouldn't appear here. > >> Somehow it was copy-pasted here from one of the older versions > >> of the policy file. > > > > So can this be updated for what is current ? > > > > Thanks. > > > > -- Hal > > > >> -- Yevgeny > >> > >>> -- Hal > >>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> -- Yevgeny > >>>> > >>>> Yevgeny Kliteynik wrote: > >>>>> Hi Sasha, > >>>>> > >>>>> Sasha Khapyorsky wrote: > >>>>>> On 10:46 Sun 21 Jan , Yevgeny Kliteynik wrote: > >>>>>>> Hi Sasha. > >>>>>>> > >>>>>>> Sasha Khapyorsky wrote: > >>>>>>>> Hi Yevgeny, > >>>>>>>> > >>>>>>>> On 17:01 Wed 17 Jan , Yevgeny Kliteynik wrote: > >>>>>>>>> Hi Hal > >>>>>>>>> > >>>>>>>>> The following series of six patches implements QoS policy file parser: > >>>>>>>>> > >>>>>>>>> 1. QoS parser Lex file > >>>>>>>>> 2. QoS parser Lex-generated c file > >>>>>>>>> 3. QoS parser grammar (Yacc) file > >>>>>>>>> 4. QoS parser Yacc-generated grammar c and h file > >>>>>>>>> 5. QoS parser header file that defines parse tree data structures > >>>>>>>>> 6. Changes in makefiles and configure.in file for compiling QoS parser files > >>>>>>>> Is there any description of proposed format and functionality? > >>>>>>> The parser is based on QoS RFC sent by Eitan in May 2006, with a few > >>>>>>> minor modifications. You can find the RFC here: > >>>>>>> http://openib.org/pipermail/openib-general/2006-May/022336.html > >>>>>> This was RFC and couple of issues were discussed then. Now you are about > >>>>>> implementation phase and exact format description would be desired. For > >>>>>> example what "few minor modifications" are? > >>>>> I'll prepare an example file with explanations. > >>>>> > >>>>> -- Yevgeny > >>>>> > >>>>>>>> Also what about using human readable formats? > >>>>>>> To me the xml-like format in the RFC looks pretty readable. > >>>>>>> It has very limited number of keywords (tags), so it's easy > >>>>>>> to follow and/or to modify. > >>>>>> It is your opinion, not everybody will agree with it (AFAIR this was > >>>>>> discussed too during RFC). > >>>>>> > >>>>>> I would not be care, but I don't know any example of really successful > >>>>>> XML using for configuration purposes (especially where advanced graphical > >>>>>> config editors/viewers were not used). Do you know? > >>>>>> > >>>>>> Sasha > >>>>>> > >>>>> _______________________________________________ > >>>>> openib-general mailing list > >>>>> openib-general at openib.org > >>>>> http://openib.org/mailman/listinfo/openib-general > >>>>> > >>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>>>> > > From HNGUYEN at de.ibm.com Mon Jan 29 19:42:15 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Mon, 29 Jan 2007 22:42:15 -0500 Subject: [openib-general] [openfabrics-ewg] [PATCH ofed1.2 2/2] libehca: change path to ehca.driver for make dist In-Reply-To: <200701291923.29730.ossrosch@linux.vnet.ibm.com> Message-ID: applied both patches From eitan at sw053.yok.mtl.com Mon Jan 29 21:42:57 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Tue, 30 Jan 2007 07:42:57 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-30:normal completion Message-ID: <200701300542.l0U5gvqp002504@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_29_10:06:23_2007 1f5e50 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=409 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 29 OsmStress IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: 1 OsmStress IS1-16.topo From dotanb at dev.mellanox.co.il Tue Jan 30 00:44:37 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 30 Jan 2007 10:44:37 +0200 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails Message-ID: <45BF0575.9020507@dev.mellanox.co.il> Hi Roland. I opened bug 331 in the bugzilla with the following content: When one tries to create a SRQ with many WR (> 16K WR), creation of the SRQ fails. The problem appears to be in the file: mthca_srq.c. Here is the problematic code: static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq) { struct mthca_data_seg *scatter; void *wqe; int err; int i; if (pd->ibpd.uobject) return 0; srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; which means that creating a SRQ with 16K WRs (or more), the driver will try to allocate 16K*8=128K bytes using kmalloc. This is a very high amount of memory to be allocated using kmalloc. The fix can be replacing this kmalloc with a different type of memory allocation. Thanks Dotan From erezz at voltaire.com Tue Jan 30 01:08:26 2007 From: erezz at voltaire.com (Erez Zilber) Date: Tue, 30 Jan 2007 11:08:26 +0200 Subject: [openib-general] [openfabrics-ewg] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: References: Message-ID: <45BF0B0A.2030003@voltaire.com> Hoang-Nam Nguyen wrote: > openfabrics-ewg-bounces at openib.org wrote on 27.01.2007 17:11:34: > >>> PS2: For backport on 2.6.16 resp. SLES10 I saw that there is a >>> hvcall.h under backport/2.6.16/include/linux. However that one >>> is not sufficient for ehca and include/linux is the wrong place. >>> Hence, I'm patching a new one under include/asm. If I'm right, >>> please remove include/linux/hvcall.h! >>> >> I remember this was needed for iser backport for some reason. >> Does someone remember? >> > Can someone from iser group please check this? > On pseries hvcall.h is placed under include/asm which is a link to > include/asm-ppc resp include/asm-powerpc. > Thanks > Nam > > I don't think that we did anything with this in iSER. Erez From ogerlitz at voltaire.com Tue Jan 30 02:05:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 30 Jan 2007 12:05:26 +0200 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails In-Reply-To: <45BF0575.9020507@dev.mellanox.co.il> References: <45BF0575.9020507@dev.mellanox.co.il> Message-ID: <45BF1866.3010807@voltaire.com> Dotan Barak wrote: > When one tries to create a SRQ with many WR (> 16K WR), creation of the SRQ > fails. > static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, > struct mthca_srq *srq) > srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); > if (!srq->wrid) > return -ENOMEM; > which means that creating a SRQ with 16K WRs (or more), the driver will try to > allocate 16K*8=128K bytes using kmalloc. This is a very high amount of memory > to be allocated using kmalloc. mthca_alloc_wqe_buf has the same problem, as it does qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), GFP_KERNEL); anyway, the solution that comes into my mind is to disable creating a QP/SRQ for which > 128KB allocations are needed. So mthca_query_device() will set the max_qp_wr and max_srq_wr attributes to values whose derived size still allows to use kmalloc. Or. From vlad at lists.openfabrics.org Tue Jan 30 02:21:20 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Tue, 30 Jan 2007 02:21:20 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070130-0200 daily build status Message-ID: <20070130102120.E7434E607F7@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.13 Passed on powerpc with linux-2.6.19 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Failed: From jonas at bgm.lt Tue Jan 30 02:29:01 2007 From: jonas at bgm.lt (Jonas Mardosas) Date: Tue, 30 Jan 2007 12:29:01 +0200 Subject: [openib-general] topspin vs ofed ? Message-ID: <45BF1DED.1050607@bgm.lt> Hello, I need some information about infiniband drivers. I use Scientific linux 4.4, and now i installed newest kernel, but topspin drivers for my adapters dont work on newest kernel, i looked in cisco website, there is the same version of infiniband host adapters drivers, that was before 3.2.0 (118), so how i understund i can use OFED-1.1, what are differences between topspin drivers and Ofed? wich is better? what are your suggestions? Thak you for your responses. -- Jonas Mardosas BGM Sistemu inzinierius M.K.Ciurlionio 17, LT-03104 Vilnius mob.tel. +370 698 74002 mail:jonas at bgm.lt http://www.bgm.lt From bugzilla-daemon at lists.openfabrics.org Tue Jan 30 04:24:14 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Tue, 30 Jan 2007 04:24:14 -0800 (PST) Subject: [openib-general] [Bug 332] New: Data does not get to IPv6 sockets after driver reset Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=332 Summary: Data does not get to IPv6 sockets after driver reset Product: OpenFabrics Linux Version: gen2 Platform: All OS/Version: Other Status: NEW Severity: normal Priority: P3 Component: IPoIB AssignedTo: bugzilla at openib.org ReportedBy: yohadd at mellanox.co.il When there is a socket that send data over IPv6, and at the same time the driver is doing reset, after the reset the data does not get to the destination. More Info: - Over IPv4 the data get to the remote side after the driver reset finished. - Running the application again, after the reset, will succeed to send data. How to reproduce: 1) run opensm on remote side. run on local side: 2) ping6 -I 3) run on ather shell: rmmod ib_mthca; modprob -i ib_mthca You will see that after the reset the ping6 continue to fail, even after the driver is up. Output: 64 bytes from fe80::202:c900:100:d061: icmp_seq=1 ttl=64 time=9.77 ms 64 bytes from fe80::202:c900:100:d061: icmp_seq=2 ttl=64 time=0.071 ms 64 bytes from fe80::202:c900:100:d061: icmp_seq=3 ttl=64 time=0.063 ms 64 bytes from fe80::202:c900:100:d061: icmp_seq=4 ttl=64 time=0.084 ms ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From tziporet at mellanox.co.il Tue Jan 30 06:23:07 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 30 Jan 2007 16:23:07 +0200 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> The meeting summary is also available on the Wiki: https://wiki.openfabrics.org/tiki-index.php?page=Teleconf+01-29-2007 Tziporet Abbreviated minutes / summary * We are going for code freeze at Feb 1. * Alpha1 release will be ready at beginning of next week o There are 3 components that are not yet ready for the code freeze (see details) * Agreed on branching and tagging for the OFED 1.2 (see details) * Approved the way external components will be provided * Decided to have a daily build for the full OFED package Action Items: 1. Tziporet to verify QoS status with OSM 2. Or to review all multicast patches in current build to make sure that all needed changes are in. 3. Each git maintainer: open ofed_1_2 branch till Feb 1. 4. Vlad to have a daily build of the full OFED package 5. Roland to send branch name from which to take libibverbs and libmthca Detailed Minutes * The following components are still under development (to be completed after alpha1): o OSM: QoS and LASH routing o RDS: integration to OFED is ongoing o ipath: driver backport patches are not ready yet * Bonding module will support only SLES10 and RHEL up3 for now. Release tagging and branching: Sources developed in OFA: 1. Each git owner will open a branch with the name ofed_1_2. This branch should be opened on Feb 1. 2. Vlad will open a new directory /pub/ofed_1_2. 3. All ofed_1_2 branches will be cloned to this directory. (Note: libibverbs and libmthca will be cloned from kernel.org for Roland's trees.) 4. Any change that should be included in the next OFED package will be first check-in to the maintainer ofed_1_2 branch. A mail should be sent to Vlad (and cc the list) to pull this change. 5. A tag will be set before any package is build. Tag name convention: ofed_1_2_ where version will be the suffix of OFED package (e.g. 1.2-alpha1) 6. OFED release package will be built based on this tag. 7. There will be a build script (as in OFED 1.1) to enable each owner to build the package for testing. 8. A daily build of the OFED package will be provided on the OFA server MPI packages: 1. MPI packages are provided as source RPMs 2. Each MPI owner will have an account on the OFA server and will open a directory named ofed_1_2 3. The SRPM package will be placed in this directory, with version indication in the filename (e.g.ompi-1.2.1-xxx) 4. There will be a file named latest.txt that will contain the package that should be taken in the OFED package External packages: Any other external packages that supplied as SRPs (e.g. bonding) and not source will use the same method as above. -------------- next part -------------- An HTML attachment was scrubbed... URL: From HNGUYEN at de.ibm.com Tue Jan 30 06:47:19 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Tue, 30 Jan 2007 09:47:19 -0500 Subject: [openib-general] [openfabrics-ewg] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: <45BF0B0A.2030003@voltaire.com> Message-ID: > > Can someone from iser group please check this? > > On pseries hvcall.h is placed under include/asm which is a link to > > include/asm-ppc resp include/asm-powerpc. > I don't think that we did anything with this in iSER. > Erez Erez, thanks for this info. Vladimir, please remove include/linux/hvcall.h from backport/2.6.16 resp backport/2.6.16sles9. Thanks Nam From kliteyn at dev.mellanox.co.il Tue Jan 30 06:51:24 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 16:51:24 +0200 Subject: [openib-general] [PATCH 0/10] osm: QoS in OpenSM Message-ID: <45BF5B6C.30406@dev.mellanox.co.il> Hi Hal. The following is a series of 10 patches: 1. QoS policy file parser Yacc file 2. QoS policy file parser Lex file 3. QoS policy file parser Yacc & Lex generated files 4. QoS policy file parser header file 5. QoS policy file parser C file with auxiliary functions 6. Compilation changes for QoS policy file parser: Added new files to makefiles. Introduced new configuration switch '--enable-maintainer-mode', which will run Lex & Yacc instead of just using the generated files. 7. Renamed static function find_prtn_by_name() to non-static osm_prtn_find_by_name() This function will be used later by the PathRecord logic. 8. Added QoS class and service id fields to the path record. 9. Added new command line option for OSM: '-Y' or '--qos_policy_file' 10.Checking PathRecord query for QoS constraints. -- Yevgeny Signed-off-by: Yevgeny Kliteynik From kliteyn at dev.mellanox.co.il Tue Jan 30 06:57:09 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 16:57:09 +0200 Subject: [openib-general] [PATCH 1/10] osm: QoS in OpenSM Message-ID: <45BF5CC5.5000405@dev.mellanox.co.il> QoS policy file parser Yacc file Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser.y | 1653 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1653 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser.y b/osm/opensm/osm_qos_parser.y new file mode 100644 index 0000000..379e8a1 --- /dev/null +++ b/osm/opensm/osm_qos_parser.y @@ -0,0 +1,1653 @@ +%{ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Grammar of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +static void __osm_qos_parse_tree_init(); + +static char * __osm_qos_parser_strip_white(char * str); + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str); + +static void __osm_qos_parser_port_group_start(); +static void __osm_qos_parser_port_group_end(); + +static void __osm_qos_parser_sl2vl_scope_start(); +static void __osm_qos_parser_sl2vl_scope_end(); + +static void __osm_qos_parser_vlarb_scope_start(); +static void __osm_qos_parser_vlarb_scope_end(); + +static void __osm_qos_parser_qos_level_start(); +static void __osm_qos_parser_qos_level_end(); + +static void __osm_qos_parser_match_rule_start(); +static void __osm_qos_parser_match_rule_end(); + +extern char * yytext; +extern void yyerror (char *s); +extern int yylex (void); +extern FILE * yyin; + +#define OSM_QOS_MAX_LINE_LEN 10000 +char str_buffer[OSM_QOS_MAX_LINE_LEN]; +#define RESET_BUFFER str_buffer[0] = '\0' + +int column_num; +int line_num; + +osm_qos_parse_tree_t * p_qos_parse_tree = NULL; +osm_qos_port_group_t * p_current_port_group = NULL; +osm_qos_sl2vl_scope_t * p_current_sl2vl_scope = NULL; +osm_qos_vlarb_scope_t * p_current_vlarb_scope = NULL; +osm_qos_level_t * p_current_qos_level = NULL; +osm_qos_match_rule_t * p_current_qos_match_rule = NULL; + +osm_log_t * p_osm_log = NULL; + +/***************************************************/ + +%} + +%token TK_HEX_NUMBER +%token TK_DEC_NUMBER +%token TK_WHITE +%token TK_DASH +%token TK_DOTDOT +%token TK_COMMA +%token TK_TEXT + +%token TK_XML_VERSION_START +%token TK_XML_VERSION_END +%token TK_QOS_POLICY_START +%token TK_QOS_POLICY_END +%token TK_PORT_GROUPS_START +%token TK_PORT_GROUPS_END +%token TK_PORT_GROUP_START +%token TK_PORT_GROUP_END +%token TK_NAME_START +%token TK_NAME_END +%token TK_USE_START +%token TK_USE_END +%token TK_PORT_GUID_START +%token TK_PORT_GUID_END +%token TK_PORT_NAME_START +%token TK_PORT_NAME_END +%token TK_PARTITION_START +%token TK_PARTITION_END +%token TK_NODE_TYPE_START +%token TK_NODE_TYPE_END +%token TK_QOS_SETUP_START +%token TK_QOS_SETUP_END +%token TK_SL2VL_TABLES_START +%token TK_SL2VL_TABLES_END +%token TK_SL2VL_SCOPE_START +%token TK_SL2VL_SCOPE_END +%token TK_GROUP_START +%token TK_GROUP_END +%token TK_FROM_START +%token TK_FROM_END +%token TK_TO_START +%token TK_TO_END +%token TK_SL2VL_TABLE_START +%token TK_SL2VL_TABLE_END +%token TK_ACROSS_START +%token TK_ACROSS_END +%token TK_ACROSS_FROM_START +%token TK_ACROSS_FROM_END +%token TK_ACROSS_TO_START +%token TK_ACROSS_TO_END +%token TK_VLARB_TABLES_START +%token TK_VLARB_TABLES_END +%token TK_VLARB_SCOPE_START +%token TK_VLARB_SCOPE_END +%token TK_VLARB_HIGH_START +%token TK_VLARB_HIGH_END +%token TK_VLARB_LOW_START +%token TK_VLARB_LOW_END +%token TK_VLARB_HIGH_LIMIT_START +%token TK_VLARB_HIGH_LIMIT_END +%token TK_QOS_LEVELS_START +%token TK_QOS_LEVELS_END +%token TK_QOS_LEVEL_START +%token TK_QOS_LEVEL_END +%token TK_SN_START +%token TK_SN_END +%token TK_SL_START +%token TK_SL_END +%token TK_CLASS_START +%token TK_CLASS_END +%token TK_MTU_LIMIT_START +%token TK_MTU_LIMIT_END +%token TK_RATE_LIMIT_START +%token TK_RATE_LIMIT_END +%token TK_PACKET_LIFE_START +%token TK_PACKET_LIFE_END +%token TK_PKEY_START +%token TK_PKEY_END +%token TK_QOS_MATCH_RULES_START +%token TK_QOS_MATCH_RULES_END +%token TK_QOS_MATCH_RULE_START +%token TK_QOS_MATCH_RULE_END +%token TK_QOS_LEVEL_SN_START +%token TK_QOS_LEVEL_SN_END +%token TK_SOURCE_START +%token TK_SOURCE_END +%token TK_DESTINATION_START +%token TK_DESTINATION_END +%token TK_SERVICE_START +%token TK_SERVICE_END + +%start head + +%% + +head: xml_version qos_policy + ; + +xml_version: /* empty */ + | TK_XML_VERSION_START any_text TK_XML_VERSION_END + ; + +qos_policy: /* empty */ + | qos_policy qos_policy_start qos_policy_entries qos_policy_end + ; + +qos_policy_start: TK_QOS_POLICY_START + ; + +qos_policy_end: TK_QOS_POLICY_END + ; + +qos_policy_entries: /* empty */ + | qos_policy_entries qos_policy_entry + ; + +qos_policy_entry: port_groups + | qos_setup + | qos_levels + | qos_match_rules + ; + + /* + * Parsing ... : + * + * + * + * Storage + * our SRP storage targets + * 0x1000000000000001 + * 0x1000000000000002 + * + * + * Virtual Servers + * node desc and IB port # + * vs1/CA-1/P1 + * vs3/CA-1/P1 + * vs3/CA-2/P1 + * + * + * Partition 1 + * default settings + * Part1 + * + * + * Routers + * all routers + * ROUTER + * + * + */ + +port_groups: port_groups_start port_group port_groups_end + ; + +port_groups_start: TK_PORT_GROUPS_START + ; + +port_groups_end: TK_PORT_GROUPS_END + ; + +port_group: single_port_group + | port_group single_port_group + ; + +single_port_group: port_group_start port_group_entries port_group_end + ; + +port_group_start: TK_PORT_GROUP_START { + __osm_qos_parser_port_group_start(); + } + ; + +port_group_end: TK_PORT_GROUP_END { + __osm_qos_parser_port_group_end(); + } + ; + +port_group_entries: /* empty */ + | port_group_entries port_group_entry + ; + +port_group_entry: name + | use + | port_guid + | port_name + | partition + | node_type + ; + + /* + * Parsing ... : + * + * + * + * + * Part1 + * * + * * + * 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 + * + * + * Storage + * StorageXX + * StorageYY + * * + * 1 + * 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1 + * + * + * + * + * + * Storage + * * + * 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 + * 8:255,9:127,10:63,11:31,12:15,13:7,14:3 + * 10 + * + * + * + */ + +qos_setup: qos_setup_start qos_setup_items qos_setup_end + ; + +qos_setup_start: TK_QOS_SETUP_START + ; + +qos_setup_end: TK_QOS_SETUP_END + ; + +qos_setup_items: /* empty */ + | qos_setup_items sl2vl_tables + | qos_setup_items vlarb_tables + ; + + /* + * + * + * ... + * + * ... + * + */ + +sl2vl_tables: sl2vl_tables_start sl2vl_scope_items sl2vl_tables_end + ; + +sl2vl_tables_start: TK_SL2VL_TABLES_START + ; + +sl2vl_tables_end: TK_SL2VL_TABLES_END + ; + +sl2vl_scope_items: /* empty */ + | sl2vl_scope_items sl2vl_scope + ; + +sl2vl_scope: sl2vl_scope_start sl2vl_scope_entries sl2vl_scope_end + ; + +sl2vl_scope_start: TK_SL2VL_SCOPE_START { + __osm_qos_parser_sl2vl_scope_start(); + } + ; + +sl2vl_scope_end: TK_SL2VL_SCOPE_END { + __osm_qos_parser_sl2vl_scope_end(); + } + ; + +sl2vl_scope_entries:/* empty */ + | sl2vl_scope_entries sl2vl_scope_entry + ; + +sl2vl_scope_entry: group + | across + | across_from + | across_to + | from + | to + | sl2vl_table + ; + + /* + * + * + * ... + * + * ... + * + */ + +vlarb_tables: vlarb_tables_start vlarb_scope_items vlarb_tables_end + ; + +vlarb_tables_start: TK_VLARB_TABLES_START + ; + +vlarb_tables_end: TK_VLARB_TABLES_END + ; + +vlarb_scope_items: /* empty */ + | vlarb_scope_items vlarb_scope + ; + +vlarb_scope: vlarb_scope_start vlarb_scope_entries vlarb_scope_end + ; + +vlarb_scope_start: TK_VLARB_SCOPE_START { + __osm_qos_parser_vlarb_scope_start(); + } + ; + +vlarb_scope_end: TK_VLARB_SCOPE_END { + __osm_qos_parser_vlarb_scope_end(); + } + ; + +vlarb_scope_entries:/* empty */ + | vlarb_scope_entries vlarb_scope_entry + ; + +vlarb_scope_entry: group + | across + | vlarb_high + | vlarb_low + | vlarb_high_limit + ; + + /* + * Parsing ... : + * + * + * + * 1 + * for the lowest priority comm + * 16 + * 5 + * + * + * 2 + * low latency best bandwidth + * 0 + * 7 + * + * + * 3 + * just an example + * 0 + * 32 + * 1 + * 1 + * + * + */ + +qos_levels: qos_levels_start qos_level_items qos_levels_end + ; + +qos_levels_start: TK_QOS_LEVELS_START + ; + +qos_levels_end: TK_QOS_LEVELS_END + ; + +qos_level_items: /* empty */ + | qos_level_items qos_level_start qos_level_entries qos_level_end + ; + +qos_level_start: TK_QOS_LEVEL_START { + __osm_qos_parser_qos_level_start(); + } + ; + +qos_level_end: TK_QOS_LEVEL_END { + __osm_qos_parser_qos_level_end(); + } + ; + +qos_level_entries: /* empty */ + | qos_level_entries qos_level_entry + ; + +qos_level_entry: sn + | use + | sl + | mtu_limit + | rate_limit + | packet_life + | pkey + | qos_level_class + ; + + /* + * Parsing ... : + * + * + * + * + * 1 + * low latency by class 7-9 or 11> + * 7-9,11 + * + * + * 2 + * Storage targets connection> + * Storage + * 22,4719 + * + * + */ + +qos_match_rules: qos_match_rules_start qos_match_rule_items qos_match_rules_end + ; + +qos_match_rules_start: TK_QOS_MATCH_RULES_START + ; + +qos_match_rules_end: TK_QOS_MATCH_RULES_END + ; + +qos_match_rule_items: /* empty */ + | qos_match_rule_items qos_match_rule_start qos_match_rule_entries qos_match_rule_end + ; + +qos_match_rule_start: TK_QOS_MATCH_RULE_START { + __osm_qos_parser_match_rule_start(); + } + ; + +qos_match_rule_end: TK_QOS_MATCH_RULE_END { + __osm_qos_parser_match_rule_end(); + } + ; + +qos_match_rule_entries: /* empty */ + | qos_match_rule_entries qos_match_rule_entry + ; + +qos_match_rule_entry: use + | match_rule_class + | source + | destination + | service + | qos_level_sn + ; + + /* + * values + */ + +name: name_start any_text TK_NAME_END { + /* of - one instance */ + p_current_port_group->name = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +name_start: TK_NAME_START { + /* of - one instance */ + RESET_BUFFER; + if (p_current_port_group->name) + { + yyerror(" has multiple tags"); + return 1; + } + } + ; + +use: use_start any_text TK_USE_END { + /* of ,, - one instance */ + if (p_current_port_group) + p_current_port_group->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_level) + p_current_qos_level->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_match_rule) + p_current_qos_match_rule->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else + CL_ASSERT(0); + } + ; + +use_start: TK_USE_START { + RESET_BUFFER; + if (p_current_port_group) + { + /* of - one instance */ + if (p_current_port_group->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_level) + { + /* of - one instance */ + if (p_current_qos_level->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_match_rule) + { + /* of - one instance */ + if (p_current_qos_match_rule->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else + CL_ASSERT(0); + } + ; + +port_name: port_name_start any_text TK_PORT_NAME_END { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_names, + p_str_item, + NULL); + } + ; + +port_name_start: TK_PORT_NAME_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +port_guid: port_guid_start hex_number TK_PORT_GUID_END { + osm_qos_uint64_vector_item_t * p_uint64_item = + (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + + __osm_qos_parser_str2uint64(&p_uint64_item->value, + __osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_guids, + p_uint64_item, + NULL); + } + ; + +port_guid_start: TK_PORT_GUID_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +partition: partition_start any_text TK_PARTITION_END { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->partitions, + p_str_item, + NULL); + } + ; + +partition_start: TK_PARTITION_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +node_type: node_type_start any_text TK_NODE_TYPE_END { + /* in - any num of instances */ + uint8_t tmp_node_type; + char * clean_str = __osm_qos_parser_strip_white(str_buffer); + + if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_ROUTER) == 0) + tmp_node_type = IB_NODE_TYPE_ROUTER; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_CA) == 0) + tmp_node_type = IB_NODE_TYPE_CA; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_SWITCH) == 0) + tmp_node_type = IB_NODE_TYPE_SWITCH; + else + { + yyerror("wrong value"); + return 1; + } + + osm_qos_uint8_vector_item_t * p_uint8_item = + (osm_qos_uint8_vector_item_t *)malloc(sizeof(osm_qos_uint8_vector_item_t)); + + p_uint8_item->value = tmp_node_type; + cl_ptr_vector_insert(&p_current_port_group->node_types, + p_uint8_item, + NULL); + } + ; + +node_type_start: TK_NODE_TYPE_START { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + ; + +group: group_start any_text TK_GROUP_END { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + if (p_current_sl2vl_scope) + cl_ptr_vector_insert(&p_current_sl2vl_scope->groups, + p_str_item, + NULL); + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->groups, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + ; + +group_start: TK_GROUP_START { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + ; + +across: across_start any_text TK_ACROSS_END { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + + /* inserting this both to across_to and to across_from */ + if (p_current_sl2vl_scope) { + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->across, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + ; + +across_start: TK_ACROSS_START { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + ; + +across_from: across_from_start any_text TK_ACROSS_FROM_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + } + ; + +across_from_start: TK_ACROSS_FROM_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +across_to: across_to_start any_text TK_ACROSS_TO_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + ; + +across_to_start: TK_ACROSS_TO_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +from: from_start any_text TK_FROM_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->from, + p_str_item, + NULL); + } + ; + +from_start: TK_FROM_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +to: to_start any_text TK_TO_END { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->to, + p_str_item, + NULL); + } + ; + +to_start: TK_TO_START { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + ; + +sl2vl_table: sl2vl_table_start whitespace + any_number num_list_wo_whites any_number + whitespace TK_SL2VL_TABLE_END { + /* in - one instance of 16 numbers */ + uint8_t counter = 0; + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + + /* checking the syntax */ + for (i = 0; tmp_str[i] != '\0'; i++) + if (tmp_str[i] == ',') + counter ++; + if (counter != 15) + { + yyerror("wrong number of values in (should be 16)"); + return 1; + } + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str," ,"); + while (tok != NULL && i < 16) + { + p_current_sl2vl_scope->sl2vl_table[i++] = atoi(tok); + tok = strtok (NULL, " ,"); + } + if (tok != NULL) + { + yyerror("wrong syntax in "); + return 1; + } + } + ; + +sl2vl_table_start: TK_SL2VL_TABLE_START { + /* in - one instance of 16 numbers */ + RESET_BUFFER; + } + ; + +sn: sn_start any_number TK_SN_END { + /* in - one instance */ + p_current_qos_level->sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->sn_set = TRUE; + } + ; + +sn_start: TK_SN_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->sn_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + ; + +sl: sl_start any_number TK_SL_END { + /* in - one instance */ + p_current_qos_level->sl = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->sl_set = TRUE; + } + ; + +sl_start: TK_SL_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->sl_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + ; + +mtu_limit: mtu_limit_start any_number TK_MTU_LIMIT_END { + /* in - one instance */ + p_current_qos_level->mtu_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->mtu_limit_set = TRUE; + } + ; + +mtu_limit_start: TK_MTU_LIMIT_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->mtu_limit_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + ; + +rate_limit: rate_limit_start any_number TK_RATE_LIMIT_END { + /* in - one instance */ + p_current_qos_level->rate_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->rate_limit_set = TRUE; + } + ; + +rate_limit_start: TK_RATE_LIMIT_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->rate_limit_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + ; + +packet_life: packet_life_start any_number TK_PACKET_LIFE_END { + /* in - one instance */ + p_current_qos_level->pkt_life = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->pkt_life_set = TRUE; + } + ; + +packet_life_start: TK_PACKET_LIFE_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->pkt_life_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + ; + +pkey: pkey_start any_number TK_PKEY_END { + /* in - one instance */ + p_current_qos_level->pkey = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +pkey_start: TK_PKEY_START { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +qos_level_class: qos_level_class_start any_number TK_CLASS_END { + /* in - one instance of one number */ + p_current_qos_level->class = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +qos_level_class_start: TK_CLASS_START { + /* in - one instance of one number */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + ; + +vlarb_high_limit: vlarb_high_limit_start any_number TK_VLARB_HIGH_LIMIT_END { + /* in - one instance of one number */ + p_current_vlarb_scope->vl_high_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +vlarb_high_limit_start: TK_VLARB_HIGH_LIMIT_START { + /* in - one instance of one number */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +vlarb_high: vlarb_high_start whitespace + any_number num_list_with_dotdot_wo_whites any_number + whitespace TK_VLARB_HIGH_END { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_highs, + p_hl_item, + NULL); + } + + } + ; + +vlarb_high_start: TK_VLARB_HIGH_START { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +vlarb_low: vlarb_low_start whitespace + any_number num_list_with_dotdot_wo_whites any_number + whitespace TK_VLARB_LOW_END { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_lows, + p_hl_item, + NULL); + } + + } + ; + +vlarb_low_start: TK_VLARB_LOW_START { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + ; + +source: source_start any_text TK_SOURCE_END { + /* in - text */ + p_current_qos_match_rule->source = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +source_start: TK_SOURCE_START { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +destination: destination_start any_text TK_DESTINATION_END { + /* in - text */ + p_current_qos_match_rule->destination = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +destination_start: TK_DESTINATION_START { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +match_rule_class: match_rule_class_start whitespace + any_number num_list_with_dash_wo_whites any_number + whitespace TK_CLASS_END { + /* in - "7-9,11" */ + uint16_t i; + uint16_t j; + uint16_t range_low; + uint16_t range_high; + char * dash_ptr = NULL; + char * tok = NULL; + char * tokens[10000] = {NULL}; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint16_vector_item_t * p_uint16_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i] == ',' || tmp_str[i] == '-') && + (tmp_str[i-1] == ',' || tmp_str[i-1] == '-') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line with ',' being a delimiter */ + tok = strtok (tmp_str," ,"); + for (i = 0; tok != NULL; tok = strtok (NULL, " ,")) + tokens[i++] = tok; + + for (i = 0; tokens[i] != NULL; i++) + { + dash_ptr = strstr(tokens[i],"-"); + if (dash_ptr != NULL) + { + /* this is number range: i-j */ + *dash_ptr = '\0'; + range_low = atoi(tokens[i]); + *dash_ptr = '-'; + range_high = atoi(dash_ptr + 1); + if (range_low > range_high) + { + uint16_t tmp_num = range_low; + range_low = range_high; + range_high = tmp_num; + } + for (j = range_low; j <= range_high; j++) + { + p_uint16_item = (osm_qos_uint16_vector_item_t *) + malloc(sizeof(osm_qos_uint16_vector_item_t)); + p_uint16_item->value = j; + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint16_item, + NULL); + } + } + else + { + /* this is a single number */ + p_uint16_item = (osm_qos_uint16_vector_item_t *) + malloc(sizeof(osm_qos_uint16_vector_item_t)); + p_uint16_item->value = atoi(tokens[i]); + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint16_item, + NULL); + } + } + } + ; + +match_rule_class_start: TK_CLASS_START { + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +service: service_start whitespace + any_number num_list_wo_whites any_number + whitespace TK_SERVICE_END { + /* in - list of ',' delimited numbers */ + uint32_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint64_vector_item_t * p_uint64_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + tok = strtok (tmp_str," ,"); + while (tok != NULL) + { + p_uint64_item = (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + __osm_qos_parser_str2uint64(&p_uint64_item->value,tok); + cl_ptr_vector_insert(&p_current_qos_match_rule->services, + p_uint64_item, + NULL); + tok = strtok (NULL, " ,"); + } + + } + ; + +service_start: TK_SERVICE_START { + /* in - list of ',' delimited numbers */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + +qos_level_sn: qos_level_sn_start any_number TK_QOS_LEVEL_SN_END { + /* in - single number */ + p_current_qos_match_rule->qos_level_sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + ; + +qos_level_sn_start: TK_QOS_LEVEL_SN_START { + /* in - single number */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + ; + + /* + * Common part + */ + + +any_text: /* empty */ + | any_text text + ; + +text: pure_text + | any_number + ; + +pure_text: TK_TEXT { + strcat(str_buffer,$1); + free($1); + } + | TK_WHITE { + strcat(str_buffer,$1); + free($1); + } + | TK_DASH { + strcat(str_buffer,$1); + free($1); + } + | TK_DOTDOT { + strcat(str_buffer,$1); + free($1); + } + | TK_COMMA { + strcat(str_buffer,$1); + free($1); + } + ; + +num_list_wo_whites: whitespace + | num_list_wo_whites whitespace + | num_list_wo_whites dec_number + | num_list_wo_whites hex_number + | num_list_wo_whites comma + ; + +num_list_with_dotdot_wo_whites: whitespace + | num_list_with_dotdot_wo_whites whitespace + | num_list_with_dotdot_wo_whites dec_number + | num_list_with_dotdot_wo_whites hex_number + | num_list_with_dotdot_wo_whites comma + | num_list_with_dotdot_wo_whites dotdot + ; + +num_list_with_dash_wo_whites: whitespace + | num_list_with_dash_wo_whites whitespace + | num_list_with_dash_wo_whites dec_number + | num_list_with_dash_wo_whites hex_number + | num_list_with_dash_wo_whites comma + | num_list_with_dash_wo_whites dash + ; + +comma: TK_COMMA { + strcat(str_buffer,$1); + free($1); + } + ; + +dotdot: TK_DOTDOT { + strcat(str_buffer,$1); + free($1); + } + ; + +dash: TK_DASH { + strcat(str_buffer,$1); + free($1); + } + ; + +any_number: dec_number + | hex_number + ; + +dec_number: TK_DEC_NUMBER { + strcat(str_buffer,$1); + free($1); + } + ; + +hex_number: TK_HEX_NUMBER { + strcat(str_buffer,$1); + free($1); + } + ; + +whitespace: /* empty */ + | TK_WHITE { + /* swallow whitespace */ + free($1); + } + ; + +%% + +/*************************************************** + ***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file) +{ + int res = 0; + p_osm_log = p_log; + + OSM_LOG_ENTER(p_osm_log, osm_qos_parse); + + p_qos_parse_tree = NULL; + yyin = fopen (policy_file, "r"); + if (!yyin) + { + osm_log(p_osm_log, OSM_LOG_ERROR, + "osm_qos_parse: ERR AC01: " + "Failed opening QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + column_num = 1; + line_num = 1; + __osm_qos_parse_tree_init(); + str_buffer[0] = '\0'; + + res = yyparse(); + + if (res != 0) + { + osm_log(p_osm_log, OSM_LOG_ERROR, + "osm_qos_parse: ERR AC02: " + "Failed parsing QoS policy file (%s)\n", + policy_file); + p_qos_parse_tree = NULL; + res = 1; + goto Exit; + } + + Exit: + if (yyin) + fclose(yyin); + OSM_LOG_EXIT(p_osm_log); + return res; +} + +/*************************************************** + ***************************************************/ + +int yywrap() +{ + return(1); +} + +/*************************************************** + ***************************************************/ + +void yyerror (char *s) +{ + OSM_LOG_ENTER(p_osm_log, yyerror); + osm_log(p_osm_log, OSM_LOG_ERROR, + "yyerror: ERR AC03: " + "Syntax error (line %d:%d): %s. " + "Last text read: \"%s\"\n", + line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + OSM_LOG_EXIT(p_osm_log); +} + +/*************************************************** + ***************************************************/ + +static char * __osm_qos_parser_strip_white(char * str) +{ + int i; + for (i = (strlen(str)-1); i >= 0; i--) + { + if (isspace(str[i])) + str[i] = '\0'; + else + break; + } + for (i = 0; i < strlen(str); i++) + { + if (!isspace(str[i])) + break; + } + return &(str[i]); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str) +{ +#if __WORDSIZE == 64 + *p_val = strtoul(str, NULL, 0); +#else + *p_val = strtoull(str, NULL, 0); +#endif +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parse_tree_init() +{ + p_qos_parse_tree = (osm_qos_parse_tree_t *) + malloc(sizeof(osm_qos_parse_tree_t)); + + memset(p_qos_parse_tree, 0, sizeof(osm_qos_parse_tree_t)); + + cl_ptr_vector_init(&p_qos_parse_tree->port_groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->sl2vl_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->vlarb_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_levels, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_match_rules, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_start() +{ + p_current_port_group = (osm_qos_port_group_t *) + malloc(sizeof(osm_qos_port_group_t)); + memset(p_current_port_group, 0, sizeof(osm_qos_port_group_t)); + + cl_ptr_vector_init(&p_current_port_group->port_guids, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->port_names, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->partitions, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->node_types, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->port_groups, + p_current_port_group, + NULL); + p_current_port_group = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_start() +{ + p_current_sl2vl_scope = (osm_qos_sl2vl_scope_t *) + malloc(sizeof(osm_qos_sl2vl_scope_t)); + memset(p_current_sl2vl_scope, 0, sizeof(osm_qos_sl2vl_scope_t)); + + cl_ptr_vector_init(&p_current_sl2vl_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->to, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_to, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->sl2vl_tables, + p_current_sl2vl_scope, + NULL); + p_current_sl2vl_scope = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_start() +{ + p_current_vlarb_scope = (osm_qos_vlarb_scope_t *) + malloc(sizeof(osm_qos_vlarb_scope_t)); + memset(p_current_vlarb_scope, 0, sizeof(osm_qos_vlarb_scope_t)); + + cl_ptr_vector_init(&p_current_vlarb_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->across, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_highs, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_lows, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->vlarb_tables, + p_current_vlarb_scope, + NULL); + p_current_vlarb_scope = NULL; +} + + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_start() +{ + p_current_qos_level = (osm_qos_level_t *) + malloc(sizeof(osm_qos_level_t)); + memset(p_current_qos_level, 0, sizeof(osm_qos_level_t)); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_levels, + p_current_qos_level, + NULL); + p_current_qos_level = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_start() +{ + p_current_qos_match_rule = (osm_qos_match_rule_t *) + malloc(sizeof(osm_qos_match_rule_t)); + memset(p_current_qos_match_rule, 0, sizeof(osm_qos_match_rule_t)); + + cl_ptr_vector_init(&p_current_qos_match_rule->services, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->classes, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_match_rules, + p_current_qos_match_rule, + NULL); + p_current_qos_match_rule = NULL; +} + +/*************************************************** + ***************************************************/ + -- 1.4.4.1.GIT From vlad at dev.mellanox.co.il Tue Jan 30 06:58:34 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 30 Jan 2007 16:58:34 +0200 Subject: [openib-general] [openfabrics-ewg] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: References: Message-ID: <1170169114.13511.0.camel@vladsk-laptop> On Tue, 2007-01-30 at 09:47 -0500, Hoang-Nam Nguyen wrote: > > > Can someone from iser group please check this? > > > On pseries hvcall.h is placed under include/asm which is a link to > > > include/asm-ppc resp include/asm-powerpc. > > I don't think that we did anything with this in iSER. > > Erez > Erez, thanks for this info. > Vladimir, please remove include/linux/hvcall.h from backport/2.6.16 > resp backport/2.6.16sles9. > Thanks > Nam Done. -- Vladimir Sokolovsky Mellanox Technologies Ltd. From kliteyn at dev.mellanox.co.il Tue Jan 30 07:01:44 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:01:44 +0200 Subject: [openib-general] [PATCH 2/10] osm: QoS in OpenSM Message-ID: <45BF5DD8.8070308@dev.mellanox.co.il> QoS policy file parser Lex file Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser.l | 253 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 253 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser.l b/osm/opensm/osm_qos_parser.l new file mode 100644 index 0000000..e57eb9f --- /dev/null +++ b/osm/opensm/osm_qos_parser.l @@ -0,0 +1,253 @@ +%{ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Lexer of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +#define SAVE_POS save_pos() +static void save_pos(); + +extern int column_num; +extern int line_num; +extern FILE * yyin; + +%} + +XML_VERSION_START <\? +XML_VERSION_END \?> +QOS_POLICY_START +QOS_POLICY_END <\/qos\-policy> +PORT_GROUPS_START +PORT_GROUPS_END <\/port\-groups> +PORT_GROUP_START +PORT_GROUP_END <\/port\-group> +NAME_START +NAME_END <\/name> +USE_START +USE_END <\/use> +PORT_GUID_START +PORT_GUID_END <\/port\-guid> +PORT_NAME_START +PORT_NAME_END <\/port\-name> +PARTITION_START +PARTITION_END <\/partition> +NODE_TYPE_START +NODE_TYPE_END <\/node\-type> +QOS_SETUP_START +QOS_SETUP_END <\/qos\-setup> +SL2VL_TABLES_START +SL2VL_TABLES_END <\/sl2vl\-tables> +SL2VL_SCOPE_START +SL2VL_SCOPE_END <\/sl2vl\-scope> +GROUP_START +GROUP_END <\/group> +FROM_START +FROM_END <\/from> +TO_START +TO_END <\/to> +SL2VL_TABLE_START +SL2VL_TABLE_END <\/sl2vl\-table> +ACROSS_START +ACROSS_END <\/across> +ACROSS_FROM_START +ACROSS_FROM_END <\/across\-from> +ACROSS_TO_START +ACROSS_TO_END <\/across\-to> +VLARB_TABLES_START +VLARB_TABLES_END <\/vlarb\-tables> +VLARB_SCOPE_START +VLARB_SCOPE_END <\/vlarb\-scope> +VLARB_HIGH_START +VLARB_HIGH_END <\/vlarb\-high> +VLARB_LOW_START +VLARB_LOW_END <\/vlarb\-low> +VLARB_HIGH_LIMIT_START +VLARB_HIGH_LIMIT_END <\/vl\-high\-limit> +QOS_LEVELS_START +QOS_LEVELS_END <\/qos\-levels> +QOS_LEVEL_START +QOS_LEVEL_END <\/qos\-level> +SN_START +SN_END <\/sn> +SL_START +SL_END <\/sl> +CLASS_START +CLASS_END <\/class> +MTU_LIMIT_START +MTU_LIMIT_END <\/mtu\-limit> +RATE_LIMIT_START +RATE_LIMIT_END <\/rate\-limit> +PACKET_LIFE_START +PACKET_LIFE_END <\/packet\-life> +PKEY_START +PKEY_END <\/pkey> +QOS_MATCH_RULES_START +QOS_MATCH_RULES_END <\/qos\-match\-rules> +QOS_MATCH_RULE_START +QOS_MATCH_RULE_END <\/qos\-match\-rule> +QOS_LEVEL_SN_START +QOS_LEVEL_SN_END <\/qos\-level\-sn> +SOURCE_START +SOURCE_END <\/source> +DESTINATION_START +DESTINATION_END <\/destination> +SERVICE_START +SERVICE_END <\/service> + +WHITE [ \t\n]* +COMMENT + +%% + +{WHITE}{XML_VERSION_START}{WHITE} { SAVE_POS; return TK_XML_VERSION_START; } +{WHITE}{XML_VERSION_END}{WHITE} { SAVE_POS; return TK_XML_VERSION_END; } +{WHITE}{QOS_POLICY_START}{WHITE} { SAVE_POS; return TK_QOS_POLICY_START; } +{WHITE}{QOS_POLICY_END}{WHITE} { SAVE_POS; return TK_QOS_POLICY_END; } +{WHITE}{PORT_GROUPS_START}{WHITE} { SAVE_POS; return TK_PORT_GROUPS_START; } +{WHITE}{PORT_GROUPS_END}{WHITE} { SAVE_POS; return TK_PORT_GROUPS_END; } +{WHITE}{PORT_GROUP_START}{WHITE} { SAVE_POS; return TK_PORT_GROUP_START; } +{WHITE}{PORT_GROUP_END}{WHITE} { SAVE_POS; return TK_PORT_GROUP_END; } +{WHITE}{NAME_START}{WHITE} { SAVE_POS; return TK_NAME_START; } +{WHITE}{NAME_END}{WHITE} { SAVE_POS; return TK_NAME_END; } +{WHITE}{USE_START}{WHITE} { SAVE_POS; return TK_USE_START; } +{WHITE}{USE_END}{WHITE} { SAVE_POS; return TK_USE_END; } +{WHITE}{PORT_GUID_START}{WHITE} { SAVE_POS; return TK_PORT_GUID_START; } +{WHITE}{PORT_GUID_END}{WHITE} { SAVE_POS; return TK_PORT_GUID_END; } +{WHITE}{PORT_NAME_START}{WHITE} { SAVE_POS; return TK_PORT_NAME_START; } +{WHITE}{PORT_NAME_END}{WHITE} { SAVE_POS; return TK_PORT_NAME_END; } +{WHITE}{PARTITION_START}{WHITE} { SAVE_POS; return TK_PARTITION_START; } +{WHITE}{PARTITION_END}{WHITE} { SAVE_POS; return TK_PARTITION_END; } +{WHITE}{NODE_TYPE_START}{WHITE} { SAVE_POS; return TK_NODE_TYPE_START; } +{WHITE}{NODE_TYPE_END}{WHITE} { SAVE_POS; return TK_NODE_TYPE_END; } +{WHITE}{QOS_SETUP_START}{WHITE} { SAVE_POS; return TK_QOS_SETUP_START; } +{WHITE}{QOS_SETUP_END}{WHITE} { SAVE_POS; return TK_QOS_SETUP_END; } +{WHITE}{SL2VL_TABLES_START}{WHITE} { SAVE_POS; return TK_SL2VL_TABLES_START; } +{WHITE}{SL2VL_TABLES_END}{WHITE} { SAVE_POS; return TK_SL2VL_TABLES_END; } +{WHITE}{SL2VL_SCOPE_START}{WHITE} { SAVE_POS; return TK_SL2VL_SCOPE_START; } +{WHITE}{SL2VL_SCOPE_END}{WHITE} { SAVE_POS; return TK_SL2VL_SCOPE_END; } +{WHITE}{GROUP_START}{WHITE} { SAVE_POS; return TK_GROUP_START; } +{WHITE}{GROUP_END}{WHITE} { SAVE_POS; return TK_GROUP_END; } +{WHITE}{FROM_START}{WHITE} { SAVE_POS; return TK_FROM_START; } +{WHITE}{FROM_END}{WHITE} { SAVE_POS; return TK_FROM_END; } +{WHITE}{TO_START}{WHITE} { SAVE_POS; return TK_TO_START; } +{WHITE}{TO_END}{WHITE} { SAVE_POS; return TK_TO_END; } +{WHITE}{SL2VL_TABLE_START}{WHITE} { SAVE_POS; return TK_SL2VL_TABLE_START; } +{WHITE}{SL2VL_TABLE_END}{WHITE} { SAVE_POS; return TK_SL2VL_TABLE_END; } +{WHITE}{ACROSS_START}{WHITE} { SAVE_POS; return TK_ACROSS_START; } +{WHITE}{ACROSS_END}{WHITE} { SAVE_POS; return TK_ACROSS_END; } +{WHITE}{ACROSS_FROM_START}{WHITE} { SAVE_POS; return TK_ACROSS_FROM_START; } +{WHITE}{ACROSS_FROM_END}{WHITE} { SAVE_POS; return TK_ACROSS_FROM_END; } +{WHITE}{ACROSS_TO_START}{WHITE} { SAVE_POS; return TK_ACROSS_TO_START; } +{WHITE}{ACROSS_TO_END}{WHITE} { SAVE_POS; return TK_ACROSS_TO_END; } +{WHITE}{VLARB_TABLES_START}{WHITE} { SAVE_POS; return TK_VLARB_TABLES_START; } +{WHITE}{VLARB_TABLES_END}{WHITE} { SAVE_POS; return TK_VLARB_TABLES_END; } +{WHITE}{VLARB_SCOPE_START}{WHITE} { SAVE_POS; return TK_VLARB_SCOPE_START; } +{WHITE}{VLARB_SCOPE_END}{WHITE} { SAVE_POS; return TK_VLARB_SCOPE_END; } +{WHITE}{VLARB_HIGH_START}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_START; } +{WHITE}{VLARB_HIGH_END}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_END; } +{WHITE}{VLARB_LOW_START}{WHITE} { SAVE_POS; return TK_VLARB_LOW_START; } +{WHITE}{VLARB_LOW_END}{WHITE} { SAVE_POS; return TK_VLARB_LOW_END; } +{WHITE}{VLARB_HIGH_LIMIT_START}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_LIMIT_START; } +{WHITE}{VLARB_HIGH_LIMIT_END}{WHITE} { SAVE_POS; return TK_VLARB_HIGH_LIMIT_END; } +{WHITE}{QOS_LEVELS_START}{WHITE} { SAVE_POS; return TK_QOS_LEVELS_START; } +{WHITE}{QOS_LEVELS_END}{WHITE} { SAVE_POS; return TK_QOS_LEVELS_END; } +{WHITE}{QOS_LEVEL_START}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_START; } +{WHITE}{QOS_LEVEL_END}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_END; } +{WHITE}{SN_START}{WHITE} { SAVE_POS; return TK_SN_START; } +{WHITE}{SN_END}{WHITE} { SAVE_POS; return TK_SN_END; } +{WHITE}{SL_START}{WHITE} { SAVE_POS; return TK_SL_START; } +{WHITE}{SL_END}{WHITE} { SAVE_POS; return TK_SL_END; } +{WHITE}{CLASS_START}{WHITE} { SAVE_POS; return TK_CLASS_START; } +{WHITE}{CLASS_END}{WHITE} { SAVE_POS; return TK_CLASS_END; } +{WHITE}{MTU_LIMIT_START}{WHITE} { SAVE_POS; return TK_MTU_LIMIT_START; } +{WHITE}{MTU_LIMIT_END}{WHITE} { SAVE_POS; return TK_MTU_LIMIT_END; } +{WHITE}{RATE_LIMIT_START}{WHITE} { SAVE_POS; return TK_RATE_LIMIT_START; } +{WHITE}{RATE_LIMIT_END}{WHITE} { SAVE_POS; return TK_RATE_LIMIT_END; } +{WHITE}{PACKET_LIFE_START}{WHITE} { SAVE_POS; return TK_PACKET_LIFE_START; } +{WHITE}{PACKET_LIFE_END}{WHITE} { SAVE_POS; return TK_PACKET_LIFE_END; } +{WHITE}{PKEY_START}{WHITE} { SAVE_POS; return TK_PKEY_START; } +{WHITE}{PKEY_END}{WHITE} { SAVE_POS; return TK_PKEY_END; } +{WHITE}{QOS_MATCH_RULES_START}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULES_START; } +{WHITE}{QOS_MATCH_RULES_END}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULES_END; } +{WHITE}{QOS_MATCH_RULE_START}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULE_START; } +{WHITE}{QOS_MATCH_RULE_END}{WHITE} { SAVE_POS; return TK_QOS_MATCH_RULE_END; } +{WHITE}{QOS_LEVEL_SN_START}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_SN_START; } +{WHITE}{QOS_LEVEL_SN_END}{WHITE} { SAVE_POS; return TK_QOS_LEVEL_SN_END; } +{WHITE}{SOURCE_START}{WHITE} { SAVE_POS; return TK_SOURCE_START; } +{WHITE}{SOURCE_END}{WHITE} { SAVE_POS; return TK_SOURCE_END; } +{WHITE}{DESTINATION_START}{WHITE} { SAVE_POS; return TK_DESTINATION_START; } +{WHITE}{DESTINATION_END}{WHITE} { SAVE_POS; return TK_DESTINATION_END; } +{WHITE}{SERVICE_START}{WHITE} { SAVE_POS; return TK_SERVICE_START; } +{WHITE}{SERVICE_END}{WHITE} { SAVE_POS; return TK_SERVICE_END; } + +{WHITE}{COMMENT}{WHITE} { SAVE_POS; } /* swallow comment */ + +0[xX][0-9a-fA-F]+ { SAVE_POS; yylval = strdup(yytext); return TK_HEX_NUMBER; } +[0-9]+ { SAVE_POS; yylval = strdup(yytext); return TK_DEC_NUMBER; } +[ \t\n]+ { SAVE_POS; yylval = strdup(yytext); return TK_WHITE; } +- { SAVE_POS; yylval = strdup(yytext); return TK_DASH; } +: { SAVE_POS; yylval = strdup(yytext); return TK_DOTDOT; } +, { SAVE_POS; yylval = strdup(yytext); return TK_COMMA; } +. { SAVE_POS; yylval = strdup(yytext); return TK_TEXT;} + +%% + +static void save_pos() +{ + int i; + for (i = 0; i < yyleng; i++) + { + if (yytext[i] == '\n') + { + line_num ++; + column_num = 1; + } + else + column_num ++; + } +} + -- 1.4.4.1.GIT From dotanb at dev.mellanox.co.il Tue Jan 30 07:26:25 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 30 Jan 2007 17:26:25 +0200 Subject: [openib-general] I created a git tree for the libibverbs man pages Message-ID: <45BF63A1.6090402@dev.mellanox.co.il> Hi all. I created a git tree for the libibverbs man pages in the path: ~dotanb/libibverbs_man_pages.git Roland, can you please take those files and add them to libibverbs? I will be more than happy to see those man pages in OFED 1.2 thanks Dotan From afriedle at open-mpi.org Tue Jan 30 07:27:00 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Tue, 30 Jan 2007 10:27:00 -0500 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BE747A.4060900@ichips.intel.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> Message-ID: <45BF63C4.9040205@open-mpi.org> Sean Hefty wrote: > I believe that this patch lets you can do what you're trying to do. The > group handle would be the returned mgid from the initial join that > created the group. The mgid would need to be passed to other processes > as an IPv6 address, who issue a join request on that group. (The mgid > is available from the rdma_cm_event.param.ud.ah_attr.grh.dgid.) > > Typically, the rdma_cm maps IP addresses to mgids using the ipoib ip > mapping algorithm. This patch avoids that mapping if the upper 32-bits > of the IP address match a specific pattern. Excellent -- is this in a git tree somewhere that I can grab (I'm new to git)? Or, what would be an appropriate tree to apply this to? Andrew From jsquyres at cisco.com Tue Jan 30 07:32:27 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 30 Jan 2007 10:32:27 -0500 Subject: [openib-general] I created a git tree for the libibverbs man pages In-Reply-To: <45BF63A1.6090402@dev.mellanox.co.il> References: <45BF63A1.6090402@dev.mellanox.co.il> Message-ID: Ooohhh -- yes, this would be wonderful! I would suggest s/OpenIB/OpenFabrics/ throughout the man pages, though. On Jan 30, 2007, at 10:26 AM, Dotan Barak wrote: > Hi all. > > I created a git tree for the libibverbs man pages in the path: > ~dotanb/libibverbs_man_pages.git > > Roland, can you please take those files and add them to libibverbs? > > I will be more than happy to see those man pages in OFED 1.2 > > > thanks > Dotan > > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From kliteyn at dev.mellanox.co.il Tue Jan 30 07:31:27 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:31:27 +0200 Subject: [openib-general] [PATCH 4/10] osm: QoS in OpenSM Message-ID: <45BF64CF.9000205@dev.mellanox.co.il> QoS policy file parser header file Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_qos_parser.h | 218 +++++++++++++++++++++++++++++++++++ 1 files changed, 218 insertions(+), 0 deletions(-) diff --git a/osm/include/opensm/osm_qos_parser.h b/osm/include/opensm/osm_qos_parser.h new file mode 100644 index 0000000..4f0e598 --- /dev/null +++ b/osm/include/opensm/osm_qos_parser.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#ifndef OSM_QOS_PARSER_H +#define OSM_QOS_PARSER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define YYSTYPE char * + +#define OSM_QOS_NODE_TYPE_CA "CA" +#define OSM_QOS_NODE_TYPE_SWITCH "SWITCH" +#define OSM_QOS_NODE_TYPE_ROUTER "ROUTER" + +typedef enum {UNDEF, ROUTER, CA, SWITCH} node_type_t; + +/***************************************************/ + +typedef struct osm_qos_string_vector_item_t_ { + cl_map_item_t map_item; + char * str; +} osm_qos_string_vector_item_t; + +typedef struct osm_qos_uint64_vector_item_t_ { + cl_map_item_t map_item; + uint64_t value; +} osm_qos_uint64_vector_item_t; + +typedef struct osm_qos_uint32_vector_item_t_ { + cl_map_item_t map_item; + uint32_t value; +} osm_qos_uint32_vector_item_t; + +typedef struct osm_qos_uint16_vector_item_t_ { + cl_map_item_t map_item; + uint16_t value; +} osm_qos_uint16_vector_item_t; + +typedef struct osm_qos_uint8_vector_item_t_ { + cl_map_item_t map_item; + uint8_t value; +} osm_qos_uint8_vector_item_t; + +typedef struct osm_qos_vlarb_hl_vector_item_t_ { + cl_map_item_t map_item; + uint32_t sl; + uint32_t credits; +} osm_qos_vlarb_hl_vector_item_t; + +/***************************************************/ + +typedef struct osm_qos_port_group_t_ { + cl_map_item_t map_item; + char * name; + char * use; + cl_ptr_vector_t port_guids; /* vector of uint64_t */ + cl_ptr_vector_t port_names; /* vector of string */ + cl_ptr_vector_t partitions; /* vector of string */ + cl_ptr_vector_t node_types; /* vector of uint8_t */ +} osm_qos_port_group_t; + +/***************************************************/ + +typedef struct osm_qos_sl2vl_scope_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t groups; /* vector of string */ + cl_ptr_vector_t from; /* vector of string */ + cl_ptr_vector_t to; /* vector of string */ + cl_ptr_vector_t across_from; /* vector of string */ + cl_ptr_vector_t across_to; /* vector of string */ + uint8_t sl2vl_table[16]; +} osm_qos_sl2vl_scope_t; + +/***************************************************/ + +typedef struct osm_qos_vlarb_scope_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t groups; /* vector of string */ + cl_ptr_vector_t across; /* vector of string */ + cl_ptr_vector_t vlarb_highs; /* vector of string */ + cl_ptr_vector_t vlarb_lows; /* vector of string */ + uint32_t vl_high_limit; +} osm_qos_vlarb_scope_t; + +/***************************************************/ + +typedef struct osm_qos_level_t_ { + cl_map_item_t map_item; + char * use; + uint32_t sn; + boolean_t sn_set; + uint32_t sl; + boolean_t sl_set; + uint8_t mtu_limit; + boolean_t mtu_limit_set; + uint8_t rate_limit; + boolean_t rate_limit_set; + uint8_t pkt_life; + boolean_t pkt_life_set; + uint16_t pkey; + boolean_t pkey_set; + uint16_t class; + boolean_t class_set; +} osm_qos_level_t; + +/***************************************************/ + +typedef struct osm_qos_match_rule_t_ { + cl_map_item_t map_item; + char * use; + char * source; + char * destination; + cl_ptr_vector_t services; /* vector of uint64 */ + cl_ptr_vector_t classes; /* vector of uint32 */ + uint32_t qos_level_sn; +} osm_qos_match_rule_t; + +/***************************************************/ + +typedef struct osm_qos_parse_tree_t_ { + cl_map_item_t map_item; + cl_ptr_vector_t port_groups; /* vector of osm_qos_port_group_t */ + cl_ptr_vector_t sl2vl_tables; /* vector of osm_qos_sl2vl_scope_t */ + cl_ptr_vector_t vlarb_tables; /* vector of osm_qos_vlarb_scope_t */ + cl_ptr_vector_t qos_levels; /* vector of osm_qos_level_t */ + cl_ptr_vector_t qos_match_rules; /* vector of osm_qos_match_rule_t */ +} osm_qos_parse_tree_t; + +/***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file); + +void osm_qos_parser_destroy_parse_tree( + IN osm_qos_parse_tree_t * p_qos_parse_tree); + +void osm_qos_parser_get_port_group_by_name( + IN osm_qos_parse_tree_t * p_qos_parse_tree, + IN const char * group_name, + OUT osm_qos_port_group_t ** pp_port_group); + +void osm_qos_parser_get_qos_level_by_sn( + IN osm_qos_parse_tree_t * p_qos_parse_tree, + IN uint32_t sn, + OUT osm_qos_level_t ** pp_qos_level); + +boolean_t osm_qos_parser_is_port_in_group( + IN const osm_pr_rcv_t * const p_rcv, + IN osm_physp_t * p_physp, + IN osm_qos_port_group_t * p_port_group); + +void osm_qos_parser_get_qos_level_by_pr( + IN const osm_pr_rcv_t * const p_rcv, + IN const ib_path_rec_t * const p_pr, + IN const osm_port_t* const p_src_port, + IN const osm_port_t* const p_dest_port, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + OUT osm_qos_level_t ** pp_qos_level); + +/***************************************************/ + +extern osm_qos_parse_tree_t * p_qos_parse_tree; + +/***************************************************/ + +#endif /* ifndef OSM_QOS_PARSER_H */ -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:31:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:31:42 +0200 Subject: [openib-general] [PATCH 5/10] osm: QoS in OpenSM Message-ID: <45BF64DE.3010009@dev.mellanox.co.il> QoS policy file parser C file with auxiliary functions Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser.c | 548 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 548 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser.c b/osm/opensm/osm_qos_parser.c new file mode 100644 index 0000000..690535a --- /dev/null +++ b/osm/opensm/osm_qos_parser.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * OSM QoS parser functions. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include +#include + +/********************************************************************** + **********************************************************************/ + +boolean_t osm_qos_parser_is_port_in_group( + IN const osm_pr_rcv_t * const p_rcv, + IN osm_physp_t * p_physp, + IN osm_qos_port_group_t * p_port_group) +{ + uint16_t i; + osm_node_t * p_node = osm_physp_get_node_ptr(p_physp); + osm_qos_uint8_vector_item_t * p_uint8_vector_item = NULL; + osm_qos_uint64_vector_item_t * p_uint64_vector_item = NULL; + osm_qos_string_vector_item_t * p_string_vector_item = NULL; + osm_prtn_t * p_prtn = NULL; + ib_net64_t port_guid = osm_physp_get_port_guid(p_physp); + uint64_t port_guid_ho = cl_ntoh64(port_guid); + uint8_t node_type = osm_node_get_type(p_node); + char desc[IB_NODE_DESCRIPTION_SIZE + 1]; + + for (i = 0; i < cl_ptr_vector_get_size(&p_port_group->port_guids); i++) + { + cl_ptr_vector_at(&p_port_group->port_guids, i, (void **)&p_uint64_vector_item); + if ( memcmp(&p_uint64_vector_item->value, &port_guid_ho, sizeof(uint64_t)) == 0 ) + return TRUE; + } + + for (i = 0; i < cl_ptr_vector_get_size(&p_port_group->node_types); i++) + { + cl_ptr_vector_at(&p_port_group->node_types, i, (void **)&p_uint8_vector_item); + if (p_uint8_vector_item->value == node_type) + return TRUE; + } + + for (i = 0; i < cl_ptr_vector_get_size(&p_port_group->partitions); i++) + { + cl_ptr_vector_at(&p_port_group->partitions, i, (void **)&p_string_vector_item); + p_prtn = osm_prtn_find_by_name(p_rcv->p_subn, p_string_vector_item->str); + if (!p_prtn) + continue; + if (osm_prtn_is_guid(p_prtn, port_guid)) + return TRUE; + } + + memcpy(desc, p_node->node_desc.description, IB_NODE_DESCRIPTION_SIZE); + desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; + printf("Description: <%s>\n\n",desc); + + for (i = 0; i < cl_ptr_vector_get_size(&p_port_group->port_names); i++) + { + cl_ptr_vector_at(&p_port_group->port_names, i, (void **)&p_string_vector_item); + //p_string_vector_item->str; + } + + /* + * TODO: //cl_ptr_vector_t port_names; (vector of string) + * + * + * vs1/HCA-1/P1 + * vs3/HCA-1/P1 + */ + + return FALSE; +} + +/********************************************************************** + **********************************************************************/ + +void osm_qos_parser_get_qos_level_by_pr( + IN const osm_pr_rcv_t * const p_rcv, + IN const ib_path_rec_t * const p_pr, + IN const osm_port_t* const p_src_port, + IN const osm_port_t* const p_dest_port, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + OUT osm_qos_level_t ** pp_qos_level) +{ + osm_qos_port_group_t * p_port_group = NULL; + osm_qos_match_rule_t * p_qos_match_rule = NULL; + osm_qos_level_t * p_qos_level = NULL; + osm_node_t * p_src_node; + osm_node_t * p_dest_node; + osm_physp_t * p_src_physp; + osm_physp_t * p_dest_physp; + uint8_t i; + + OSM_LOG_ENTER( p_rcv->p_log, osm_qos_parser_get_qos_level_by_pr ); + + *pp_qos_level = NULL; + if (!p_qos_parse_tree) + goto Exit; + + p_src_physp = osm_port_get_default_phys_ptr( p_src_port ); + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); + + p_src_node = osm_physp_get_node_ptr( p_src_physp ); + p_dest_node = osm_physp_get_node_ptr( p_dest_physp ); + + /* if destination node is switch, the dest port should be port 0 */ + if( p_dest_node->sw ) + p_dest_physp = osm_switch_get_route_by_lid(p_dest_node->sw, cl_ntoh16(dest_lid_ho)); + + + /* Go over all QoS match rules. + For every rule, check whether this pathrecord request + (src/dst ports) should comply a certain QoS match rule. */ + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_match_rules); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_match_rules, i, (void **)&p_qos_match_rule); + + if (p_qos_match_rule->source) + { + osm_qos_parser_get_port_group_by_name(p_qos_parse_tree, + p_qos_match_rule->source, + &p_port_group); + if (p_port_group) + { + /* Port group with the given name found. + Now check if the src port is member of this group */ + if ( !osm_qos_parser_is_port_in_group(p_rcv, p_src_physp, p_port_group) ) + { + /* Src port is not a member of the specified port group. + Go to the next match rule. */ + continue; + } + } + } + + if (p_qos_match_rule->destination) + { + osm_qos_parser_get_port_group_by_name(p_qos_parse_tree, + p_qos_match_rule->destination, + &p_port_group); + if (p_port_group) + { + /* Port group with the given name found. + Now check if the dest port is member of this group */ + if ( !osm_qos_parser_is_port_in_group(p_rcv, p_dest_physp, p_port_group) ) + { + /* Dest port is not a member of the specified port group. + Go to the next match rule. */ + continue; + } + } + } + + /* Now check QoS class of the path record */ + + if (cl_ptr_vector_get_size(&p_qos_match_rule->classes) > 0) + { + /* This match rule specifies list of QoS classes. + Check whether this path record request contains one these classes. */ + if (comp_mask & IB_PR_COMPMASK_QOS_CLASS) + { + boolean_t found = FALSE; + osm_qos_uint32_vector_item_t * p_uint32_vector_item = NULL; + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_match_rule->classes); i++) + { + cl_ptr_vector_at(&p_qos_match_rule->classes, i, (void **)&p_uint32_vector_item); + if (p_uint32_vector_item->value == ib_path_rec_qos_class(p_pr)) + { + found = TRUE; + break; + } + } + if (!found) + { + /* Path record QoS class doesn't match anything in the match rule. + Go to the next match rule. */ + continue; + } + } + else + { + /* Path record doesn't have QoS class. + Go to the next match rule. */ + continue; + } + } + + /* Done checking QoS class of the path record. + Now check service id. */ + + if (cl_ptr_vector_get_size(&p_qos_match_rule->services) > 0) + { + /* This match rule specifies list of services. + Check whether this path record request contains one these services. */ + if (comp_mask & IB_PR_COMPMASK_SERVICEID) + { + boolean_t found = FALSE; + osm_qos_uint64_vector_item_t * p_uint64_vector_item = NULL; + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_match_rule->services); i++) + { + cl_ptr_vector_at(&p_qos_match_rule->services, i, (void **)&p_uint64_vector_item); + if (memcmp(&p_uint64_vector_item->value,&p_pr->service_id,sizeof(uint64_t)) == 0) + { + found = TRUE; + break; + } + } + if (!found) + { + /* Path record service id doesn't match anything in the match rule. + Go to the next match rule. */ + continue; + } + } + else + { + /* Path record doesn't have service id. + Go to the next match rule. */ + continue; + } + } + + /* Done checking service id of the path record. + We had match on every criteria, so the query matches this match rule. + Now get the QoS Level that has to be applied on this path record. */ + osm_qos_parser_get_qos_level_by_sn(p_qos_parse_tree, + p_qos_match_rule->qos_level_sn, + &p_qos_level); + break; + } + + if (p_qos_level) + { + /* there is a QoS Level that should be applied to this query */ + *pp_qos_level = p_qos_level; + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "osm_qos_parser_get_qos_level_by_pr: " + "PathRecord request:" + "Src port 0x%016" PRIx64 ", " + "Dst port 0x%016" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)) ); + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "osm_qos_parser_get_qos_level_by_pr: " + "Applying QoS Level %d (%s)\n", + p_qos_level->sn, + (p_qos_level->use)? p_qos_level->use : "no description" ); + } + + Exit: + OSM_LOG_EXIT( p_rcv->p_log ); +} + +/*************************************************** + ***************************************************/ + +void osm_qos_parser_get_port_group_by_name( + IN osm_qos_parse_tree_t * p_qos_parse_tree, + IN const char * group_name, + OUT osm_qos_port_group_t ** pp_port_group) +{ + uint32_t i; + osm_qos_port_group_t * p_port_group = NULL; + *pp_port_group = NULL; + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->port_groups); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->port_groups, i, (void **)&p_port_group); + + if (!p_port_group->name) + continue; + + if (strcasecmp(p_port_group->name,group_name) == 0) + *pp_port_group = p_port_group; + return; + } +} + +/*************************************************** + ***************************************************/ + +void osm_qos_parser_get_qos_level_by_sn( + IN osm_qos_parse_tree_t * p_qos_parse_tree, + IN uint32_t sn, + OUT osm_qos_level_t ** pp_qos_level) +{ + uint32_t i; + osm_qos_level_t * p_qos_level = NULL; + + *pp_qos_level = NULL; + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_levels); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_levels, i, (void **)&p_qos_level); + + if (p_qos_level->sn == sn) + *pp_qos_level = p_qos_level; + return; + } +} + +/*************************************************** + ***************************************************/ + +/* free all the memory occupied by the parse tree data structure */ +void osm_qos_parser_destroy_parse_tree( + IN osm_qos_parse_tree_t * p_qos_parse_tree) +{ + uint32_t i; + uint32_t j; + + osm_qos_port_group_t * p_port_group = NULL; + osm_qos_sl2vl_scope_t * p_sl2vl_scope = NULL; + osm_qos_vlarb_scope_t * p_vlarb_scope = NULL; + osm_qos_level_t * p_qos_level = NULL; + osm_qos_match_rule_t * p_qos_match_rule = NULL; + + osm_qos_string_vector_item_t * p_str_vector_item = NULL; + osm_qos_uint64_vector_item_t * p_uint64_vector_item = NULL; + osm_qos_uint32_vector_item_t * p_uint32_vector_item = NULL; + osm_qos_vlarb_hl_vector_item_t * p_vlarb_hl_vector_item = NULL; + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->port_groups); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->port_groups, i, (void **)&p_port_group); + + if (p_port_group->name) + free(p_port_group->name); + if (p_port_group->use) + free(p_port_group->use); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_guids); j++) + { + cl_ptr_vector_at(&p_port_group->port_guids, j, (void **)&p_uint64_vector_item); + free(p_uint64_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_guids); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->port_names); j++) + { + cl_ptr_vector_at(&p_port_group->port_names, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->port_names); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->partitions); j++) + { + cl_ptr_vector_at(&p_port_group->partitions, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->partitions); + + for (j = 0; j < cl_ptr_vector_get_size(&p_port_group->node_types); j++) + { + cl_ptr_vector_at(&p_port_group->node_types, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_port_group->node_types); + + free(p_port_group); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->port_groups); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->sl2vl_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->sl2vl_tables, i, (void **)&p_sl2vl_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->groups); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->to); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_from); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_from, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_from); + + for (j = 0; j < cl_ptr_vector_get_size(&p_sl2vl_scope->across_to); j++) + { + cl_ptr_vector_at(&p_sl2vl_scope->across_to, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_sl2vl_scope->across_to); + + free(p_sl2vl_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->sl2vl_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->vlarb_tables); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->vlarb_tables, i, (void **)&p_vlarb_scope); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->groups); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->groups, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->groups); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->across); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->across, j, (void **)&p_str_vector_item); + if (p_str_vector_item->str) + free(p_str_vector_item->str); + free(p_str_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->across); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_highs); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_highs, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_highs); + + for (j = 0; j < cl_ptr_vector_get_size(&p_vlarb_scope->vlarb_lows); j++) + { + cl_ptr_vector_at(&p_vlarb_scope->vlarb_lows, j, (void **)&p_vlarb_hl_vector_item); + free(p_vlarb_hl_vector_item); + } + cl_ptr_vector_destroy(&p_vlarb_scope->vlarb_lows); + + free(p_vlarb_scope); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->vlarb_tables); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_levels); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_levels, i, (void **)&p_qos_level); + if (p_qos_level->use) + free(p_qos_level->use); + free(p_qos_level); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_levels); + + for (i = 0; i < cl_ptr_vector_get_size(&p_qos_parse_tree->qos_match_rules); i++) + { + cl_ptr_vector_at(&p_qos_parse_tree->qos_match_rules, i, (void **)&p_qos_match_rule); + + if (p_qos_match_rule->use) + free(p_qos_match_rule->use); + if (p_qos_match_rule->source) + free(p_qos_match_rule->source); + if (p_qos_match_rule->destination) + free(p_qos_match_rule->destination); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->services); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->services, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->services); + + for (j = 0; j < cl_ptr_vector_get_size(&p_qos_match_rule->classes); j++) + { + cl_ptr_vector_at(&p_qos_match_rule->classes, j, (void **)&p_uint32_vector_item); + free(p_uint32_vector_item); + } + cl_ptr_vector_destroy(&p_qos_match_rule->classes); + + free(p_qos_match_rule); + } + cl_ptr_vector_destroy(&p_qos_parse_tree->qos_match_rules); + + free(p_qos_parse_tree); +} + +/*************************************************** + ***************************************************/ + + -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:31:54 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:31:54 +0200 Subject: [openib-general] [PATCH 6/10] osm: QoS in OpenSM Message-ID: <45BF64EA.8050103@dev.mellanox.co.il> Compilation changes for QoS policy file parser - Added new files to makefiles. - Introduced new configuration switch '--enable-maintainer-mode', which will run Lex & Yacc instead of just using the generated files. Signed-off-by: Yevgeny Kliteynik --- osm/include/Makefile.am | 2 ++ osm/opensm/Makefile.am | 16 +++++++++++++++- osm/opensm/configure.in | 4 ++++ 3 files changed, 21 insertions(+), 1 deletions(-) diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am index 5efc11a..2b5a17e 100644 --- a/osm/include/Makefile.am +++ b/osm/include/Makefile.am @@ -87,6 +87,8 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_drop_mgr.h \ $(srcdir)/opensm/osm_port_info_rcv.h \ $(srcdir)/opensm/osm_state_mgr_ctrl.h \ + $(srcdir)/opensm/osm_qos_parser.h \ + $(srcdir)/opensm/osm_qos_parser_y.h \ $(srcdir)/complib/cl_thread_osd.h \ $(srcdir)/complib/cl_packon.h \ $(srcdir)/complib/cl_atomic_osd.h \ diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am index b1028d8..5898d66 100644 --- a/osm/opensm/Makefile.am +++ b/osm/opensm/Makefile.am @@ -56,7 +56,21 @@ opensm_SOURCES = main.c osm_console.c os osm_trap_rcv.c osm_ucast_mgr.c osm_ucast_updn.c \ osm_ucast_file.c osm_ucast_ftree.c \ osm_vl15intf.c osm_vl_arb_rcv.c \ - st.c + st.c \ + osm_qos_parser_y.c osm_qos_parser_l.c osm_qos_parser.c + +osm_qos_parser_y.c: @MAINTAINER_MODE_TRUE@ $(srcdir)/osm_qos_parser.y + $(YACC) -y -d $(srcdir)/osm_qos_parser.y + mv y.tab.c osm_qos_parser_y.c + mv y.tab.h osm_qos_parser_y.h + cp -f osm_qos_parser_y.c $(srcdir)/ + cp -f osm_qos_parser_y.h $(srcdir)/../include/opensm/ + +osm_qos_parser_l.c: @MAINTAINER_MODE_TRUE@ $(srcdir)/osm_qos_parser.l + $(LEX) $(srcdir)/osm_qos_parser.l + mv lex.yy.c osm_qos_parser_l.c + cp -f osm_qos_parser_l.c $(srcdir)/ + if OSMV_OPENIB opensm_CFLAGS = -Wall $(OSMV_CFLAGS) -fno-strict-aliasing -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 opensm_CXXFLAGS = -Wall $(OSMV_CFLAGS) -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in index 893e3b1..01340a5 100644 --- a/osm/opensm/configure.in +++ b/osm/opensm/configure.in @@ -14,6 +14,8 @@ if test -z $opensm_api_version; then fi AC_SUBST(opensm_api_version) +AM_MAINTAINER_MODE + dnl Checks for programs AC_PROG_CXX AC_PROG_CC @@ -22,6 +24,8 @@ AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_LIBTOOL +AC_PROG_LEX +AC_PROG_YACC dnl Checks for libraries -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:32:00 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:32:00 +0200 Subject: [openib-general] [PATCH 7/10] osm: QoS in OpenSM Message-ID: <45BF64F0.4060006@dev.mellanox.co.il> Renamed static function find_prtn_by_name() to non-static osm_prtn_find_by_name() This function will be used later by the PathRecord logic. Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_partition.h | 29 +++++++++++++++++++++++++++++ osm/opensm/osm_prtn.c | 4 ++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/osm/include/opensm/osm_partition.h b/osm/include/opensm/osm_partition.h index 41f18fa..6e123e6 100644 --- a/osm/include/opensm/osm_partition.h +++ b/osm/include/opensm/osm_partition.h @@ -251,6 +251,35 @@ ib_api_status_t osm_prtn_make_partitions * SEE ALSO *********/ +/****f* OpenSM: Partition/osm_prtn_find_by_name +* NAME +* osm_prtn_find_by_name +* +* DESCRIPTION +* Fides partition by name. +* +* SYNOPSIS +*/ +osm_prtn_t * +osm_prtn_find_by_name( + IN osm_subn_t *p_subn, + IN const char *name); +/* +* PARAMETERS +* p_subn +* [in] Pointer to a subnet object. +* +* name +* [in] Required partition name. +* +* RETURN VALUES +* Pointer to the partition object on success. +* +* NOTES +* +* SEE ALSO +*********/ + END_C_DECLS #endif /* _OSM_PARTITION_H_ */ diff --git a/osm/opensm/osm_prtn.c b/osm/opensm/osm_prtn.c index 4099cee..4b2cf23 100644 --- a/osm/opensm/osm_prtn.c +++ b/osm/opensm/osm_prtn.c @@ -264,7 +264,7 @@ static uint16_t __generate_pkey(osm_subn return 0; } -static osm_prtn_t *find_prtn_by_name(osm_subn_t *p_subn, const char *name) +osm_prtn_t *osm_prtn_find_by_name(osm_subn_t *p_subn, const char *name) { cl_map_item_t *p_next; osm_prtn_t *p; @@ -288,7 +288,7 @@ osm_prtn_t *osm_prtn_make_new(osm_log_t pkey &= cl_hton16((uint16_t)~0x8000); if (!pkey) { - if (name && (p = find_prtn_by_name(p_subn, name))) + if (name && (p = osm_prtn_find_by_name(p_subn, name))) return p; if(!(pkey = __generate_pkey(p_subn))) return NULL; -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:32:10 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:32:10 +0200 Subject: [openib-general] [PATCH 8/10] osm: QoS in OpenSM Message-ID: <45BF64FA.2030800@dev.mellanox.co.il> Added QoS class and service id fields to the path record Signed-off-by: Yevgeny Kliteynik --- diags/src/saquery.c | 6 +- osm/include/iba/ib_types.h | 148 +++++++++++++++++++++++++++++++--- osm/opensm/osm_helper.c | 8 +- osm/opensm/osm_sa_multipath_record.c | 2 +- osm/osmtest/osmtest.c | 2 +- 5 files changed, 146 insertions(+), 20 deletions(-) diff --git a/diags/src/saquery.c b/diags/src/saquery.c index b69bd63..baf8736 100644 --- a/diags/src/saquery.c +++ b/diags/src/saquery.c @@ -168,7 +168,7 @@ static void print_path_record(ib_path_rec_t *p_pr) { printf("PathRecord dump:\n" - "\t\tresv0...................0x%016" PRIx64 "\n" + "\t\tservice_id..............0x%016" PRIx64 "\n" "\t\tdgid....................0x%016" PRIx64 " : " "0x%016" PRIx64 "\n" "\t\tsgid....................0x%016" PRIx64 " : " @@ -187,7 +187,7 @@ print_path_record(ib_path_rec_t *p_pr) "\t\tresv2...................0x%X\n" "\t\tresv3...................0x%X\n" "", - *(uint64_t*)p_pr->resv0, + cl_ntoh64( p_pr->service_id ), cl_ntoh64( p_pr->dgid.unicast.prefix ), cl_ntoh64( p_pr->dgid.unicast.interface_id ), cl_ntoh64( p_pr->sgid.unicast.prefix ), @@ -198,7 +198,7 @@ print_path_record(ib_path_rec_t *p_pr) p_pr->tclass, p_pr->num_path, cl_ntoh16( p_pr->pkey ), - cl_ntoh16( p_pr->sl ), + cl_ntoh16( p_pr->qos_class_sl ), p_pr->mtu, p_pr->rate, p_pr->pkt_life, diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h index 22f7f62..5399ea6 100644 --- a/osm/include/iba/ib_types.h +++ b/osm/include/iba/ib_types.h @@ -1700,6 +1700,28 @@ ib_class_is_rmpp( #define IB_SMINFO_STATE_MASTER 3 /**********/ +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK +* NAME +* IB_PATH_REC_SL_MASK +* +* DESCRIPTION +* Mask for the sl field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_SL_MASK 0xF + +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK +* NAME +* IB_PATH_REC_QOS_CLASS_MASK +* +* DESCRIPTION +* Mask for the QoS class field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 + /****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK * NAME * IB_PATH_REC_SELECTOR_MASK @@ -2314,7 +2336,7 @@ ib_gid_get_guid( #include typedef struct _ib_path_rec { - uint8_t resv0[8]; + ib_net64_t service_id; ib_gid_t dgid; ib_gid_t sgid; ib_net16_t dlid; @@ -2323,7 +2345,7 @@ typedef struct _ib_path_rec uint8_t tclass; uint8_t num_path; ib_net16_t pkey; - ib_net16_t sl; + ib_net16_t qos_class_sl; uint8_t mtu; uint8_t rate; uint8_t pkt_life; @@ -2363,11 +2385,8 @@ typedef struct _ib_path_rec * pkey * Partition key (P_Key) to use on this path. * -* resv1 -* Reserved byte. -* -* sl -* Service level to use on this path. +* qos_class_sl +* QoS class and service level to use on this path. * * mtu * MTU and MTU selector fields to use on this path @@ -2388,6 +2407,7 @@ typedef struct _ib_path_rec *********/ /* Path Record Component Masks */ +#define IB_PR_COMPMASK_SERVICEID (CL_HTON64(((uint64_t)1)<<1)) #define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) #define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) #define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) @@ -2400,7 +2420,7 @@ typedef struct _ib_path_rec #define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) #define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) #define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) -#define IB_PR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<14)) +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) #define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) #define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) #define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) @@ -2658,6 +2678,7 @@ ib_path_rec_init_local( IN ib_net16_t slid, IN uint8_t num_path, IN ib_net16_t pkey, + IN uint16_t qos_class, IN uint8_t sl, IN uint8_t mtu_selector, IN uint8_t mtu, @@ -2673,8 +2694,8 @@ ib_path_rec_init_local( p_rec->slid = slid; p_rec->num_path = num_path; p_rec->pkey = pkey; - /* Lower 4 bits of path rec's SL are reserved. */ - p_rec->sl = cl_ntoh16( sl ); + p_rec->qos_class_sl = cl_hton16( (sl & IB_PATH_REC_SL_MASK) | + (qos_class << 4) ); p_rec->mtu = (uint8_t)((mtu & IB_PATH_REC_BASE_MASK) | (uint8_t)(mtu_selector << 6)); p_rec->rate = (uint8_t)((rate & IB_PATH_REC_BASE_MASK) | @@ -2686,8 +2707,8 @@ ib_path_rec_init_local( /* Clear global routing fields for local path records */ p_rec->hop_flow_raw = 0; p_rec->tclass = 0; + p_rec->service_id = 0; - *((uint64_t*)p_rec->resv0) = 0; *((uint32_t*)p_rec->resv2) = 0; *((uint16_t*)p_rec->resv2 + 2) = 0; } @@ -2716,6 +2737,9 @@ ib_path_rec_init_local( * pkey * [in] Partition key (P_Key) to use on this path. * +* qos_class +* [in] QoS class to use on this path. Lower 12-bits are valid. +* * sl * [in] Service level to use on this path. Lower 4-bits are valid. * @@ -2779,6 +2803,41 @@ ib_path_rec_num_path( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_sl +* NAME +* ib_path_rec_set_sl +* +* DESCRIPTION +* Set path service level. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_sl( + IN ib_path_rec_t* const p_rec, + IN const uint8_t sl ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_QOS_CLASS_MASK ) | + ( sl & IB_PATH_REC_SL_MASK) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* sl +* [in] Service level to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + /****f* IBA Base: Types/ib_path_rec_sl * NAME * ib_path_rec_sl @@ -2792,7 +2851,7 @@ static inline uint8_t OSM_API ib_path_rec_sl( IN const ib_path_rec_t* const p_rec ) { - return( (uint8_t)((cl_ntoh16( p_rec->sl )) & 0xF) ); + return( (uint8_t)((cl_ntoh16( p_rec->qos_class_sl )) & IB_PATH_REC_SL_MASK) ); } /* * PARAMETERS @@ -2808,6 +2867,71 @@ ib_path_rec_sl( * ib_path_rec_t *********/ +/****f* IBA Base: Types/ib_path_rec_set_qos_class +* NAME +* ib_path_rec_set_qos_class +* +* DESCRIPTION +* Set path QoS class. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_qos_class( + IN ib_path_rec_t* const p_rec, + IN const uint16_t qos_class ) +{ + p_rec->qos_class_sl = cl_hton16( ( cl_ntoh16(p_rec->qos_class_sl) & + IB_PATH_REC_SL_MASK ) | + ( qos_class << 4) ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* qos_class +* [in] QoS class to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_qos_class +* NAME +* ib_path_rec_qos_class +* +* DESCRIPTION +* Get QoS class. +* +* SYNOPSIS +*/ +static inline uint16_t OSM_API +ib_path_rec_qos_class( + IN const ib_path_rec_t* const p_rec ) +{ + return( cl_ntoh16( p_rec->qos_class_sl ) >> 4 ); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* QoS class of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + + /****f* IBA Base: Types/ib_path_rec_mtu * NAME * ib_path_rec_mtu diff --git a/osm/opensm/osm_helper.c b/osm/opensm/osm_helper.c index e2a3e8a..103853b 100644 --- a/osm/opensm/osm_helper.c +++ b/osm/opensm/osm_helper.c @@ -1095,7 +1095,7 @@ osm_dump_path_record( { osm_log( p_log, log_level, "PathRecord dump:\n" - "\t\t\t\tresv0...................0x%016" PRIx64 "\n" + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" "\t\t\t\tdgid....................0x%016" PRIx64 " : " "0x%016" PRIx64 "\n" "\t\t\t\tsgid....................0x%016" PRIx64 " : " @@ -1106,6 +1106,7 @@ osm_dump_path_record( "\t\t\t\ttclass..................0x%X\n" "\t\t\t\tnum_path_revers.........0x%X\n" "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\tqos_class...............0x%X\n" "\t\t\t\tsl......................0x%X\n" "\t\t\t\tmtu.....................0x%X\n" "\t\t\t\trate....................0x%X\n" @@ -1114,7 +1115,7 @@ osm_dump_path_record( "\t\t\t\tresv2...................0x%X\n" "\t\t\t\tresv3...................0x%X\n" "", - *(uint64_t*)p_pr->resv0, + cl_ntoh64(p_pr->service_id), cl_ntoh64( p_pr->dgid.unicast.prefix ), cl_ntoh64( p_pr->dgid.unicast.interface_id ), cl_ntoh64( p_pr->sgid.unicast.prefix ), @@ -1125,7 +1126,8 @@ osm_dump_path_record( p_pr->tclass, p_pr->num_path, cl_ntoh16( p_pr->pkey ), - cl_ntoh16( p_pr->sl ), + ib_path_rec_qos_class(p_pr), + ib_path_rec_sl(p_pr), p_pr->mtu, p_pr->rate, p_pr->pkt_life, diff --git a/osm/opensm/osm_sa_multipath_record.c b/osm/opensm/osm_sa_multipath_record.c index 3256a45..44fe453 100644 --- a/osm/opensm/osm_sa_multipath_record.c +++ b/osm/opensm/osm_sa_multipath_record.c @@ -759,7 +759,7 @@ __osm_mpr_rcv_build_pr( p_pr->hop_flow_raw &= cl_hton32(1<<31); p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16( p_parms->sl ); + ib_path_rec_set_sl(p_pr, p_parms->sl); p_pr->mtu = (uint8_t)( p_parms->mtu | 0x80 ); p_pr->rate = (uint8_t)( p_parms->rate | 0x80 ); diff --git a/osm/osmtest/osmtest.c b/osm/osmtest/osmtest.c index 3c16a6f..f00a9da 100644 --- a/osm/osmtest/osmtest.c +++ b/osm/osmtest/osmtest.c @@ -1982,7 +1982,7 @@ osmtest_write_path_info( IN osmtest_t * cl_ntoh64( p_rec->sgid.unicast.interface_id ), cl_ntoh16( p_rec->dlid ), cl_ntoh16( p_rec->slid ), cl_ntoh32( p_rec->hop_flow_raw ), p_rec->tclass, - p_rec->num_path, cl_ntoh16( p_rec->pkey ), p_rec->sl, + p_rec->num_path, cl_ntoh16( p_rec->pkey ), ib_path_rec_sl(p_rec), p_rec->mtu, p_rec->rate, p_rec->pkt_life, p_rec->preference ); -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:32:16 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:32:16 +0200 Subject: [openib-general] [PATCH 9/10] osm: QoS in OpenSM Message-ID: <45BF6500.9000107@dev.mellanox.co.il> Added new command line option for OSM: '-Y' or '--qos_policy_file' Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_subnet.h | 4 ++++ osm/opensm/main.c | 10 +++++++++- osm/opensm/osm_opensm.c | 10 ++++++++++ osm/opensm/osm_subnet.c | 10 ++++++++++ 4 files changed, 33 insertions(+), 1 deletions(-) diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h index a6ffd45..58be7c9 100644 --- a/osm/include/opensm/osm_subnet.h +++ b/osm/include/opensm/osm_subnet.h @@ -264,6 +264,7 @@ typedef struct _osm_subn_opt char * partition_config_file; boolean_t no_partition_enforcement; boolean_t no_qos; + char * qos_policy_file; boolean_t accum_log_file; char * console; uint16_t console_port; @@ -397,6 +398,9 @@ typedef struct _osm_subn_opt * specified the log file will be truncated upon reaching * this limit. * +* qos_policy_file +* Name of the QoS policy file. +* * accum_log_file * If TRUE (default) - the log file will be accumulated. * If FALSE - the log file will be erased before starting current opensm run. diff --git a/osm/opensm/main.c b/osm/opensm/main.c index 69c940c..8db1938 100644 --- a/osm/opensm/main.c +++ b/osm/opensm/main.c @@ -259,6 +259,9 @@ show_usage(void) printf( "-Q\n" "--qos\n" " This option enables QoS setup.\n\n"); + printf( "-Y\n" + "--qos_policy_file\n" + " This option defines the optional QoS policy file.\n\n"); printf( "-N\n" "--no_part_enforce\n" " This option disables partition enforcement on switch external ports.\n\n"); @@ -551,7 +554,7 @@ main( boolean_t cache_options = FALSE; char *ignore_guids_file_name = NULL; uint32_t val; - const char * const short_option = "i:f:ed:g:l:L:s:t:a:R:M:U:S:P:NQvVhorcyx"; + const char * const short_option = "i:f:ed:g:l:L:s:t:a:R:M:U:S:P:Y:NQvVhorcyx"; /* In the array below, the 2nd parameter specified the number @@ -576,6 +579,7 @@ main( { "Pconfig", 1, NULL, 'P'}, { "no_part_enforce",0,NULL, 'N'}, { "qos", 0, NULL, 'Q'}, + { "qos_policy_file",1, NULL, 'Y'}, { "maxsmps", 1, NULL, 'n'}, { "console", 1, NULL, 'q'}, { "V", 0, NULL, 'V'}, @@ -793,6 +797,10 @@ main( opt.no_qos = FALSE; break; + case 'Y': + opt.qos_policy_file = optarg; + break; + case 'y': opt.exit_on_fatal = FALSE; printf(" Staying on fatal initialization errors\n"); diff --git a/osm/opensm/osm_opensm.c b/osm/opensm/osm_opensm.c index 1c17979..52923b7 100644 --- a/osm/opensm/osm_opensm.c +++ b/osm/opensm/osm_opensm.c @@ -63,6 +63,7 @@ #include #include #include +#include struct routing_engine_module { const char *name; @@ -290,6 +291,15 @@ osm_opensm_init( goto Exit; } + if( p_opt->qos_policy_file ) + { + if (!osm_qos_parse_policy_file(&p_osm->log, + p_opt->qos_policy_file)) + osm_log(&p_osm->log, OSM_LOG_VERBOSE, + "osm_opensm_init: QoS policy file %s parsed successfully\n", + p_opt->qos_policy_file); + } + Exit: osm_log( &p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: ]\n" ); /* Format Waived */ return ( status ); diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c index f2e909b..9905335 100644 --- a/osm/opensm/osm_subnet.c +++ b/osm/opensm/osm_subnet.c @@ -470,6 +470,7 @@ osm_subn_set_default_opt( p_opt->partition_config_file = OSM_DEFAULT_PARTITION_CONFIG_FILE; p_opt->no_partition_enforcement = FALSE; p_opt->no_qos = TRUE; + p_opt->qos_policy_file = NULL; p_opt->accum_log_file = TRUE; p_opt->port_profile_switch_nodes = FALSE; p_opt->pfn_ui_pre_lid_assign = NULL; @@ -977,6 +978,9 @@ osm_subn_parse_conf_file( "no_qos", p_key, p_val, &p_opts->no_qos); + __osm_subn_opts_unpack_charp( + "qos_policy_file", p_key, p_val, &p_opts->qos_policy_file); + __osm_subn_opts_unpack_boolean( "accum_log_file", p_key, p_val, &p_opts->accum_log_file); @@ -1281,6 +1285,12 @@ osm_subn_write_conf_file( "no_qos %s\n\n", p_opts->no_qos ? "TRUE" : "FALSE"); + if (p_opts->qos_policy_file) + fprintf( opts_file, + "# QoS policy file name\n" + "qos_policy_file %s\n\n", + p_opts->qos_policy_file); + subn_dump_qos_options(opts_file, "QoS default options", "qos", &p_opts->qos_options); fprintf(opts_file, "\n"); -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:33:28 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:33:28 +0200 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM Message-ID: <45BF6548.80104@dev.mellanox.co.il> Checking PathRecord query for QoS constraints The QoS-aware path selection logic is implemented in a separate function that is called only when QoS in OpenSM is on. It causes some code duplication, but the idea is to minimize the changes in the existing logic in OSM. Eventually, these two function (the old path selection and the new QoS-aware path selection) will be merged into a single function. Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_sa_path_record.c | 822 ++++++++++++++++++++++++++++++++++++++- 1 files changed, 816 insertions(+), 6 deletions(-) diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c index a0dbb07..2ff7a42 100644 --- a/osm/opensm/osm_sa_path_record.c +++ b/osm/opensm/osm_sa_path_record.c @@ -70,6 +70,7 @@ #include #include #endif +#include #define OSM_PR_RCV_POOL_MIN_SIZE 64 #define OSM_PR_RCV_POOL_GROW_SIZE 64 @@ -87,6 +88,7 @@ typedef struct _osm_path_parms uint8_t rate; uint8_t sl; uint8_t pkt_life; + uint16_t class; boolean_t reversible; } osm_path_parms_t; @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms( /********************************************************************** **********************************************************************/ + +static ib_api_status_t +__osm_pr_rcv_get_path_parms_qos( + IN osm_pr_rcv_t* const p_rcv, + IN const ib_path_rec_t* const p_pr, + IN const osm_port_t* const p_src_port, + IN const osm_port_t* const p_dest_port, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + OUT osm_path_parms_t* const p_parms ) +{ + const osm_node_t* p_node; + const osm_physp_t* p_physp; + const osm_physp_t* p_src_physp; + const osm_physp_t* p_dest_physp; + const osm_prtn_t* p_prtn; + const ib_port_info_t* p_pi; + ib_api_status_t status = IB_SUCCESS; + ib_net16_t pkey = 0; + ib_net16_t shared_pkey = 0; + uint8_t mtu = 0; + uint8_t rate = 0; + uint8_t pkt_life = 0; + uint8_t sl = 0; + uint16_t class = 0; + uint8_t required_mtu; + uint8_t required_rate; + uint8_t required_pkt_life; + uint8_t in_port_num; + uint8_t out_port_num; + ib_net16_t dest_lid; + uint8_t i; + uint8_t vl; + ib_slvl_table_t * p_slvl_tbl = NULL; + boolean_t valid_sls[IB_MAX_NUM_VLS]; + boolean_t sl2vl_valid_path = FALSE; + uint8_t first_valid_sl; + osm_qos_level_t * p_qos_level = NULL; + + OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos ); + + memset(valid_sls,TRUE,sizeof(valid_sls)); + dest_lid = cl_hton16( dest_lid_ho ); + + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); + p_physp = osm_port_get_default_phys_ptr( p_src_port ); + p_src_physp = p_physp; + p_pi = &p_physp->port_info; + + mtu = ib_port_info_get_mtu_cap( p_pi ); + rate = ib_port_info_compute_rate( p_pi ); + + /* + * Mellanox Tavor device performance is better using 1K MTU. + * If required MTU and MTU selector are such that 1K is OK + * and at least one end of the path is Tavor we override the + * port MTU with 1K. + */ + if ( p_rcv->p_subn->opt.enable_quirks && + __osm_sa_path_rec_apply_tavor_mtu_limit( + p_pr, p_src_port, p_dest_port, comp_mask) ) + { + if (mtu > IB_MTU_LEN_1024) + { + mtu = IB_MTU_LEN_1024; + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); + } + } + + /* + * Walk the subnet object from source to destination, + * tracking the most restrictive rate and mtu values along the way... + * + * If source port node is a switch, then p_physp should + * point to the port that routes the destination lid + */ + + p_node = osm_physp_get_node_ptr( p_physp ); + + if( p_node->sw ) + { + /* source node is a switch */ + in_port_num = osm_physp_get_port_num(p_physp); + /* + * If the dest_lid_ho is equal to the lid of the switch pointed by + * p_sw then p_physp will be the physical port of the switch port zero, + * and out_port_num will be 0. + */ + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) ); + if ( p_physp == 0 ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: " + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", + dest_lid_ho, + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); + status = IB_ERROR; + goto Exit; + } + p_src_physp = p_physp; + out_port_num = osm_physp_get_port_num(p_physp); + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); + } + else + { + /* source node is CA or Router */ + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); + } + + for (i = 0; i < IB_MAX_NUM_VLS; i++) + { + if (valid_sls[i]) + { + vl = ib_slvl_table_get(p_slvl_tbl,i); + if (vl == IB_DROP_VL) + valid_sls[i] = FALSE; + } + } + + /* + * now get pointer to the destination port (same as above) + */ + p_node = osm_physp_get_node_ptr( p_dest_physp ); + + if( p_node->sw ) + { + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); + if ( p_dest_physp == 0 ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: " + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", + dest_lid_ho, + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); + status = IB_ERROR; + goto Exit; + } + } + + /* + * Now go through the path step by step + */ + + while( p_physp != p_dest_physp ) + { + p_physp = osm_physp_get_remote( p_physp ); + if ( p_physp == 0 ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: " + "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", + dest_lid_ho, + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); + status = IB_ERROR; + goto Exit; + } + + in_port_num = osm_physp_get_port_num(p_physp); + + /* this is point to point case (no switch in between) */ + if( p_physp == p_dest_physp ) + break; + + /* Check parameters for the ingress port in this switch */ + p_pi = &p_physp->port_info; + + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) + { + mtu = ib_port_info_get_mtu_cap( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest MTU = %u at intervening port 0x%016" PRIx64 + " port num 0x%X\n", + mtu, + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), + osm_physp_get_port_num( p_physp ) ); + } + } + + if( rate > ib_port_info_compute_rate( p_pi ) ) + { + rate = ib_port_info_compute_rate( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest rate = %u at intervening port 0x%016" PRIx64 + " port num 0x%X\n", + rate, + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), + osm_physp_get_port_num( p_physp ) ); + } + } + + p_node = osm_physp_get_node_ptr( p_physp ); + if( !p_node->sw ) + { + /* + There is some sort of problem in the subnet object! + If this isn't a switch, we should have reached + the destination by now! + */ + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: " + "Internal error, bad path\n" ); + status = IB_ERROR; + goto Exit; + } + + /* Continue with the egress port on this switch */ + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); + + if ( p_physp == 0 ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: " + "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", + dest_lid_ho, + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); + status = IB_ERROR; + goto Exit; + } + + CL_ASSERT( p_physp ); + CL_ASSERT( osm_physp_is_valid( p_physp ) ); + + p_pi = &p_physp->port_info; + + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) + { + mtu = ib_port_info_get_mtu_cap( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest MTU = %u at intervening port 0x%016" PRIx64 + " port num 0x%X\n", + mtu, + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), + osm_physp_get_port_num( p_physp ) ); + } + } + + if( rate > ib_port_info_compute_rate( p_pi ) ) + { + rate = ib_port_info_compute_rate( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest rate = %u at intervening port 0x%016" PRIx64 + " port num 0x%X\n", + rate, + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), + osm_physp_get_port_num( p_physp ) ); + } + } + + out_port_num = osm_physp_get_port_num(p_physp); + + /* + * Check SL2VL table of the switch + */ + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); + for ( i = 0; i < IB_MAX_NUM_VLS; i++ ) + { + if (valid_sls[i]) + { + vl = ib_slvl_table_get(p_slvl_tbl,i); + if (vl == IB_DROP_VL) + valid_sls[i] = FALSE; + } + } + + /* go to the next step in the path */ + } + + /* p_physp now points to the destination */ + + p_pi = &p_physp->port_info; + + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) + { + mtu = ib_port_info_get_mtu_cap( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n", + mtu, + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); + } + } + + if( rate > ib_port_info_compute_rate( p_pi ) ) + { + rate = ib_port_info_compute_rate( p_pi ); + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "New smallest rate = %u at destination port 0x%016" PRIx64 "\n", + rate, + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); + } + } + + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "Path min MTU = %u, min rate = %u\n", mtu, rate ); + } + + /* check whether there is some SL that won't lead to VL15 eventually */ + for (i = 0; i < IB_MAX_NUM_VLS; i++) + { + if (valid_sls[i]) + { + sl2vl_valid_path = TRUE; + first_valid_sl = i; + break; + } + } + + if (!sl2vl_valid_path) + { + /* all the SLs will eventually lead to VL15 on this path */ + status = IB_NOT_FOUND; + goto Exit; + } + + /* + * According to spec definition IBA 1.2 Table 205 PacketLifeTime + * description, for loopback paths, packetLifeTime shall be zero. + */ + if ( p_src_port == p_dest_port ) + pkt_life = 0; /* loopback */ + else + pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; + + /* + * Get QoS Level object according to the path request + */ + osm_qos_parser_get_qos_level_by_pr(p_rcv, + p_pr, + p_src_port, + p_dest_port, + dest_lid_ho, + comp_mask, + &p_qos_level); + + if (p_qos_level) + { + /* + * Found QoS level that should be applied to this path record request + */ + + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "PathRecord request matches QoS Level %u (%s)\n", + p_qos_level->sn, + (p_qos_level->use)? p_qos_level->use : "no description" ) ; + } + + /* adjust MTU limit according to QoS constraints */ + if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) ) + { + mtu = p_qos_level->mtu_limit; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new smallest MTU = %u\n", + mtu); + } + } + + /* adjust Rate limit according to QoS constraints */ + if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) ) + { + rate = p_qos_level->rate_limit; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new smallest Rate = %u\n", + rate); + } + } + + /* adjust Packet Lifetime according to QoS constraints */ + if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) ) + { + pkt_life = p_qos_level->pkt_life; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new smallest Packet Lifetime = %u\n", + pkt_life); + } + } + + /* adjust SL according to QoS constraints */ + if (p_qos_level->sl_set) + { + if (!valid_sls[p_qos_level->sl]) + { + status = IB_NOT_FOUND; + goto Exit; + } + else + { + sl = p_qos_level->sl; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new SL = %u\n", + sl); + } + } + } + + /* adjust PKey according to QoS constraints */ + if (p_qos_level->pkey_set) + { + pkey = p_qos_level->pkey; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new PKey = %u\n", + pkey); + } + } + + /* adjust Class according to QoS constraints */ + if (p_qos_level->class_set) + { + class = p_qos_level->class; + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: new Class = %u\n", + class); + } + } + + } /*if (p_qos_level)*/ + + /* + * Determine if these values meet the user criteria + * and adjust appropriately + */ + + /* we silently ignore cases where only the MTU selector is defined */ + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && + ( comp_mask & IB_PR_COMPMASK_MTU ) ) + { + required_mtu = ib_path_rec_mtu( p_pr ); + switch( ib_path_rec_mtu_sel( p_pr ) ) + { + case 0: /* must be greater than */ + if( mtu <= required_mtu ) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if( mtu >= required_mtu ) + { + /* adjust to use the highest mtu + lower then the required one */ + if( required_mtu > 1 ) + mtu = required_mtu - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if( mtu < required_mtu ) + status = IB_NOT_FOUND; + else + mtu = required_mtu; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ + CL_ASSERT( FALSE ); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* we silently ignore cases where only the Rate selector is defined */ + if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) && + ( comp_mask & IB_PR_COMPMASK_RATE ) ) + { + required_rate = ib_path_rec_rate( p_pr ); + switch( ib_path_rec_rate_sel( p_pr ) ) + { + case 0: /* must be greater than */ + if( rate <= required_rate ) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if( rate >= required_rate ) + { + /* adjust the rate to use the highest rate + lower then the required one */ + if( required_rate > 2 ) + rate = required_rate - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if( rate < required_rate ) + status = IB_NOT_FOUND; + else + rate = required_rate; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ + CL_ASSERT( FALSE ); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* we silently ignore cases where only the PktLife selector is defined */ + if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) && + ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) ) + { + required_pkt_life = ib_path_rec_pkt_life( p_pr ); + switch( ib_path_rec_pkt_life_sel( p_pr ) ) + { + case 0: /* must be greater than */ + if( pkt_life <= required_pkt_life ) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if( pkt_life >= required_pkt_life ) + { + /* adjust the lifetime to use the highest possible + lower then the required one */ + if( required_pkt_life > 1 ) + pkt_life = required_pkt_life - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if( pkt_life < required_pkt_life ) + status = IB_NOT_FOUND; + else + pkt_life = required_pkt_life; + break; + + case 3: /* smallest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ + CL_ASSERT( FALSE ); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* + * set Pkey for this path record request + */ + + shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp ); + if ( !shared_pkey ) + { + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: " + "Ports do not have any shared PKeys\n"); + status = IB_NOT_FOUND; + goto Exit; + } + + if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) + { + if (p_qos_level && p_qos_level->pkey_set) + pkey = p_qos_level->pkey; + else + pkey = shared_pkey; + } + else if (comp_mask & IB_PR_COMPMASK_PKEY) + { + /* PathRecord requires specific PKey */ + if (p_qos_level && p_qos_level->pkey_set) + { + /* check that QoS pkey matches the required pkey */ + if (p_qos_level->pkey != p_pr->pkey) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n", + cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey)); + status = IB_NOT_FOUND; + goto Exit; + } + pkey = p_qos_level->pkey; + } + else + pkey = p_pr->pkey; + } + else + { + if (p_qos_level && p_qos_level->pkey_set) + pkey = p_qos_level->pkey; + else + pkey = shared_pkey; + } + + /* + * PKey has been set. Now check that ports share this PKey. + */ + + if ( (pkey != shared_pkey) && + (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "Ports do not share specified PKey 0x%04x\n", + cl_ntoh16(pkey)); + status = IB_NOT_FOUND; + goto Exit; + } + + /* + * Done selecting PKey - Now select valid SL + */ + + if (pkey) + { + p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, + pkey & cl_ntoh16((uint16_t)~0x8000)); + if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) ) + { + /* this may be possible when pkey tables are created somehow in + previous runs or things are going wrong here */ + osm_log( p_rcv->p_log, OSM_LOG_ERROR, + "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: " + "No partition found for PKey 0x%04x - using default SL %d\n", + cl_ntoh16(pkey), sl); + } + else + { + if (!valid_sls[p_prtn->sl]) + { + /* partition forces to use SL that eventually leads to VL15 */ + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "Partition constraints (pkey=%u, sl=%u) lead to VL15\n", + pkey, p_prtn->sl); + } + status = IB_NOT_FOUND; + goto Exit; + } + else + { + /* partition's SL is valid */ + if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) ) + { + /* partition's SL doesn't match QoS Level SL */ + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n", + pkey, p_prtn->sl, p_qos_level->sl); + } + status = IB_NOT_FOUND; + goto Exit; + } + else + { + /* set SL to be partition's SL */ + sl = p_prtn->sl; + } + } + } + } + else + { + /* + * No pkey (no partition) + */ + if (p_qos_level && p_qos_level->sl_set) + { + /* sl has been already set */ + } + else + { + if (comp_mask & IB_PR_COMPMASK_SL) + { + /* specific SL was requested */ + if (!valid_sls[ib_path_rec_sl(p_pr)]) + { + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: required SL (%u) leads to VL15\n", + ib_path_rec_sl(p_pr)); + } + status = IB_NOT_FOUND; + goto Exit; + } + /* set sl to whatever requested */ + sl = ib_path_rec_sl(p_pr); + } + else + { + /* set sl to the first valid sl that won't lead to VL15 */ + sl = first_valid_sl; + } + } + } + + if ( (comp_mask & IB_PR_COMPMASK_SL) && + (ib_path_rec_sl( p_pr ) != sl) ) + { + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, + "__osm_pr_rcv_get_path_parms_qos: " + "QoS constaraints: required SL (%u) doesn't match QoS constraints\n", + ib_path_rec_sl(p_pr)); + } + status = IB_NOT_FOUND; + goto Exit; + } + + /* reset pkey when raw traffic */ + if( (pkey) && + (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) + { + pkey = 0; + } + + p_parms->mtu = mtu; + p_parms->rate = rate; + p_parms->pkt_life = pkt_life; + p_parms->pkey = pkey; + p_parms->sl = sl; + p_parms->class = class; + + Exit: + OSM_LOG_EXIT( p_rcv->p_log ); + return( status ); +} + +/********************************************************************** + **********************************************************************/ static void __osm_pr_rcv_build_pr( IN osm_pr_rcv_t* const p_rcv, @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr( #endif p_pr->pkey = p_parms->pkey; - p_pr->sl = cl_hton16(p_parms->sl); + ib_path_rec_set_qos_class(p_pr,p_parms->class); + ib_path_rec_set_sl(p_pr,p_parms->sl); p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); p_pr->rate = (uint8_t)(p_parms->rate | 0x80); @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path( goto Exit; } - status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, - p_dest_port, dest_lid_ho, - comp_mask, &path_parms ); - + if (p_rcv->p_subn->opt.no_qos) + status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, + p_dest_port, dest_lid_ho, + comp_mask, &path_parms ); + else + status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port, + p_dest_port, dest_lid_ho, + comp_mask, &path_parms ); if( status != IB_SUCCESS ) { cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item ); @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path( comp_mask, &rev_path_parms ); path_parms.reversible = ( rev_path_status == IB_SUCCESS ); + /* + * ToDo: + * ToDo: The whole implementation of reversible path is wrong. + * ToDo: It is not enough to know that the reversed path exist. + * ToDo: We should also adjust limits (mtu, rate, etc...) by + * ToDo: comparing path and reversed path's limits. + * ToDo: Also, need to think about reversible path in QoS. + * ToDo: + */ + /* did we get a Reversible Path compmask ? */ /* NOTE that if the reversible component = 0, it is a don't care @@ -2053,7 +2863,7 @@ osm_pr_rcv_process( /* SL, Hop Limit, and Flow Label */ ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, &sl, &flow_label, &hop_limit ); - p_pr_item->path_rec.sl = cl_hton16( sl ); + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); #ifndef ROUTER_EXP p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | (flow_label << 8); -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:55:44 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:55:44 +0200 Subject: [openib-general] [PATCH 3.1/10] osm: QoS in OpenSM Message-ID: <45BF6A80.4040005@dev.mellanox.co.il> QoS policy file parser Yacc & Lex generated files Signed-off-by: Yevgeny Kliteynik --- osm/include/opensm/osm_qos_parser_y.h | 222 +++ osm/opensm/osm_qos_parser_l.c | 2686 ++++++++++++++++++++++++++++ 2 files changed, 2908 insertions(+), 0 deletions(-) diff --git a/osm/include/opensm/osm_qos_parser_y.h b/osm/include/opensm/osm_qos_parser_y.h new file mode 100644 index 0000000..e58507c --- /dev/null +++ b/osm/include/opensm/osm_qos_parser_y.h @@ -0,0 +1,222 @@ +/* A Bison parser, made by GNU Bison 1.875. */ + +/* Skeleton parser for Yacc-like parsing with Bison, + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, when this file is copied by Bison into a + Bison output file, you may use that output file without restriction. + This special exception was added by the Free Software Foundation + in version 1.24 of Bison. */ + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + TK_HEX_NUMBER = 258, + TK_DEC_NUMBER = 259, + TK_WHITE = 260, + TK_DASH = 261, + TK_DOTDOT = 262, + TK_COMMA = 263, + TK_TEXT = 264, + TK_XML_VERSION_START = 265, + TK_XML_VERSION_END = 266, + TK_QOS_POLICY_START = 267, + TK_QOS_POLICY_END = 268, + TK_PORT_GROUPS_START = 269, + TK_PORT_GROUPS_END = 270, + TK_PORT_GROUP_START = 271, + TK_PORT_GROUP_END = 272, + TK_NAME_START = 273, + TK_NAME_END = 274, + TK_USE_START = 275, + TK_USE_END = 276, + TK_PORT_GUID_START = 277, + TK_PORT_GUID_END = 278, + TK_PORT_NAME_START = 279, + TK_PORT_NAME_END = 280, + TK_PARTITION_START = 281, + TK_PARTITION_END = 282, + TK_NODE_TYPE_START = 283, + TK_NODE_TYPE_END = 284, + TK_QOS_SETUP_START = 285, + TK_QOS_SETUP_END = 286, + TK_SL2VL_TABLES_START = 287, + TK_SL2VL_TABLES_END = 288, + TK_SL2VL_SCOPE_START = 289, + TK_SL2VL_SCOPE_END = 290, + TK_GROUP_START = 291, + TK_GROUP_END = 292, + TK_FROM_START = 293, + TK_FROM_END = 294, + TK_TO_START = 295, + TK_TO_END = 296, + TK_SL2VL_TABLE_START = 297, + TK_SL2VL_TABLE_END = 298, + TK_ACROSS_START = 299, + TK_ACROSS_END = 300, + TK_ACROSS_FROM_START = 301, + TK_ACROSS_FROM_END = 302, + TK_ACROSS_TO_START = 303, + TK_ACROSS_TO_END = 304, + TK_VLARB_TABLES_START = 305, + TK_VLARB_TABLES_END = 306, + TK_VLARB_SCOPE_START = 307, + TK_VLARB_SCOPE_END = 308, + TK_VLARB_HIGH_START = 309, + TK_VLARB_HIGH_END = 310, + TK_VLARB_LOW_START = 311, + TK_VLARB_LOW_END = 312, + TK_VLARB_HIGH_LIMIT_START = 313, + TK_VLARB_HIGH_LIMIT_END = 314, + TK_QOS_LEVELS_START = 315, + TK_QOS_LEVELS_END = 316, + TK_QOS_LEVEL_START = 317, + TK_QOS_LEVEL_END = 318, + TK_SN_START = 319, + TK_SN_END = 320, + TK_SL_START = 321, + TK_SL_END = 322, + TK_CLASS_START = 323, + TK_CLASS_END = 324, + TK_MTU_LIMIT_START = 325, + TK_MTU_LIMIT_END = 326, + TK_RATE_LIMIT_START = 327, + TK_RATE_LIMIT_END = 328, + TK_PACKET_LIFE_START = 329, + TK_PACKET_LIFE_END = 330, + TK_PKEY_START = 331, + TK_PKEY_END = 332, + TK_QOS_MATCH_RULES_START = 333, + TK_QOS_MATCH_RULES_END = 334, + TK_QOS_MATCH_RULE_START = 335, + TK_QOS_MATCH_RULE_END = 336, + TK_QOS_LEVEL_SN_START = 337, + TK_QOS_LEVEL_SN_END = 338, + TK_SOURCE_START = 339, + TK_SOURCE_END = 340, + TK_DESTINATION_START = 341, + TK_DESTINATION_END = 342, + TK_SERVICE_START = 343, + TK_SERVICE_END = 344 + }; +#endif +#define TK_HEX_NUMBER 258 +#define TK_DEC_NUMBER 259 +#define TK_WHITE 260 +#define TK_DASH 261 +#define TK_DOTDOT 262 +#define TK_COMMA 263 +#define TK_TEXT 264 +#define TK_XML_VERSION_START 265 +#define TK_XML_VERSION_END 266 +#define TK_QOS_POLICY_START 267 +#define TK_QOS_POLICY_END 268 +#define TK_PORT_GROUPS_START 269 +#define TK_PORT_GROUPS_END 270 +#define TK_PORT_GROUP_START 271 +#define TK_PORT_GROUP_END 272 +#define TK_NAME_START 273 +#define TK_NAME_END 274 +#define TK_USE_START 275 +#define TK_USE_END 276 +#define TK_PORT_GUID_START 277 +#define TK_PORT_GUID_END 278 +#define TK_PORT_NAME_START 279 +#define TK_PORT_NAME_END 280 +#define TK_PARTITION_START 281 +#define TK_PARTITION_END 282 +#define TK_NODE_TYPE_START 283 +#define TK_NODE_TYPE_END 284 +#define TK_QOS_SETUP_START 285 +#define TK_QOS_SETUP_END 286 +#define TK_SL2VL_TABLES_START 287 +#define TK_SL2VL_TABLES_END 288 +#define TK_SL2VL_SCOPE_START 289 +#define TK_SL2VL_SCOPE_END 290 +#define TK_GROUP_START 291 +#define TK_GROUP_END 292 +#define TK_FROM_START 293 +#define TK_FROM_END 294 +#define TK_TO_START 295 +#define TK_TO_END 296 +#define TK_SL2VL_TABLE_START 297 +#define TK_SL2VL_TABLE_END 298 +#define TK_ACROSS_START 299 +#define TK_ACROSS_END 300 +#define TK_ACROSS_FROM_START 301 +#define TK_ACROSS_FROM_END 302 +#define TK_ACROSS_TO_START 303 +#define TK_ACROSS_TO_END 304 +#define TK_VLARB_TABLES_START 305 +#define TK_VLARB_TABLES_END 306 +#define TK_VLARB_SCOPE_START 307 +#define TK_VLARB_SCOPE_END 308 +#define TK_VLARB_HIGH_START 309 +#define TK_VLARB_HIGH_END 310 +#define TK_VLARB_LOW_START 311 +#define TK_VLARB_LOW_END 312 +#define TK_VLARB_HIGH_LIMIT_START 313 +#define TK_VLARB_HIGH_LIMIT_END 314 +#define TK_QOS_LEVELS_START 315 +#define TK_QOS_LEVELS_END 316 +#define TK_QOS_LEVEL_START 317 +#define TK_QOS_LEVEL_END 318 +#define TK_SN_START 319 +#define TK_SN_END 320 +#define TK_SL_START 321 +#define TK_SL_END 322 +#define TK_CLASS_START 323 +#define TK_CLASS_END 324 +#define TK_MTU_LIMIT_START 325 +#define TK_MTU_LIMIT_END 326 +#define TK_RATE_LIMIT_START 327 +#define TK_RATE_LIMIT_END 328 +#define TK_PACKET_LIFE_START 329 +#define TK_PACKET_LIFE_END 330 +#define TK_PKEY_START 331 +#define TK_PKEY_END 332 +#define TK_QOS_MATCH_RULES_START 333 +#define TK_QOS_MATCH_RULES_END 334 +#define TK_QOS_MATCH_RULE_START 335 +#define TK_QOS_MATCH_RULE_END 336 +#define TK_QOS_LEVEL_SN_START 337 +#define TK_QOS_LEVEL_SN_END 338 +#define TK_SOURCE_START 339 +#define TK_SOURCE_END 340 +#define TK_DESTINATION_START 341 +#define TK_DESTINATION_END 342 +#define TK_SERVICE_START 343 +#define TK_SERVICE_END 344 + + + + +#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + +extern YYSTYPE yylval; + + + diff --git a/osm/opensm/osm_qos_parser_l.c b/osm/opensm/osm_qos_parser_l.c new file mode 100644 index 0000000..f614ed7 --- /dev/null +++ b/osm/opensm/osm_qos_parser_l.c @@ -0,0 +1,2686 @@ + +#line 3 "lex.yy.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 33 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart(yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +extern int yyleng; + +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, (yytext_ptr) ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef unsigned int yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; +static int yy_n_chars; /* number of characters read into yy_ch_buf */ +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 0; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart (FILE *input_file ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ); +void yy_delete_buffer (YY_BUFFER_STATE b ); +void yy_flush_buffer (YY_BUFFER_STATE b ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ); +void yypop_buffer_state (void ); + +static void yyensure_buffer_stack (void ); +static void yy_load_buffer_state (void ); +static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); + +#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len ); + +void *yyalloc (yy_size_t ); +void *yyrealloc (void *,yy_size_t ); +void yyfree (void * ); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +typedef unsigned char YY_CHAR; + +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +typedef int yy_state_type; + +extern int yylineno; + +int yylineno = 1; + +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state (void ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); +static int yy_get_next_buffer (void ); +static void yy_fatal_error (yyconst char msg[] ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + yyleng = (size_t) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; + +#define YY_NUM_RULES 89 +#define YY_END_OF_BUFFER 90 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[612] = + { 0, + 0, 0, 90, 88, 84, 84, 87, 85, 83, 83, + 86, 88, 88, 84, 0, 0, 83, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 82, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 57, 55, 0, 31, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 58, 56, 0, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, + 55, 0, 31, 11, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 58, 56, 0, 32, 12, 0, 0, + 0, 0, 0, 29, 0, 0, 9, 0, 0, 0, + + 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 11, 0, 0, 81, 0, 0, 0, 30, 0, 0, + 10, 0, 0, 0, 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 12, 0, 0, 0, 59, 0, + 29, 27, 0, 9, 0, 0, 0, 67, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 81, 0, 60, 0, 30, 28, 0, 10, 0, 0, + 0, 68, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 35, 59, 0, 27, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, + 0, 36, 60, 0, 28, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 76, 0, 0, 0, 0, 0, 0, 0, 35, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 79, 0, 0, 75, 0, 0, 0, 0, + 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, + 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, + 0, 0, 0, 39, 0, 61, 19, 0, 17, 0, + 13, 15, 0, 53, 0, 0, 0, 21, 0, 0, + 0, 0, 0, 47, 0, 0, 0, 40, 0, 62, + 20, 0, 18, 0, 14, 16, 0, 54, 0, 0, + 0, 22, 0, 0, 0, 0, 0, 48, 0, 0, + 0, 39, 0, 61, 19, 0, 17, 7, 0, 13, + 15, 0, 53, 51, 0, 3, 21, 63, 0, 0, + + 0, 45, 47, 0, 0, 0, 40, 0, 62, 20, + 0, 18, 8, 0, 14, 16, 0, 54, 52, 0, + 4, 22, 64, 0, 0, 0, 46, 48, 0, 0, + 37, 77, 65, 7, 5, 0, 51, 0, 3, 63, + 25, 33, 0, 0, 45, 43, 0, 38, 78, 66, + 8, 6, 0, 52, 0, 4, 64, 26, 34, 0, + 0, 46, 44, 0, 37, 77, 65, 5, 73, 0, + 25, 33, 23, 0, 43, 41, 38, 78, 66, 6, + 74, 0, 26, 34, 24, 0, 44, 42, 73, 0, + 23, 49, 41, 74, 0, 24, 50, 42, 71, 0, + + 49, 72, 0, 50, 71, 69, 72, 70, 69, 70, + 0 + } ; + +static yyconst flex_int32_t yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 5, 6, 1, 7, 8, 9, 10, + 9, 9, 9, 9, 9, 9, 9, 11, 1, 12, + 1, 13, 14, 1, 15, 15, 15, 15, 15, 15, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, + 1, 1, 1, 1, 1, 1, 17, 18, 19, 20, + + 21, 22, 23, 24, 25, 1, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 16, + 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int32_t yy_meta[40] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int16_t yy_base[615] = + { 0, + 0, 13, 885, 886, 26, 29, 886, 886, 26, 36, + 886, 43, 871, 56, 0, 870, 43, 0, 876, 64, + 46, 862, 853, 858, 845, 844, 841, 0, 77, 845, + 857, 61, 843, 838, 844, 52, 0, 864, 850, 841, + 846, 833, 832, 829, 3, 85, 833, 845, 83, 831, + 826, 832, 103, 825, 840, 822, 825, 824, 817, 824, + 831, 89, 829, 816, 814, 812, 813, 51, 832, 808, + 830, 821, 20, 114, 835, 807, 822, 804, 807, 806, + 799, 806, 813, 90, 811, 798, 796, 794, 795, 108, + 814, 790, 812, 803, 50, 793, 788, 786, 792, 783, + + 812, 796, 795, 789, 779, 774, 777, 805, 789, 772, + 771, 117, 122, 774, 124, 793, 781, 771, 797, 796, + 771, 766, 764, 770, 761, 790, 774, 773, 767, 757, + 752, 755, 783, 767, 750, 749, 126, 128, 752, 130, + 771, 759, 749, 747, 746, 754, 765, 746, 749, 762, + 768, 752, 747, 758, 764, 107, 763, 743, 740, 134, + 137, 747, 140, 142, 740, 746, 750, 728, 727, 735, + 746, 727, 730, 743, 749, 733, 728, 739, 745, 119, + 744, 724, 721, 146, 149, 728, 152, 154, 721, 727, + 710, 730, 713, 156, 728, 715, 158, 704, 703, 702, + + 160, 141, 715, 718, 704, 712, 705, 712, 724, 708, + 163, 705, 721, 165, 692, 712, 695, 169, 710, 697, + 171, 686, 685, 684, 173, 154, 697, 700, 686, 694, + 687, 694, 706, 690, 176, 687, 703, 174, 179, 691, + 182, 186, 679, 188, 667, 699, 679, 190, 161, 686, + 665, 666, 673, 664, 673, 676, 161, 683, 671, 174, + 197, 197, 202, 677, 204, 209, 665, 211, 653, 685, + 665, 213, 184, 672, 651, 652, 659, 650, 659, 662, + 184, 669, 657, 197, 200, 223, 225, 645, 227, 654, + 647, 650, 646, 645, 649, 645, 651, 652, 645, 633, + + 640, 654, 647, 648, 231, 658, 638, 632, 642, 643, + 201, 235, 237, 624, 239, 633, 626, 629, 625, 624, + 628, 624, 630, 631, 624, 612, 619, 633, 626, 627, + 241, 637, 617, 611, 621, 622, 605, 607, 243, 611, + 600, 613, 608, 603, 595, 610, 608, 601, 603, 607, + 594, 599, 245, 593, 604, 247, 594, 597, 581, 588, + 599, 583, 585, 249, 589, 578, 591, 586, 581, 573, + 588, 586, 579, 581, 585, 572, 577, 251, 571, 582, + 253, 572, 575, 559, 566, 577, 564, 580, 562, 578, + 577, 567, 575, 556, 573, 572, 251, 578, 544, 569, + + 546, 256, 549, 552, 553, 553, 563, 544, 547, 543, + 559, 541, 557, 556, 546, 554, 535, 552, 551, 254, + 557, 523, 548, 525, 259, 528, 531, 532, 532, 542, + 523, 526, 524, 263, 522, 266, 268, 529, 270, 250, + 272, 274, 515, 276, 535, 514, 533, 278, 532, 523, + 522, 514, 528, 280, 519, 518, 510, 284, 508, 287, + 289, 515, 291, 282, 294, 296, 501, 298, 521, 500, + 519, 300, 518, 509, 508, 500, 514, 302, 505, 504, + 511, 304, 510, 306, 308, 509, 310, 312, 508, 315, + 317, 491, 319, 321, 483, 323, 325, 327, 505, 318, + + 492, 330, 332, 503, 481, 501, 334, 500, 336, 338, + 499, 340, 342, 498, 344, 346, 481, 348, 351, 473, + 353, 355, 357, 495, 348, 482, 360, 362, 493, 471, + 364, 366, 368, 370, 372, 491, 374, 476, 376, 378, + 381, 383, 489, 466, 385, 387, 487, 389, 391, 393, + 395, 397, 464, 399, 175, 401, 403, 405, 407, 173, + 134, 409, 411, 101, 413, 415, 417, 419, 421, 68, + 423, 425, 427, 72, 429, 431, 433, 435, 437, 439, + 441, 52, 443, 445, 447, 56, 449, 451, 453, 444, + 456, 458, 460, 462, 453, 465, 467, 469, 471, 26, + + 473, 477, 0, 479, 481, 483, 486, 488, 490, 492, + 886, 495, 2, 497 + } ; + +static yyconst flex_int16_t yy_def[615] = + { 0, + 612, 612, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 12, 611, 611, 613, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 613, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 614, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 614, 614, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 614, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 0, 611, 611, 611 + } ; + +static yyconst flex_int16_t yy_nxt[926] = + { 0, + 611, 5, 6, 37, 7, 8, 611, 9, 10, 10, + 11, 12, 608, 13, 5, 6, 60, 7, 8, 82, + 9, 10, 10, 11, 12, 117, 13, 14, 14, 61, + 14, 14, 83, 17, 17, 17, 118, 15, 606, 16, + 15, 18, 16, 17, 17, 17, 19, 53, 53, 20, + 17, 17, 17, 74, 74, 142, 21, 14, 14, 22, + 111, 23, 24, 112, 25, 26, 143, 15, 597, 16, + 27, 28, 595, 29, 30, 31, 32, 33, 34, 35, + 39, 67, 40, 41, 592, 42, 43, 68, 590, 69, + 70, 44, 45, 62, 46, 47, 48, 49, 50, 51, + + 52, 84, 63, 89, 53, 53, 64, 104, 129, 90, + 85, 91, 92, 588, 86, 74, 74, 136, 160, 160, + 137, 105, 130, 161, 161, 163, 163, 184, 184, 185, + 185, 187, 187, 203, 204, 160, 160, 205, 161, 161, + 206, 163, 163, 211, 211, 227, 228, 184, 184, 229, + 185, 185, 230, 187, 187, 235, 235, 241, 241, 244, + 244, 248, 248, 249, 211, 211, 261, 261, 586, 250, + 265, 265, 268, 268, 272, 272, 273, 235, 235, 285, + 287, 287, 274, 241, 241, 585, 286, 289, 289, 244, + 244, 248, 248, 294, 303, 304, 295, 307, 261, 261, + + 308, 582, 311, 313, 313, 265, 265, 309, 310, 312, + 315, 315, 268, 268, 272, 272, 320, 329, 330, 321, + 333, 337, 362, 334, 339, 339, 287, 287, 289, 289, + 335, 336, 356, 356, 338, 363, 364, 364, 313, 313, + 315, 315, 381, 381, 339, 339, 402, 402, 356, 356, + 364, 364, 425, 425, 381, 381, 443, 402, 402, 467, + 425, 425, 488, 444, 482, 482, 468, 484, 484, 485, + 485, 487, 487, 490, 490, 491, 491, 493, 493, 497, + 497, 503, 503, 489, 445, 507, 507, 469, 509, 509, + 510, 510, 512, 512, 513, 515, 515, 516, 516, 518, + + 518, 522, 522, 528, 528, 482, 482, 484, 484, 485, + 485, 487, 487, 534, 534, 514, 490, 490, 491, 491, + 493, 493, 537, 537, 539, 539, 497, 497, 540, 540, + 542, 545, 545, 503, 503, 507, 507, 509, 509, 510, + 510, 512, 512, 551, 551, 515, 515, 516, 516, 518, + 518, 543, 554, 554, 556, 556, 522, 522, 557, 557, + 559, 562, 562, 528, 528, 565, 565, 566, 566, 567, + 567, 534, 534, 568, 568, 537, 537, 539, 539, 540, + 540, 560, 571, 571, 572, 572, 545, 545, 575, 575, + 577, 577, 578, 578, 579, 579, 551, 551, 580, 580, + + 554, 554, 556, 556, 557, 557, 583, 583, 584, 584, + 562, 562, 587, 587, 565, 565, 566, 566, 567, 567, + 568, 568, 589, 589, 571, 571, 572, 572, 591, 591, + 575, 575, 593, 593, 577, 577, 578, 578, 579, 579, + 580, 580, 594, 594, 583, 583, 584, 584, 596, 596, + 587, 587, 598, 598, 589, 589, 599, 591, 591, 601, + 601, 593, 593, 594, 594, 602, 596, 596, 604, 604, + 598, 598, 605, 605, 601, 601, 581, 600, 607, 607, + 604, 604, 605, 605, 609, 609, 603, 607, 607, 610, + 610, 609, 609, 610, 610, 4, 4, 119, 119, 576, + + 574, 573, 570, 569, 564, 563, 561, 558, 555, 553, + 552, 550, 549, 548, 547, 546, 544, 541, 538, 536, + 535, 533, 532, 531, 530, 529, 527, 526, 525, 524, + 523, 521, 520, 519, 517, 511, 508, 506, 505, 504, + 502, 501, 500, 499, 498, 496, 495, 494, 492, 486, + 483, 481, 480, 479, 478, 477, 476, 475, 474, 473, + 472, 471, 470, 466, 465, 464, 463, 462, 461, 460, + 459, 458, 457, 456, 455, 454, 453, 452, 451, 450, + 449, 448, 447, 446, 442, 441, 440, 439, 438, 437, + 436, 435, 434, 433, 432, 431, 430, 429, 428, 427, + + 426, 424, 423, 422, 421, 420, 419, 418, 417, 416, + 415, 414, 413, 412, 411, 410, 409, 408, 407, 406, + 405, 404, 403, 401, 400, 399, 398, 397, 396, 395, + 394, 393, 392, 391, 390, 389, 388, 387, 386, 385, + 384, 383, 382, 380, 379, 378, 377, 376, 375, 374, + 373, 372, 371, 370, 369, 368, 367, 366, 365, 361, + 360, 359, 358, 357, 355, 354, 353, 352, 351, 350, + 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, + 332, 331, 328, 327, 326, 325, 324, 323, 322, 319, + 318, 317, 316, 314, 306, 305, 302, 301, 300, 299, + + 298, 297, 296, 293, 292, 291, 290, 288, 284, 283, + 282, 281, 280, 279, 278, 277, 276, 275, 271, 270, + 269, 267, 266, 264, 263, 262, 260, 259, 258, 257, + 256, 255, 254, 253, 252, 251, 247, 246, 245, 243, + 242, 240, 239, 238, 237, 236, 234, 233, 232, 231, + 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, + 216, 215, 214, 213, 212, 210, 209, 208, 207, 202, + 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, + 191, 190, 189, 188, 186, 183, 182, 181, 180, 179, + 178, 177, 176, 175, 174, 173, 172, 171, 170, 169, + + 168, 167, 120, 166, 165, 164, 162, 159, 158, 157, + 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, + 146, 145, 144, 141, 140, 139, 138, 135, 134, 133, + 132, 131, 128, 127, 126, 125, 124, 123, 122, 121, + 120, 116, 115, 114, 113, 110, 109, 108, 107, 106, + 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, + 93, 88, 87, 81, 80, 79, 78, 77, 76, 75, + 73, 72, 71, 66, 65, 59, 58, 57, 56, 55, + 54, 38, 36, 36, 611, 3, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611 + } ; + +static yyconst flex_int16_t yy_chk[926] = + { 0, + 0, 1, 1, 613, 1, 1, 0, 1, 1, 1, + 1, 1, 603, 1, 2, 2, 28, 2, 2, 45, + 2, 2, 2, 2, 2, 73, 2, 5, 5, 28, + 6, 6, 45, 9, 9, 9, 73, 5, 600, 5, + 6, 9, 6, 10, 10, 10, 12, 21, 21, 12, + 17, 17, 17, 36, 36, 95, 12, 14, 14, 12, + 68, 12, 12, 68, 12, 12, 95, 14, 586, 14, + 12, 12, 582, 12, 12, 12, 12, 12, 12, 12, + 20, 32, 20, 20, 574, 20, 20, 32, 570, 32, + 32, 20, 20, 29, 20, 20, 20, 20, 20, 20, + + 20, 46, 29, 49, 53, 53, 29, 62, 84, 49, + 46, 49, 49, 564, 46, 74, 74, 90, 112, 112, + 90, 62, 84, 113, 113, 115, 115, 137, 137, 138, + 138, 140, 140, 156, 156, 160, 160, 156, 161, 161, + 156, 163, 163, 164, 164, 180, 180, 184, 184, 180, + 185, 185, 180, 187, 187, 188, 188, 194, 194, 197, + 197, 201, 201, 202, 211, 211, 214, 214, 561, 202, + 218, 218, 221, 221, 225, 225, 226, 235, 235, 238, + 239, 239, 226, 241, 241, 560, 238, 242, 242, 244, + 244, 248, 248, 249, 257, 257, 249, 260, 261, 261, + + 260, 555, 262, 263, 263, 265, 265, 260, 260, 262, + 266, 266, 268, 268, 272, 272, 273, 281, 281, 273, + 284, 285, 311, 284, 286, 286, 287, 287, 289, 289, + 284, 284, 305, 305, 285, 311, 312, 312, 313, 313, + 315, 315, 331, 331, 339, 339, 353, 353, 356, 356, + 364, 364, 378, 378, 381, 381, 397, 402, 402, 420, + 425, 425, 440, 397, 434, 434, 420, 436, 436, 437, + 437, 439, 439, 441, 441, 442, 442, 444, 444, 448, + 448, 454, 454, 440, 397, 458, 458, 420, 460, 460, + 461, 461, 463, 463, 464, 465, 465, 466, 466, 468, + + 468, 472, 472, 478, 478, 482, 482, 484, 484, 485, + 485, 487, 487, 488, 488, 464, 490, 490, 491, 491, + 493, 493, 494, 494, 496, 496, 497, 497, 498, 498, + 500, 502, 502, 503, 503, 507, 507, 509, 509, 510, + 510, 512, 512, 513, 513, 515, 515, 516, 516, 518, + 518, 500, 519, 519, 521, 521, 522, 522, 523, 523, + 525, 527, 527, 528, 528, 531, 531, 532, 532, 533, + 533, 534, 534, 535, 535, 537, 537, 539, 539, 540, + 540, 525, 541, 541, 542, 542, 545, 545, 546, 546, + 548, 548, 549, 549, 550, 550, 551, 551, 552, 552, + + 554, 554, 556, 556, 557, 557, 558, 558, 559, 559, + 562, 562, 563, 563, 565, 565, 566, 566, 567, 567, + 568, 568, 569, 569, 571, 571, 572, 572, 573, 573, + 575, 575, 576, 576, 577, 577, 578, 578, 579, 579, + 580, 580, 581, 581, 583, 583, 584, 584, 585, 585, + 587, 587, 588, 588, 589, 589, 590, 591, 591, 592, + 592, 593, 593, 594, 594, 595, 596, 596, 597, 597, + 598, 598, 599, 599, 601, 601, 553, 590, 602, 602, + 604, 604, 605, 605, 606, 606, 595, 607, 607, 608, + 608, 609, 609, 610, 610, 612, 612, 614, 614, 547, + + 544, 543, 538, 536, 530, 529, 526, 524, 520, 517, + 514, 511, 508, 506, 505, 504, 501, 499, 495, 492, + 489, 486, 483, 481, 480, 479, 477, 476, 475, 474, + 473, 471, 470, 469, 467, 462, 459, 457, 456, 455, + 453, 452, 451, 450, 449, 447, 446, 445, 443, 438, + 435, 433, 432, 431, 430, 429, 428, 427, 426, 424, + 423, 422, 421, 419, 418, 417, 416, 415, 414, 413, + 412, 411, 410, 409, 408, 407, 406, 405, 404, 403, + 401, 400, 399, 398, 396, 395, 394, 393, 392, 391, + 390, 389, 388, 387, 386, 385, 384, 383, 382, 380, + + 379, 377, 376, 375, 374, 373, 372, 371, 370, 369, + 368, 367, 366, 365, 363, 362, 361, 360, 359, 358, + 357, 355, 354, 352, 351, 350, 349, 348, 347, 346, + 345, 344, 343, 342, 341, 340, 338, 337, 336, 335, + 334, 333, 332, 330, 329, 328, 327, 326, 325, 324, + 323, 322, 321, 320, 319, 318, 317, 316, 314, 310, + 309, 308, 307, 306, 304, 303, 302, 301, 300, 299, + 298, 297, 296, 295, 294, 293, 292, 291, 290, 288, + 283, 282, 280, 279, 278, 277, 276, 275, 274, 271, + 270, 269, 267, 264, 259, 258, 256, 255, 254, 253, + + 252, 251, 250, 247, 246, 245, 243, 240, 237, 236, + 234, 233, 232, 231, 230, 229, 228, 227, 224, 223, + 222, 220, 219, 217, 216, 215, 213, 212, 210, 209, + 208, 207, 206, 205, 204, 203, 200, 199, 198, 196, + 195, 193, 192, 191, 190, 189, 186, 183, 182, 181, + 179, 178, 177, 176, 175, 174, 173, 172, 171, 170, + 169, 168, 167, 166, 165, 162, 159, 158, 157, 155, + 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, + 144, 143, 142, 141, 139, 136, 135, 134, 133, 132, + 131, 130, 129, 128, 127, 126, 125, 124, 123, 122, + + 121, 120, 119, 118, 117, 116, 114, 111, 110, 109, + 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, + 98, 97, 96, 94, 93, 92, 91, 89, 88, 87, + 86, 85, 83, 82, 81, 80, 79, 78, 77, 76, + 75, 72, 71, 70, 69, 67, 66, 65, 64, 63, + 61, 60, 59, 58, 57, 56, 55, 54, 52, 51, + 50, 48, 47, 44, 43, 42, 41, 40, 39, 38, + 35, 34, 33, 31, 30, 27, 26, 25, 24, 23, + 22, 19, 16, 13, 3, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +extern int yy_flex_debug; +int yy_flex_debug = 0; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +#line 2 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Lexer of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +#define SAVE_POS save_pos() +static void save_pos(); + +extern int column_num; +extern int line_num; +extern FILE * yyin; + +#line 916 "lex.yy.c" + +#define INITIAL 0 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals (void ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (void ); +#else +extern int yywrap (void ); +#endif +#endif + + static void yyunput (int c,char *buf_ptr ); + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (void ); +#else +static int input (void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + size_t n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (void); + +#define YY_DECL int yylex (void) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +#line 144 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" + + +#line 1072 "lex.yy.c" + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ); + } + + yy_load_buffer_state( ); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = (yy_start); +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 612 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 886 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = (yy_hold_char); + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 146 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_XML_VERSION_START; } + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 147 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_XML_VERSION_END; } + YY_BREAK +case 3: +/* rule 3 can match eol */ +YY_RULE_SETUP +#line 148 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_POLICY_START; } + YY_BREAK +case 4: +/* rule 4 can match eol */ +YY_RULE_SETUP +#line 149 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_POLICY_END; } + YY_BREAK +case 5: +/* rule 5 can match eol */ +YY_RULE_SETUP +#line 150 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUPS_START; } + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 151 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUPS_END; } + YY_BREAK +case 7: +/* rule 7 can match eol */ +YY_RULE_SETUP +#line 152 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUP_START; } + YY_BREAK +case 8: +/* rule 8 can match eol */ +YY_RULE_SETUP +#line 153 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GROUP_END; } + YY_BREAK +case 9: +/* rule 9 can match eol */ +YY_RULE_SETUP +#line 154 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NAME_START; } + YY_BREAK +case 10: +/* rule 10 can match eol */ +YY_RULE_SETUP +#line 155 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NAME_END; } + YY_BREAK +case 11: +/* rule 11 can match eol */ +YY_RULE_SETUP +#line 156 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_USE_START; } + YY_BREAK +case 12: +/* rule 12 can match eol */ +YY_RULE_SETUP +#line 157 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_USE_END; } + YY_BREAK +case 13: +/* rule 13 can match eol */ +YY_RULE_SETUP +#line 158 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GUID_START; } + YY_BREAK +case 14: +/* rule 14 can match eol */ +YY_RULE_SETUP +#line 159 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_GUID_END; } + YY_BREAK +case 15: +/* rule 15 can match eol */ +YY_RULE_SETUP +#line 160 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_NAME_START; } + YY_BREAK +case 16: +/* rule 16 can match eol */ +YY_RULE_SETUP +#line 161 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PORT_NAME_END; } + YY_BREAK +case 17: +/* rule 17 can match eol */ +YY_RULE_SETUP +#line 162 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PARTITION_START; } + YY_BREAK +case 18: +/* rule 18 can match eol */ +YY_RULE_SETUP +#line 163 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PARTITION_END; } + YY_BREAK +case 19: +/* rule 19 can match eol */ +YY_RULE_SETUP +#line 164 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NODE_TYPE_START; } + YY_BREAK +case 20: +/* rule 20 can match eol */ +YY_RULE_SETUP +#line 165 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_NODE_TYPE_END; } + YY_BREAK +case 21: +/* rule 21 can match eol */ +YY_RULE_SETUP +#line 166 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_SETUP_START; } + YY_BREAK +case 22: +/* rule 22 can match eol */ +YY_RULE_SETUP +#line 167 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_SETUP_END; } + YY_BREAK +case 23: +/* rule 23 can match eol */ +YY_RULE_SETUP +#line 168 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLES_START; } + YY_BREAK +case 24: +/* rule 24 can match eol */ +YY_RULE_SETUP +#line 169 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLES_END; } + YY_BREAK +case 25: +/* rule 25 can match eol */ +YY_RULE_SETUP +#line 170 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_SCOPE_START; } + YY_BREAK +case 26: +/* rule 26 can match eol */ +YY_RULE_SETUP +#line 171 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_SCOPE_END; } + YY_BREAK +case 27: +/* rule 27 can match eol */ +YY_RULE_SETUP +#line 172 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_GROUP_START; } + YY_BREAK +case 28: +/* rule 28 can match eol */ +YY_RULE_SETUP +#line 173 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_GROUP_END; } + YY_BREAK +case 29: +/* rule 29 can match eol */ +YY_RULE_SETUP +#line 174 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_FROM_START; } + YY_BREAK +case 30: +/* rule 30 can match eol */ +YY_RULE_SETUP +#line 175 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_FROM_END; } + YY_BREAK +case 31: +/* rule 31 can match eol */ +YY_RULE_SETUP +#line 176 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_TO_START; } + YY_BREAK +case 32: +/* rule 32 can match eol */ +YY_RULE_SETUP +#line 177 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_TO_END; } + YY_BREAK +case 33: +/* rule 33 can match eol */ +YY_RULE_SETUP +#line 178 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLE_START; } + YY_BREAK +case 34: +/* rule 34 can match eol */ +YY_RULE_SETUP +#line 179 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL2VL_TABLE_END; } + YY_BREAK +case 35: +/* rule 35 can match eol */ +YY_RULE_SETUP +#line 180 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_START; } + YY_BREAK +case 36: +/* rule 36 can match eol */ +YY_RULE_SETUP +#line 181 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_END; } + YY_BREAK +case 37: +/* rule 37 can match eol */ +YY_RULE_SETUP +#line 182 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_FROM_START; } + YY_BREAK +case 38: +/* rule 38 can match eol */ +YY_RULE_SETUP +#line 183 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_FROM_END; } + YY_BREAK +case 39: +/* rule 39 can match eol */ +YY_RULE_SETUP +#line 184 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_TO_START; } + YY_BREAK +case 40: +/* rule 40 can match eol */ +YY_RULE_SETUP +#line 185 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_ACROSS_TO_END; } + YY_BREAK +case 41: +/* rule 41 can match eol */ +YY_RULE_SETUP +#line 186 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_TABLES_START; } + YY_BREAK +case 42: +/* rule 42 can match eol */ +YY_RULE_SETUP +#line 187 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_TABLES_END; } + YY_BREAK +case 43: +/* rule 43 can match eol */ +YY_RULE_SETUP +#line 188 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_SCOPE_START; } + YY_BREAK +case 44: +/* rule 44 can match eol */ +YY_RULE_SETUP +#line 189 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_SCOPE_END; } + YY_BREAK +case 45: +/* rule 45 can match eol */ +YY_RULE_SETUP +#line 190 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_START; } + YY_BREAK +case 46: +/* rule 46 can match eol */ +YY_RULE_SETUP +#line 191 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_END; } + YY_BREAK +case 47: +/* rule 47 can match eol */ +YY_RULE_SETUP +#line 192 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_LOW_START; } + YY_BREAK +case 48: +/* rule 48 can match eol */ +YY_RULE_SETUP +#line 193 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_LOW_END; } + YY_BREAK +case 49: +/* rule 49 can match eol */ +YY_RULE_SETUP +#line 194 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_LIMIT_START; } + YY_BREAK +case 50: +/* rule 50 can match eol */ +YY_RULE_SETUP +#line 195 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_VLARB_HIGH_LIMIT_END; } + YY_BREAK +case 51: +/* rule 51 can match eol */ +YY_RULE_SETUP +#line 196 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVELS_START; } + YY_BREAK +case 52: +/* rule 52 can match eol */ +YY_RULE_SETUP +#line 197 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVELS_END; } + YY_BREAK +case 53: +/* rule 53 can match eol */ +YY_RULE_SETUP +#line 198 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_START; } + YY_BREAK +case 54: +/* rule 54 can match eol */ +YY_RULE_SETUP +#line 199 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_END; } + YY_BREAK +case 55: +/* rule 55 can match eol */ +YY_RULE_SETUP +#line 200 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SN_START; } + YY_BREAK +case 56: +/* rule 56 can match eol */ +YY_RULE_SETUP +#line 201 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SN_END; } + YY_BREAK +case 57: +/* rule 57 can match eol */ +YY_RULE_SETUP +#line 202 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL_START; } + YY_BREAK +case 58: +/* rule 58 can match eol */ +YY_RULE_SETUP +#line 203 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SL_END; } + YY_BREAK +case 59: +/* rule 59 can match eol */ +YY_RULE_SETUP +#line 204 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_CLASS_START; } + YY_BREAK +case 60: +/* rule 60 can match eol */ +YY_RULE_SETUP +#line 205 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_CLASS_END; } + YY_BREAK +case 61: +/* rule 61 can match eol */ +YY_RULE_SETUP +#line 206 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_MTU_LIMIT_START; } + YY_BREAK +case 62: +/* rule 62 can match eol */ +YY_RULE_SETUP +#line 207 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_MTU_LIMIT_END; } + YY_BREAK +case 63: +/* rule 63 can match eol */ +YY_RULE_SETUP +#line 208 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_RATE_LIMIT_START; } + YY_BREAK +case 64: +/* rule 64 can match eol */ +YY_RULE_SETUP +#line 209 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_RATE_LIMIT_END; } + YY_BREAK +case 65: +/* rule 65 can match eol */ +YY_RULE_SETUP +#line 210 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PACKET_LIFE_START; } + YY_BREAK +case 66: +/* rule 66 can match eol */ +YY_RULE_SETUP +#line 211 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PACKET_LIFE_END; } + YY_BREAK +case 67: +/* rule 67 can match eol */ +YY_RULE_SETUP +#line 212 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PKEY_START; } + YY_BREAK +case 68: +/* rule 68 can match eol */ +YY_RULE_SETUP +#line 213 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_PKEY_END; } + YY_BREAK +case 69: +/* rule 69 can match eol */ +YY_RULE_SETUP +#line 214 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULES_START; } + YY_BREAK +case 70: +/* rule 70 can match eol */ +YY_RULE_SETUP +#line 215 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULES_END; } + YY_BREAK +case 71: +/* rule 71 can match eol */ +YY_RULE_SETUP +#line 216 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULE_START; } + YY_BREAK +case 72: +/* rule 72 can match eol */ +YY_RULE_SETUP +#line 217 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_MATCH_RULE_END; } + YY_BREAK +case 73: +/* rule 73 can match eol */ +YY_RULE_SETUP +#line 218 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_SN_START; } + YY_BREAK +case 74: +/* rule 74 can match eol */ +YY_RULE_SETUP +#line 219 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_QOS_LEVEL_SN_END; } + YY_BREAK +case 75: +/* rule 75 can match eol */ +YY_RULE_SETUP +#line 220 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SOURCE_START; } + YY_BREAK +case 76: +/* rule 76 can match eol */ +YY_RULE_SETUP +#line 221 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SOURCE_END; } + YY_BREAK +case 77: +/* rule 77 can match eol */ +YY_RULE_SETUP +#line 222 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_DESTINATION_START; } + YY_BREAK +case 78: +/* rule 78 can match eol */ +YY_RULE_SETUP +#line 223 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_DESTINATION_END; } + YY_BREAK +case 79: +/* rule 79 can match eol */ +YY_RULE_SETUP +#line 224 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SERVICE_START; } + YY_BREAK +case 80: +/* rule 80 can match eol */ +YY_RULE_SETUP +#line 225 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; return TK_SERVICE_END; } + YY_BREAK +case 81: +/* rule 81 can match eol */ +YY_RULE_SETUP +#line 227 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; } /* swallow comment */ + YY_BREAK +case 82: +YY_RULE_SETUP +#line 229 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_HEX_NUMBER; } + YY_BREAK +case 83: +YY_RULE_SETUP +#line 230 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DEC_NUMBER; } + YY_BREAK +case 84: +/* rule 84 can match eol */ +YY_RULE_SETUP +#line 231 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_WHITE; } + YY_BREAK +case 85: +YY_RULE_SETUP +#line 232 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DASH; } + YY_BREAK +case 86: +YY_RULE_SETUP +#line 233 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_DOTDOT; } + YY_BREAK +case 87: +YY_RULE_SETUP +#line 234 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_COMMA; } + YY_BREAK +case 88: +YY_RULE_SETUP +#line 235 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +{ SAVE_POS; yylval = strdup(yytext); return TK_TEXT;} + YY_BREAK +case 89: +YY_RULE_SETUP +#line 237 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" +ECHO; + YY_BREAK +#line 1682 "lex.yy.c" +case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + register char *source = (yytext_ptr); + register int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER; + + int yy_c_buf_p_offset = + (int) ((yy_c_buf_p) - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart(yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = (yy_start); + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 612 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + register int yy_is_jam; + register char *yy_cp = (yy_c_buf_p); + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 612 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 611); + + return yy_is_jam ? 0 : yy_current_state; +} + + static void yyunput (int c, register char * yy_bp ) +{ + register char *yy_cp; + + yy_cp = (yy_c_buf_p); + + /* undo effects of setting up yytext */ + *yy_cp = (yy_hold_char); + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = (yy_n_chars) + 2; + register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; + register char *source = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; + + while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size; + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + (yytext_ptr) = yy_bp; + (yy_hold_char) = *yy_cp; + (yy_c_buf_p) = yy_cp; +} + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + int offset = (yy_c_buf_p) - (yytext_ptr); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart(yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( ) ) + return EOF; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ); + } + + yy_init_buffer(YY_CURRENT_BUFFER,input_file ); + yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer(b,file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * + */ + void yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree((void *) b->yy_ch_buf ); + + yyfree((void *) b ); +} + +#ifndef __cplusplus +extern int isatty (int ); +#endif /* __cplusplus */ + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + yy_flush_buffer(b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (void) +{ + int num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; + (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + int grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer(b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param str a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (yyconst char * yystr ) +{ + + return yy_scan_bytes(yystr,strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param bytes the byte buffer to scan + * @param len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = _yybytes_len + 2; + buf = (char *) yyalloc(n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer(buf,n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yy_fatal_error (yyconst char* msg ) +{ + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int yyget_lineno (void) +{ + + return yylineno; +} + +/** Get the input stream. + * + */ +FILE *yyget_in (void) +{ + return yyin; +} + +/** Get the output stream. + * + */ +FILE *yyget_out (void) +{ + return yyout; +} + +/** Get the length of the current token. + * + */ +int yyget_leng (void) +{ + return yyleng; +} + +/** Get the current token. + * + */ + +char *yyget_text (void) +{ + return yytext; +} + +/** Set the current line number. + * @param line_number + * + */ +void yyset_lineno (int line_number ) +{ + + yylineno = line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param in_str A readable stream. + * + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * in_str ) +{ + yyin = in_str ; +} + +void yyset_out (FILE * out_str ) +{ + yyout = out_str ; +} + +int yyget_debug (void) +{ + return yy_flex_debug; +} + +void yyset_debug (int bdebug ) +{ + yy_flex_debug = bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + (yy_buffer_stack) = 0; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = (char *) 0; + (yy_init) = 0; + (yy_start) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = (FILE *) 0; + yyout = (FILE *) 0; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) +{ + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * s ) +{ + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size ) +{ + return (void *) malloc( size ); +} + +void *yyrealloc (void * ptr, yy_size_t size ) +{ + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); +} + +void yyfree (void * ptr ) +{ + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 237 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.l" + + + +static void save_pos() +{ + int i; + for (i = 0; i < yyleng; i++) + { + if (yytext[i] == '\n') + { + line_num ++; + column_num = 1; + } + else + column_num ++; + } +} + + -- 1.4.4.1.GIT From kliteyn at dev.mellanox.co.il Tue Jan 30 07:57:02 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 30 Jan 2007 17:57:02 +0200 Subject: [openib-general] [PATCH 3.2/10] osm: QoS in OpenSM Message-ID: <45BF6ACE.6050708@dev.mellanox.co.il> QoS policy file parser Yacc & Lex generated files Signed-off-by: Yevgeny Kliteynik --- osm/opensm/osm_qos_parser_y.c | 3070 +++++++++++++++++++++++++++++++++ 1 file changed, 3070 insertions(+), 0 deletions(-) diff --git a/osm/opensm/osm_qos_parser_y.c b/osm/opensm/osm_qos_parser_y.c new file mode 100644 index 0000000..88198a2 --- /dev/null +++ b/osm/opensm/osm_qos_parser_y.c @@ -0,0 +1,3070 @@ +/* A Bison parser, made by GNU Bison 1.875. */ + +/* Skeleton parser for Yacc-like parsing with Bison, + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, when this file is copied by Bison into a + Bison output file, you may use that output file without restriction. + This special exception was added by the Free Software Foundation + in version 1.24 of Bison. */ + +/* Written by Richard Stallman by simplifying the original so called + ``semantic'' parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + TK_HEX_NUMBER = 258, + TK_DEC_NUMBER = 259, + TK_WHITE = 260, + TK_DASH = 261, + TK_DOTDOT = 262, + TK_COMMA = 263, + TK_TEXT = 264, + TK_XML_VERSION_START = 265, + TK_XML_VERSION_END = 266, + TK_QOS_POLICY_START = 267, + TK_QOS_POLICY_END = 268, + TK_PORT_GROUPS_START = 269, + TK_PORT_GROUPS_END = 270, + TK_PORT_GROUP_START = 271, + TK_PORT_GROUP_END = 272, + TK_NAME_START = 273, + TK_NAME_END = 274, + TK_USE_START = 275, + TK_USE_END = 276, + TK_PORT_GUID_START = 277, + TK_PORT_GUID_END = 278, + TK_PORT_NAME_START = 279, + TK_PORT_NAME_END = 280, + TK_PARTITION_START = 281, + TK_PARTITION_END = 282, + TK_NODE_TYPE_START = 283, + TK_NODE_TYPE_END = 284, + TK_QOS_SETUP_START = 285, + TK_QOS_SETUP_END = 286, + TK_SL2VL_TABLES_START = 287, + TK_SL2VL_TABLES_END = 288, + TK_SL2VL_SCOPE_START = 289, + TK_SL2VL_SCOPE_END = 290, + TK_GROUP_START = 291, + TK_GROUP_END = 292, + TK_FROM_START = 293, + TK_FROM_END = 294, + TK_TO_START = 295, + TK_TO_END = 296, + TK_SL2VL_TABLE_START = 297, + TK_SL2VL_TABLE_END = 298, + TK_ACROSS_START = 299, + TK_ACROSS_END = 300, + TK_ACROSS_FROM_START = 301, + TK_ACROSS_FROM_END = 302, + TK_ACROSS_TO_START = 303, + TK_ACROSS_TO_END = 304, + TK_VLARB_TABLES_START = 305, + TK_VLARB_TABLES_END = 306, + TK_VLARB_SCOPE_START = 307, + TK_VLARB_SCOPE_END = 308, + TK_VLARB_HIGH_START = 309, + TK_VLARB_HIGH_END = 310, + TK_VLARB_LOW_START = 311, + TK_VLARB_LOW_END = 312, + TK_VLARB_HIGH_LIMIT_START = 313, + TK_VLARB_HIGH_LIMIT_END = 314, + TK_QOS_LEVELS_START = 315, + TK_QOS_LEVELS_END = 316, + TK_QOS_LEVEL_START = 317, + TK_QOS_LEVEL_END = 318, + TK_SN_START = 319, + TK_SN_END = 320, + TK_SL_START = 321, + TK_SL_END = 322, + TK_CLASS_START = 323, + TK_CLASS_END = 324, + TK_MTU_LIMIT_START = 325, + TK_MTU_LIMIT_END = 326, + TK_RATE_LIMIT_START = 327, + TK_RATE_LIMIT_END = 328, + TK_PACKET_LIFE_START = 329, + TK_PACKET_LIFE_END = 330, + TK_PKEY_START = 331, + TK_PKEY_END = 332, + TK_QOS_MATCH_RULES_START = 333, + TK_QOS_MATCH_RULES_END = 334, + TK_QOS_MATCH_RULE_START = 335, + TK_QOS_MATCH_RULE_END = 336, + TK_QOS_LEVEL_SN_START = 337, + TK_QOS_LEVEL_SN_END = 338, + TK_SOURCE_START = 339, + TK_SOURCE_END = 340, + TK_DESTINATION_START = 341, + TK_DESTINATION_END = 342, + TK_SERVICE_START = 343, + TK_SERVICE_END = 344 + }; +#endif +#define TK_HEX_NUMBER 258 +#define TK_DEC_NUMBER 259 +#define TK_WHITE 260 +#define TK_DASH 261 +#define TK_DOTDOT 262 +#define TK_COMMA 263 +#define TK_TEXT 264 +#define TK_XML_VERSION_START 265 +#define TK_XML_VERSION_END 266 +#define TK_QOS_POLICY_START 267 +#define TK_QOS_POLICY_END 268 +#define TK_PORT_GROUPS_START 269 +#define TK_PORT_GROUPS_END 270 +#define TK_PORT_GROUP_START 271 +#define TK_PORT_GROUP_END 272 +#define TK_NAME_START 273 +#define TK_NAME_END 274 +#define TK_USE_START 275 +#define TK_USE_END 276 +#define TK_PORT_GUID_START 277 +#define TK_PORT_GUID_END 278 +#define TK_PORT_NAME_START 279 +#define TK_PORT_NAME_END 280 +#define TK_PARTITION_START 281 +#define TK_PARTITION_END 282 +#define TK_NODE_TYPE_START 283 +#define TK_NODE_TYPE_END 284 +#define TK_QOS_SETUP_START 285 +#define TK_QOS_SETUP_END 286 +#define TK_SL2VL_TABLES_START 287 +#define TK_SL2VL_TABLES_END 288 +#define TK_SL2VL_SCOPE_START 289 +#define TK_SL2VL_SCOPE_END 290 +#define TK_GROUP_START 291 +#define TK_GROUP_END 292 +#define TK_FROM_START 293 +#define TK_FROM_END 294 +#define TK_TO_START 295 +#define TK_TO_END 296 +#define TK_SL2VL_TABLE_START 297 +#define TK_SL2VL_TABLE_END 298 +#define TK_ACROSS_START 299 +#define TK_ACROSS_END 300 +#define TK_ACROSS_FROM_START 301 +#define TK_ACROSS_FROM_END 302 +#define TK_ACROSS_TO_START 303 +#define TK_ACROSS_TO_END 304 +#define TK_VLARB_TABLES_START 305 +#define TK_VLARB_TABLES_END 306 +#define TK_VLARB_SCOPE_START 307 +#define TK_VLARB_SCOPE_END 308 +#define TK_VLARB_HIGH_START 309 +#define TK_VLARB_HIGH_END 310 +#define TK_VLARB_LOW_START 311 +#define TK_VLARB_LOW_END 312 +#define TK_VLARB_HIGH_LIMIT_START 313 +#define TK_VLARB_HIGH_LIMIT_END 314 +#define TK_QOS_LEVELS_START 315 +#define TK_QOS_LEVELS_END 316 +#define TK_QOS_LEVEL_START 317 +#define TK_QOS_LEVEL_END 318 +#define TK_SN_START 319 +#define TK_SN_END 320 +#define TK_SL_START 321 +#define TK_SL_END 322 +#define TK_CLASS_START 323 +#define TK_CLASS_END 324 +#define TK_MTU_LIMIT_START 325 +#define TK_MTU_LIMIT_END 326 +#define TK_RATE_LIMIT_START 327 +#define TK_RATE_LIMIT_END 328 +#define TK_PACKET_LIFE_START 329 +#define TK_PACKET_LIFE_END 330 +#define TK_PKEY_START 331 +#define TK_PKEY_END 332 +#define TK_QOS_MATCH_RULES_START 333 +#define TK_QOS_MATCH_RULES_END 334 +#define TK_QOS_MATCH_RULE_START 335 +#define TK_QOS_MATCH_RULE_END 336 +#define TK_QOS_LEVEL_SN_START 337 +#define TK_QOS_LEVEL_SN_END 338 +#define TK_SOURCE_START 339 +#define TK_SOURCE_END 340 +#define TK_DESTINATION_START 341 +#define TK_DESTINATION_END 342 +#define TK_SERVICE_START 343 +#define TK_SERVICE_END 344 + + + + +/* Copy the first part of user declarations. */ +#line 1 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Grammar of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include + +static void __osm_qos_parse_tree_init(); + +static char * __osm_qos_parser_strip_white(char * str); + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str); + +static void __osm_qos_parser_port_group_start(); +static void __osm_qos_parser_port_group_end(); + +static void __osm_qos_parser_sl2vl_scope_start(); +static void __osm_qos_parser_sl2vl_scope_end(); + +static void __osm_qos_parser_vlarb_scope_start(); +static void __osm_qos_parser_vlarb_scope_end(); + +static void __osm_qos_parser_qos_level_start(); +static void __osm_qos_parser_qos_level_end(); + +static void __osm_qos_parser_match_rule_start(); +static void __osm_qos_parser_match_rule_end(); + +extern char * yytext; +extern void yyerror (char *s); +extern int yylex (void); +extern FILE * yyin; + +#define OSM_QOS_MAX_LINE_LEN 10000 +char str_buffer[OSM_QOS_MAX_LINE_LEN]; +#define RESET_BUFFER str_buffer[0] = '\0' + +int column_num; +int line_num; + +osm_qos_parse_tree_t * p_qos_parse_tree = NULL; +osm_qos_port_group_t * p_current_port_group = NULL; +osm_qos_sl2vl_scope_t * p_current_sl2vl_scope = NULL; +osm_qos_vlarb_scope_t * p_current_vlarb_scope = NULL; +osm_qos_level_t * p_current_qos_level = NULL; +osm_qos_match_rule_t * p_current_qos_match_rule = NULL; + +osm_log_t * p_osm_log = NULL; + +/***************************************************/ + + + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + + + +/* Copy the second part of user declarations. */ + + +/* Line 214 of yacc.c. */ +#line 360 "y.tab.c" + +#if ! defined (yyoverflow) || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# if YYSTACK_USE_ALLOCA +# define YYSTACK_ALLOC alloca +# else +# ifndef YYSTACK_USE_ALLOCA +# if defined (alloca) || defined (_ALLOCA_H) +# define YYSTACK_ALLOC alloca +# else +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# else +# if defined (__STDC__) || defined (__cplusplus) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# endif +# define YYSTACK_ALLOC malloc +# define YYSTACK_FREE free +# endif +#endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */ + + +#if (! defined (yyoverflow) \ + && (! defined (__cplusplus) \ + || (YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + short yyss; + YYSTYPE yyvs; + }; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (short) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + register YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (0) +# endif +# endif + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack, Stack, yysize); \ + Stack = &yyptr->Stack; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined (__STDC__) || defined (__cplusplus) + typedef signed char yysigned_char; +#else + typedef short yysigned_char; +#endif + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 5 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 310 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 90 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 124 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 191 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 281 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 344 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const unsigned char yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89 +}; + +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const unsigned short yyprhs[] = +{ + 0, 0, 3, 6, 7, 11, 12, 17, 19, 21, + 22, 25, 27, 29, 31, 33, 37, 39, 41, 43, + 46, 50, 52, 54, 55, 58, 60, 62, 64, 66, + 68, 70, 74, 76, 78, 79, 82, 85, 89, 91, + 93, 94, 97, 101, 103, 105, 106, 109, 111, 113, + 115, 117, 119, 121, 123, 127, 129, 131, 132, 135, + 139, 141, 143, 144, 147, 149, 151, 153, 155, 157, + 161, 163, 165, 166, 171, 173, 175, 176, 179, 181, + 183, 185, 187, 189, 191, 193, 195, 199, 201, 203, + 204, 209, 211, 213, 214, 217, 219, 221, 223, 225, + 227, 229, 233, 235, 239, 241, 245, 247, 251, 253, + 257, 259, 263, 265, 269, 271, 275, 277, 281, 283, + 287, 289, 293, 295, 299, 301, 309, 311, 315, 317, + 321, 323, 327, 329, 333, 335, 339, 341, 345, 347, + 351, 353, 357, 359, 367, 369, 377, 379, 383, 385, + 389, 391, 399, 401, 409, 411, 415, 417, 418, 421, + 423, 425, 427, 429, 431, 433, 435, 437, 440, 443, + 446, 449, 451, 454, 457, 460, 463, 466, 468, 471, + 474, 477, 480, 483, 485, 487, 489, 491, 493, 495, + 497, 498 +}; + +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const short yyrhs[] = +{ + 91, 0, -1, 92, 93, -1, -1, 10, 201, 11, + -1, -1, 93, 94, 96, 95, -1, 12, -1, 13, + -1, -1, 96, 97, -1, 98, -1, 107, -1, 129, + -1, 137, -1, 99, 101, 100, -1, 14, -1, 15, + -1, 102, -1, 101, 102, -1, 103, 105, 104, -1, + 16, -1, 17, -1, -1, 105, 106, -1, 145, -1, + 147, -1, 151, -1, 149, -1, 153, -1, 155, -1, + 108, 110, 109, -1, 30, -1, 31, -1, -1, 110, + 111, -1, 110, 120, -1, 112, 114, 113, -1, 32, + -1, 33, -1, -1, 114, 115, -1, 116, 118, 117, + -1, 34, -1, 35, -1, -1, 118, 119, -1, 157, + -1, 159, -1, 161, -1, 163, -1, 165, -1, 167, + -1, 169, -1, 121, 123, 122, -1, 50, -1, 51, + -1, -1, 123, 124, -1, 125, 127, 126, -1, 52, + -1, 53, -1, -1, 127, 128, -1, 157, -1, 159, + -1, 187, -1, 189, -1, 185, -1, 130, 132, 131, + -1, 60, -1, 61, -1, -1, 132, 133, 135, 134, + -1, 62, -1, 63, -1, -1, 135, 136, -1, 171, + -1, 147, -1, 173, -1, 175, -1, 177, -1, 179, + -1, 181, -1, 183, -1, 138, 140, 139, -1, 78, + -1, 79, -1, -1, 140, 141, 143, 142, -1, 80, + -1, 81, -1, -1, 143, 144, -1, 147, -1, 195, + -1, 191, -1, 193, -1, 197, -1, 199, -1, 146, + 201, 19, -1, 18, -1, 148, 201, 21, -1, 20, + -1, 150, 201, 25, -1, 24, -1, 152, 212, 23, + -1, 22, -1, 154, 201, 27, -1, 26, -1, 156, + 201, 29, -1, 28, -1, 158, 201, 37, -1, 36, + -1, 160, 201, 45, -1, 44, -1, 162, 201, 47, + -1, 46, -1, 164, 201, 49, -1, 48, -1, 166, + 201, 39, -1, 38, -1, 168, 201, 41, -1, 40, + -1, 170, 213, 210, 204, 210, 213, 43, -1, 42, + -1, 172, 210, 65, -1, 64, -1, 174, 210, 67, + -1, 66, -1, 176, 210, 71, -1, 70, -1, 178, + 210, 73, -1, 72, -1, 180, 210, 75, -1, 74, + -1, 182, 210, 77, -1, 76, -1, 184, 210, 69, + -1, 68, -1, 186, 210, 59, -1, 58, -1, 188, + 213, 210, 205, 210, 213, 55, -1, 54, -1, 190, + 213, 210, 205, 210, 213, 57, -1, 56, -1, 192, + 201, 85, -1, 84, -1, 194, 201, 87, -1, 86, + -1, 196, 213, 210, 206, 210, 213, 69, -1, 68, + -1, 198, 213, 210, 204, 210, 213, 89, -1, 88, + -1, 200, 210, 83, -1, 82, -1, -1, 201, 202, + -1, 203, -1, 210, -1, 9, -1, 5, -1, 6, + -1, 7, -1, 8, -1, 213, -1, 204, 213, -1, + 204, 211, -1, 204, 212, -1, 204, 207, -1, 213, + -1, 205, 213, -1, 205, 211, -1, 205, 212, -1, + 205, 207, -1, 205, 208, -1, 213, -1, 206, 213, + -1, 206, 211, -1, 206, 212, -1, 206, 207, -1, + 206, 209, -1, 8, -1, 7, -1, 6, -1, 211, + -1, 212, -1, 4, -1, 3, -1, -1, 5, -1 +}; + +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const unsigned short yyrline[] = +{ + 0, 190, 190, 193, 194, 197, 198, 201, 204, 207, + 208, 211, 212, 213, 214, 247, 250, 253, 256, 257, + 260, 263, 268, 273, 274, 277, 278, 279, 280, 281, + 282, 318, 321, 324, 327, 328, 329, 341, 344, 347, + 350, 351, 354, 357, 362, 367, 368, 371, 372, 373, + 374, 375, 376, 377, 389, 392, 395, 398, 399, 402, + 405, 410, 415, 416, 419, 420, 421, 422, 423, 453, + 456, 459, 462, 463, 466, 471, 476, 477, 480, 481, + 482, 483, 484, 485, 486, 487, 509, 512, 515, 518, + 519, 522, 527, 532, 533, 536, 537, 538, 539, 540, + 541, 548, 554, 565, 578, 612, 624, 631, 643, 650, + 662, 669, 696, 703, 723, 730, 755, 762, 775, 783, + 796, 804, 817, 825, 838, 846, 887, 893, 900, 912, + 919, 931, 938, 950, 957, 969, 976, 988, 994, 1001, + 1007, 1014, 1020, 1027, 1063, 1070, 1106, 1113, 1119, 1126, + 1132, 1139, 1207, 1213, 1245, 1252, 1258, 1270, 1271, 1274, + 1275, 1278, 1282, 1286, 1290, 1294, 1300, 1301, 1302, 1303, + 1304, 1307, 1308, 1309, 1310, 1311, 1312, 1315, 1316, 1317, + 1318, 1319, 1320, 1323, 1329, 1335, 1341, 1342, 1345, 1351, + 1357, 1358 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE +/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "TK_HEX_NUMBER", "TK_DEC_NUMBER", + "TK_WHITE", "TK_DASH", "TK_DOTDOT", "TK_COMMA", "TK_TEXT", + "TK_XML_VERSION_START", "TK_XML_VERSION_END", "TK_QOS_POLICY_START", + "TK_QOS_POLICY_END", "TK_PORT_GROUPS_START", "TK_PORT_GROUPS_END", + "TK_PORT_GROUP_START", "TK_PORT_GROUP_END", "TK_NAME_START", + "TK_NAME_END", "TK_USE_START", "TK_USE_END", "TK_PORT_GUID_START", + "TK_PORT_GUID_END", "TK_PORT_NAME_START", "TK_PORT_NAME_END", + "TK_PARTITION_START", "TK_PARTITION_END", "TK_NODE_TYPE_START", + "TK_NODE_TYPE_END", "TK_QOS_SETUP_START", "TK_QOS_SETUP_END", + "TK_SL2VL_TABLES_START", "TK_SL2VL_TABLES_END", "TK_SL2VL_SCOPE_START", + "TK_SL2VL_SCOPE_END", "TK_GROUP_START", "TK_GROUP_END", "TK_FROM_START", + "TK_FROM_END", "TK_TO_START", "TK_TO_END", "TK_SL2VL_TABLE_START", + "TK_SL2VL_TABLE_END", "TK_ACROSS_START", "TK_ACROSS_END", + "TK_ACROSS_FROM_START", "TK_ACROSS_FROM_END", "TK_ACROSS_TO_START", + "TK_ACROSS_TO_END", "TK_VLARB_TABLES_START", "TK_VLARB_TABLES_END", + "TK_VLARB_SCOPE_START", "TK_VLARB_SCOPE_END", "TK_VLARB_HIGH_START", + "TK_VLARB_HIGH_END", "TK_VLARB_LOW_START", "TK_VLARB_LOW_END", + "TK_VLARB_HIGH_LIMIT_START", "TK_VLARB_HIGH_LIMIT_END", + "TK_QOS_LEVELS_START", "TK_QOS_LEVELS_END", "TK_QOS_LEVEL_START", + "TK_QOS_LEVEL_END", "TK_SN_START", "TK_SN_END", "TK_SL_START", + "TK_SL_END", "TK_CLASS_START", "TK_CLASS_END", "TK_MTU_LIMIT_START", + "TK_MTU_LIMIT_END", "TK_RATE_LIMIT_START", "TK_RATE_LIMIT_END", + "TK_PACKET_LIFE_START", "TK_PACKET_LIFE_END", "TK_PKEY_START", + "TK_PKEY_END", "TK_QOS_MATCH_RULES_START", "TK_QOS_MATCH_RULES_END", + "TK_QOS_MATCH_RULE_START", "TK_QOS_MATCH_RULE_END", + "TK_QOS_LEVEL_SN_START", "TK_QOS_LEVEL_SN_END", "TK_SOURCE_START", + "TK_SOURCE_END", "TK_DESTINATION_START", "TK_DESTINATION_END", + "TK_SERVICE_START", "TK_SERVICE_END", "$accept", "head", "xml_version", + "qos_policy", "qos_policy_start", "qos_policy_end", + "qos_policy_entries", "qos_policy_entry", "port_groups", + "port_groups_start", "port_groups_end", "port_group", + "single_port_group", "port_group_start", "port_group_end", + "port_group_entries", "port_group_entry", "qos_setup", + "qos_setup_start", "qos_setup_end", "qos_setup_items", "sl2vl_tables", + "sl2vl_tables_start", "sl2vl_tables_end", "sl2vl_scope_items", + "sl2vl_scope", "sl2vl_scope_start", "sl2vl_scope_end", + "sl2vl_scope_entries", "sl2vl_scope_entry", "vlarb_tables", + "vlarb_tables_start", "vlarb_tables_end", "vlarb_scope_items", + "vlarb_scope", "vlarb_scope_start", "vlarb_scope_end", + "vlarb_scope_entries", "vlarb_scope_entry", "qos_levels", + "qos_levels_start", "qos_levels_end", "qos_level_items", + "qos_level_start", "qos_level_end", "qos_level_entries", + "qos_level_entry", "qos_match_rules", "qos_match_rules_start", + "qos_match_rules_end", "qos_match_rule_items", "qos_match_rule_start", + "qos_match_rule_end", "qos_match_rule_entries", "qos_match_rule_entry", + "name", "name_start", "use", "use_start", "port_name", + "port_name_start", "port_guid", "port_guid_start", "partition", + "partition_start", "node_type", "node_type_start", "group", + "group_start", "across", "across_start", "across_from", + "across_from_start", "across_to", "across_to_start", "from", + "from_start", "to", "to_start", "sl2vl_table", "sl2vl_table_start", + "sn", "sn_start", "sl", "sl_start", "mtu_limit", "mtu_limit_start", + "rate_limit", "rate_limit_start", "packet_life", "packet_life_start", + "pkey", "pkey_start", "qos_level_class", "qos_level_class_start", + "vlarb_high_limit", "vlarb_high_limit_start", "vlarb_high", + "vlarb_high_start", "vlarb_low", "vlarb_low_start", "source", + "source_start", "destination", "destination_start", "match_rule_class", + "match_rule_class_start", "service", "service_start", "qos_level_sn", + "qos_level_sn_start", "any_text", "text", "pure_text", + "num_list_wo_whites", "num_list_with_dotdot_wo_whites", + "num_list_with_dash_wo_whites", "comma", "dotdot", "dash", "any_number", + "dec_number", "hex_number", "whitespace", 0 +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const unsigned short yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, + 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, + 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, + 335, 336, 337, 338, 339, 340, 341, 342, 343, 344 +}; +# endif + +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const unsigned char yyr1[] = +{ + 0, 90, 91, 92, 92, 93, 93, 94, 95, 96, + 96, 97, 97, 97, 97, 98, 99, 100, 101, 101, + 102, 103, 104, 105, 105, 106, 106, 106, 106, 106, + 106, 107, 108, 109, 110, 110, 110, 111, 112, 113, + 114, 114, 115, 116, 117, 118, 118, 119, 119, 119, + 119, 119, 119, 119, 120, 121, 122, 123, 123, 124, + 125, 126, 127, 127, 128, 128, 128, 128, 128, 129, + 130, 131, 132, 132, 133, 134, 135, 135, 136, 136, + 136, 136, 136, 136, 136, 136, 137, 138, 139, 140, + 140, 141, 142, 143, 143, 144, 144, 144, 144, 144, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, + 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 195, 196, 197, 198, 199, 200, 201, 201, 202, + 202, 203, 203, 203, 203, 203, 204, 204, 204, 204, + 204, 205, 205, 205, 205, 205, 205, 206, 206, 206, + 206, 206, 206, 207, 208, 209, 210, 210, 211, 212, + 213, 213 +}; + +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const unsigned char yyr2[] = +{ + 0, 2, 2, 0, 3, 0, 4, 1, 1, 0, + 2, 1, 1, 1, 1, 3, 1, 1, 1, 2, + 3, 1, 1, 0, 2, 1, 1, 1, 1, 1, + 1, 3, 1, 1, 0, 2, 2, 3, 1, 1, + 0, 2, 3, 1, 1, 0, 2, 1, 1, 1, + 1, 1, 1, 1, 3, 1, 1, 0, 2, 3, + 1, 1, 0, 2, 1, 1, 1, 1, 1, 3, + 1, 1, 0, 4, 1, 1, 0, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 1, 1, 0, + 4, 1, 1, 0, 2, 1, 1, 1, 1, 1, + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 3, 1, 7, 1, 3, 1, 3, + 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, + 1, 3, 1, 7, 1, 7, 1, 3, 1, 3, + 1, 7, 1, 7, 1, 3, 1, 0, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 0, 1 +}; + +/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state + STATE-NUM when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const unsigned char yydefact[] = +{ + 3, 157, 0, 5, 0, 1, 2, 189, 188, 162, + 163, 164, 165, 161, 4, 158, 159, 160, 186, 187, + 7, 9, 0, 8, 16, 32, 70, 87, 6, 10, + 11, 0, 12, 34, 13, 72, 14, 89, 21, 0, + 18, 23, 0, 0, 0, 17, 15, 19, 0, 33, + 38, 55, 31, 35, 40, 36, 57, 71, 74, 69, + 76, 88, 91, 86, 93, 22, 102, 104, 108, 106, + 110, 112, 20, 24, 25, 157, 26, 157, 28, 157, + 27, 0, 29, 157, 30, 157, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 39, 43, 37, 41, + 45, 56, 60, 54, 58, 62, 75, 128, 130, 140, + 132, 134, 136, 138, 73, 77, 79, 78, 0, 80, + 0, 81, 0, 82, 0, 83, 0, 84, 0, 85, + 0, 152, 92, 156, 148, 150, 154, 90, 94, 95, + 97, 157, 98, 157, 96, 190, 99, 190, 100, 0, + 101, 103, 105, 107, 109, 111, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 191, 0, 0, + 0, 44, 114, 122, 124, 126, 116, 118, 120, 42, + 46, 47, 157, 48, 157, 49, 157, 50, 157, 51, + 157, 52, 157, 53, 190, 61, 144, 146, 142, 59, + 63, 64, 65, 68, 0, 66, 190, 67, 190, 127, + 129, 131, 133, 135, 137, 139, 147, 149, 190, 190, + 155, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 177, 0, 166, 113, 115, 117, 119, 121, + 123, 190, 141, 190, 190, 185, 183, 181, 182, 190, + 179, 180, 178, 170, 190, 168, 169, 167, 0, 0, + 171, 0, 0, 0, 190, 184, 175, 176, 190, 173, + 174, 172, 190, 151, 153, 0, 0, 0, 125, 143, + 145 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const short yydefgoto[] = +{ + -1, 2, 3, 6, 21, 28, 22, 29, 30, 31, + 46, 39, 40, 41, 72, 48, 73, 32, 33, 52, + 42, 53, 54, 98, 86, 99, 100, 179, 156, 180, + 55, 56, 103, 87, 104, 105, 199, 157, 200, 34, + 35, 59, 43, 60, 114, 88, 115, 36, 37, 63, + 44, 64, 137, 89, 138, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 203, 204, 205, 206, 207, + 208, 140, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 4, 15, 16, 233, 259, 231, 253, 267, 248, + 17, 18, 19, 234 +}; + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -192 +static const short yypact[] = +{ + 0, -192, 2, -192, 293, -192, 4, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, 1, -192, -192, -192, -192, -192, -192, -192, + -192, 14, -192, -192, -192, -192, -192, -192, -192, 56, + -192, -192, 34, 70, 66, -192, -192, -192, 267, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, 15, -192, -192, -192, -192, 139, 144, 22, -8, + 257, 247, 234, 18, 209, 202, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, 216, -192, + 216, -192, 216, -192, 216, -192, 216, -192, 216, -192, + 216, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, 48, -192, 48, -192, 216, + -192, -192, -192, -192, -192, -192, 235, 191, -10, 8, + -14, -6, -16, 5, 26, 30, 20, -192, 216, 216, + 28, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, 48, -192, -192, -192, -192, -192, + -192, -192, -192, -192, 216, -192, 48, -192, 48, -192, + -192, -192, -192, -192, -192, -192, -192, -192, 48, 48, + -192, 195, 178, 150, 40, 185, 96, 216, 32, 216, + 216, 122, -192, 171, -192, -192, -192, -192, -192, -192, + -192, 48, -192, 48, 48, -192, -192, -192, -192, 48, + 69, 71, -192, -192, 48, -39, -26, -192, 171, 302, + -192, 302, 91, -12, 48, -192, -192, -192, 48, -49, + -36, -192, 48, -192, -192, 54, 62, 52, -192, -192, + -192 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const short yypgoto[] = +{ + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, 123, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, 133, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -22, -192, 7, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -192, -192, -192, -192, -192, -192, -192, -192, -192, + -192, -21, -192, -192, -75, -76, -192, -191, -192, -192, + -117, -110, -81, -125 +}; + +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If zero, do what YYDEFACT says. + If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -188 +static const short yytable[] = +{ + 93, 158, 5, 159, -186, 160, -186, 161, -186, 162, + 1, 163, 67, 164, 23, 24, 20, -187, 7, -187, + 168, -187, 169, 7, 8, 9, 10, 11, 12, 13, + 38, 25, 170, 7, 8, 9, 10, 11, 12, 13, + 247, 153, 67, 7, 8, 9, 10, 11, 12, 13, + -186, 218, 219, 167, 90, 209, 91, 211, 92, 213, + 131, 26, 94, -187, 95, 49, 50, 212, 266, 227, + 266, 45, 38, 132, 133, 210, 134, 274, 135, 27, + 136, 229, 214, 230, 51, 106, 107, 228, 108, 238, + 109, 242, 110, 232, 111, 215, 112, 278, 113, 7, + 8, 9, 10, 11, 12, 13, 252, 217, 257, 280, + 241, 220, 243, 244, 249, 216, 254, 279, 260, 260, + 165, 250, 166, 255, 262, 7, 8, 167, 245, 263, + 246, 57, 58, 257, 271, 201, 271, 240, -186, 275, + -187, 264, 268, 276, 272, 61, 62, 277, 255, 269, + 251, 269, 256, 7, 8, 9, 10, 11, 12, 13, + 273, 221, 47, 222, 202, 223, 258, 224, 261, 225, + 0, 226, 96, 97, 7, 8, 167, 256, 270, 246, + 270, 7, 8, 9, 10, 11, 12, 13, 7, 8, + 9, 10, 11, 12, 13, 101, 102, 237, 7, 8, + 9, 10, 11, 12, 13, 7, 8, 9, 10, 11, + 12, 13, 7, 8, 9, 10, 11, 12, 13, 7, + 8, 116, 139, 236, 239, 0, 0, 172, 0, 0, + 0, 155, 235, 0, 0, 176, 154, 7, 8, 9, + 10, 11, 12, 13, 195, 196, 0, 197, 0, 198, + 7, 8, 9, 10, 11, 12, 13, 0, 0, 152, + 7, 8, 9, 10, 11, 12, 13, 0, 151, 0, + 171, 172, 0, 173, 0, 174, 150, 175, 0, 176, + 0, 177, 0, 178, 65, 66, 0, 67, 0, 68, + 0, 69, 0, 70, 0, 71, 7, 8, 9, 10, + 11, 12, 13, 0, 14, 7, 8, 167, 0, 265, + 246 +}; + +static const short yycheck[] = +{ + 81, 118, 0, 120, 43, 122, 55, 124, 57, 126, + 10, 128, 20, 130, 13, 14, 12, 43, 3, 55, + 145, 57, 147, 3, 4, 5, 6, 7, 8, 9, + 16, 30, 149, 3, 4, 5, 6, 7, 8, 9, + 231, 23, 20, 3, 4, 5, 6, 7, 8, 9, + 89, 168, 169, 5, 75, 65, 77, 71, 79, 75, + 68, 60, 83, 89, 85, 31, 32, 73, 259, 194, + 261, 15, 16, 81, 82, 67, 84, 89, 86, 78, + 88, 206, 77, 208, 50, 63, 64, 204, 66, 49, + 68, 59, 70, 218, 72, 69, 74, 43, 76, 3, + 4, 5, 6, 7, 8, 9, 231, 87, 233, 57, + 227, 83, 229, 230, 231, 85, 233, 55, 243, 244, + 141, 231, 143, 233, 249, 3, 4, 5, 6, 254, + 8, 61, 62, 258, 259, 157, 261, 41, 69, 264, + 69, 258, 259, 268, 261, 79, 80, 272, 258, 259, + 231, 261, 233, 3, 4, 5, 6, 7, 8, 9, + 69, 182, 39, 184, 157, 186, 241, 188, 244, 190, + -1, 192, 33, 34, 3, 4, 5, 258, 259, 8, + 261, 3, 4, 5, 6, 7, 8, 9, 3, 4, + 5, 6, 7, 8, 9, 51, 52, 47, 3, 4, + 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, + 8, 9, 3, 4, 5, 6, 7, 8, 9, 3, + 4, 88, 89, 45, 39, -1, -1, 36, -1, -1, + -1, 29, 37, -1, -1, 44, 27, 3, 4, 5, + 6, 7, 8, 9, 53, 54, -1, 56, -1, 58, + 3, 4, 5, 6, 7, 8, 9, -1, -1, 25, + 3, 4, 5, 6, 7, 8, 9, -1, 21, -1, + 35, 36, -1, 38, -1, 40, 19, 42, -1, 44, + -1, 46, -1, 48, 17, 18, -1, 20, -1, 22, + -1, 24, -1, 26, -1, 28, 3, 4, 5, 6, + 7, 8, 9, -1, 11, 3, 4, 5, -1, 7, + 8 +}; + +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const unsigned char yystos[] = +{ + 0, 10, 91, 92, 201, 0, 93, 3, 4, 5, + 6, 7, 8, 9, 11, 202, 203, 210, 211, 212, + 12, 94, 96, 13, 14, 30, 60, 78, 95, 97, + 98, 99, 107, 108, 129, 130, 137, 138, 16, 101, + 102, 103, 110, 132, 140, 15, 100, 102, 105, 31, + 32, 50, 109, 111, 112, 120, 121, 61, 62, 131, + 133, 79, 80, 139, 141, 17, 18, 20, 22, 24, + 26, 28, 104, 106, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 154, 155, 156, 114, 123, 135, 143, + 201, 201, 201, 212, 201, 201, 33, 34, 113, 115, + 116, 51, 52, 122, 124, 125, 63, 64, 66, 68, + 70, 72, 74, 76, 134, 136, 147, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 68, 81, 82, 84, 86, 88, 142, 144, 147, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, + 19, 21, 25, 23, 27, 29, 118, 127, 210, 210, + 210, 210, 210, 210, 210, 201, 201, 5, 213, 213, + 210, 35, 36, 38, 40, 42, 44, 46, 48, 117, + 119, 157, 158, 159, 160, 161, 162, 163, 164, 165, + 166, 167, 168, 169, 170, 53, 54, 56, 58, 126, + 128, 157, 159, 185, 186, 187, 188, 189, 190, 65, + 67, 71, 73, 75, 77, 69, 85, 87, 210, 210, + 83, 201, 201, 201, 201, 201, 201, 213, 210, 213, + 213, 206, 213, 204, 213, 37, 45, 47, 49, 39, + 41, 210, 59, 210, 210, 6, 8, 207, 209, 210, + 211, 212, 213, 207, 210, 211, 212, 213, 204, 205, + 213, 205, 213, 213, 210, 7, 207, 208, 210, 211, + 212, 213, 210, 69, 89, 213, 213, 213, 43, 55, + 57 +}; + +#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__) +# define YYSIZE_T __SIZE_TYPE__ +#endif +#if ! defined (YYSIZE_T) && defined (size_t) +# define YYSIZE_T size_t +#endif +#if ! defined (YYSIZE_T) +# if defined (__STDC__) || defined (__cplusplus) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# endif +#endif +#if ! defined (YYSIZE_T) +# define YYSIZE_T unsigned int +#endif + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrlab1 + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. */ + +#define YYFAIL goto yyerrlab + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + yytoken = YYTRANSLATE (yychar); \ + YYPOPSTACK; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror ("syntax error: cannot back up");\ + YYERROR; \ + } \ +while (0) + +#define YYTERROR 1 +#define YYERRCODE 256 + +/* YYLLOC_DEFAULT -- Compute the default location (before the actions + are run). */ + +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + Current.first_line = Rhs[1].first_line; \ + Current.first_column = Rhs[1].first_column; \ + Current.last_line = Rhs[N].last_line; \ + Current.last_column = Rhs[N].last_column; +#endif + +/* YYLEX -- calling `yylex' with the right arguments. */ + +#ifdef YYLEX_PARAM +# define YYLEX yylex (YYLEX_PARAM) +#else +# define YYLEX yylex () +#endif + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +# define YYDSYMPRINT(Args) \ +do { \ + if (yydebug) \ + yysymprint Args; \ +} while (0) + +# define YYDSYMPRINTF(Title, Token, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yysymprint (stderr, \ + Token, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (cinluded). | +`------------------------------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yy_stack_print (short *bottom, short *top) +#else +static void +yy_stack_print (bottom, top) + short *bottom; + short *top; +#endif +{ + YYFPRINTF (stderr, "Stack now"); + for (/* Nothing. */; bottom <= top; ++bottom) + YYFPRINTF (stderr, " %d", *bottom); + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yy_reduce_print (int yyrule) +#else +static void +yy_reduce_print (yyrule) + int yyrule; +#endif +{ + int yyi; + unsigned int yylineno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ", + yyrule - 1, yylineno); + /* Print the symbols being reduced, and their result. */ + for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++) + YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]); + YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]); +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (Rule); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YYDSYMPRINT(Args) +# define YYDSYMPRINTF(Title, Token, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#if YYMAXDEPTH == 0 +# undef YYMAXDEPTH +#endif + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined (__GLIBC__) && defined (_STRING_H) +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +# if defined (__STDC__) || defined (__cplusplus) +yystrlen (const char *yystr) +# else +yystrlen (yystr) + const char *yystr; +# endif +{ + register const char *yys = yystr; + + while (*yys++ != '\0') + continue; + + return yys - yystr - 1; +} +# endif +# endif + +# ifndef yystpcpy +# if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE) +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +# if defined (__STDC__) || defined (__cplusplus) +yystpcpy (char *yydest, const char *yysrc) +# else +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +# endif +{ + register char *yyd = yydest; + register const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +#endif /* !YYERROR_VERBOSE */ + + + +#if YYDEBUG +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep) +#else +static void +yysymprint (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE *yyvaluep; +#endif +{ + /* Pacify ``unused variable'' warnings. */ + (void) yyvaluep; + + if (yytype < YYNTOKENS) + { + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); +# ifdef YYPRINT + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + } + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + switch (yytype) + { + default: + break; + } + YYFPRINTF (yyoutput, ")"); +} + +#endif /* ! YYDEBUG */ +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +#if defined (__STDC__) || defined (__cplusplus) +static void +yydestruct (int yytype, YYSTYPE *yyvaluep) +#else +static void +yydestruct (yytype, yyvaluep) + int yytype; + YYSTYPE *yyvaluep; +#endif +{ + /* Pacify ``unused variable'' warnings. */ + (void) yyvaluep; + + switch (yytype) + { + + default: + break; + } +} + + +/* Prevent warnings from -Wmissing-prototypes. */ + +#ifdef YYPARSE_PARAM +# if defined (__STDC__) || defined (__cplusplus) +int yyparse (void *YYPARSE_PARAM); +# else +int yyparse (); +# endif +#else /* ! YYPARSE_PARAM */ +#if defined (__STDC__) || defined (__cplusplus) +int yyparse (void); +#else +int yyparse (); +#endif +#endif /* ! YYPARSE_PARAM */ + + + +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; + +/* Number of syntax errors so far. */ +int yynerrs; + + + +/*----------. +| yyparse. | +`----------*/ + +#ifdef YYPARSE_PARAM +# if defined (__STDC__) || defined (__cplusplus) +int yyparse (void *YYPARSE_PARAM) +# else +int yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; +# endif +#else /* ! YYPARSE_PARAM */ +#if defined (__STDC__) || defined (__cplusplus) +int +yyparse (void) +#else +int +yyparse () + +#endif +#endif +{ + + register int yystate; + register int yyn; + int yyresult; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + + /* Three stacks and their tools: + `yyss': related to states, + `yyvs': related to semantic values, + `yyls': related to locations. + + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + short yyssa[YYINITDEPTH]; + short *yyss = yyssa; + register short *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + register YYSTYPE *yyvsp; + + + +#define YYPOPSTACK (yyvsp--, yyssp--) + + YYSIZE_T yystacksize = YYINITDEPTH; + + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + /* When reducing, the number of symbols on the RHS of the reduced + rule. */ + int yylen; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + + yyssp = yyss; + yyvsp = yyvs; + + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. so pushing a state here evens the stacks. + */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + short *yyss1 = yyss; + + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow ("parser stack overflow", + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyoverflowlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyoverflowlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + short *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyoverflowlab; + YYSTACK_RELOCATE (yyss); + YYSTACK_RELOCATE (yyvs); + +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + +/* Do appropriate processing given the current state. */ +/* Read a lookahead token if we need one and don't already have one. */ +/* yyresume: */ + + /* First try to decide what to do without reference to lookahead token. */ + + yyn = yypact[yystate]; + if (yyn == YYPACT_NINF) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = YYLEX; + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yyn == 0 || yyn == YYTABLE_NINF) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + if (yyn == YYFINAL) + YYACCEPT; + + /* Shift the lookahead token. */ + YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken])); + + /* Discard the token being shifted unless it is eof. */ + if (yychar != YYEOF) + yychar = YYEMPTY; + + *++yyvsp = yylval; + + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + yystate = yyn; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 21: +#line 263 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_port_group_start(); + } + break; + + case 22: +#line 268 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_port_group_end(); + } + break; + + case 43: +#line 357 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_sl2vl_scope_start(); + } + break; + + case 44: +#line 362 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_sl2vl_scope_end(); + } + break; + + case 60: +#line 405 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_vlarb_scope_start(); + } + break; + + case 61: +#line 410 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_vlarb_scope_end(); + } + break; + + case 74: +#line 466 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_qos_level_start(); + } + break; + + case 75: +#line 471 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_qos_level_end(); + } + break; + + case 91: +#line 522 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_match_rule_start(); + } + break; + + case 92: +#line 527 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + __osm_qos_parser_match_rule_end(); + } + break; + + case 101: +#line 548 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of - one instance */ + p_current_port_group->name = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 102: +#line 554 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of - one instance */ + RESET_BUFFER; + if (p_current_port_group->name) + { + yyerror(" has multiple tags"); + return 1; + } + } + break; + + case 103: +#line 565 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* of ,, - one instance */ + if (p_current_port_group) + p_current_port_group->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_level) + p_current_qos_level->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else if (p_current_qos_match_rule) + p_current_qos_match_rule->use = strdup(__osm_qos_parser_strip_white(str_buffer)); + else + CL_ASSERT(0); + } + break; + + case 104: +#line 578 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + RESET_BUFFER; + if (p_current_port_group) + { + /* of - one instance */ + if (p_current_port_group->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_level) + { + /* of - one instance */ + if (p_current_qos_level->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else if (p_current_qos_match_rule) + { + /* of - one instance */ + if (p_current_qos_match_rule->use) + { + yyerror(" has multiple tags"); + return 1; + } + } + else + CL_ASSERT(0); + } + break; + + case 105: +#line 612 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_names, + p_str_item, + NULL); + } + break; + + case 106: +#line 624 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 107: +#line 631 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + osm_qos_uint64_vector_item_t * p_uint64_item = + (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + + __osm_qos_parser_str2uint64(&p_uint64_item->value, + __osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->port_guids, + p_uint64_item, + NULL); + } + break; + + case 108: +#line 643 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 109: +#line 650 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_port_group->partitions, + p_str_item, + NULL); + } + break; + + case 110: +#line 662 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 111: +#line 669 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + uint8_t tmp_node_type; + char * clean_str = __osm_qos_parser_strip_white(str_buffer); + + if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_ROUTER) == 0) + tmp_node_type = IB_NODE_TYPE_ROUTER; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_CA) == 0) + tmp_node_type = IB_NODE_TYPE_CA; + else if (strcasecmp(clean_str,OSM_QOS_NODE_TYPE_SWITCH) == 0) + tmp_node_type = IB_NODE_TYPE_SWITCH; + else + { + yyerror("wrong value"); + return 1; + } + + osm_qos_uint8_vector_item_t * p_uint8_item = + (osm_qos_uint8_vector_item_t *)malloc(sizeof(osm_qos_uint8_vector_item_t)); + + p_uint8_item->value = tmp_node_type; + cl_ptr_vector_insert(&p_current_port_group->node_types, + p_uint8_item, + NULL); + } + break; + + case 112: +#line 696 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances */ + CL_ASSERT(p_current_port_group); + RESET_BUFFER; + } + break; + + case 113: +#line 703 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + if (p_current_sl2vl_scope) + cl_ptr_vector_insert(&p_current_sl2vl_scope->groups, + p_str_item, + NULL); + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->groups, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + break; + + case 114: +#line 723 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + break; + + case 115: +#line 730 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + + /* inserting this both to across_to and to across_from */ + if (p_current_sl2vl_scope) { + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + else if (p_current_vlarb_scope) + cl_ptr_vector_insert(&p_current_vlarb_scope->across, + p_str_item, + NULL); + else + CL_ASSERT(0); + } + break; + + case 116: +#line 755 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in and - any num of instances. + The value refers to the in */ + RESET_BUFFER; + } + break; + + case 117: +#line 762 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_from, + p_str_item, + NULL); + } + break; + + case 118: +#line 775 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 119: +#line 783 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->across_to, + p_str_item, + NULL); + } + break; + + case 120: +#line 796 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 121: +#line 804 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->from, + p_str_item, + NULL); + } + break; + + case 122: +#line 817 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 123: +#line 825 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + osm_qos_string_vector_item_t * p_str_item = + (osm_qos_string_vector_item_t *)malloc(sizeof(osm_qos_string_vector_item_t)); + + p_str_item->str = strdup(__osm_qos_parser_strip_white(str_buffer)); + cl_ptr_vector_insert(&p_current_sl2vl_scope->to, + p_str_item, + NULL); + } + break; + + case 124: +#line 838 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - any num of instances. + The value refers to the in */ + CL_ASSERT(p_current_sl2vl_scope); + RESET_BUFFER; + } + break; + + case 125: +#line 848 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of 16 numbers */ + uint8_t counter = 0; + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + + /* checking the syntax */ + for (i = 0; tmp_str[i] != '\0'; i++) + if (tmp_str[i] == ',') + counter ++; + if (counter != 15) + { + yyerror("wrong number of values in (should be 16)"); + return 1; + } + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str," ,"); + while (tok != NULL && i < 16) + { + p_current_sl2vl_scope->sl2vl_table[i++] = atoi(tok); + tok = strtok (NULL, " ,"); + } + if (tok != NULL) + { + yyerror("wrong syntax in "); + return 1; + } + } + break; + + case 126: +#line 887 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of 16 numbers */ + RESET_BUFFER; + } + break; + + case 127: +#line 893 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->sn_set = TRUE; + } + break; + + case 128: +#line 900 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->sn_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + break; + + case 129: +#line 912 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->sl = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->sl_set = TRUE; + } + break; + + case 130: +#line 919 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->sl_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + break; + + case 131: +#line 931 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->mtu_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->mtu_limit_set = TRUE; + } + break; + + case 132: +#line 938 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->mtu_limit_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + break; + + case 133: +#line 950 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->rate_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->rate_limit_set = TRUE; + } + break; + + case 134: +#line 957 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->rate_limit_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + break; + + case 135: +#line 969 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->pkt_life = atoi(__osm_qos_parser_strip_white(str_buffer)); + p_current_qos_level->pkt_life_set = TRUE; + } + break; + + case 136: +#line 976 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + if (p_current_qos_level->pkt_life_set) + { + yyerror(" has multiple tags"); + return 1; + } + RESET_BUFFER; + } + break; + + case 137: +#line 988 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + p_current_qos_level->pkey = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 138: +#line 994 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 139: +#line 1001 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + p_current_qos_level->class = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 140: +#line 1007 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + CL_ASSERT(p_current_qos_level); + RESET_BUFFER; + } + break; + + case 141: +#line 1014 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + p_current_vlarb_scope->vl_high_limit = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 142: +#line 1020 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - one instance of one number */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 143: +#line 1029 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_highs, + p_hl_item, + NULL); + } + + } + break; + + case 144: +#line 1063 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 145: +#line 1072 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + uint8_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_vlarb_hl_vector_item_t * p_hl_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i-1] == ',' || tmp_str[i-1] == ':') && + (tmp_str[i] == ',' || tmp_str[i] == ':') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + i = 0; + tok = strtok (tmp_str,":,"); + while (tok != NULL) + { + p_hl_item = (osm_qos_vlarb_hl_vector_item_t *)malloc(sizeof(osm_qos_vlarb_hl_vector_item_t)); + p_hl_item->sl = atoi(tok); + tok = strtok (NULL, ":,"); + p_hl_item->credits = atoi(tok); + tok = strtok (NULL, ":,"); + cl_ptr_vector_insert(&p_current_vlarb_scope->vlarb_lows, + p_hl_item, + NULL); + } + + } + break; + + case 146: +#line 1106 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of pairs of numbers with ':' and ',' */ + CL_ASSERT(p_current_vlarb_scope); + RESET_BUFFER; + } + break; + + case 147: +#line 1113 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + p_current_qos_match_rule->source = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 148: +#line 1119 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 149: +#line 1126 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + p_current_qos_match_rule->destination = strdup(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 150: +#line 1132 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - text */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 151: +#line 1141 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - "7-9,11" */ + uint16_t i; + uint16_t j; + uint16_t range_low; + uint16_t range_high; + char * dash_ptr = NULL; + char * tok = NULL; + char * tokens[10000] = {NULL}; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint16_vector_item_t * p_uint16_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ( (tmp_str[i] == ',' || tmp_str[i] == '-') && + (tmp_str[i-1] == ',' || tmp_str[i-1] == '-') ) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line with ',' being a delimiter */ + tok = strtok (tmp_str," ,"); + for (i = 0; tok != NULL; tok = strtok (NULL, " ,")) + tokens[i++] = tok; + + for (i = 0; tokens[i] != NULL; i++) + { + dash_ptr = strstr(tokens[i],"-"); + if (dash_ptr != NULL) + { + /* this is number range: i-j */ + *dash_ptr = '\0'; + range_low = atoi(tokens[i]); + *dash_ptr = '-'; + range_high = atoi(dash_ptr + 1); + if (range_low > range_high) + { + uint16_t tmp_num = range_low; + range_low = range_high; + range_high = tmp_num; + } + for (j = range_low; j <= range_high; j++) + { + p_uint16_item = (osm_qos_uint16_vector_item_t *) + malloc(sizeof(osm_qos_uint16_vector_item_t)); + p_uint16_item->value = j; + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint16_item, + NULL); + } + } + else + { + /* this is a single number */ + p_uint16_item = (osm_qos_uint16_vector_item_t *) + malloc(sizeof(osm_qos_uint16_vector_item_t)); + p_uint16_item->value = atoi(tokens[i]); + cl_ptr_vector_insert(&p_current_qos_match_rule->classes, + p_uint16_item, + NULL); + } + } + } + break; + + case 152: +#line 1207 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 153: +#line 1215 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of ',' delimited numbers */ + uint32_t i; + char * tok = NULL; + char * tmp_str = __osm_qos_parser_strip_white(str_buffer); + osm_qos_uint64_vector_item_t * p_uint64_item = NULL; + + /* checking the syntax */ + for (i = 1; tmp_str[i] != '\0'; i++) + if ((tmp_str[i-1] == ',') && (tmp_str[i] == ',')) + { + yyerror("wrong syntax in "); + return 1; + } + + /* tokenizing the line */ + tok = strtok (tmp_str," ,"); + while (tok != NULL) + { + p_uint64_item = (osm_qos_uint64_vector_item_t *)malloc(sizeof(osm_qos_uint64_vector_item_t)); + __osm_qos_parser_str2uint64(&p_uint64_item->value,tok); + cl_ptr_vector_insert(&p_current_qos_match_rule->services, + p_uint64_item, + NULL); + tok = strtok (NULL, " ,"); + } + + } + break; + + case 154: +#line 1245 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - list of ',' delimited numbers */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 155: +#line 1252 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - single number */ + p_current_qos_match_rule->qos_level_sn = atoi(__osm_qos_parser_strip_white(str_buffer)); + } + break; + + case 156: +#line 1258 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* in - single number */ + CL_ASSERT(p_current_qos_match_rule); + RESET_BUFFER; + } + break; + + case 161: +#line 1278 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 162: +#line 1282 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 163: +#line 1286 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 164: +#line 1290 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 165: +#line 1294 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 183: +#line 1323 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 184: +#line 1329 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 185: +#line 1335 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 188: +#line 1345 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 189: +#line 1351 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + strcat(str_buffer,yyvsp[0]); + free(yyvsp[0]); + } + break; + + case 191: +#line 1358 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + { + /* swallow whitespace */ + free(yyvsp[0]); + } + break; + + + } + +/* Line 991 of yacc.c. */ +#line 2569 "y.tab.c" + + yyvsp -= yylen; + yyssp -= yylen; + + + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if YYERROR_VERBOSE + yyn = yypact[yystate]; + + if (YYPACT_NINF < yyn && yyn < YYLAST) + { + YYSIZE_T yysize = 0; + int yytype = YYTRANSLATE (yychar); + char *yymsg; + int yyx, yycount; + + yycount = 0; + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. */ + for (yyx = yyn < 0 ? -yyn : 0; + yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + yysize += yystrlen (yytname[yyx]) + 15, yycount++; + yysize += yystrlen ("syntax error, unexpected ") + 1; + yysize += yystrlen (yytname[yytype]); + yymsg = (char *) YYSTACK_ALLOC (yysize); + if (yymsg != 0) + { + char *yyp = yystpcpy (yymsg, "syntax error, unexpected "); + yyp = yystpcpy (yyp, yytname[yytype]); + + if (yycount < 5) + { + yycount = 0; + for (yyx = yyn < 0 ? -yyn : 0; + yyx < (int) (sizeof (yytname) / sizeof (char *)); + yyx++) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + { + const char *yyq = ! yycount ? ", expecting " : " or "; + yyp = yystpcpy (yyp, yyq); + yyp = yystpcpy (yyp, yytname[yyx]); + yycount++; + } + } + yyerror (yymsg); + YYSTACK_FREE (yymsg); + } + else + yyerror ("syntax error; also virtual memory exhausted"); + } + else +#endif /* YYERROR_VERBOSE */ + yyerror ("syntax error"); + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + /* Return failure if at end of input. */ + if (yychar == YYEOF) + { + /* Pop the error token. */ + YYPOPSTACK; + /* Pop the rest of the stack. */ + while (yyss < yyssp) + { + YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); + yydestruct (yystos[*yyssp], yyvsp); + YYPOPSTACK; + } + YYABORT; + } + + YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc); + yydestruct (yytoken, &yylval); + yychar = YYEMPTY; + + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab2; + + +/*----------------------------------------------------. +| yyerrlab1 -- error raised explicitly by an action. | +`----------------------------------------------------*/ +yyerrlab1: + + /* Suppress GCC warning that yyerrlab1 is unused when no action + invokes YYERROR. */ +#if defined (__GNUC_MINOR__) && 2093 <= (__GNUC__ * 1000 + __GNUC_MINOR__) \ + && !defined __cplusplus + __attribute__ ((__unused__)) +#endif + + + goto yyerrlab2; + + +/*---------------------------------------------------------------. +| yyerrlab2 -- pop states until the error token can be shifted. | +`---------------------------------------------------------------*/ +yyerrlab2: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (yyn != YYPACT_NINF) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); + yydestruct (yystos[yystate], yyvsp); + yyvsp--; + yystate = *--yyssp; + + YY_STACK_PRINT (yyss, yyssp); + } + + if (yyn == YYFINAL) + YYACCEPT; + + YYDPRINTF ((stderr, "Shifting error token, ")); + + *++yyvsp = yylval; + + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#ifndef yyoverflow +/*----------------------------------------------. +| yyoverflowlab -- parser overflow comes here. | +`----------------------------------------------*/ +yyoverflowlab: + yyerror ("parser stack overflow"); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + return yyresult; +} + + +#line 1364 "/mswg/work/kliteyn/git/management/osm/opensm/osm_qos_parser.y" + + +/*************************************************** + ***************************************************/ + +int osm_qos_parse_policy_file( + IN osm_log_t * p_log, + IN const char * policy_file) +{ + int res = 0; + p_osm_log = p_log; + + OSM_LOG_ENTER(p_osm_log, osm_qos_parse); + + p_qos_parse_tree = NULL; + yyin = fopen (policy_file, "r"); + if (!yyin) + { + osm_log(p_osm_log, OSM_LOG_ERROR, + "osm_qos_parse: ERR AC01: " + "Failed opening QoS policy file (%s)\n", + policy_file); + res = 1; + goto Exit; + } + column_num = 1; + line_num = 1; + __osm_qos_parse_tree_init(); + str_buffer[0] = '\0'; + + res = yyparse(); + + if (res != 0) + { + osm_log(p_osm_log, OSM_LOG_ERROR, + "osm_qos_parse: ERR AC02: " + "Failed parsing QoS policy file (%s)\n", + policy_file); + p_qos_parse_tree = NULL; + res = 1; + goto Exit; + } + + Exit: + if (yyin) + fclose(yyin); + OSM_LOG_EXIT(p_osm_log); + return res; +} + +/*************************************************** + ***************************************************/ + +int yywrap() +{ + return(1); +} + +/*************************************************** + ***************************************************/ + +void yyerror (char *s) +{ + OSM_LOG_ENTER(p_osm_log, yyerror); + osm_log(p_osm_log, OSM_LOG_ERROR, + "yyerror: ERR AC03: " + "Syntax error (line %d:%d): %s. " + "Last text read: \"%s\"\n", + line_num, column_num, s, __osm_qos_parser_strip_white(yytext)); + OSM_LOG_EXIT(p_osm_log); +} + +/*************************************************** + ***************************************************/ + +static char * __osm_qos_parser_strip_white(char * str) +{ + int i; + for (i = (strlen(str)-1); i >= 0; i--) + { + if (isspace(str[i])) + str[i] = '\0'; + else + break; + } + for (i = 0; i < strlen(str); i++) + { + if (!isspace(str[i])) + break; + } + return &(str[i]); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_str2uint64(uint64_t * p_val, char * str) +{ +#if __WORDSIZE == 64 + *p_val = strtoul(str, NULL, 0); +#else + *p_val = strtoull(str, NULL, 0); +#endif +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parse_tree_init() +{ + p_qos_parse_tree = (osm_qos_parse_tree_t *) + malloc(sizeof(osm_qos_parse_tree_t)); + + memset(p_qos_parse_tree, 0, sizeof(osm_qos_parse_tree_t)); + + cl_ptr_vector_init(&p_qos_parse_tree->port_groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->sl2vl_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->vlarb_tables, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_levels, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_qos_parse_tree->qos_match_rules, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_start() +{ + p_current_port_group = (osm_qos_port_group_t *) + malloc(sizeof(osm_qos_port_group_t)); + memset(p_current_port_group, 0, sizeof(osm_qos_port_group_t)); + + cl_ptr_vector_init(&p_current_port_group->port_guids, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->port_names, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->partitions, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_port_group->node_types, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_port_group_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->port_groups, + p_current_port_group, + NULL); + p_current_port_group = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_start() +{ + p_current_sl2vl_scope = (osm_qos_sl2vl_scope_t *) + malloc(sizeof(osm_qos_sl2vl_scope_t)); + memset(p_current_sl2vl_scope, 0, sizeof(osm_qos_sl2vl_scope_t)); + + cl_ptr_vector_init(&p_current_sl2vl_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->to, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_from, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_sl2vl_scope->across_to, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_sl2vl_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->sl2vl_tables, + p_current_sl2vl_scope, + NULL); + p_current_sl2vl_scope = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_start() +{ + p_current_vlarb_scope = (osm_qos_vlarb_scope_t *) + malloc(sizeof(osm_qos_vlarb_scope_t)); + memset(p_current_vlarb_scope, 0, sizeof(osm_qos_vlarb_scope_t)); + + cl_ptr_vector_init(&p_current_vlarb_scope->groups, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->across, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_highs, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_vlarb_scope->vlarb_lows, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_vlarb_scope_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->vlarb_tables, + p_current_vlarb_scope, + NULL); + p_current_vlarb_scope = NULL; +} + + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_start() +{ + p_current_qos_level = (osm_qos_level_t *) + malloc(sizeof(osm_qos_level_t)); + memset(p_current_qos_level, 0, sizeof(osm_qos_level_t)); +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_qos_level_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_levels, + p_current_qos_level, + NULL); + p_current_qos_level = NULL; +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_start() +{ + p_current_qos_match_rule = (osm_qos_match_rule_t *) + malloc(sizeof(osm_qos_match_rule_t)); + memset(p_current_qos_match_rule, 0, sizeof(osm_qos_match_rule_t)); + + cl_ptr_vector_init(&p_current_qos_match_rule->services, + 0, /* min size */ + 4); /* grow size */ + cl_ptr_vector_init(&p_current_qos_match_rule->classes, + 0, /* min size */ + 4); /* grow size */ +} + +/*************************************************** + ***************************************************/ + +static void __osm_qos_parser_match_rule_end() +{ + cl_ptr_vector_insert(&p_qos_parse_tree->qos_match_rules, + p_current_qos_match_rule, + NULL); + p_current_qos_match_rule = NULL; +} + +/*************************************************** + ***************************************************/ + + -- 1.4.4.1.GIT From vlad at mellanox.co.il Tue Jan 30 08:17:37 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 30 Jan 2007 18:17:37 +0200 Subject: [openib-general] [PATCH 00/12] ofed_1_2 - Neighbour update support In-Reply-To: <1169845867.2996.48.camel@stevo-desktop> References: <20070125191321.30934.74542.stgit@dell3.ogc.int> <1169827215.2996.14.camel@stevo-desktop> <1169845867.2996.48.camel@stevo-desktop> Message-ID: <1170173857.13511.7.camel@vladsk-laptop> On Fri, 2007-01-26 at 15:11 -0600, Steve Wise wrote: > Michael/Vlad: > > I've pushed these up to my git tree. Can you merge them in? > > git://staging.openfabrics.com/~swise/ofed_1_2.git cxgb3 > > Here is the short log of the commits: > > aedc0b3c1681fb550ec4b8d1021caa2ce3dcbfd7 iw_cxgb3: allow doorbell mappings with VM_READ set. > 5ea83b9e3ec6f9c74040944adb83e4faf6613fe1 Backport Chelsio to rhel5 (2.6.18_FC6). > ff38246f6f07ff25609eaa304a707748904bf2bf Backport sles9sp3: Simulate neigh update events by snooping ARP packets > b88d46d10ce15f8ee725454f4998af6497cc13e1 Backport rhel4u4: Simulate neigh update events by snooping ARP packets > ab3a817b10da2df2e3d5bf08018be3d0212dc5bd Backport 2.6.11: Simulate neigh update events by snooping ARP packets > e545001a94c180c32b8b15d4ca4351506bd50fc2 Backport 2.6.12: Simulate neigh update events by snooping ARP packets > 8ddafe035c1a997c7625ae1bd42767deed148cb7 Backport 2.6.13: Simulate neigh update events by snooping ARP packets > ef260b8242d90edcabdc3153b829eda65d451672 Backport 2.6.14: Simulate neigh update events by snooping ARP packets > 84c78965a7c6a2a831fb2a49c6936321e2566904 Backport ubuntu606: Simulate neigh update events by snooping ARP packets > eb09f52a33471613fc29f898dfad8d9a57238d3e Backport 2.6.15: Simulate neigh update events by snooping ARP packets > ddc3ec432bd1898005ab52241d125dd4a71436aa Backport sles10: Simulate neigh update events by snooping ARP packets > b4af429744ff06545b2941fc5ef1ab4d6f0c0e77 Backport 2.6.16: Simulate neigh update events by snooping ARP packets > fe1a597f3aa409465d5b1b577a3b28c4a002f143 Backport 2.6.17: Simulate neighbour update events by snooping ARP packets > 9b3bfe5696aa417d38ce903eb345a03d65743dd2 Handle Ethernet neighbour updates during route resolution. > Done. -- Vladimir Sokolovsky Mellanox Technologies Ltd. From rdreier at cisco.com Tue Jan 30 08:26:34 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 30 Jan 2007 08:26:34 -0800 Subject: [openib-general] I created a git tree for the libibverbs man pages In-Reply-To: <45BF63A1.6090402@dev.mellanox.co.il> (Dotan Barak's message of "Tue, 30 Jan 2007 17:26:25 +0200") References: <45BF63A1.6090402@dev.mellanox.co.il> Message-ID: > I created a git tree for the libibverbs man pages in the path: > ~dotanb/libibverbs_man_pages.git Great, I was just about to ask for your latest work so that I can start reviewing the man pages and merging them. However, what's the URL of your git tree... never mind, git://git.openfabrics.org/~dotanb/libibverbs_man_pages.git worked for me. So just starting in alphabetical order, I see $ cat man3/ibv_ack_async_event.3 .so man3/ibv_get_async_event.3 $ And it looks like many other files are stubs too. What are your plans for continuing to write man pages? - R. From rdreier at cisco.com Tue Jan 30 08:28:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 30 Jan 2007 08:28:30 -0800 Subject: [openib-general] I created a git tree for the libibverbs man pages In-Reply-To: (Jeff Squyres's message of "Tue, 30 Jan 2007 10:32:27 -0500") References: <45BF63A1.6090402@dev.mellanox.co.il> Message-ID: > I would suggest s/OpenIB/OpenFabrics/ throughout the man pages, though. Actually I think I'll just do s/OpenIB// since I see no reason to plaster the OpenFabrics brand all over libibverbs. I guess I'll change them to say "Libibverbs Programmer's Manual". - R. From dotanb at dev.mellanox.co.il Tue Jan 30 08:42:19 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 30 Jan 2007 18:42:19 +0200 Subject: [openib-general] I created a git tree for the libibverbs man pages In-Reply-To: References: <45BF63A1.6090402@dev.mellanox.co.il> Message-ID: <45BF756B.1060500@dev.mellanox.co.il> Roland Dreier wrote: > > I created a git tree for the libibverbs man pages in the path: > > ~dotanb/libibverbs_man_pages.git > > Great, I was just about to ask for your latest work so that I can > start reviewing the man pages and merging them. > > However, what's the URL of your git tree... > never mind, git://git.openfabrics.org/~dotanb/libibverbs_man_pages.git > worked for me. > > sorry, i thought that this info is obvious so i didn't send the full URL. > So just starting in alphabetical order, I see > > $ cat man3/ibv_ack_async_event.3 > .so man3/ibv_get_async_event.3 > $ > > Instead of create a file duplication, this is the best way to do it (other man pages uses the same trick) > And it looks like many other files are stubs too. What are your plans > for continuing to write man pages? > > - R. > I have the following issues in my "to do list": * I still need to update the man page of the ibv_get_cq_event page * I plan to add an example for non blocked polling of events for the pages of ibv_get_cq_event and ibv_get_async_event * If you are planning to add the MR reregister verb, i will add a man page for it as well. * If i will get any request / problem report from anyone, i will fix it I hope to finish those issues soon. thanks Dotan From sweitzen at cisco.com Tue Jan 30 08:54:10 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Tue, 30 Jan 2007 08:54:10 -0800 Subject: [openib-general] topspin vs ofed ? In-Reply-To: <45BF1DED.1050607@bgm.lt> References: <45BF1DED.1050607@bgm.lt> Message-ID: If you have a Cisco support contract, you can use either stack and get support from Cisco, such as RPMs for some errata kernels. With OFED you can compile the source yourself for errata kernels. Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems > -----Original Message----- > From: openib-general-bounces at openib.org > [mailto:openib-general-bounces at openib.org] On Behalf Of Jonas Mardosas > Sent: Tuesday, January 30, 2007 2:29 AM > To: openib-general at openib.org > Subject: [openib-general] topspin vs ofed ? > > Hello, > > I need some information about infiniband drivers. I use > Scientific linux > 4.4, and now i installed newest kernel, but topspin drivers for my > adapters dont work on newest kernel, i looked in cisco > website, there > is the same version of infiniband host adapters drivers, that > was before > 3.2.0 (118), so how i understund i can use OFED-1.1, what are > differences between topspin drivers and Ofed? wich is better? > what are > your suggestions? > Thak you for your responses. > > > -- > Jonas Mardosas > BGM > Sistemu inzinierius > M.K.Ciurlionio 17, LT-03104 Vilnius > mob.tel. +370 698 74002 > mail:jonas at bgm.lt > http://www.bgm.lt > > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From vlad at mellanox.co.il Tue Jan 30 08:59:31 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 30 Jan 2007 18:59:31 +0200 Subject: [openib-general] [PATCH ofed-1.2 0/6] ehca (kernel space) patches for ofed-1.2 In-Reply-To: <200701270159.21073.hnguyen@linux.vnet.ibm.com> References: <200701270159.21073.hnguyen@linux.vnet.ibm.com> Message-ID: <1170176371.13511.11.camel@vladsk-laptop> On Sat, 2007-01-27 at 01:59 +0100, Hoang-Nam Nguyen wrote: > Hello Vladimir! > Here is a patch set for ehca device driver with the following changes: > * Fix yield with spinlock held (this has been queued for 2.6.20) > * Fix mismatched unlock in irq handler (this has been queued for 2.6.20) > * Remove mmap (this has been queued for 2.6.21) > * Remove obsolete prototypes (this has been queued for 2.6.21) > * Backport for 2.6.16, 2.6.16sles10, 2.6.18, 2.6.18_FC6 (RHEL5) > With those patches we are able to support the following platforms: > * 2.6.20, 2.6.19, 2.6.18, RHEL5, 2.6.16, SLES10 > Thanks > Nam > PS1: The first two patches are actually not required since ofed-1.2 > is based on 2.6.20. Since I did not see them in daily build yesterday > I'm sending here for completeness. > PS2: For backport on 2.6.16 resp. SLES10 I saw that there is a > hvcall.h under backport/2.6.16/include/linux. However that one > is not sufficient for ehca and include/linux is the wrong place. > Hence, I'm patching a new one under include/asm. If I'm right, > please remove include/linux/hvcall.h! > Hi Nam, Applied patches 2-6. -- Vladimir Sokolovsky Mellanox Technologies Ltd. From mshefty at ichips.intel.com Tue Jan 30 10:27:35 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 30 Jan 2007 10:27:35 -0800 Subject: [openib-general] please pull for 2.6.21: fix + add IB multicast support In-Reply-To: <000701c741a6$16dc4760$ff0da8c0@amr.corp.intel.com> References: <000701c741a6$16dc4760$ff0da8c0@amr.corp.intel.com> Message-ID: <45BF8E17.2010805@ichips.intel.com> Roland, I've created a 'for-roland' branch off of my git tree: git://git.openfabrics.org/~shefty/rdma-dev.git with the following changes: Sean Hefty (3): rdma_cm: Increment port number after close to avoid re-use. ib_sa: track multicast join/leave requests rdma_cm: add multicast communication support The multicast patches combine additional patches posted to the list, including the fix for the multiple HCA issue reported by Michael. - Sean From mshefty at ichips.intel.com Tue Jan 30 10:29:37 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 30 Jan 2007 10:29:37 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BF63C4.9040205@open-mpi.org> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> <45BF63C4.9040205@open-mpi.org> Message-ID: <45BF8E91.2050500@ichips.intel.com> > Excellent -- is this in a git tree somewhere that I can grab (I'm new to > git)? Or, what would be an appropriate tree to apply this to? This is now available from my rdma-dev.git tree on openfabrics. The patch is included in the multicast and ofed_1_2 branches. - Sean From sean.hefty at intel.com Tue Jan 30 10:40:57 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 30 Jan 2007 10:40:57 -0800 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <20070129162015.GD20398@mellanox.co.il> Message-ID: <000001c7449e$2e5396a0$ff0da8c0@amr.corp.intel.com> > *Sources developed in OFA:* > 1. Each git owner will open a branch with the name ofed_1_2. This branch > should be opened on 31-Jan (based on code readiness we will review today). I've added ofed_1_2 branches to my libibcm.git, librdmacm.git, and rdma-dev.git trees. - Sean From hnguyen at linux.vnet.ibm.com Tue Jan 30 11:52:28 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Tue, 30 Jan 2007 20:52:28 +0100 Subject: [openib-general] [PATCH ofed-1.2 alpha rel] ehca: reworked irq handler to support NAPI consistently Message-ID: <200701302052.28962.hnguyen@linux.vnet.ibm.com> Hi Vladimir, here is a patch for ehca with reworked irq handler. With those changes the performance result without/with scaling code and with NAPI (scaling code turned off) is consistent. They also reduce the rate of drop packets (when scaling code is turned off) significantly. Thanks Nam PS: Roland, this patch is aligned with ofed-1.2 "only". I'l send this patch for 2.6.21 separately next week. Signed-off-by: Hoang-Nam Nguyen --- ehca_classes.h | 18 +++-- ehca_eq.c | 1 ehca_irq.c | 202 ++++++++++++++++++++++++++++++++++++--------------------- ehca_irq.h | 1 ehca_main.c | 24 +++++- ipz_pt_fn.h | 9 ++ 6 files changed, 173 insertions(+), 82 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index cf95ee4..8888002 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -42,8 +42,6 @@ #ifndef __EHCA_CLASSES_H__ #define __EHCA_CLASSES_H__ -#include "ehca_classes.h" -#include "ipz_pt_fn.h" struct ehca_module; struct ehca_qp; @@ -54,14 +52,22 @@ struct ehca_mw; struct ehca_pd; struct ehca_av; +#include +#include + #ifdef CONFIG_PPC64 #include "ehca_classes_pSeries.h" #endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" +#include "ehca_irq.h" -#include -#include +#define EHCA_EQE_CACHE_SIZE 20 -#include "ehca_irq.h" +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; struct ehca_eq { u32 length; @@ -74,6 +80,8 @@ struct ehca_eq { spinlock_t spinlock; struct tasklet_struct interrupt_task; u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; }; struct ehca_sport { diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 5281dec..33c822e 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shc struct ib_device *ib_dev = &shca->ib_device; spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); eq->is_initialized = 0; if (type != EHCA_EQ && type != EHCA_NEQ) { diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index c069be8..1a9ec79 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -401,87 +401,143 @@ irqreturn_t ehca_interrupt_eq(int irq, v return IRQ_HANDLED; } -void ehca_tasklet_eq(unsigned long data) -{ - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - int int_state; - int query_cnt = 0; - do { - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); - - if ((shca->hw_level >= 2) && eqe) - int_state = 1; - else - int_state = 0; - - while ((int_state == 1) || eqe) { - while (eqe) { - u64 eqe_value = eqe->entry; - - ehca_dbg(&shca->ib_device, - "eqe_value=%lx", eqe_value); - - /* TODO: better structure */ - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, - eqe_value)) { - unsigned long flags; - u32 token; - struct ehca_cq *cq; - - ehca_dbg(&shca->ib_device, - "... completion event"); - token = - EHCA_BMASK_GET(EQE_CQ_TOKEN, - eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, - flags); - cq = idr_find(&ehca_cq_idr, token); - - if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); - break; - } - - reset_eq_pending(cq); +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + unsigned long flags; + struct ehca_cq *cq; + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "... completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, token); + if (cq == NULL) { + spin_unlock(&ehca_cq_idr_lock); + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); #ifdef CONFIG_INFINIBAND_EHCA_SCALING - queue_comp_task(cq); - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); + queue_comp_task(cq); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); #else - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); - comp_event_callback(cq); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + comp_event_callback(cq); #endif - } else { - ehca_dbg(&shca->ib_device, - "... non completion event"); - parse_identifier(shca, eqe_value); - } - eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, - &shca->eq); - } + } else { + ehca_dbg(&shca->ib_device, + "... non completion event"); + parse_identifier(shca, eqe_value); + } +} - if (shca->hw_level >= 2) { - int_state = - hipz_h_query_int_state(shca->ipz_hca_handle, - shca->eq.ist); - query_cnt++; - iosync(); - if (query_cnt >= 100) { - query_cnt = 0; - int_state = 0; - } - } - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value; + unsigned long flags; + unsigned long irq_flags; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock_irqsave(&eq->irq_spinlock, irq_flags); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_err(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = + (struct ehca_eqe *)ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (!eqe_cache[eqe_cnt].cq) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + if (eqe_cnt == EHCA_EQE_CACHE_SIZE) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock_irqsave(&eq->spinlock, flags); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock_irqrestore(&eq->spinlock, flags); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + queue_comp_task(eq->eqe_cache[i].cq); +#else + comp_event_callback(eq->eqe_cache[i].cq); +#endif + else { + ehca_dbg(&shca->ib_device, "got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); } - } while (int_state != 0); + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + eqe_cnt++; + } while (1); + + unlock_irq_spinlock: + spin_unlock_irqrestore(&eq->irq_spinlock, irq_flags); +} - return; +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); } #ifdef CONFIG_INFINIBAND_EHCA_SCALING diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index be579cc..6ed06ee 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { wait_queue_head_t wait_queue; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index f2c328f..49fa5b6 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "hcp_if.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0020"); +MODULE_VERSION("SVNEHCA_0021"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -778,8 +777,24 @@ void ehca_poll_eqs(unsigned long data) spin_lock(&shca_list_lock); list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) - ehca_tasklet_eq((unsigned long)(void*)shca); + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + u64 flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } } mod_timer(&poll_eqs_timer, jiffies + HZ); spin_unlock(&shca_list_lock); @@ -790,7 +805,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0020)\n"); + "(Rel.: SVNEHCA_0021)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index dc3bda2..4501f75 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_ return ret; } +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *) ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + /* returns address (GX) of first queue entry */ static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) { From rdreier at cisco.com Tue Jan 30 15:11:10 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 30 Jan 2007 15:11:10 -0800 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails In-Reply-To: <45BF1866.3010807@voltaire.com> (Or Gerlitz's message of "Tue, 30 Jan 2007 12:05:26 +0200") References: <45BF0575.9020507@dev.mellanox.co.il> <45BF1866.3010807@voltaire.com> Message-ID: > anyway, the solution that comes into my mind is to disable creating a > QP/SRQ for which > 128KB allocations are needed. So > mthca_query_device() will set the max_qp_wr and max_srq_wr attributes > to values whose derived size still allows to use kmalloc. But that will limit the size of the queues that userspace can create too. I guess we could allocate kernel wrid arrays with vmalloc(), but I wonder if anyone actually cares about this limit... - R. From sean.hefty at intel.com Tue Jan 30 17:15:33 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 30 Jan 2007 17:15:33 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BF63C4.9040205@open-mpi.org> Message-ID: <000201c744d5$4e514420$ff0da8c0@amr.corp.intel.com> >Excellent -- is this in a git tree somewhere that I can grab (I'm new to >git)? Or, what would be an appropriate tree to apply this to? I've committed changes to the librdmacm multicast test program (mckey) that provides an example of using this functionality. The changes are in the librdmacm.git tree master branch. Execute the server side with something like: mckey -m 0.0.0.0 -b local_ip_address (e.g. 192.168.0.1) this will display something similar to: mckey: joined dgid: ff12:a01b:fe80:c007:c007:0 The client can join the group with the -M option: mckey -M ff12:a01b:fe80:c007:c007:0 -b local_ip_address - Sean From rowland at cse.ohio-state.edu Tue Jan 30 17:36:22 2007 From: rowland at cse.ohio-state.edu (Shaun Rowland) Date: Tue, 30 Jan 2007 20:36:22 -0500 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BDFF11.9080901@mellanox.co.il> References: <45BDFF11.9080901@mellanox.co.il> Message-ID: <45BFF296.8000908@cse.ohio-state.edu> Tziporet Koren wrote: > *MPI packages: > *1. MPI packages are provided as source RPMs > 2. Each MPI owner will have an account on the OFA server and will open a > directory named ofed_1_2 Hi. I am not exactly sure where the ofed_1_2 directory for MPI SRPMs is supposed to go. I assume from previous meetings this is just a filesystem directory. Should it be a directory in my home directory on staging.openfabrics.org, in ~/public_html, or is there something else I need to do to put this into place? I think from the previous MPI specific meeting, this was supposed to be done in a web directory. Since I am unclear, I wanted to ask here. -- Shaun Rowland rowland at cse.ohio-state.edu http://www.cse.ohio-state.edu/~rowland/ From jsquyres at cisco.com Tue Jan 30 18:00:54 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 30 Jan 2007 21:00:54 -0500 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BFF296.8000908@cse.ohio-state.edu> References: <45BDFF11.9080901@mellanox.co.il> <45BFF296.8000908@cse.ohio-state.edu> Message-ID: It would be helpful to see the MVAPICH1 distribution for OFED 1.2 somewhere on the OFA server (under ~vlad/ofed_1_2 or ~vlad/ public_html/ofed_1_2...?) for comparison / example purposes. On Jan 30, 2007, at 8:36 PM, Shaun Rowland wrote: > Tziporet Koren wrote: > >> *MPI packages: >> *1. MPI packages are provided as source RPMs >> 2. Each MPI owner will have an account on the OFA server and will >> open a >> directory named ofed_1_2 > > Hi. I am not exactly sure where the ofed_1_2 directory for MPI > SRPMs is > supposed to go. I assume from previous meetings this is just a > filesystem directory. Should it be a directory in my home directory on > staging.openfabrics.org, in ~/public_html, or is there something > else I > need to do to put this into place? I think from the previous MPI > specific meeting, this was supposed to be done in a web directory. > Since > I am unclear, I wanted to ask here. > -- > Shaun Rowland rowland at cse.ohio-state.edu > http://www.cse.ohio-state.edu/~rowland/ > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/ > openib-general -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From jsquyres at cisco.com Tue Jan 30 18:28:40 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 30 Jan 2007 21:28:40 -0500 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> Message-ID: On Jan 30, 2007, at 9:23 AM, Tziporet Koren wrote: > 4. Vlad to have a daily build of the full OFED package Where is this build available from? Thanks. -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From afriedle at open-mpi.org Tue Jan 30 22:46:45 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Tue, 30 Jan 2007 22:46:45 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <000201c744d5$4e514420$ff0da8c0@amr.corp.intel.com> References: <000201c744d5$4e514420$ff0da8c0@amr.corp.intel.com> Message-ID: <45C03B55.1000906@open-mpi.org> Sean Hefty wrote: > I've committed changes to the librdmacm multicast test program (mckey) that > provides an example of using this functionality. The changes are in the > librdmacm.git tree master branch. Great, thanks for working on this! Definitely a newbie here -- I'm seeing the patches and mckey code in git in your ofed_1_2 branches, does this mean it'll be in the upcoming 1.2 alpha tarballs? Andrew From eitan at sw053.yok.mtl.com Tue Jan 30 21:18:36 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Wed, 31 Jan 2007 07:18:36 +0200 Subject: [openib-general] nightly osm_sim report 2007-01-31:normal completion Message-ID: <200701310518.l0V5IaVc029948@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Mon_Jan_29_10:06:23_2007 1f5e50 ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From mst at mellanox.co.il Tue Jan 30 22:14:51 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 08:14:51 +0200 Subject: [openib-general] IPOIB CM with Non SRQ support In-Reply-To: References: Message-ID: <20070131061451.GA3405@mellanox.co.il> > -One artifact of the current send side implemantation is that for every > message we create a new set of tx qps. I do not believe this describes the implementation correctly - ipoib_cm_tx is cached in ipoib_neigh structure so that once a connection is setup, it is reused for all messages to the same neighbour. -- MST From mst at mellanox.co.il Tue Jan 30 22:58:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 08:58:53 +0200 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: References: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> Message-ID: <20070131065853.GH3405@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures > > On Jan 30, 2007, at 9:23 AM, Tziporet Koren wrote: > > > 4. Vlad to have a daily build of the full OFED package > > Where is this build available from? http://www.openfabrics.org/builds/ -- MST From mst at mellanox.co.il Tue Jan 30 23:40:15 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 09:40:15 +0200 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BF8E91.2050500@ichips.intel.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> <45BF63C4.9040205@open-mpi.org> <45BF8E91.2050500@ichips.intel.com> Message-ID: <20070131074015.GI3405@mellanox.co.il> > Quoting Sean Hefty : > Subject: Re: [RFC][PATCH] rdma_cm: allow joins to return a unique address > > > Excellent -- is this in a git tree somewhere that I can grab (I'm new to > > git)? Or, what would be an appropriate tree to apply this to? > > This is now available from my rdma-dev.git tree on openfabrics. The patch is > included in the multicast and ofed_1_2 branches. Sean, when you put something in ofed_1_2 branch I assume this means that you want it included in OFED 1.2? In this case, you should let Vlad know (Cc list). -- MST From tziporet at mellanox.co.il Wed Jan 31 01:44:49 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 31 Jan 2007 11:44:49 +0200 Subject: [openib-general] [openfabrics-ewg] OFED 1.2 release - to be reviewed in the meeting today Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DCDC@mtlexch01.mtl.com> thanks -----Original Message----- From: openfabrics-ewg-bounces at openib.org [mailto:openfabrics-ewg-bounces at openib.org] On Behalf Of Sean Hefty Sent: Tuesday, January 30, 2007 8:41 PM To: Michael S. Tsirkin; Sasha Khapyorsky; vlad at dev.mellanox.co.il Cc: EWG; OPENIB Subject: Re: [openfabrics-ewg] [openib-general] OFED 1.2 release - to be reviewed in the meeting today > *Sources developed in OFA:* > 1. Each git owner will open a branch with the name ofed_1_2. This branch > should be opened on 31-Jan (based on code readiness we will review today). I've added ofed_1_2 branches to my libibcm.git, librdmacm.git, and rdma-dev.git trees. - Sean _______________________________________________ openfabrics-ewg mailing list openfabrics-ewg at openib.org http://openib.org/mailman/listinfo/openfabrics-ewg From dotanb at dev.mellanox.co.il Wed Jan 31 01:49:30 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 31 Jan 2007 11:49:30 +0200 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails In-Reply-To: References: <45BF0575.9020507@dev.mellanox.co.il> <45BF1866.3010807@voltaire.com> Message-ID: <45C0662A.7050203@dev.mellanox.co.il> Roland Dreier wrote: > > anyway, the solution that comes into my mind is to disable creating a > > QP/SRQ for which > 128KB allocations are needed. So > > mthca_query_device() will set the max_qp_wr and max_srq_wr attributes > > to values whose derived size still allows to use kmalloc. > > But that will limit the size of the queues that userspace can create > too. I guess we could allocate kernel wrid arrays with vmalloc(), but > I wonder if anyone actually cares about this limit... > I think that now, when implementation of IPoIB CM is available and SRQ is being used, one may need to use a SRQ with more than 16K WRs. thanks Dotan From mst at mellanox.co.il Wed Jan 31 02:09:07 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 12:09:07 +0200 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails In-Reply-To: <45C0662A.7050203@dev.mellanox.co.il> References: <45BF0575.9020507@dev.mellanox.co.il> <45BF1866.3010807@voltaire.com> <45C0662A.7050203@dev.mellanox.co.il> Message-ID: <20070131100907.GA24831@mellanox.co.il> > Quoting Dotan Barak : > Subject: Re: [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails > > Roland Dreier wrote: > > > anyway, the solution that comes into my mind is to disable creating a > > > QP/SRQ for which > 128KB allocations are needed. So > > > mthca_query_device() will set the max_qp_wr and max_srq_wr attributes > > > to values whose derived size still allows to use kmalloc. > > > > But that will limit the size of the queues that userspace can create > > too. I guess we could allocate kernel wrid arrays with vmalloc(), but > > I wonder if anyone actually cares about this limit... > > I think that now, when implementation of IPoIB CM is available and SRQ > is being used, one may need to use a SRQ with more than 16K WRs. Not really: IPoIB CM uses a common CQ for all recv completions, so it does not make sense for IPoIB CM to create a SRQ bigger than the max CQ size. -- MST From dotanb at dev.mellanox.co.il Wed Jan 31 02:11:10 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 31 Jan 2007 12:11:10 +0200 Subject: [openib-general] [libibverbs] destroying an AH causes a seg fault (this failure appeared during the last night) Message-ID: <45C06B3E.80103@dev.mellanox.co.il> Hi Roland. During the last night many tests failed in our regression (new failure that appeared only during the last night). It seems that destroy an AH causes a seg fault, i reproduced it using the ibv_ud_pingpong. Here are the machine props: ************************************************************* Host Architecture : x86_64 Linux Distribution: SUSE Linux Enterprise Server 10 (x86_64) VERSION = 10 Kernel Version : 2.6.16.21-0.8-smp GCC Version : gcc (GCC) 4.1.0 (SUSE Linux) Memory size : 5081168 kB Driver Version : gen2_devel-20070130-1817 HCA ID(s) : mthca0 HCA model(s) : 25218 FW version(s) : 5.2.0 Board(s) : MT_0150000002 ************************************************************* Driver Checksums: gen2_devel-20070130-1817 Kernel: Git: git://git.openfabrics.org/~vlad/ofed_1_2/.git commit ab8b772956b6178ef14c983fd215d0dda3fb6842 Kernel: Git: git://git.openfabrics.org/~vlad/ofed_1_2/.git commit ab8b772956b6178ef14c983fd215d0dda3fb6842 Here is the backtrace of the core dump: # gdb ibv_ud_pingpong core GNU gdb 6.4 Copyright 2005 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "x86_64-suse-linux"...Using host libthread_db library "/lib64/libthread_db.so.1". Core was generated by `ibv_ud_pingpong sw031'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/local/lib64/libibverbs.so.1...done. Loaded symbols for /usr/local//lib64/libibverbs.so.1 Reading symbols from /lib64/libpthread.so.0...done. Loaded symbols for /lib64/libpthread.so.0 Reading symbols from /lib64/libdl.so.2...done. Loaded symbols for /lib64/libdl.so.2 Reading symbols from /lib64/libc.so.6...done. Loaded symbols for /lib64/libc.so.6 Reading symbols from /lib64/ld-linux-x86-64.so.2...done. Loaded symbols for /lib64/ld-linux-x86-64.so.2 Reading symbols from /usr/local/lib64/libcxgb3-rdmav2.so...done. Loaded symbols for /usr/local/lib64/libcxgb3-rdmav2.so Reading symbols from /usr/local/lib64/libmthca-rdmav2.so...done. Loaded symbols for /usr/local/lib64/libmthca-rdmav2.so #0 0x00002b94b6612263 in __ibv_destroy_ah (ah=0x504e60) at src/verbs.c:475 475 return ah->context->ops.destroy_ah(ah); (gdb) bt #0 0x00002b94b6612263 in __ibv_destroy_ah (ah=0x504e60) at src/verbs.c:475 #1 0x0000000000401cb8 in pp_close_ctx (ctx=0x505340) at examples/ud_pingpong.c:387 #2 0x0000000000402a2b in main (argc=, argv=) at examples/ud_pingpong.c:749 thanks Dotan From dotanb at dev.mellanox.co.il Wed Jan 31 02:13:36 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 31 Jan 2007 12:13:36 +0200 Subject: [openib-general] [mthca] Creation of a SRQ with many WR (> 16K) in kernel level fails In-Reply-To: <20070131100907.GA24831@mellanox.co.il> References: <45BF0575.9020507@dev.mellanox.co.il> <45BF1866.3010807@voltaire.com> <45C0662A.7050203@dev.mellanox.co.il> <20070131100907.GA24831@mellanox.co.il> Message-ID: <45C06BD0.7030806@dev.mellanox.co.il> Michael S. Tsirkin wrote: >> I think that now, when implementation of IPoIB CM is available and SRQ >> is being used, one may need to use a SRQ with more than 16K WRs. >> > > Not really: IPoIB CM uses a common CQ for all recv completions, so > it does not make sense for IPoIB CM to create a SRQ bigger than > the max CQ size. > > In many HCAs, the maximum CQ size is 128K entries. Dotan From kliteyn at dev.mellanox.co.il Wed Jan 31 02:16:33 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 31 Jan 2007 12:16:33 +0200 Subject: [openib-general] Bugzilla Bug 329: HCA_FATAL_EVENT cause to OpenSM to stop functioning Message-ID: <45C06C81.4060500@dev.mellanox.co.il> Hi Hal. I noticed the following bug in Bugzilla: Bugzilla Bug 329: HCA_FATAL_EVENT cause to opensm to stop functioning https://bugs.openfabrics.org/show_bug.cgi?id=329 When there is a HCA fatal event on the host that opensm is running on it, the opensm stop to function (After the event, the driver restart the device, and the port does not return to active state). If the opensm run in sweep mode , after the event you can see that the opensm stop sweeping. I remember that a couple of months ago I sent a patch that takes care of this problem: - in case of IBV_EVENT_DEVICE_FATAL, osm was forced to exit - in case of IBV_EVENT_PORT_ERROR, osm initiated heavy sweep The problem with my patch was that it made osm to depend on uverbs module. To resolve this problem, support should be added in umad, and then osm could use this support. Do you know if some work in this area was done in umad? -- Yevgeny From vlad at lists.openfabrics.org Wed Jan 31 02:20:59 2007 From: vlad at lists.openfabrics.org (vlad at lists.openfabrics.org) Date: Wed, 31 Jan 2007 02:20:59 -0800 (PST) Subject: [openib-general] ofa_1_2_kernel 20070131-0200 daily build status Message-ID: <20070131102059.86F73E607F7@openfabrics.org> This email was generated automatically, please do not reply Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-core-mod --with-addr_trans-mod --with-cxgb3-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on powerpc with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on powerpc with linux-2.6.19 Passed on powerpc with linux-2.6.17 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.15 Failed: From mst at mellanox.co.il Wed Jan 31 02:24:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 12:24:53 +0200 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: References: <20070119211659.18220.52930.stgit@dell3.ogc.int> Message-ID: <20070131102453.GA25270@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH] The ibv_cmd_* create functions need to set the context. > > Thanks, applied to master and stable branches. Did you test it? This patch (8b3d225476c99ea29a68109a7d40e5ef353d4388) causes ibv_ud_pingpong to segfault on libmthca: libmthca never calls ibv_cmd_create_ah to context is now never set. Starting program: /usr/local/ofed/bin/ibv_ud_pingpong sw069 [Thread debugging using libthread_db enabled] [New Thread 47299578320592 (LWP 5085)] local address: LID 0x0002, QPN 0x090406, PSN 0x71bffb remote address: LID 0x0001, QPN 0x040406, PSN 0x92316a 4096000 bytes in 0.02 seconds = 1893.99 Mbit/sec 1000 iters in 0.02 seconds = 17.30 usec/iter Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 47299578320592 (LWP 5085)] 0x00002b04ca3b7263 in __ibv_destroy_ah (ah=0x5050b0) at src/verbs.c:475 475 return ah->context->ops.destroy_ah(ah); (gdb) p ah->context $1 = (struct ibv_context *) 0x0 I actually think this approach is a wrong one: context should be set in common code like ibv_create_ah, not in ibv_cmd_ which is a library function low level driver might or might not call. And certainly this kind of change does not seem appropriate for stable branch. I think the proper thing is for low level driver not to assume that fields such as contex are intialized until create functions have returned. Steve, pls fix your low level driver not to rely on this. Roland, I have reverted this in OFED, please revert on master and stable. -- MST From bugzilla-daemon at lists.openfabrics.org Wed Jan 31 03:42:39 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Wed, 31 Jan 2007 03:42:39 -0800 (PST) Subject: [openib-general] [Bug 334] New: Problems with build OFED-1.1.1-ib_local_sa Message-ID: https://bugs.openfabrics.org/show_bug.cgi?id=334 Summary: Problems with build OFED-1.1.1-ib_local_sa Product: OpenFabrics Linux Version: gen2 Platform: X86-64 OS/Version: SLES 10 Status: NEW Severity: critical Priority: P1 Component: IB Core AssignedTo: bugzilla at openib.org ReportedBy: dmitry.yulov at intel.com I have a problem with build RPM packages on SLES10. The output of prolem is next: gcc -Wp,-MD,/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/.cma.o.d -nostdinc -isystem /usr/lib64/gcc/x86_64-suse-linux/4.1.0/include -D__KERNEL__ -I/var/tmp/OFEDRPM/BUILD/openib-1.1/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/include -Iinclude -Iinclude2 -I/usr/src/linux-2.6.16.21-0.8/include -include include/linux/autoconf.h -include /var/tmp/OFEDRPM/BUILD/openib-1.1/include/linux/autoconf.h -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -Werror-implicit-function-declaration -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -mtune=generic -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -Wdeclaration-after-statement -Wno-pointer-sign -I/var/tmp/OFEDRPM/BUILD/openib-1.1/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/ipoib -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/debug -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(cma)" -D"KBUILD_MODNAME=KBUILD_STR(rdma_cm)" -c -o /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/.tmp_cma.o /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/cma.c /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/cma.c: In function 'cma_resolve_ib_route': /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/cma.c:1205: error: implicit declaration of function 'ib_get_path_rec' make[5]: *** [/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core/cma.o] Error 1 make[4]: *** [/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/core] Error 2 make[3]: *** [_module_/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband] Error 2 make[2]: *** [modules] Error 2 make[1]: *** [modules] Error 2 The machine configuration: Kernel: Linux 2.6.16.21-0.8-smp #1 SMP Mon Jul 3 18:25:39 UTC 2006 x86_64 x86_64 x86_64 GNU/Linux OS: SUSE Linux Enterprise Server 10 (x86_64) gcc version: gcc (GCC) 4.1.0 (SUSE Linux) -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at lists.openfabrics.org Wed Jan 31 03:43:25 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Wed, 31 Jan 2007 03:43:25 -0800 (PST) Subject: [openib-general] [Bug 334] Problems with build OFED-1.1.1-ib_local_sa In-Reply-To: Message-ID: <20070131114326.0852FE607F7@openfabrics.org> https://bugs.openfabrics.org/show_bug.cgi?id=334 dmitry.yulov at intel.com changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |dmitry.yulov at intel.com -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From jsquyres at cisco.com Wed Jan 31 03:48:48 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 31 Jan 2007 06:48:48 -0500 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <20070131065853.GH3405@mellanox.co.il> References: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> <20070131065853.GH3405@mellanox.co.il> Message-ID: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> On Jan 31, 2007, at 1:58 AM, Michael S. Tsirkin wrote: >>> 4. Vlad to have a daily build of the full OFED package >> >> Where is this build available from? > > http://www.openfabrics.org/builds/ All I see at that URL is nightly tarballs of the OFA kernel sources and the OFA user sources. I was under the impression from the above text that there would be an **OFED** nightly tarball generated. Is this incorrect? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From mst at mellanox.co.il Wed Jan 31 03:51:38 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 13:51:38 +0200 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> References: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> Message-ID: <20070131115138.GA25697@mellanox.co.il> > Quoting Jeff Squyres : > Subject: Re: Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures > > On Jan 31, 2007, at 1:58 AM, Michael S. Tsirkin wrote: > > >>> 4. Vlad to have a daily build of the full OFED package > >> > >> Where is this build available from? > > > > http://www.openfabrics.org/builds/ > > All I see at that URL is nightly tarballs of the OFA kernel sources > and the OFA user sources. > > I was under the impression from the above text that there would be an > **OFED** nightly tarball generated. > > Is this incorrect? OFED didn't branch yet so there's no difference. -- MST From jsquyres at cisco.com Wed Jan 31 03:56:51 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 31 Jan 2007 06:56:51 -0500 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <20070131115138.GA25697@mellanox.co.il> References: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> <20070131115138.GA25697@mellanox.co.il> Message-ID: <9A0B8FA7-007B-45B8-B67F-D3AA806AA2E1@cisco.com> On Jan 31, 2007, at 6:51 AM, Michael S. Tsirkin wrote: >> I was under the impression from the above text that there would be an >> **OFED** nightly tarball generated. > > OFED didn't branch yet so there's no difference. So are you saying that starting tomorrow (or shortly after tomorrow -- whatever), there will be a nightly OFED tarball (with all the OFED build scripts and sources and whatnot -- quite different than just bundling the OFA sources together) available at that URL as well? -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From tziporet at mellanox.co.il Wed Jan 31 04:29:46 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 31 Jan 2007 14:29:46 +0200 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <9A0B8FA7-007B-45B8-B67F-D3AA806AA2E1@cisco.com> References: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> <20070131115138.GA25697@mellanox.co.il> <9A0B8FA7-007B-45B8-B67F-D3AA806AA2E1@cisco.com> Message-ID: <45C08BBA.8020609@mellanox.co.il> Jeff Squyres wrote: > On Jan 31, 2007, at 6:51 AM, Michael S. Tsirkin wrote: > > >>> I was under the impression from the above text that there would be an >>> **OFED** nightly tarball generated. >>> >> OFED didn't branch yet so there's no difference. >> > > So are you saying that starting tomorrow (or shortly after tomorrow > -- whatever), there will be a nightly OFED tarball (with all the OFED > build scripts and sources and whatnot -- quite different than just > bundling the OFA sources together) available at that URL as well? > > There is a misunderstanding here: Michale pointed you to the current daily build of OFA SW. The build of the full OFED tarball will available early next week (hope on Monday). When this will happened Vlad will send a mail to all with the packages location. Tziporet From tziporet at mellanox.co.il Wed Jan 31 04:32:17 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 31 Jan 2007 14:32:17 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: References: <45BDFF11.9080901@mellanox.co.il> <45BFF296.8000908@cse.ohio-state.edu> Message-ID: <45C08C51.50607@mellanox.co.il> Jeff Squyres wrote: > It would be helpful to see the MVAPICH1 distribution for OFED 1.2 > somewhere on the OFA server (under ~vlad/ofed_1_2 or > ~vlad/public_html/ofed_1_2...?) for comparison / example purposes. Pasha will place his SRPM on ~pasha/ofed_1_2 today Tziporet From jsquyres at cisco.com Wed Jan 31 04:32:42 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Wed, 31 Jan 2007 07:32:42 -0500 Subject: [openib-general] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <45C08BBA.8020609@mellanox.co.il> References: <0A6B39C2-2CAB-436E-BE96-44FE30F7210C@cisco.com> <20070131115138.GA25697@mellanox.co.il> <9A0B8FA7-007B-45B8-B67F-D3AA806AA2E1@cisco.com> <45C08BBA.8020609@mellanox.co.il> Message-ID: <75E342F0-847B-4F1A-9894-5FBD322AA6F9@cisco.com> On Jan 31, 2007, at 7:29 AM, Tziporet Koren wrote: > There is a misunderstanding here: > Michale pointed you to the current daily build of OFA SW. > The build of the full OFED tarball will available early next week > (hope on Monday). > When this will happened Vlad will send a mail to all with the > packages location. Great -- thanks! -- Jeff Squyres Server Virtualization Business Unit Cisco Systems From tziporet at mellanox.co.il Wed Jan 31 04:40:39 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 31 Jan 2007 14:40:39 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BFF296.8000908@cse.ohio-state.edu> References: <45BDFF11.9080901@mellanox.co.il> <45BFF296.8000908@cse.ohio-state.edu> Message-ID: <45C08E47.2040506@mellanox.co.il> Shaun Rowland wrote: > > Hi. I am not exactly sure where the ofed_1_2 directory for MPI SRPMs is > supposed to go. I assume from previous meetings this is just a > filesystem directory. Should it be a directory in my home directory on > staging.openfabrics.org, in ~/public_html, or is there something else I > need to do to put this into place? I think from the previous MPI > specific meeting, this was supposed to be done in a web directory. Since > I am unclear, I wanted to ask here. Please place your SRPM under your home directory at ofed_1_2 directory. Then you can make this directory accessible to the web in this way: 1. mkdir public_html 2. chmod 755 public_html Now you can put any stuff under public_html (also symbolic links) and it will be available via web www.openfabrics.org/~/ Tziporet From or.gerlitz at gmail.com Wed Jan 31 05:33:12 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 31 Jan 2007 15:33:12 +0200 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <45BE747A.4060900@ichips.intel.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> Message-ID: <15ddcffd0701310533n3301de00g36c8015dccf6a6d1@mail.gmail.com> On 1/30/07, Sean Hefty wrote: > I believe that this patch lets you can do what you're trying to do. The group > handle would be the returned mgid from the initial join that created the group. > The mgid would need to be passed to other processes as an IPv6 address, who > issue a join request on that group. (The mgid is available from the > rdma_cm_event.param.ud.ah_attr.grh.dgid.) Sean, I understand that your approach relies on the uniqueness of the MGID being generated. This means that to have different MPI jobs use different MGIDs , the MGIDs must be generated --always-- on the same NODE and be propagated to other nodes/ranks participating in that MPI job - correct? Andrew - can you fulfil this demand? that is having the rank which generated MGIDs always run on the same node of the cluster??? Or. From afriedle at open-mpi.org Wed Jan 31 06:12:20 2007 From: afriedle at open-mpi.org (Andrew Friedley) Date: Wed, 31 Jan 2007 09:12:20 -0500 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <15ddcffd0701310533n3301de00g36c8015dccf6a6d1@mail.gmail.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> <15ddcffd0701310533n3301de00g36c8015dccf6a6d1@mail.gmail.com> Message-ID: <45C0A3C4.5020407@open-mpi.org> Or Gerlitz wrote: > Sean, > > I understand that your approach relies on the uniqueness of the MGID > being generated. This means that to have different MPI jobs use > different MGIDs , the MGIDs must be generated --always-- on the same > NODE and be propagated to other nodes/ranks participating in that MPI > job - correct? > > Andrew - can you fulfil this demand? that is having the rank which > generated MGIDs always run on the same node of the cluster??? Not across multiple MPI jobs, no -- MPI jobs have no awareness of each other whatsoever. Andrew From swise at opengridcomputing.com Wed Jan 31 06:15:02 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 08:15:02 -0600 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: <20070131102453.GA25270@mellanox.co.il> References: <20070119211659.18220.52930.stgit@dell3.ogc.int> <20070131102453.GA25270@mellanox.co.il> Message-ID: <1170252902.20525.7.camel@stevo-desktop> On Wed, 2007-01-31 at 12:24 +0200, Michael S. Tsirkin wrote: > > Quoting Roland Dreier : > > Subject: Re: [PATCH] The ibv_cmd_* create functions need to set the context. > > > > Thanks, applied to master and stable branches. > > Did you test it? > This patch (8b3d225476c99ea29a68109a7d40e5ef353d4388) causes ibv_ud_pingpong > to segfault on libmthca: libmthca never calls ibv_cmd_create_ah to context is now > never set. > > I didn't test UD. > Starting program: /usr/local/ofed/bin/ibv_ud_pingpong sw069 > [Thread debugging using libthread_db enabled] > [New Thread 47299578320592 (LWP 5085)] > local address: LID 0x0002, QPN 0x090406, PSN 0x71bffb > remote address: LID 0x0001, QPN 0x040406, PSN 0x92316a > 4096000 bytes in 0.02 seconds = 1893.99 Mbit/sec > 1000 iters in 0.02 seconds = 17.30 usec/iter > > Program received signal SIGSEGV, Segmentation fault. > [Switching to Thread 47299578320592 (LWP 5085)] > 0x00002b04ca3b7263 in __ibv_destroy_ah (ah=0x5050b0) at src/verbs.c:475 > 475 return ah->context->ops.destroy_ah(ah); > (gdb) p ah->context > $1 = (struct ibv_context *) 0x0 > > I actually think this approach is a wrong one: context should be > set in common code like ibv_create_ah, not in ibv_cmd_ which is > a library function low level driver might or might not call. > And certainly this kind of change does not seem appropriate for stable branch. > > I think the proper thing is for low level driver not to assume that > fields such as contex are intialized until create functions have returned. > Steve, pls fix your low level driver not to rely on this. > The issue is that the provider lib calls ibv_cmd_create_blah to create the object, then some failure happens (like a failure mmap()ing the object's DMA area to the process). At this point the provider lib must destroy this object that is created from the perspective of the ibv_cmd* interface. The only way to do that is to call the ibv_cmd_destroy_blah call, which needs the context field. So I don't think solving this in the provider lib is the right thing to do. > Roland, I have reverted this in OFED, please revert on master and stable. > I think we should fix the bug introduced: set the context field in the ibv_create_blah service if its not set after calling the provider method. Steve. From vlad at mellanox.co.il Wed Jan 31 06:48:21 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 31 Jan 2007 16:48:21 +0200 Subject: [openib-general] [PATCH ofed-1.2 alpha rel] ehca: reworked irq handler to support NAPI consistently In-Reply-To: <200701302052.28962.hnguyen@linux.vnet.ibm.com> References: <200701302052.28962.hnguyen@linux.vnet.ibm.com> Message-ID: <1170254901.6206.39.camel@vladsk-laptop> On Tue, 2007-01-30 at 20:52 +0100, Hoang-Nam Nguyen wrote: > Hi Vladimir, > here is a patch for ehca with reworked irq handler. With those changes > the performance result without/with scaling code and with NAPI (scaling > code turned off) is consistent. They also reduce the rate of drop > packets (when scaling code is turned off) significantly. > Thanks > Nam > PS: Roland, this patch is aligned with ofed-1.2 "only". I'l send this > patch for 2.6.21 separately next week. > > > Signed-off-by: Hoang-Nam Nguyen > --- > Added to kernel_patches/fixes in ~vlad/ofed_1_2/.git -- Vladimir Sokolovsky Mellanox Technologies Ltd. From halr at voltaire.com Wed Jan 31 06:48:15 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 09:48:15 -0500 Subject: [openib-general] Bugzilla Bug 329: HCA_FATAL_EVENT cause to OpenSM to stop functioning In-Reply-To: <45C06C81.4060500@dev.mellanox.co.il> References: <45C06C81.4060500@dev.mellanox.co.il> Message-ID: <1170254523.15660.179944.camel@hal.voltaire.com> Hi Yevgeny, On Wed, 2007-01-31 at 05:16, Yevgeny Kliteynik wrote: > Hi Hal. > > I noticed the following bug in Bugzilla: > > Bugzilla Bug 329: HCA_FATAL_EVENT cause to opensm to stop functioning > https://bugs.openfabrics.org/show_bug.cgi?id=329 > > When there is a HCA fatal event on the host that opensm is running on it, > the opensm stop to function (After the event, the driver restart the device, > and the port does not return to active state). > > If the opensm run in sweep mode , after the event you can see that the opensm > stop sweeping. > > I remember that a couple of months ago I sent a patch that takes care of this problem: > - in case of IBV_EVENT_DEVICE_FATAL, osm was forced to exit > - in case of IBV_EVENT_PORT_ERROR, osm initiated heavy sweep > > The problem with my patch was that it made osm to depend on uverbs module. > To resolve this problem, support should be added in umad, and then osm could > use this support. > > Do you know if some work in this area was done in umad? This has been on the list but unfortunately there has been no time yet to work on the local events support in libibumad. -- Hal > -- Yevgeny From pasha at dev.mellanox.co.il Wed Jan 31 07:10:13 2007 From: pasha at dev.mellanox.co.il (Pavel Shamis (Pasha)) Date: Wed, 31 Jan 2007 17:10:13 +0200 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45C08C51.50607@mellanox.co.il> References: <45BDFF11.9080901@mellanox.co.il> <45BFF296.8000908@cse.ohio-state.edu> <45C08C51.50607@mellanox.co.il> Message-ID: <45C0B155.2090002@dev.mellanox.co.il> Tziporet Koren wrote: > Jeff Squyres wrote: >> It would be helpful to see the MVAPICH1 distribution for OFED 1.2 >> somewhere on the OFA server (under ~vlad/ofed_1_2 or >> ~vlad/public_html/ofed_1_2...?) for comparison / example purposes. > Pasha will place his SRPM on ~pasha/ofed_1_2 today I just finished to prepare the SRPM stuff. So you may find it: mvapich - http://www.openfabrics.org/~pasha/ofed_1_2/mvapich/ mpitests - http://www.openfabrics.org/~pasha/ofed_1_2/mpitests/ Pasha > > Tziporet > > _______________________________________________ > openib-general mailing list > openib-general at openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From HNGUYEN at de.ibm.com Wed Jan 31 07:14:07 2007 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Wed, 31 Jan 2007 10:14:07 -0500 Subject: [openib-general] [openfabrics-ewg] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures In-Reply-To: <6C2C79E72C305246B504CBA17B5500C9A0DCBE@mtlexch01.mtl.com> Message-ID: Hi, > 3. Each git maintainer: open ofed_1_2 branch till Feb 1. created branch ofed_1_2 for libehca. Regards Nam From swise at opengridcomputing.com Wed Jan 31 07:27:55 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 09:27:55 -0600 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <45BDFF11.9080901@mellanox.co.il> References: <45BDFF11.9080901@mellanox.co.il> Message-ID: <1170257275.20525.13.camel@stevo-desktop> On Mon, 2007-01-29 at 16:05 +0200, Tziporet Koren wrote: > Hi, > > This is the proposal for OFED 1.2 branching and tagging: > > Sources developed in OFA: > 1. Each git owner will open a branch with the name ofed_1_2. This > branch should be opened on 31-Jan (based on code readiness we will > review today). ofed_1_2 branch created for libcxgb3.git. Steve. From tziporet at mellanox.co.il Wed Jan 31 07:30:43 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Wed, 31 Jan 2007 17:30:43 +0200 Subject: [openib-general] [openfabrics-ewg] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures Message-ID: <6C2C79E72C305246B504CBA17B5500C9A0DCE1@mtlexch01.mtl.com> thanks -----Original Message----- From: Hoang-Nam Nguyen [mailto:HNGUYEN at de.ibm.com] Sent: Wednesday, January 31, 2007 5:14 PM To: Tziporet Koren Cc: EWG; openfabrics-ewg-bounces at openib.org; openib Subject: Re: [openfabrics-ewg] Minutes for January 29, 2007 teleconference about OFED 1.2 release integration and build procedures Hi, > 3. Each git maintainer: open ofed_1_2 branch till Feb 1. created branch ofed_1_2 for libehca. Regards Nam From swise at opengridcomputing.com Wed Jan 31 08:31:57 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 10:31:57 -0600 Subject: [openib-general] regression in ofed 1.2 Message-ID: <1170261117.24454.4.camel@stevo-desktop> Sean, I think librdmacm commit 1fd83b0bbbfc7fadba45390b98d5f9c944b42bdc broke iwarp usermode. I'm debugging now, but basically the change in rdma_create_qp() to call into the kernel to setup the qp init attributes doesn't work for iwarp because the iwcm hasn't been created at this point. So we fall off a NULL ptr in iw_cm_init_qp_attr(). I'm working up a fix for this because I think the iw_cm_id _should_ be created at the time the addr and/or route is resolved. But it isn't create until rdma_connect() is issued. Stay tuned. Bug 335 opened to track this. ---- commit 1fd83b0bbbfc7fadba45390b98d5f9c944b42bdc Author: Sean Hefty Date: Fri Jan 26 10:21:17 2007 -0800 Allow unicast traffic over IPOIB port space. Adjust the RMDA_PS_IPOIB to allow unicast traffic. This requires changing how QPs are initialized in order to get the correct qkey to use. We need to call into the kernel to get the initial QP attributes. Update the udaddy unicast test program to test this capability. From mshefty at ichips.intel.com Wed Jan 31 09:13:29 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 09:13:29 -0800 Subject: [openib-general] regression in ofed 1.2 In-Reply-To: <1170261117.24454.4.camel@stevo-desktop> References: <1170261117.24454.4.camel@stevo-desktop> Message-ID: <45C0CE39.2010404@ichips.intel.com> > I think librdmacm commit 1fd83b0bbbfc7fadba45390b98d5f9c944b42bdc broke > iwarp usermode. I'm debugging now, but basically the change in > rdma_create_qp() to call into the kernel to setup the qp init attributes > doesn't work for iwarp because the iwcm hasn't been created at this > point. So we fall off a NULL ptr in iw_cm_init_qp_attr(). I'm working > up a fix for this because I think the iw_cm_id _should_ be created at > the time the addr and/or route is resolved. But it isn't create until > rdma_connect() is issued. Stay tuned. There's also an issue mixing the newer librdamcm with older kernels that leads to a kernel crash. This is due to a bug in the kernel code that's now exposed. I'm working on a work-around in the librdmacm to avoid causing this crash. - Sean From mshefty at ichips.intel.com Wed Jan 31 09:24:51 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 09:24:51 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <15ddcffd0701310533n3301de00g36c8015dccf6a6d1@mail.gmail.com> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> <15ddcffd0701310533n3301de00g36c8015dccf6a6d1@mail.gmail.com> Message-ID: <45C0D0E3.5050903@ichips.intel.com> > I understand that your approach relies on the uniqueness of the MGID > being generated. This means that to have different MPI jobs use > different MGIDs , the MGIDs must be generated --always-- on the same > NODE and be propagated to other nodes/ranks participating in that MPI > job - correct? MGID uniqueness is provided by the SA when the join request contains an MGID of 0. There is no requirement that the MGIDs be generated on the same node. - Sean From mshefty at ichips.intel.com Wed Jan 31 09:26:37 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 09:26:37 -0800 Subject: [openib-general] [RFC][PATCH] rdma_cm: allow joins to return a unique address In-Reply-To: <20070131074015.GI3405@mellanox.co.il> References: <000101c743e0$ff0dac90$8698070a@amr.corp.intel.com> <45BE6B04.2020602@open-mpi.org> <45BE747A.4060900@ichips.intel.com> <45BF63C4.9040205@open-mpi.org> <45BF8E91.2050500@ichips.intel.com> <20070131074015.GI3405@mellanox.co.il> Message-ID: <45C0D14D.4020509@ichips.intel.com> > Sean, when you put something in ofed_1_2 branch I assume this means that > you want it included in OFED 1.2? In this case, you should let Vlad know > (Cc list). Understood. I sent a message yesterday that I created new ofed_1_2 branches in my git trees. - Sean From halr at voltaire.com Wed Jan 31 09:02:35 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 12:02:35 -0500 Subject: [openib-general] Diags/ibtracert: Add switch-map option to ibtracert Message-ID: <1170262906.15660.187974.camel@hal.voltaire.com> Diags/ibtracert: Add switch-map option to ibtracert Signed-off-by: Ira K. Weiny Signed-off-by: Hal Rosenstock diff --git a/diags/man/ibtracert.8 b/diags/man/ibtracert.8 index c1632ac..28f18b6 100644 --- a/diags/man/ibtracert.8 +++ b/diags/man/ibtracert.8 @@ -1,11 +1,11 @@ -.TH IBTRACERT 8 "July 25, 2006" "OpenIB" "OpenIB Diagnostics" +.TH IBTRACERT 8 "January 31, 2007" "OpenIB" "OpenIB Diagnostics" .SH NAME ibtracert\- trace InfiniBand path .SH SYNOPSIS .B ibtracert -[\-d(ebug)] [-v(erbose)] [\-D(irect)] [\-G(uids)] [-n(o_info)] [-m mlid] [-s smlid] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\-h(elp)] [ [ []]] +[\-d(ebug)] [-v(erbose)] [\-D(irect)] [\-G(uids)] [-n(o_info)] [-m mlid] [-s smlid] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\-\-switch\-map ] [\-h(elp)] [ [ []]] .SH DESCRIPTION .PP @@ -23,6 +23,10 @@ simple format; don't show additional inf .TP \fB\-m\fR show the multicast trace of the specified mlid +.TP +\fB\-\-switch\-map\fR +Specify a switch map. The switch map file maps GUIDs to more user friendly +names. See ibnetdiscover for switch map file format. .SH COMMON OPTIONS @@ -101,3 +105,6 @@ ibtracert -m 0xc000 4 16 # show multi .TP Hal Rosenstock .RI < halr at voltaire.com > +.TP +Ira Weiny +.RI < weiny2 at llnl.gov > diff --git a/diags/src/ibtracert.c b/diags/src/ibtracert.c index c69ff4e..34da658 100644 --- a/diags/src/ibtracert.c +++ b/diags/src/ibtracert.c @@ -35,6 +35,7 @@ # include #endif /* HAVE_CONFIG_H */ +#define _GNU_SOURCE #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include #define __BUILD_VERSION_TAG__ 1.2 #include @@ -65,6 +67,8 @@ static int force; static FILE *f; static char *argv0 = "ibtracert"; +static char *switch_map = NULL; +static FILE *switch_map_fp = NULL; #undef DEBUG #define DEBUG if (ibdebug || verbose) IBWARN @@ -146,6 +150,68 @@ clean_nodedesc(char *nodedesc) return (nodedesc); } +/** ========================================================================= + */ +static void +open_switch_map(void) +{ + if (switch_map) { + switch_map_fp = fopen(switch_map, "r"); + if (switch_map_fp == NULL) { + fprintf(stderr, + "WARNING failed to open switch map \"%s\" (%s)\n" + " Switch names will default to node descriptions\n", + switch_map, strerror(errno)); + } + } +} + +static void +close_switch_map(void) +{ + if (switch_map_fp) + fclose(switch_map_fp); +} + +static char * +lookup_switch_name(Node *node) +{ +#define NAME_LEN (256) + char *line = NULL; + size_t len = 0; + uint64_t guid = 0; + char *rc = NULL; + int line_count = 0; + uint64_t target_guid = node->nodeguid; + + if (switch_map_fp == NULL) + goto done; + + rewind(switch_map_fp); + for (line_count = 1; + getline(&line, &len, switch_map_fp) != -1; + line_count++) { + line[len-1] = '\0'; + if (line[0] == '#') { goto next_one; } + char *guid_str = strtok(line, "\"#"); + char *name = strtok(NULL, "\"#"); + if (!guid_str || !name) { goto next_one; } + guid = strtoull(guid_str, NULL, 0); + if (target_guid == guid) { + rc = strdup(name); + free(line); + goto done; + } +next_one: + free (line); + line = NULL; + } +done: + if (rc == NULL) + rc = strdup(clean_nodedesc(node->nodedesc)); + return (rc); +} + static int get_node(Node *node, Port *port, ib_portid_t *portid) { @@ -234,13 +300,20 @@ dump_endnode(int dump, char *prompt, Nod return; } - nodename = clean_nodedesc(node->nodedesc); + if (node->type == IB_NODE_SWITCH) + nodename = lookup_switch_name(node); + else + nodename = clean_nodedesc(node->nodedesc); + fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", prompt, (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->type == IB_NODE_SWITCH ? 0 : port->portnum, port->lid, port->lid + (1 << port->lmc) - 1, nodename); + + if (nodename && (node->type == IB_NODE_SWITCH)) + free(nodename); } static void @@ -251,7 +324,11 @@ dump_route(int dump, Node *node, int out if (!dump && !verbose) return; - nodename = clean_nodedesc(node->nodedesc); + if (node->type == IB_NODE_SWITCH) + nodename = lookup_switch_name(node); + else + nodename = clean_nodedesc(node->nodedesc); + if (dump == 1) fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", outport, port->portguid, port->portnum); @@ -262,6 +339,9 @@ dump_route(int dump, Node *node, int out port->portguid, port->portnum, port->lid, port->lid + (1 << port->lmc) - 1, nodename); + + if (nodename && (node->type == IB_NODE_SWITCH)) + free(nodename); } static int @@ -660,14 +740,18 @@ dump_mcpath(Node *node, int dumplevel) if (node->upnode) dump_mcpath(node->upnode, dumplevel); - nodename = clean_nodedesc(node->nodedesc); + if (node->type == IB_NODE_SWITCH) + nodename = lookup_switch_name(node); + else + nodename = clean_nodedesc(node->nodedesc); + if (!node->dist) { printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->ports->portnum, node->ports->lid, node->ports->lid + (1 << node->ports->lmc) - 1, nodename); - return; + goto free_name; } if (node->dist) { @@ -691,6 +775,10 @@ dump_mcpath(Node *node, int dumplevel) node->nodeguid, node->ports->portnum, node->ports->lid, node->ports->lid + (1 << node->ports->lmc) - 1, nodename); + +free_name: + if (nodename && (node->type == IB_NODE_SWITCH)) + free(nodename); } static void @@ -704,7 +792,7 @@ usage(void) basename++; fprintf(stderr, "Usage: %s [-d(ebug) -v(erbose) -D(irect) -G(uids) -n(o_info) -C ca_name -P ca_port " - "-s smlid -t(imeout) timeout_ms -m mlid] \n", + "-s smlid -t(imeout) timeout_ms -m mlid --switch-map switch-map ] \n", basename); fprintf(stderr, "\n\tUnicast examples:\n"); fprintf(stderr, "\t\t%s 4 16\t\t\t# show path between lids 4 and 16\n", basename); @@ -747,6 +835,7 @@ main(int argc, char **argv) { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, + { "switch-map", 1, 0, 1}, { } }; @@ -759,6 +848,9 @@ main(int argc, char **argv) if ( ch == -1 ) break; switch(ch) { + case 1: + switch_map = strdup(optarg); + break; case 'C': ca = optarg; break; @@ -815,6 +907,7 @@ main(int argc, char **argv) usage(); madrpc_init(ca, ca_port, mgmt_classes, 3); + open_switch_map(); if (ib_resolve_portid_str(&src_portid, argv[0], dest_type, sm_id) < 0) IBERROR("can't resolve source port %s", argv[0]); @@ -852,5 +945,7 @@ main(int argc, char **argv) /* dump multicast path */ dump_mcpath(endnode, dumplevel); + + close_switch_map(); exit(0); } From ardavis at ichips.intel.com Wed Jan 31 09:35:20 2007 From: ardavis at ichips.intel.com (Arlin Davis) Date: Wed, 31 Jan 2007 09:35:20 -0800 Subject: [openib-general] OFED 1.2 release - to be reviewed in the meeting today In-Reply-To: <000001c7449e$2e5396a0$ff0da8c0@amr.corp.intel.com> References: <000001c7449e$2e5396a0$ff0da8c0@amr.corp.intel.com> Message-ID: <45C0D358.20406@ichips.intel.com> *Sources developed in OFA:* >>1. Each git owner will open a branch with the name ofed_1_2. This branch >>should be opened on 31-Jan (based on code readiness we will review today). >> >> > > > ofed_1_2 branch created for dapl.git -arlin From halr at voltaire.com Wed Jan 31 09:29:42 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 12:29:42 -0500 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM In-Reply-To: <45BF6548.80104@dev.mellanox.co.il> References: <45BF6548.80104@dev.mellanox.co.il> Message-ID: <1170264561.15660.189494.camel@hal.voltaire.com> Hi Yevgeny, On Tue, 2007-01-30 at 10:33, Yevgeny Kliteynik wrote: > Checking PathRecord query for QoS constraints > > The QoS-aware path selection logic is implemented in a > separate function that is called only when QoS in OpenSM > is on. It causes some code duplication, but the idea is > to minimize the changes in the existing logic in OSM. > Eventually, these two function (the old path selection > and the new QoS-aware path selection) will be merged > into a single function. Yes, this would be nice to do in the future as there is much overlap. Whether qos is carried in the request could be handled internal to this combined routine rather than outside to determine which routine to call. This will make for a lot less code. Some comments embedded below. > Signed-off-by: Yevgeny Kliteynik > --- > osm/opensm/osm_sa_path_record.c | 822 ++++++++++++++++++++++++++++++++++++++- > 1 files changed, 816 insertions(+), 6 deletions(-) > > diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c > index a0dbb07..2ff7a42 100644 > --- a/osm/opensm/osm_sa_path_record.c > +++ b/osm/opensm/osm_sa_path_record.c > @@ -70,6 +70,7 @@ > #include > #include > #endif > +#include > > #define OSM_PR_RCV_POOL_MIN_SIZE 64 > #define OSM_PR_RCV_POOL_GROW_SIZE 64 > @@ -87,6 +88,7 @@ typedef struct _osm_path_parms > uint8_t rate; > uint8_t sl; > uint8_t pkt_life; > + uint16_t class; > boolean_t reversible; > } osm_path_parms_t; > > @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms( > > /********************************************************************** > **********************************************************************/ > + > +static ib_api_status_t > +__osm_pr_rcv_get_path_parms_qos( This is the similar function to the non QoS one: __osm_pr_rcv_get_path_parms > + IN osm_pr_rcv_t* const p_rcv, > + IN const ib_path_rec_t* const p_pr, > + IN const osm_port_t* const p_src_port, > + IN const osm_port_t* const p_dest_port, > + IN const uint16_t dest_lid_ho, > + IN const ib_net64_t comp_mask, > + OUT osm_path_parms_t* const p_parms ) > +{ > + const osm_node_t* p_node; > + const osm_physp_t* p_physp; > + const osm_physp_t* p_src_physp; > + const osm_physp_t* p_dest_physp; > + const osm_prtn_t* p_prtn; > + const ib_port_info_t* p_pi; > + ib_api_status_t status = IB_SUCCESS; > + ib_net16_t pkey = 0; > + ib_net16_t shared_pkey = 0; > + uint8_t mtu = 0; > + uint8_t rate = 0; > + uint8_t pkt_life = 0; > + uint8_t sl = 0; > + uint16_t class = 0; > + uint8_t required_mtu; > + uint8_t required_rate; > + uint8_t required_pkt_life; > + uint8_t in_port_num; > + uint8_t out_port_num; > + ib_net16_t dest_lid; > + uint8_t i; > + uint8_t vl; > + ib_slvl_table_t * p_slvl_tbl = NULL; > + boolean_t valid_sls[IB_MAX_NUM_VLS]; > + boolean_t sl2vl_valid_path = FALSE; > + uint8_t first_valid_sl; > + osm_qos_level_t * p_qos_level = NULL; > + > + OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos ); > + > + memset(valid_sls,TRUE,sizeof(valid_sls)); > + dest_lid = cl_hton16( dest_lid_ho ); > + > + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); > + p_physp = osm_port_get_default_phys_ptr( p_src_port ); > + p_src_physp = p_physp; > + p_pi = &p_physp->port_info; > + > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + rate = ib_port_info_compute_rate( p_pi ); > + > + /* > + * Mellanox Tavor device performance is better using 1K MTU. > + * If required MTU and MTU selector are such that 1K is OK > + * and at least one end of the path is Tavor we override the > + * port MTU with 1K. > + */ > + if ( p_rcv->p_subn->opt.enable_quirks && > + __osm_sa_path_rec_apply_tavor_mtu_limit( > + p_pr, p_src_port, p_dest_port, comp_mask) ) > + { > + if (mtu > IB_MTU_LEN_1024) > + { > + mtu = IB_MTU_LEN_1024; > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); > + } > + } > + > + /* > + * Walk the subnet object from source to destination, > + * tracking the most restrictive rate and mtu values along the way... > + * > + * If source port node is a switch, then p_physp should > + * point to the port that routes the destination lid > + */ > + > + p_node = osm_physp_get_node_ptr( p_physp ); > + > + if( p_node->sw ) > + { > + /* source node is a switch */ > + in_port_num = osm_physp_get_port_num(p_physp); > + /* > + * If the dest_lid_ho is equal to the lid of the switch pointed by > + * p_sw then p_physp will be the physical port of the switch port zero, > + * and out_port_num will be 0. > + */ > + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) ); > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: " > + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); Nit: Error codes should be made unique and not overlap existing ones. > + status = IB_ERROR; > + goto Exit; > + } > + p_src_physp = p_physp; > + out_port_num = osm_physp_get_port_num(p_physp); > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); > + } > + else > + { > + /* source node is CA or Router */ > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); > + } > + > + for (i = 0; i < IB_MAX_NUM_VLS; i++) > + { > + if (valid_sls[i]) > + { > + vl = ib_slvl_table_get(p_slvl_tbl,i); > + if (vl == IB_DROP_VL) Does vl > Operational VLs need checking here or is it never set this way ? > + valid_sls[i] = FALSE; > + } > + } > + > + /* > + * now get pointer to the destination port (same as above) > + */ > + p_node = osm_physp_get_node_ptr( p_dest_physp ); > + > + if( p_node->sw ) > + { > + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); > + if ( p_dest_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: " > + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + } > + > + /* > + * Now go through the path step by step > + */ > + > + while( p_physp != p_dest_physp ) > + { > + p_physp = osm_physp_get_remote( p_physp ); > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: " > + "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + > + in_port_num = osm_physp_get_port_num(p_physp); > + > + /* this is point to point case (no switch in between) */ > + if( p_physp == p_dest_physp ) > + break; Ordering of check for switch and point to point case are different here and original routine. Should they be the same ? If so, which should change ? (Any reason why this was moved in this routine ?) > + /* Check parameters for the ingress port in this switch */ > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + mtu, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + rate, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + p_node = osm_physp_get_node_ptr( p_physp ); > + if( !p_node->sw ) > + { > + /* > + There is some sort of problem in the subnet object! > + If this isn't a switch, we should have reached > + the destination by now! > + */ > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: " > + "Internal error, bad path\n" ); > + status = IB_ERROR; > + goto Exit; > + } > + > + /* Continue with the egress port on this switch */ > + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); > + > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: " > + "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + > + CL_ASSERT( p_physp ); > + CL_ASSERT( osm_physp_is_valid( p_physp ) ); > + > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + mtu, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + rate, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + out_port_num = osm_physp_get_port_num(p_physp); > + > + /* > + * Check SL2VL table of the switch > + */ > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); > + for ( i = 0; i < IB_MAX_NUM_VLS; i++ ) > + { > + if (valid_sls[i]) > + { > + vl = ib_slvl_table_get(p_slvl_tbl,i); > + if (vl == IB_DROP_VL) > + valid_sls[i] = FALSE; > + } > + } > + > + /* go to the next step in the path */ > + } > + > + /* p_physp now points to the destination */ > + > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n", > + mtu, > + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at destination port 0x%016" PRIx64 "\n", > + rate, > + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); > + } > + } > + > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Path min MTU = %u, min rate = %u\n", mtu, rate ); > + } > + > + /* check whether there is some SL that won't lead to VL15 eventually */ > + for (i = 0; i < IB_MAX_NUM_VLS; i++) > + { > + if (valid_sls[i]) > + { > + sl2vl_valid_path = TRUE; > + first_valid_sl = i; > + break; > + } > + } > + > + if (!sl2vl_valid_path) > + { > + /* all the SLs will eventually lead to VL15 on this path */ > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* > + * According to spec definition IBA 1.2 Table 205 PacketLifeTime > + * description, for loopback paths, packetLifeTime shall be zero. > + */ > + if ( p_src_port == p_dest_port ) > + pkt_life = 0; /* loopback */ > + else > + pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; > + > + /* > + * Get QoS Level object according to the path request > + */ > + osm_qos_parser_get_qos_level_by_pr(p_rcv, > + p_pr, > + p_src_port, > + p_dest_port, > + dest_lid_ho, > + comp_mask, > + &p_qos_level); > + > + if (p_qos_level) > + { > + /* > + * Found QoS level that should be applied to this path record request > + */ > + > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "PathRecord request matches QoS Level %u (%s)\n", > + p_qos_level->sn, > + (p_qos_level->use)? p_qos_level->use : "no description" ) ; > + } > + > + /* adjust MTU limit according to QoS constraints */ > + if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) ) > + { > + mtu = p_qos_level->mtu_limit; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest MTU = %u\n", > + mtu); > + } > + } > + > + /* adjust Rate limit according to QoS constraints */ > + if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) ) > + { > + rate = p_qos_level->rate_limit; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest Rate = %u\n", > + rate); > + } > + } > + > + /* adjust Packet Lifetime according to QoS constraints */ > + if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) ) > + { > + pkt_life = p_qos_level->pkt_life; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest Packet Lifetime = %u\n", > + pkt_life); > + } > + } > + > + /* adjust SL according to QoS constraints */ > + if (p_qos_level->sl_set) > + { > + if (!valid_sls[p_qos_level->sl]) > + { > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + sl = p_qos_level->sl; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new SL = %u\n", > + sl); > + } > + } > + } > + > + /* adjust PKey according to QoS constraints */ > + if (p_qos_level->pkey_set) > + { > + pkey = p_qos_level->pkey; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new PKey = %u\n", > + pkey); > + } > + } > + > + /* adjust Class according to QoS constraints */ > + if (p_qos_level->class_set) > + { > + class = p_qos_level->class; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new Class = %u\n", > + class); > + } > + } > + > + } /*if (p_qos_level)*/ > + > + /* > + * Determine if these values meet the user criteria > + * and adjust appropriately > + */ > + > + /* we silently ignore cases where only the MTU selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && > + ( comp_mask & IB_PR_COMPMASK_MTU ) ) > + { > + required_mtu = ib_path_rec_mtu( p_pr ); > + switch( ib_path_rec_mtu_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( mtu <= required_mtu ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( mtu >= required_mtu ) > + { > + /* adjust to use the highest mtu > + lower then the required one */ > + if( required_mtu > 1 ) > + mtu = required_mtu - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( mtu < required_mtu ) > + status = IB_NOT_FOUND; > + else > + mtu = required_mtu; > + break; > + > + case 3: /* largest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* we silently ignore cases where only the Rate selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) && > + ( comp_mask & IB_PR_COMPMASK_RATE ) ) > + { > + required_rate = ib_path_rec_rate( p_pr ); > + switch( ib_path_rec_rate_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( rate <= required_rate ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( rate >= required_rate ) > + { > + /* adjust the rate to use the highest rate > + lower then the required one */ > + if( required_rate > 2 ) > + rate = required_rate - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( rate < required_rate ) > + status = IB_NOT_FOUND; > + else > + rate = required_rate; > + break; > + > + case 3: /* largest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* we silently ignore cases where only the PktLife selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) && > + ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) ) > + { > + required_pkt_life = ib_path_rec_pkt_life( p_pr ); > + switch( ib_path_rec_pkt_life_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( pkt_life <= required_pkt_life ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( pkt_life >= required_pkt_life ) > + { > + /* adjust the lifetime to use the highest possible > + lower then the required one */ > + if( required_pkt_life > 1 ) > + pkt_life = required_pkt_life - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( pkt_life < required_pkt_life ) > + status = IB_NOT_FOUND; > + else > + pkt_life = required_pkt_life; > + break; > + > + case 3: /* smallest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* > + * set Pkey for this path record request > + */ > + > + shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp ); > + if ( !shared_pkey ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: " > + "Ports do not have any shared PKeys\n"); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && > + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) > + { > + if (p_qos_level && p_qos_level->pkey_set) > + pkey = p_qos_level->pkey; > + else > + pkey = shared_pkey; > + } > + else if (comp_mask & IB_PR_COMPMASK_PKEY) > + { > + /* PathRecord requires specific PKey */ > + if (p_qos_level && p_qos_level->pkey_set) > + { > + /* check that QoS pkey matches the required pkey */ > + if (p_qos_level->pkey != p_pr->pkey) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n", > + cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey)); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + pkey = p_qos_level->pkey; > + } > + else > + pkey = p_pr->pkey; > + } > + else > + { > + if (p_qos_level && p_qos_level->pkey_set) > + pkey = p_qos_level->pkey; > + else > + pkey = shared_pkey; > + } > + > + /* > + * PKey has been set. Now check that ports share this PKey. > + */ > + > + if ( (pkey != shared_pkey) && > + (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Ports do not share specified PKey 0x%04x\n", > + cl_ntoh16(pkey)); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* > + * Done selecting PKey - Now select valid SL > + */ > + > + if (pkey) > + { > + p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, > + pkey & cl_ntoh16((uint16_t)~0x8000)); > + if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) ) > + { > + /* this may be possible when pkey tables are created somehow in > + previous runs or things are going wrong here */ > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: " > + "No partition found for PKey 0x%04x - using default SL %d\n", > + cl_ntoh16(pkey), sl); > + } > + else > + { > + if (!valid_sls[p_prtn->sl]) > + { > + /* partition forces to use SL that eventually leads to VL15 */ > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Partition constraints (pkey=%u, sl=%u) lead to VL15\n", > + pkey, p_prtn->sl); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + /* partition's SL is valid */ > + if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) ) > + { > + /* partition's SL doesn't match QoS Level SL */ > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n", > + pkey, p_prtn->sl, p_qos_level->sl); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + /* set SL to be partition's SL */ > + sl = p_prtn->sl; > + } > + } > + } > + } > + else > + { > + /* > + * No pkey (no partition) > + */ > + if (p_qos_level && p_qos_level->sl_set) > + { > + /* sl has been already set */ > + } > + else > + { > + if (comp_mask & IB_PR_COMPMASK_SL) > + { > + /* specific SL was requested */ > + if (!valid_sls[ib_path_rec_sl(p_pr)]) > + { > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: required SL (%u) leads to VL15\n", > + ib_path_rec_sl(p_pr)); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + /* set sl to whatever requested */ > + sl = ib_path_rec_sl(p_pr); > + } > + else > + { > + /* set sl to the first valid sl that won't lead to VL15 */ > + sl = first_valid_sl; > + } > + } > + } > + > + if ( (comp_mask & IB_PR_COMPMASK_SL) && > + (ib_path_rec_sl( p_pr ) != sl) ) > + { > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: required SL (%u) doesn't match QoS constraints\n", > + ib_path_rec_sl(p_pr)); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* reset pkey when raw traffic */ > + if( (pkey) && > + (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && > + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) > + { > + pkey = 0; > + } > + > + p_parms->mtu = mtu; > + p_parms->rate = rate; > + p_parms->pkt_life = pkt_life; > + p_parms->pkey = pkey; > + p_parms->sl = sl; > + p_parms->class = class; > + > + Exit: > + OSM_LOG_EXIT( p_rcv->p_log ); > + return( status ); > +} > + > +/********************************************************************** > + **********************************************************************/ > static void > __osm_pr_rcv_build_pr( > IN osm_pr_rcv_t* const p_rcv, > @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr( > #endif > > p_pr->pkey = p_parms->pkey; > - p_pr->sl = cl_hton16(p_parms->sl); > + ib_path_rec_set_qos_class(p_pr,p_parms->class); > + ib_path_rec_set_sl(p_pr,p_parms->sl); > p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); > p_pr->rate = (uint8_t)(p_parms->rate | 0x80); > > @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path( > goto Exit; > } > > - status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, > - p_dest_port, dest_lid_ho, > - comp_mask, &path_parms ); > - > + if (p_rcv->p_subn->opt.no_qos) Shouldn't this be based on p_rcv->p_subn.opt.qos_policy_file rather than no_qos ? I think there are cases where the QoS will be used without the QoS policy (higher level QoS support). -- Hal > + status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, > + p_dest_port, dest_lid_ho, > + comp_mask, &path_parms ); > + else > + status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port, > + p_dest_port, dest_lid_ho, > + comp_mask, &path_parms ); > if( status != IB_SUCCESS ) > { > cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item ); > @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path( > comp_mask, &rev_path_parms ); > path_parms.reversible = ( rev_path_status == IB_SUCCESS ); > > + /* > + * ToDo: > + * ToDo: The whole implementation of reversible path is wrong. > + * ToDo: It is not enough to know that the reversed path exist. > + * ToDo: We should also adjust limits (mtu, rate, etc...) by > + * ToDo: comparing path and reversed path's limits. > + * ToDo: Also, need to think about reversible path in QoS. > + * ToDo: > + */ > + > /* did we get a Reversible Path compmask ? */ > /* > NOTE that if the reversible component = 0, it is a don't care > @@ -2053,7 +2863,7 @@ osm_pr_rcv_process( > /* SL, Hop Limit, and Flow Label */ > ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, > &sl, &flow_label, &hop_limit ); > - p_pr_item->path_rec.sl = cl_hton16( sl ); > + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); > #ifndef ROUTER_EXP > p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | > (flow_label << 8); From halr at voltaire.com Wed Jan 31 09:01:56 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 12:01:56 -0500 Subject: [openib-general] Diags/ibnetdiscover: Add switch-map option to ibnetdiscover Message-ID: <1170262905.15660.187973.camel@hal.voltaire.com> Diags/ibnetdiscover: Add switch-map option to ibnetdiscover Signed-off-by: Ira K. Weiny Signed-off-by: Hal Rosenstock diff --git a/diags/man/ibnetdiscover.8 b/diags/man/ibnetdiscover.8 index 4960a8f..000edb5 100644 --- a/diags/man/ibnetdiscover.8 +++ b/diags/man/ibnetdiscover.8 @@ -1,11 +1,11 @@ -.TH IBNETDISCOVER 8 "October 16, 2006" "OpenIB" "OpenIB Diagnostics" +.TH IBNETDISCOVER 8 "January 31, 2007" "OpenIB" "OpenIB Diagnostics" .SH NAME ibnetdiscover \- discover InfiniBand topology .SH SYNOPSIS .B ibnetdiscover -[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-s(how)] [\-l(ist)] [\-g(rouping)] [\-H(ca_list)] [\-S(witch_list)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\-h(elp)] [] +[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-s(how)] [\-l(ist)] [\-g(rouping)] [\-H(ca_list)] [\-S(witch_list)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\--switch-map ] [\-h(elp)] [] .SH DESCRIPTION .PP @@ -34,6 +34,10 @@ List of connected switches .TP \fB\-s\fR, \fB\-\-show\fR Show more information +.TP +\fB\-\-switch\-map\fR +Specify a switch map. The switch map file maps GUIDs to more user friendly +names. See file format below. .SH COMMON OPTIONS @@ -89,7 +93,63 @@ by the following criteria: If a port and/or CA name is specified, the user request is attempted to be fulfilled, and will fail if it is not possible. +.SH SWITCH MAP FILE FORMAT +The switch map is used to specify a user friendly name for switches in the +output. GUIDs are used to perform the lookup. + +.TP +\fBGenerically:\fR + +# comment +.br + "" + +.TP +\fBExample:\fR + +# IB1 +.br +# Line cards +.br +0x0008f104003f125c "IB1 (Rack 11 slot 1 ) ISR9288/ISR9096 Voltaire sLB-24D" +.br +0x0008f104003f125d "IB1 (Rack 11 slot 1 ) ISR9288/ISR9096 Voltaire sLB-24D" +.br +0x0008f104003f10d2 "IB1 (Rack 11 slot 2 ) ISR9288/ISR9096 Voltaire sLB-24D" +.br +0x0008f104003f10d3 "IB1 (Rack 11 slot 2 ) ISR9288/ISR9096 Voltaire sLB-24D" +.br +0x0008f104003f10bf "IB1 (Rack 11 slot 12 ) ISR9288/ISR9096 Voltaire sLB-24D" +.br +.br +# Spines +.br +0x0008f10400400e2d "IB1 (Rack 11 spine 1 ) ISR9288 Voltaire sFB-12D" +.br +0x0008f10400400e2e "IB1 (Rack 11 spine 1 ) ISR9288 Voltaire sFB-12D" +.br +0x0008f10400400e2f "IB1 (Rack 11 spine 1 ) ISR9288 Voltaire sFB-12D" +.br +0x0008f10400400e31 "IB1 (Rack 11 spine 2 ) ISR9288 Voltaire sFB-12D" +.br +0x0008f10400400e32 "IB1 (Rack 11 spine 2 ) ISR9288 Voltaire sFB-12D" +.br +.br +# GUID Switch Name +.br +0x0008f10400411a08 "SW1 (Rack 3) ISR9024 Voltaire 9024D" +.br +0x0008f10400411a28 "SW2 (Rack 3) ISR9024 Voltaire 9024D" +.br +0x0008f10400411a34 "SW3 (Rack 3) ISR9024 Voltaire 9024D" +.br +0x0008f104004119d0 "SW4 (Rack 3) ISR9024 Voltaire 9024D" +.br + .SH AUTHOR .TP Hal Rosenstock .RI < halr at voltaire.com > +.TP +Ira Weiny +.RI < weiny2 at llnl.gov > diff --git a/diags/src/ibnetdiscover.c b/diags/src/ibnetdiscover.c index ec47961..c0ed563 100644 --- a/diags/src/ibnetdiscover.c +++ b/diags/src/ibnetdiscover.c @@ -74,6 +74,9 @@ static FILE *f; static char *argv0 = "ibnetdiscover"; +static char *switch_map = NULL; +static FILE *switch_map_fp = NULL; + Node *nodesdist[MAXHOPS+1]; /* last is Ca list */ Node *mynode; int maxhops_discovered = 0; @@ -201,6 +204,68 @@ clean_nodedesc(char *nodedesc) return (nodedesc); } +/** ========================================================================= + */ +static void +open_switch_map(void) +{ + if (switch_map != NULL) { + switch_map_fp = fopen(switch_map, "r"); + if (switch_map_fp == NULL) { + fprintf(stderr, + "WARNING failed to open switch map \"%s\" (%s)\n", + switch_map, strerror(errno)); + } + } +} + +static void +close_switch_map(void) +{ + if (switch_map_fp) + fclose(switch_map_fp); +} + +static char * +lookup_switch_name(Node *node) +{ +#define NAME_LEN (256) + char *line = NULL; + size_t len = 0; + uint64_t guid = 0; + char *rc = NULL; + int line_count = 0; + uint64_t target_guid = node->nodeguid; + + if (switch_map_fp == NULL) + goto done; + + rewind(switch_map_fp); + for (line_count = 1; + getline(&line, &len, switch_map_fp) != -1; + line_count++) { + line[len-1] = '\0'; + if (line[0] == '#') { goto next_one; } + char *guid_str = strtok(line, "\"#"); + char *name = strtok(NULL, "\"#"); + if (!guid_str || !name) { goto next_one; } + guid = strtoull(guid_str, NULL, 0); + if (target_guid == guid) + { + rc = strdup(name); + free (line); + goto done; + } +next_one: + free (line); + line = NULL; + } +done: + if (rc == NULL) + rc = strdup(clean_nodedesc(node->nodedesc)); + return (rc); +} + static void dump_endnode(ib_portid_t *path, char *prompt, Node *node, Port *port) { @@ -454,7 +519,12 @@ void list_node(Node *node) { char *node_type; + char *nodename = NULL; + if (node->type == SWITCH_NODE) + nodename = lookup_switch_name(node); + else + nodename = clean_nodedesc(node->nodedesc); switch(node->type) { case SWITCH_NODE: node_type = "Switch"; @@ -472,7 +542,10 @@ list_node(Node *node) fprintf(f, "%s\t : 0x%016" PRIx64 " ports %d devid 0x%x vendid 0x%x \"%s\"\n", node_type, node->nodeguid, node->numports, node->devid, node->vendid, - clean_nodedesc(node->nodedesc)); + nodename); + + if (nodename && (node->type == SWITCH_NODE)) + free(nodename); } void @@ -499,6 +572,7 @@ void out_switch(Node *node, int group) { char *str; + char *nodename = NULL; out_ids(node); fprintf(f, "switchguid=0x%" PRIx64, node->nodeguid); @@ -520,11 +594,17 @@ out_switch(Node *node, int group) } } + if (node->type == SWITCH_NODE) + nodename = lookup_switch_name(node); + else + nodename = clean_nodedesc(node->nodedesc); fprintf(f, "\nSwitch\t%d %s\t\t# %s %s port 0 lid %d lmc %d\n", node->numports, node_name(node), - clean_nodedesc(node->nodedesc), + nodename, node->smaenhsp0 ? "enhanced" : "base", node->smalid, node->smalmc); + if (nodename && (node->type == SWITCH_NODE)) + free(nodename); } void @@ -573,25 +653,37 @@ void out_switch_port(Port *port, int group) { char *ext_port_str = NULL; + char *rem_nodename = NULL; DEBUG("port %p:%d remoteport %p", port, port->portnum, port->remoteport); fprintf(f, "[%d]", port->portnum); + ext_port_str = out_ext_port(port, group); if (ext_port_str) fprintf(f, "%s", ext_port_str); + + if (port->remoteport->node->type == SWITCH_NODE) + rem_nodename = lookup_switch_name(port->remoteport->node); + else + rem_nodename = clean_nodedesc(port->remoteport->node->nodedesc); + ext_port_str = out_ext_port(port->remoteport, group); fprintf(f, "\t%s[%d]%s\t\t# \"%s\" lid %d\n", node_name(port->remoteport->node), port->remoteport->portnum, ext_port_str ? ext_port_str : "", - clean_nodedesc(port->remoteport->node->nodedesc), + rem_nodename, port->remoteport->node->type == SWITCH_NODE ? port->remoteport->node->smalid : port->remoteport->lid); + + if (rem_nodename && (port->remoteport->node->type == SWITCH_NODE)) + free(rem_nodename); } void out_ca_port(Port *port, int group) { char *str = NULL; + char *rem_nodename = NULL; fprintf(f, "[%d]\t%s[%d]", port->portnum, node_name(port->remoteport->node), @@ -599,10 +691,16 @@ out_ca_port(Port *port, int group) str = out_ext_port(port->remoteport, group); if (str) fprintf(f, "%s", str); + + if (port->remoteport->node->type == SWITCH_NODE) + rem_nodename = lookup_switch_name(port->remoteport->node); + else + rem_nodename = clean_nodedesc(port->remoteport->node->nodedesc); fprintf(f, "\t\t# lid %d lmc %d \"%s\" lid %d\n", - port->lid, port->lmc, - clean_nodedesc(port->remoteport->node->nodedesc), + port->lid, port->lmc, rem_nodename, port->remoteport->node->type == SWITCH_NODE ? port->remoteport->node->smalid : port->remoteport->lid); + if (rem_nodename && (port->remoteport->node->type == SWITCH_NODE)) + free(rem_nodename); } int @@ -739,8 +837,9 @@ void usage(void) { fprintf(stderr, "Usage: %s [-d(ebug)] -e(rr_show) -v(erbose) -s(how) -l(ist) -g(rouping) -H(ca_list) -S(witch_list) -V(ersion) -C ca_name -P ca_port " - "-t(imeout) timeout_ms] []\n", + "-t(imeout) timeout_ms --switch-map switch-map] []\n", argv0); + fprintf(stderr, " --switch-map specify a switch-map file\n"); exit(-1); } @@ -768,6 +867,7 @@ main(int argc, char **argv) { "Hca_list", 0, 0, 'H'}, { "Switch_list", 0, 0, 'S'}, { "timeout", 1, 0, 't'}, + { "switch-map", 1, 0, 1}, { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, @@ -783,6 +883,9 @@ main(int argc, char **argv) if ( ch == -1 ) break; switch(ch) { + case 1: + switch_map = strdup(optarg); + break; case 'C': ca = optarg; break; @@ -836,6 +939,7 @@ main(int argc, char **argv) IBERROR("can't open file %s for writing", argv[0]); madrpc_init(ca, ca_port, mgmt_classes, 2); + open_switch_map(); if (discover(&my_portid) < 0) IBERROR("discover"); @@ -845,5 +949,6 @@ main(int argc, char **argv) dump_topology(list, group); + close_switch_map(); exit(0); } From halr at voltaire.com Wed Jan 31 09:41:43 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 12:41:43 -0500 Subject: [openib-general] [PATCH 0/10] osm: QoS in OpenSM In-Reply-To: <45BF5B6C.30406@dev.mellanox.co.il> References: <45BF5B6C.30406@dev.mellanox.co.il> Message-ID: <1170265270.15660.190215.camel@hal.voltaire.com> Hi Yevgeny, On Tue, 2007-01-30 at 09:51, Yevgeny Kliteynik wrote: > Hi Hal. > > The following is a series of 10 patches: > 1. QoS policy file parser Yacc file > 2. QoS policy file parser Lex file > 3. QoS policy file parser Yacc & Lex generated files > 4. QoS policy file parser header file > 5. QoS policy file parser C file with auxiliary functions > 6. Compilation changes for QoS policy file parser: > Added new files to makefiles. > Introduced new configuration switch '--enable-maintainer-mode', > which will run Lex & Yacc instead of just using the generated > files. > 7. Renamed static function find_prtn_by_name() to non-static osm_prtn_find_by_name() > This function will be used later by the PathRecord logic. > 8. Added QoS class and service id fields to the path record. > 9. Added new command line option for OSM: '-Y' or '--qos_policy_file' > 10.Checking PathRecord query for QoS constraints. Is everyone on the list satisfied with an XML format or should there be a text version ? Is anyone concerned about the ease of configuring XML for QoS ? IMO, the XML syntax needs to be explained, discussed, and vetted on the list. I am hopping this can occur reasonably quickly. If we are doing XML for this, we need to get to a stable agreed syntax. A couple of missing minor things: SA ClassPortInfo and SA MultiPathRecord similar to PathRecord A major missing component is a QoS manager which supports the granular configuration of the SL2VL and VLArb tables. Based on our experience with the existing QoS manager, this effort is not to be minimized. If this is not part of this package, a fair portion of the QoS syntax is "dormant". I know this can be run on top of the existing QoS manager to get a more complete QoS solution than what already exists so this could be considered an stepping stone towards that. -- Hal > -- > Yevgeny > > Signed-off-by: Yevgeny Kliteynik > > > From jgunthorpe at obsidianresearch.com Wed Jan 31 10:19:34 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 31 Jan 2007 11:19:34 -0700 Subject: [openib-general] [PATCH 0/10] osm: QoS in OpenSM In-Reply-To: <1170265270.15660.190215.camel@hal.voltaire.com> References: <45BF5B6C.30406@dev.mellanox.co.il> <1170265270.15660.190215.camel@hal.voltaire.com> Message-ID: <20070131181934.GR12386@obsidianresearch.com> On Wed, Jan 31, 2007 at 12:41:43PM -0500, Hal Rosenstock wrote: > IMO, the XML syntax needs to be explained, discussed, and vetted on the > list. I am hopping this can occur reasonably quickly. If we are doing > XML for this, we need to get to a stable agreed syntax. I didn't see a DTD or schema float by for the XML.. IMHO a DTD is essential for a complex XML like this. Jason From rdreier at cisco.com Wed Jan 31 10:22:25 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 31 Jan 2007 10:22:25 -0800 Subject: [openib-general] [libibverbs] destroying an AH causes a seg fault (this failure appeared during the last night) In-Reply-To: <45C06B3E.80103@dev.mellanox.co.il> (Dotan Barak's message of "Wed, 31 Jan 2007 12:11:10 +0200") References: <45C06B3E.80103@dev.mellanox.co.il> Message-ID: ugh -- OK, see my reply in the thread with mst's diagnosis... From mst at mellanox.co.il Wed Jan 31 10:25:32 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 20:25:32 +0200 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: <1170252902.20525.7.camel@stevo-desktop> References: <20070119211659.18220.52930.stgit@dell3.ogc.int> <20070131102453.GA25270@mellanox.co.il> <1170252902.20525.7.camel@stevo-desktop> Message-ID: <20070131182532.GC14602@mellanox.co.il> > Quoting Steve Wise : > Subject: Re: [PATCH] The ibv_cmd_* create functions need to set the context. > > On Wed, 2007-01-31 at 12:24 +0200, Michael S. Tsirkin wrote: > > > Quoting Roland Dreier : > > > Subject: Re: [PATCH] The ibv_cmd_* create functions need to set the context. > > > > > > Thanks, applied to master and stable branches. > > > > Did you test it? > > This patch (8b3d225476c99ea29a68109a7d40e5ef353d4388) causes ibv_ud_pingpong > > to segfault on libmthca: libmthca never calls ibv_cmd_create_ah to context is now > > never set. > > > > > > I didn't test UD. Well, when you touch the AH functions, UD is really the only way to test them. > > > Starting program: /usr/local/ofed/bin/ibv_ud_pingpong sw069 > > [Thread debugging using libthread_db enabled] > > [New Thread 47299578320592 (LWP 5085)] > > local address: LID 0x0002, QPN 0x090406, PSN 0x71bffb > > remote address: LID 0x0001, QPN 0x040406, PSN 0x92316a > > 4096000 bytes in 0.02 seconds = 1893.99 Mbit/sec > > 1000 iters in 0.02 seconds = 17.30 usec/iter > > > > Program received signal SIGSEGV, Segmentation fault. > > [Switching to Thread 47299578320592 (LWP 5085)] > > 0x00002b04ca3b7263 in __ibv_destroy_ah (ah=0x5050b0) at src/verbs.c:475 > > 475 return ah->context->ops.destroy_ah(ah); > > (gdb) p ah->context > > $1 = (struct ibv_context *) 0x0 > > > > I actually think this approach is a wrong one: context should be > > set in common code like ibv_create_ah, not in ibv_cmd_ which is > > a library function low level driver might or might not call. > > And certainly this kind of change does not seem appropriate for stable branch. > > > > I think the proper thing is for low level driver not to assume that > > fields such as contex are intialized until create functions have returned. > > Steve, pls fix your low level driver not to rely on this. > > > > The issue is that the provider lib calls ibv_cmd_create_blah to create > the object, then some failure happens (like a failure mmap()ing the > object's DMA area to the process). At this point the provider lib must > destroy this object that is created from the perspective of the ibv_cmd* > interface. The only way to do that is to call the ibv_cmd_destroy_blah > call, which needs the context field. For stable, in case of error, set the context in the provider lib then? > So I don't think solving this in the provider lib is the right thing to > do. At least for stable branch, this seams more sensible than the disruptive patch that was applied. Roland, what do you think? For master, maybe ibv_cmd destructors should get the context as a parameter? > > Roland, I have reverted this in OFED, please revert on master and stable. > > > > I think we should fix the bug introduced: set the context field in the > ibv_create_blah service if its not set after calling the provider > method. This is ugly as well, but at least it would work. -- MST From rdreier at cisco.com Wed Jan 31 10:26:59 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 31 Jan 2007 10:26:59 -0800 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: <20070131102453.GA25270@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 31 Jan 2007 12:24:53 +0200") References: <20070119211659.18220.52930.stgit@dell3.ogc.int> <20070131102453.GA25270@mellanox.co.il> Message-ID: > I actually think this approach is a wrong one: context should be > set in common code like ibv_create_ah, not in ibv_cmd_ which is > a library function low level driver might or might not call. > And certainly this kind of change does not seem appropriate for stable branch. > > I think the proper thing is for low level driver not to assume that > fields such as contex are intialized until create functions have returned. > Steve, pls fix your low level driver not to rely on this. Hmm, there's not really any good solution to this. Really the problem is that the ibv_cmd_destroy_xxx functions assume the context is set in the object they are destroying. But I don't want to change the signature of those functions at this point in the release cycle. It's not really very pleasing for low-level drivers to have to know about the internal assumptions of ibv_cmd_destroy_xxx either. I think what I'll do is the following: - add the assignments to context back into ibv_create_ah() and so on. context will get assigned in two places but oh well. - early in the libibverbs 1.2 cycle, change the signature of ibv_cmd_destroy_xxx so that low-level drivers need to explicitly pass in the context to use. - R. From mst at mellanox.co.il Wed Jan 31 10:31:22 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 20:31:22 +0200 Subject: [openib-general] Bugzilla Bug 329: HCA_FATAL_EVENT cause to OpenSM to stop functioning In-Reply-To: <1170254523.15660.179944.camel@hal.voltaire.com> References: <45C06C81.4060500@dev.mellanox.co.il> <1170254523.15660.179944.camel@hal.voltaire.com> Message-ID: <20070131183122.GF14602@mellanox.co.il> > Quoting Hal Rosenstock : > Subject: Re: Bugzilla Bug 329: HCA_FATAL_EVENT cause to OpenSM to stop functioning > > Hi Yevgeny, > > On Wed, 2007-01-31 at 05:16, Yevgeny Kliteynik wrote: > > Hi Hal. > > > > I noticed the following bug in Bugzilla: > > > > Bugzilla Bug 329: HCA_FATAL_EVENT cause to opensm to stop functioning > > https://bugs.openfabrics.org/show_bug.cgi?id=329 > > > > When there is a HCA fatal event on the host that opensm is running on it, > > the opensm stop to function (After the event, the driver restart the device, > > and the port does not return to active state). > > > > If the opensm run in sweep mode , after the event you can see that the opensm > > stop sweeping. > > > > I remember that a couple of months ago I sent a patch that takes care of this problem: > > - in case of IBV_EVENT_DEVICE_FATAL, osm was forced to exit > > - in case of IBV_EVENT_PORT_ERROR, osm initiated heavy sweep > > > > The problem with my patch was that it made osm to depend on uverbs module. > > To resolve this problem, support should be added in umad, and then osm could > > use this support. > > > > Do you know if some work in this area was done in umad? > > This has been on the list but unfortunately there has been no time yet > to work on the local events support in libibumad. I do not think making libibmad depend on ib_uverbs module is a good idea either. More properly, the problem is in ib_umad which does not report hotplug events. If we just make ib_umad return an error code to user on hotplug, the problem will go away without userspace changes. -- MST From mst at mellanox.co.il Wed Jan 31 10:36:35 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 20:36:35 +0200 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: References: <20070119211659.18220.52930.stgit@dell3.ogc.int> <20070131102453.GA25270@mellanox.co.il> Message-ID: <20070131183635.GG14602@mellanox.co.il> > Quoting Roland Dreier : > Subject: Re: [PATCH] The ibv_cmd_* create functions need to set the context. > > > I actually think this approach is a wrong one: context should be > > set in common code like ibv_create_ah, not in ibv_cmd_ which is > > a library function low level driver might or might not call. > > And certainly this kind of change does not seem appropriate for stable branch. > > > > I think the proper thing is for low level driver not to assume that > > fields such as contex are intialized until create functions have returned. > > Steve, pls fix your low level driver not to rely on this. > > Hmm, there's not really any good solution to this. Really the problem > is that the ibv_cmd_destroy_xxx functions assume the context is set in > the object they are destroying. But I don't want to change the > signature of those functions at this point in the release cycle. > > It's not really very pleasing for low-level drivers to have to know > about the internal assumptions of ibv_cmd_destroy_xxx either. > > I think what I'll do is the following: > - add the assignments to context back into ibv_create_ah() and so > on. context will get assigned in two places but oh well. > - early in the libibverbs 1.2 cycle, change the signature of > ibv_cmd_destroy_xxx so that low-level drivers need to explicitly > pass in the context to use. This might work. However, I wonder about stable branch - is it wise for a provider to depend on a specific libibverbs 1.0.x version? Surely just working atround this by setting up context field before destroy cmd makes more sense? And if the providers implement the work-around anyway, should we implement hacks to work-around this in libibverbs as well? What I am trying to propose is delaying the whole change till 1.2, and doing the work-around in provider lib for now. -- MST From rdreier at cisco.com Wed Jan 31 10:38:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 31 Jan 2007 10:38:30 -0800 Subject: [openib-general] [PATCH] The ibv_cmd_* create functions need to set the context. In-Reply-To: <20070131183635.GG14602@mellanox.co.il> (Michael S. Tsirkin's message of "Wed, 31 Jan 2007 20:36:35 +0200") References: <20070119211659.18220.52930.stgit@dell3.ogc.int> <20070131102453.GA25270@mellanox.co.il> <20070131183635.GG14602@mellanox.co.il> Message-ID: > However, I wonder about stable branch - is it wise for a provider > to depend on a specific libibverbs 1.0.x version? > Surely just working atround this by setting up context field > before destroy cmd makes more sense? I think I'll just revert the change from libibverbs 1.0.x. libcxgb3 (the impetus for this change) will never work with libibverbs 1.0 anyway. From halr at voltaire.com Wed Jan 31 11:00:38 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 14:00:38 -0500 Subject: [openib-general] [PATCH RFC 0 of 5] QoS support In-Reply-To: <20070122144146.GB7261@mellanox.co.il> References: <20070122144146.GB7261@mellanox.co.il> Message-ID: <1170270030.15660.194852.camel@hal.voltaire.com> On Mon, 2007-01-22 at 09:41, Michael S. Tsirkin wrote: > The following (lightly tested) RFC patch series implement QoS support > according to the proposed LWG2 QoS Annex. This is *not* intended for > upstream merge, but I do intent to put it in OFED as a technology preview. > > Some notes: > - Module parameters (off by default) were added to control QoS support. > This is a safety measure, due to the fact QoS is not in spec yet. > These will have to go before upstream submission. Is SA ClassPortInfo queried to see if the SA supports QoS prior to making any SA requests ? -- Hal > - Once enabled, all CMA based ULPs will get Service ID set in their > path query requests. > - To set priority, a new rdma_set_priority call was added. > It should be possible to implement it on top of iWarp as well > (IB QoS is explicitly defined to match DSCP semantics), > so I think this belongs in the common header. > For now this value is ignored on iWarp HCA. From halr at voltaire.com Wed Jan 31 11:00:49 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 14:00:49 -0500 Subject: [openib-general] [PATCH RFC 4 of 5] IB/mthca: QoS support In-Reply-To: <20070122145021.GF7261@mellanox.co.il> References: <20070122145021.GF7261@mellanox.co.il> Message-ID: <1170270031.15660.194853.camel@hal.voltaire.com> On Mon, 2007-01-22 at 09:50, Michael S. Tsirkin wrote: > encode SL in sched_queue field to improve hardware QoS guarantees > for connected QPs. Is UD already handled properly in terms of mthca ? -- Hal > Signed-off-by: Michael S. Tsirkin > > --- > > Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_qp.c > =================================================================== > --- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_qp.c > +++ linux-2.6/drivers/infiniband/hw/mthca/mthca_qp.c > @@ -49,6 +49,10 @@ > #include "mthca_memfree.h" > #include "mthca_wqe.h" > > +static int mthca_qos_support = 0; > +module_param_named(qos_support, mthca_qos_support, int, 0644); > +MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0"); > + > enum { > MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, > MTHCA_ACK_REQ_FREQ = 10, > @@ -694,6 +698,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, > goto out_mailbox; > > qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); > + if (mthca_qos_support) { > + u8 sl = attr->ah_attr.sl; > + u8 sched_queue = (sl & 0x8) | (sl & (~(sl >> 1)) & 0x4) | > + ((sl >> 1) & (sl >> 2) & 0x2) | ((sl >> 1) & 0x1); > + > + if (mthca_is_memfree(dev)) { > + qp_context->rlkey_arbel_sched_queue |= sched_queue; > + } else { > + qp_context->tavor_sched_queue |= sched_queue; > + } > + qp_param->opt_param_mask |= > + cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE); > + } > } > > if (attr_mask & IB_QP_TIMEOUT) { > From mst at mellanox.co.il Wed Jan 31 11:09:06 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 21:09:06 +0200 Subject: [openib-general] [PATCH RFC 4 of 5] IB/mthca: QoS support In-Reply-To: <1170270031.15660.194853.camel@hal.voltaire.com> References: <20070122145021.GF7261@mellanox.co.il> <1170270031.15660.194853.camel@hal.voltaire.com> Message-ID: <20070131190906.GB21097@mellanox.co.il> > Quoting Hal Rosenstock : > Subject: Re: [PATCH RFC 4 of 5] IB/mthca: QoS support > > On Mon, 2007-01-22 at 09:50, Michael S. Tsirkin wrote: > > encode SL in sched_queue field to improve hardware QoS guarantees > > for connected QPs. > > Is UD already handled properly in terms of mthca ? It's not the question of "proper" handling - this patch is an enhancement, not really a bug fix. I think mthca already does the best it can with UD AVs. -- MST From halr at voltaire.com Wed Jan 31 11:12:19 2007 From: halr at voltaire.com (Hal Rosenstock) Date: 31 Jan 2007 14:12:19 -0500 Subject: [openib-general] [PATCH RFC 4 of 5] IB/mthca: QoS support In-Reply-To: <20070131190906.GB21097@mellanox.co.il> References: <20070122145021.GF7261@mellanox.co.il> <1170270031.15660.194853.camel@hal.voltaire.com> <20070131190906.GB21097@mellanox.co.il> Message-ID: <1170270736.15660.195569.camel@hal.voltaire.com> On Wed, 2007-01-31 at 14:09, Michael S. Tsirkin wrote: > > Quoting Hal Rosenstock : > > Subject: Re: [PATCH RFC 4 of 5] IB/mthca: QoS support > > > > On Mon, 2007-01-22 at 09:50, Michael S. Tsirkin wrote: > > > encode SL in sched_queue field to improve hardware QoS guarantees > > > for connected QPs. > > > > Is UD already handled properly in terms of mthca ? > > It's not the question of "proper" handling - this patch is an enhancement, > not really a bug fix. I think mthca already does the best it can with UD AVs. So there are no scheduling parameters or anything else that needs tweaking in mthca in terms of the SL for UD AVs ? Just want to be sure. -- Hal From mst at mellanox.co.il Wed Jan 31 11:17:53 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Wed, 31 Jan 2007 21:17:53 +0200 Subject: [openib-general] [PATCH RFC 4 of 5] IB/mthca: QoS support In-Reply-To: <1170270736.15660.195569.camel@hal.voltaire.com> References: <1170270736.15660.195569.camel@hal.voltaire.com> Message-ID: <20070131191753.GC21097@mellanox.co.il> > Quoting Hal Rosenstock : > Subject: Re: [PATCH RFC 4 of 5] IB/mthca: QoS support > > On Wed, 2007-01-31 at 14:09, Michael S. Tsirkin wrote: > > > Quoting Hal Rosenstock : > > > Subject: Re: [PATCH RFC 4 of 5] IB/mthca: QoS support > > > > > > On Mon, 2007-01-22 at 09:50, Michael S. Tsirkin wrote: > > > > encode SL in sched_queue field to improve hardware QoS guarantees > > > > for connected QPs. > > > > > > Is UD already handled properly in terms of mthca ? > > > > It's not the question of "proper" handling - this patch is an enhancement, > > not really a bug fix. I think mthca already does the best it can with UD AVs. > > So there are no scheduling parameters or anything else that needs > tweaking in mthca in terms of the SL for UD AVs ? Just want to be sure. Not that I know. -- MST From sean.hefty at intel.com Wed Jan 31 11:19:03 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 11:19:03 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <45C0CE39.2010404@ichips.intel.com> Message-ID: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> Here's a first attempt at a patch to allow the latest librdmacm to work with kernel ABI version 3 without crashing the kernel. If you're trying to use a developmental kernel that has ABI 4, you'll have to update the kernel cma. Note that I didn't actually run this against an older kernel (I need to reload that on my system), but did test this fix by forcing the abi to version 3 with a newer kernel loaded. Signed-off-by: Sean Hefty --- diff --git a/src/cma.c b/src/cma.c index 2d2a587..c5f8cd9 100644 --- a/src/cma.c +++ b/src/cma.c @@ -653,11 +653,49 @@ static int ucma_modify_qp_err(struct rdma_cm_id *id) return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE); } +static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num, + uint16_t pkey, uint16_t *pkey_index) +{ + int ret, i; + uint16_t chk_pkey; + + for (i = 0, ret = 0; !ret; i++) { + ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey); + if (!ret && pkey == chk_pkey) { + *pkey_index = (uint16_t) i; + return 0; + } + } + return -EINVAL; +} + +static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) +{ + struct ibv_qp_attr qp_attr; + int ret; + + ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, + id_priv->id.route.addr.addr.ibaddr.pkey, + &qp_attr.pkey_index); + if (ret) + return ret; + + qp_attr.port_num = id_priv->id.port_num; + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.qp_access_flags = 0; + + return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS | + IBV_QP_PKEY_INDEX | IBV_QP_PORT); +} + static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; + if (abi_ver == 3) + return ucma_init_conn_qp3(id_priv, qp); + qp_attr.qp_state = IBV_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) @@ -666,11 +704,44 @@ static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) return ibv_modify_qp(qp, &qp_attr, qp_attr_mask); } +static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) +{ + struct ibv_qp_attr qp_attr; + int ret; + + ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, + id_priv->id.route.addr.addr.ibaddr.pkey, + &qp_attr.pkey_index); + if (ret) + return ret; + + qp_attr.port_num = id_priv->id.port_num; + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.qkey = RDMA_UDP_QKEY; + + ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY | + IBV_QP_PKEY_INDEX | IBV_QP_PORT); + if (ret) + return ret; + + qp_attr.qp_state = IBV_QPS_RTR; + ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); + if (ret) + return ret; + + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 0; + return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); +} + static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; + if (abi_ver == 3) + return ucma_init_ud_qp3(id_priv, qp); + qp_attr.qp_state = IBV_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) From swise at opengridcomputing.com Wed Jan 31 12:28:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 14:28:51 -0600 Subject: [openib-general] ip_ib_mc_map? Message-ID: <1170275331.14294.1.camel@stevo-desktop> where can I find this symbol? I can't load rdma_cm on rhel4u4... rdma_cm: Unknown symbol ip_ib_mc_map From swise at opengridcomputing.com Wed Jan 31 12:34:40 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 14:34:40 -0600 Subject: [openib-general] ip_ib_mc_map? In-Reply-To: <000101c74576$fedc81f0$8698070a@amr.corp.intel.com> References: <000101c74576$fedc81f0$8698070a@amr.corp.intel.com> Message-ID: <1170275680.14294.5.camel@stevo-desktop> Perhaps there's no backport for this to rhel4u4? On Wed, 2007-01-31 at 12:32 -0800, Sean Hefty wrote: > >where can I find this symbol? I can't load rdma_cm on rhel4u4... > > > >rdma_cm: Unknown symbol ip_ib_mc_map > > This is in include/net/ip.h for current systems. It is part of ipoib support. > > - Sean From sean.hefty at intel.com Wed Jan 31 12:32:58 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 12:32:58 -0800 Subject: [openib-general] ip_ib_mc_map? In-Reply-To: <1170275331.14294.1.camel@stevo-desktop> Message-ID: <000101c74576$fedc81f0$8698070a@amr.corp.intel.com> >where can I find this symbol? I can't load rdma_cm on rhel4u4... > >rdma_cm: Unknown symbol ip_ib_mc_map This is in include/net/ip.h for current systems. It is part of ipoib support. - Sean From swise at opengridcomputing.com Wed Jan 31 12:50:55 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 14:50:55 -0600 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> Message-ID: <1170276655.14294.11.camel@stevo-desktop> Should this be a problem for OFED 1.2? I would think the ABI for all backports should be the same, so it wouldn't be a problem. Is this true? I'm assuming all backported UCMA modules would have the same ABI. On Wed, 2007-01-31 at 11:19 -0800, Sean Hefty wrote: > Here's a first attempt at a patch to allow the latest librdmacm to work with kernel ABI > version 3 without crashing the kernel. If you're trying to use a developmental kernel > that has ABI 4, you'll have to update the kernel cma. > > Note that I didn't actually run this against an older kernel (I need to reload that on my > system), but did test this fix by forcing the abi to version 3 with a newer kernel loaded. > > Signed-off-by: Sean Hefty > --- > diff --git a/src/cma.c b/src/cma.c > index 2d2a587..c5f8cd9 100644 > --- a/src/cma.c > +++ b/src/cma.c > @@ -653,11 +653,49 @@ static int ucma_modify_qp_err(struct rdma_cm_id *id) > return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE); > } > > +static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num, > + uint16_t pkey, uint16_t *pkey_index) > +{ > + int ret, i; > + uint16_t chk_pkey; > + > + for (i = 0, ret = 0; !ret; i++) { > + ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey); > + if (!ret && pkey == chk_pkey) { > + *pkey_index = (uint16_t) i; > + return 0; > + } > + } > + return -EINVAL; > +} > + > +static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) > +{ > + struct ibv_qp_attr qp_attr; > + int ret; > + > + ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, > + id_priv->id.route.addr.addr.ibaddr.pkey, > + &qp_attr.pkey_index); > + if (ret) > + return ret; > + > + qp_attr.port_num = id_priv->id.port_num; > + qp_attr.qp_state = IBV_QPS_INIT; > + qp_attr.qp_access_flags = 0; > + > + return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS | > + IBV_QP_PKEY_INDEX | IBV_QP_PORT); > +} > + > static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) > { > struct ibv_qp_attr qp_attr; > int qp_attr_mask, ret; > > + if (abi_ver == 3) > + return ucma_init_conn_qp3(id_priv, qp); > + > qp_attr.qp_state = IBV_QPS_INIT; > ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); > if (ret) > @@ -666,11 +704,44 @@ static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct > ibv_qp *qp) > return ibv_modify_qp(qp, &qp_attr, qp_attr_mask); > } > > +static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) > +{ > + struct ibv_qp_attr qp_attr; > + int ret; > + > + ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, > + id_priv->id.route.addr.addr.ibaddr.pkey, > + &qp_attr.pkey_index); > + if (ret) > + return ret; > + > + qp_attr.port_num = id_priv->id.port_num; > + qp_attr.qp_state = IBV_QPS_INIT; > + qp_attr.qkey = RDMA_UDP_QKEY; > + > + ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY | > + IBV_QP_PKEY_INDEX | IBV_QP_PORT); > + if (ret) > + return ret; > + > + qp_attr.qp_state = IBV_QPS_RTR; > + ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); > + if (ret) > + return ret; > + > + qp_attr.qp_state = IBV_QPS_RTS; > + qp_attr.sq_psn = 0; > + return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); > +} > + > static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) > { > struct ibv_qp_attr qp_attr; > int qp_attr_mask, ret; > > + if (abi_ver == 3) > + return ucma_init_ud_qp3(id_priv, qp); > + > qp_attr.qp_state = IBV_QPS_INIT; > ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); > if (ret) > From sashak at voltaire.com Wed Jan 31 13:29:49 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 31 Jan 2007 23:29:49 +0200 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM In-Reply-To: <45BF6548.80104@dev.mellanox.co.il> References: <45BF6548.80104@dev.mellanox.co.il> Message-ID: <20070131212949.GV3837@sashak.voltaire.com> On 17:33 Tue 30 Jan , Yevgeny Kliteynik wrote: > Checking PathRecord query for QoS constraints > > The QoS-aware path selection logic is implemented in a > separate function that is called only when QoS in OpenSM > is on. It causes some code duplication, but the idea is > to minimize the changes in the existing logic in OSM. > Eventually, these two function (the old path selection > and the new QoS-aware path selection) will be merged > into a single function. Please merge __osm_pr_rcv_get_path_parms() and __osm_pr_rcv_get_path_parms_qos() functions into single one - as you stated most code is duplicated there. In fact __osm_pr_rcv_get_path_parms() is most "changeable" function in SA PR processor, and it is not good idea to make this twice. IMHO it creates more ground for future bugs comparing to the risk of possible impacts to existing functionality. This also will make your patch much more review friendly. Thanks, Sasha > > Signed-off-by: Yevgeny Kliteynik > --- > osm/opensm/osm_sa_path_record.c | 822 ++++++++++++++++++++++++++++++++++++++- > 1 files changed, 816 insertions(+), 6 deletions(-) > > diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c > index a0dbb07..2ff7a42 100644 > --- a/osm/opensm/osm_sa_path_record.c > +++ b/osm/opensm/osm_sa_path_record.c > @@ -70,6 +70,7 @@ > #include > #include > #endif > +#include > > #define OSM_PR_RCV_POOL_MIN_SIZE 64 > #define OSM_PR_RCV_POOL_GROW_SIZE 64 > @@ -87,6 +88,7 @@ typedef struct _osm_path_parms > uint8_t rate; > uint8_t sl; > uint8_t pkt_life; > + uint16_t class; > boolean_t reversible; > } osm_path_parms_t; > > @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms( > > /********************************************************************** > **********************************************************************/ > + > +static ib_api_status_t > +__osm_pr_rcv_get_path_parms_qos( > + IN osm_pr_rcv_t* const p_rcv, > + IN const ib_path_rec_t* const p_pr, > + IN const osm_port_t* const p_src_port, > + IN const osm_port_t* const p_dest_port, > + IN const uint16_t dest_lid_ho, > + IN const ib_net64_t comp_mask, > + OUT osm_path_parms_t* const p_parms ) > +{ > + const osm_node_t* p_node; > + const osm_physp_t* p_physp; > + const osm_physp_t* p_src_physp; > + const osm_physp_t* p_dest_physp; > + const osm_prtn_t* p_prtn; > + const ib_port_info_t* p_pi; > + ib_api_status_t status = IB_SUCCESS; > + ib_net16_t pkey = 0; > + ib_net16_t shared_pkey = 0; > + uint8_t mtu = 0; > + uint8_t rate = 0; > + uint8_t pkt_life = 0; > + uint8_t sl = 0; > + uint16_t class = 0; > + uint8_t required_mtu; > + uint8_t required_rate; > + uint8_t required_pkt_life; > + uint8_t in_port_num; > + uint8_t out_port_num; > + ib_net16_t dest_lid; > + uint8_t i; > + uint8_t vl; > + ib_slvl_table_t * p_slvl_tbl = NULL; > + boolean_t valid_sls[IB_MAX_NUM_VLS]; > + boolean_t sl2vl_valid_path = FALSE; > + uint8_t first_valid_sl; > + osm_qos_level_t * p_qos_level = NULL; > + > + OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos ); > + > + memset(valid_sls,TRUE,sizeof(valid_sls)); > + dest_lid = cl_hton16( dest_lid_ho ); > + > + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); > + p_physp = osm_port_get_default_phys_ptr( p_src_port ); > + p_src_physp = p_physp; > + p_pi = &p_physp->port_info; > + > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + rate = ib_port_info_compute_rate( p_pi ); > + > + /* > + * Mellanox Tavor device performance is better using 1K MTU. > + * If required MTU and MTU selector are such that 1K is OK > + * and at least one end of the path is Tavor we override the > + * port MTU with 1K. > + */ > + if ( p_rcv->p_subn->opt.enable_quirks && > + __osm_sa_path_rec_apply_tavor_mtu_limit( > + p_pr, p_src_port, p_dest_port, comp_mask) ) > + { > + if (mtu > IB_MTU_LEN_1024) > + { > + mtu = IB_MTU_LEN_1024; > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); > + } > + } > + > + /* > + * Walk the subnet object from source to destination, > + * tracking the most restrictive rate and mtu values along the way... > + * > + * If source port node is a switch, then p_physp should > + * point to the port that routes the destination lid > + */ > + > + p_node = osm_physp_get_node_ptr( p_physp ); > + > + if( p_node->sw ) > + { > + /* source node is a switch */ > + in_port_num = osm_physp_get_port_num(p_physp); > + /* > + * If the dest_lid_ho is equal to the lid of the switch pointed by > + * p_sw then p_physp will be the physical port of the switch port zero, > + * and out_port_num will be 0. > + */ > + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) ); > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: " > + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + p_src_physp = p_physp; > + out_port_num = osm_physp_get_port_num(p_physp); > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); > + } > + else > + { > + /* source node is CA or Router */ > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); > + } > + > + for (i = 0; i < IB_MAX_NUM_VLS; i++) > + { > + if (valid_sls[i]) > + { > + vl = ib_slvl_table_get(p_slvl_tbl,i); > + if (vl == IB_DROP_VL) > + valid_sls[i] = FALSE; > + } > + } > + > + /* > + * now get pointer to the destination port (same as above) > + */ > + p_node = osm_physp_get_node_ptr( p_dest_physp ); > + > + if( p_node->sw ) > + { > + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); > + if ( p_dest_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: " > + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + } > + > + /* > + * Now go through the path step by step > + */ > + > + while( p_physp != p_dest_physp ) > + { > + p_physp = osm_physp_get_remote( p_physp ); > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: " > + "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + > + in_port_num = osm_physp_get_port_num(p_physp); > + > + /* this is point to point case (no switch in between) */ > + if( p_physp == p_dest_physp ) > + break; > + > + /* Check parameters for the ingress port in this switch */ > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + mtu, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + rate, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + p_node = osm_physp_get_node_ptr( p_physp ); > + if( !p_node->sw ) > + { > + /* > + There is some sort of problem in the subnet object! > + If this isn't a switch, we should have reached > + the destination by now! > + */ > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: " > + "Internal error, bad path\n" ); > + status = IB_ERROR; > + goto Exit; > + } > + > + /* Continue with the egress port on this switch */ > + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); > + > + if ( p_physp == 0 ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: " > + "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", > + dest_lid_ho, > + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > + status = IB_ERROR; > + goto Exit; > + } > + > + CL_ASSERT( p_physp ); > + CL_ASSERT( osm_physp_is_valid( p_physp ) ); > + > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + mtu, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at intervening port 0x%016" PRIx64 > + " port num 0x%X\n", > + rate, > + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), > + osm_physp_get_port_num( p_physp ) ); > + } > + } > + > + out_port_num = osm_physp_get_port_num(p_physp); > + > + /* > + * Check SL2VL table of the switch > + */ > + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); > + for ( i = 0; i < IB_MAX_NUM_VLS; i++ ) > + { > + if (valid_sls[i]) > + { > + vl = ib_slvl_table_get(p_slvl_tbl,i); > + if (vl == IB_DROP_VL) > + valid_sls[i] = FALSE; > + } > + } > + > + /* go to the next step in the path */ > + } > + > + /* p_physp now points to the destination */ > + > + p_pi = &p_physp->port_info; > + > + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) > + { > + mtu = ib_port_info_get_mtu_cap( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n", > + mtu, > + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); > + } > + } > + > + if( rate > ib_port_info_compute_rate( p_pi ) ) > + { > + rate = ib_port_info_compute_rate( p_pi ); > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "New smallest rate = %u at destination port 0x%016" PRIx64 "\n", > + rate, > + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); > + } > + } > + > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Path min MTU = %u, min rate = %u\n", mtu, rate ); > + } > + > + /* check whether there is some SL that won't lead to VL15 eventually */ > + for (i = 0; i < IB_MAX_NUM_VLS; i++) > + { > + if (valid_sls[i]) > + { > + sl2vl_valid_path = TRUE; > + first_valid_sl = i; > + break; > + } > + } > + > + if (!sl2vl_valid_path) > + { > + /* all the SLs will eventually lead to VL15 on this path */ > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* > + * According to spec definition IBA 1.2 Table 205 PacketLifeTime > + * description, for loopback paths, packetLifeTime shall be zero. > + */ > + if ( p_src_port == p_dest_port ) > + pkt_life = 0; /* loopback */ > + else > + pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; > + > + /* > + * Get QoS Level object according to the path request > + */ > + osm_qos_parser_get_qos_level_by_pr(p_rcv, > + p_pr, > + p_src_port, > + p_dest_port, > + dest_lid_ho, > + comp_mask, > + &p_qos_level); > + > + if (p_qos_level) > + { > + /* > + * Found QoS level that should be applied to this path record request > + */ > + > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "PathRecord request matches QoS Level %u (%s)\n", > + p_qos_level->sn, > + (p_qos_level->use)? p_qos_level->use : "no description" ) ; > + } > + > + /* adjust MTU limit according to QoS constraints */ > + if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) ) > + { > + mtu = p_qos_level->mtu_limit; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest MTU = %u\n", > + mtu); > + } > + } > + > + /* adjust Rate limit according to QoS constraints */ > + if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) ) > + { > + rate = p_qos_level->rate_limit; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest Rate = %u\n", > + rate); > + } > + } > + > + /* adjust Packet Lifetime according to QoS constraints */ > + if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) ) > + { > + pkt_life = p_qos_level->pkt_life; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new smallest Packet Lifetime = %u\n", > + pkt_life); > + } > + } > + > + /* adjust SL according to QoS constraints */ > + if (p_qos_level->sl_set) > + { > + if (!valid_sls[p_qos_level->sl]) > + { > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + sl = p_qos_level->sl; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new SL = %u\n", > + sl); > + } > + } > + } > + > + /* adjust PKey according to QoS constraints */ > + if (p_qos_level->pkey_set) > + { > + pkey = p_qos_level->pkey; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new PKey = %u\n", > + pkey); > + } > + } > + > + /* adjust Class according to QoS constraints */ > + if (p_qos_level->class_set) > + { > + class = p_qos_level->class; > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: new Class = %u\n", > + class); > + } > + } > + > + } /*if (p_qos_level)*/ > + > + /* > + * Determine if these values meet the user criteria > + * and adjust appropriately > + */ > + > + /* we silently ignore cases where only the MTU selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && > + ( comp_mask & IB_PR_COMPMASK_MTU ) ) > + { > + required_mtu = ib_path_rec_mtu( p_pr ); > + switch( ib_path_rec_mtu_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( mtu <= required_mtu ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( mtu >= required_mtu ) > + { > + /* adjust to use the highest mtu > + lower then the required one */ > + if( required_mtu > 1 ) > + mtu = required_mtu - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( mtu < required_mtu ) > + status = IB_NOT_FOUND; > + else > + mtu = required_mtu; > + break; > + > + case 3: /* largest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* we silently ignore cases where only the Rate selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) && > + ( comp_mask & IB_PR_COMPMASK_RATE ) ) > + { > + required_rate = ib_path_rec_rate( p_pr ); > + switch( ib_path_rec_rate_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( rate <= required_rate ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( rate >= required_rate ) > + { > + /* adjust the rate to use the highest rate > + lower then the required one */ > + if( required_rate > 2 ) > + rate = required_rate - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( rate < required_rate ) > + status = IB_NOT_FOUND; > + else > + rate = required_rate; > + break; > + > + case 3: /* largest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* we silently ignore cases where only the PktLife selector is defined */ > + if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) && > + ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) ) > + { > + required_pkt_life = ib_path_rec_pkt_life( p_pr ); > + switch( ib_path_rec_pkt_life_sel( p_pr ) ) > + { > + case 0: /* must be greater than */ > + if( pkt_life <= required_pkt_life ) > + status = IB_NOT_FOUND; > + break; > + > + case 1: /* must be less than */ > + if( pkt_life >= required_pkt_life ) > + { > + /* adjust the lifetime to use the highest possible > + lower then the required one */ > + if( required_pkt_life > 1 ) > + pkt_life = required_pkt_life - 1; > + else > + status = IB_NOT_FOUND; > + } > + break; > + > + case 2: /* exact match */ > + if( pkt_life < required_pkt_life ) > + status = IB_NOT_FOUND; > + else > + pkt_life = required_pkt_life; > + break; > + > + case 3: /* smallest available */ > + /* can't be disqualified by this one */ > + break; > + > + default: > + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ > + CL_ASSERT( FALSE ); > + status = IB_ERROR; > + break; > + } > + } > + if (status != IB_SUCCESS) > + goto Exit; > + > + /* > + * set Pkey for this path record request > + */ > + > + shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp ); > + if ( !shared_pkey ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: " > + "Ports do not have any shared PKeys\n"); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && > + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) > + { > + if (p_qos_level && p_qos_level->pkey_set) > + pkey = p_qos_level->pkey; > + else > + pkey = shared_pkey; > + } > + else if (comp_mask & IB_PR_COMPMASK_PKEY) > + { > + /* PathRecord requires specific PKey */ > + if (p_qos_level && p_qos_level->pkey_set) > + { > + /* check that QoS pkey matches the required pkey */ > + if (p_qos_level->pkey != p_pr->pkey) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n", > + cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey)); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + pkey = p_qos_level->pkey; > + } > + else > + pkey = p_pr->pkey; > + } > + else > + { > + if (p_qos_level && p_qos_level->pkey_set) > + pkey = p_qos_level->pkey; > + else > + pkey = shared_pkey; > + } > + > + /* > + * PKey has been set. Now check that ports share this PKey. > + */ > + > + if ( (pkey != shared_pkey) && > + (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Ports do not share specified PKey 0x%04x\n", > + cl_ntoh16(pkey)); > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* > + * Done selecting PKey - Now select valid SL > + */ > + > + if (pkey) > + { > + p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, > + pkey & cl_ntoh16((uint16_t)~0x8000)); > + if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) ) > + { > + /* this may be possible when pkey tables are created somehow in > + previous runs or things are going wrong here */ > + osm_log( p_rcv->p_log, OSM_LOG_ERROR, > + "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: " > + "No partition found for PKey 0x%04x - using default SL %d\n", > + cl_ntoh16(pkey), sl); > + } > + else > + { > + if (!valid_sls[p_prtn->sl]) > + { > + /* partition forces to use SL that eventually leads to VL15 */ > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Partition constraints (pkey=%u, sl=%u) lead to VL15\n", > + pkey, p_prtn->sl); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + /* partition's SL is valid */ > + if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) ) > + { > + /* partition's SL doesn't match QoS Level SL */ > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n", > + pkey, p_prtn->sl, p_qos_level->sl); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + else > + { > + /* set SL to be partition's SL */ > + sl = p_prtn->sl; > + } > + } > + } > + } > + else > + { > + /* > + * No pkey (no partition) > + */ > + if (p_qos_level && p_qos_level->sl_set) > + { > + /* sl has been already set */ > + } > + else > + { > + if (comp_mask & IB_PR_COMPMASK_SL) > + { > + /* specific SL was requested */ > + if (!valid_sls[ib_path_rec_sl(p_pr)]) > + { > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: required SL (%u) leads to VL15\n", > + ib_path_rec_sl(p_pr)); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + /* set sl to whatever requested */ > + sl = ib_path_rec_sl(p_pr); > + } > + else > + { > + /* set sl to the first valid sl that won't lead to VL15 */ > + sl = first_valid_sl; > + } > + } > + } > + > + if ( (comp_mask & IB_PR_COMPMASK_SL) && > + (ib_path_rec_sl( p_pr ) != sl) ) > + { > + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) > + { > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > + "__osm_pr_rcv_get_path_parms_qos: " > + "QoS constaraints: required SL (%u) doesn't match QoS constraints\n", > + ib_path_rec_sl(p_pr)); > + } > + status = IB_NOT_FOUND; > + goto Exit; > + } > + > + /* reset pkey when raw traffic */ > + if( (pkey) && > + (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && > + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) > + { > + pkey = 0; > + } > + > + p_parms->mtu = mtu; > + p_parms->rate = rate; > + p_parms->pkt_life = pkt_life; > + p_parms->pkey = pkey; > + p_parms->sl = sl; > + p_parms->class = class; > + > + Exit: > + OSM_LOG_EXIT( p_rcv->p_log ); > + return( status ); > +} > + > +/********************************************************************** > + **********************************************************************/ > static void > __osm_pr_rcv_build_pr( > IN osm_pr_rcv_t* const p_rcv, > @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr( > #endif > > p_pr->pkey = p_parms->pkey; > - p_pr->sl = cl_hton16(p_parms->sl); > + ib_path_rec_set_qos_class(p_pr,p_parms->class); > + ib_path_rec_set_sl(p_pr,p_parms->sl); > p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); > p_pr->rate = (uint8_t)(p_parms->rate | 0x80); > > @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path( > goto Exit; > } > > - status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, > - p_dest_port, dest_lid_ho, > - comp_mask, &path_parms ); > - > + if (p_rcv->p_subn->opt.no_qos) > + status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, > + p_dest_port, dest_lid_ho, > + comp_mask, &path_parms ); > + else > + status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port, > + p_dest_port, dest_lid_ho, > + comp_mask, &path_parms ); > if( status != IB_SUCCESS ) > { > cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item ); > @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path( > comp_mask, &rev_path_parms ); > path_parms.reversible = ( rev_path_status == IB_SUCCESS ); > > + /* > + * ToDo: > + * ToDo: The whole implementation of reversible path is wrong. > + * ToDo: It is not enough to know that the reversed path exist. > + * ToDo: We should also adjust limits (mtu, rate, etc...) by > + * ToDo: comparing path and reversed path's limits. > + * ToDo: Also, need to think about reversible path in QoS. > + * ToDo: > + */ > + > /* did we get a Reversible Path compmask ? */ > /* > NOTE that if the reversible component = 0, it is a don't care > @@ -2053,7 +2863,7 @@ osm_pr_rcv_process( > /* SL, Hop Limit, and Flow Label */ > ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, > &sl, &flow_label, &hop_limit ); > - p_pr_item->path_rec.sl = cl_hton16( sl ); > + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); > #ifndef ROUTER_EXP > p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | > (flow_label << 8); > -- > 1.4.4.1.GIT > > From mshefty at ichips.intel.com Wed Jan 31 13:55:08 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 13:55:08 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <1170276655.14294.11.camel@stevo-desktop> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> <1170276655.14294.11.camel@stevo-desktop> Message-ID: <45C1103C.3010301@ichips.intel.com> Steve Wise wrote: > Should this be a problem for OFED 1.2? I would think the ABI for all > backports should be the same, so it wouldn't be a problem. Is this > true? I'm assuming all backported UCMA modules would have the same > ABI. This is a problem for anyone that tries to use a newer version of the librdamcm (like an OFED 1.2 version) with an older kernel (e.g. 2.6.20). As you pointed out, the issue is that the kernel rdma_cm crashes if rdma_init_qp_attr() is called before the user calls rdma_connect(). The problem affects both IB and iWarp. The latest changes to the librdmacm exposed this bug, but the latest kernel multicast code also fixed it. As far as I know, only ABI 3 has been released anywhere. ABI 4 is only available from my git tree. This problem will occur on any code based on ABI 3 or older code snapshots of ABI 4. Hopefully this makes sense. - Sean From swise at opengridcomputing.com Wed Jan 31 13:59:10 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 15:59:10 -0600 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <45C1103C.3010301@ichips.intel.com> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> <1170276655.14294.11.camel@stevo-desktop> <45C1103C.3010301@ichips.intel.com> Message-ID: <1170280750.14294.19.camel@stevo-desktop> On Wed, 2007-01-31 at 13:55 -0800, Sean Hefty wrote: > Steve Wise wrote: > > Should this be a problem for OFED 1.2? I would think the ABI for all > > backports should be the same, so it wouldn't be a problem. Is this > > true? I'm assuming all backported UCMA modules would have the same > > ABI. > > This is a problem for anyone that tries to use a newer version of the librdamcm > (like an OFED 1.2 version) with an older kernel (e.g. 2.6.20). As you pointed > out, the issue is that the kernel rdma_cm crashes if rdma_init_qp_attr() is > called before the user calls rdma_connect(). The problem affects both IB and > iWarp. The latest changes to the librdmacm exposed this bug, but the latest > kernel multicast code also fixed it. > Fixed it for IB maybe, but not for iWarp, right? > As far as I know, only ABI 3 has been released anywhere. ABI 4 is only > available from my git tree. This problem will occur on any code based on ABI 3 > or older code snapshots of ABI 4. > > Hopefully this makes sense. So OFED 1.2 will be ABI 3, right? Sorry if I'm being dense... From mshefty at ichips.intel.com Wed Jan 31 14:04:39 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 14:04:39 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <1170280750.14294.19.camel@stevo-desktop> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> <1170276655.14294.11.camel@stevo-desktop> <45C1103C.3010301@ichips.intel.com> <1170280750.14294.19.camel@stevo-desktop> Message-ID: <45C11277.90403@ichips.intel.com> > Fixed it for IB maybe, but not for iWarp, right? It should be fixed for both. > So OFED 1.2 will be ABI 3, right? OFED will be ABI 4, since it will include multicast support (which is what causes the ABI to bump from 3 to 4). - Sean From sean.hefty at intel.com Wed Jan 31 14:17:42 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 14:17:42 -0800 Subject: [openib-general] new IB CM reject reason Message-ID: <000201c74585$a0bc7260$8698070a@amr.corp.intel.com> We've hit into an issue with the IB CM reject reason codes. When a remote application crashes during connection establishment, the connection will be rejected by the kernel CM. Unfortunately, there's not a decent reject reason that maps to this event. Currently, the ib_cm issues the reject as consumer defined (code 28). I'd like to propose adding reject reason 0, which would mean other/unknown/or none given. This is a deviation from the spec, but does anyone know of any issues with such an approach? - Sean From kliteyn at dev.mellanox.co.il Wed Jan 31 14:19:04 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 01 Feb 2007 00:19:04 +0200 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM In-Reply-To: <1170264561.15660.189494.camel@hal.voltaire.com> References: <45BF6548.80104@dev.mellanox.co.il> <1170264561.15660.189494.camel@hal.voltaire.com> Message-ID: <45C115D8.6070504@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Tue, 2007-01-30 at 10:33, Yevgeny Kliteynik wrote: >> Checking PathRecord query for QoS constraints >> >> The QoS-aware path selection logic is implemented in a >> separate function that is called only when QoS in OpenSM >> is on. It causes some code duplication, but the idea is >> to minimize the changes in the existing logic in OSM. >> Eventually, these two function (the old path selection >> and the new QoS-aware path selection) will be merged >> into a single function. > > Yes, this would be nice to do in the future as there is much overlap. > Whether qos is carried in the request could be handled internal to this > combined routine rather than outside to determine which routine to call. > This will make for a lot less code. Sure, that's the plan. The current implementation looks the way it does only to separate the new code completely from the usual flow, so that the old functionality won't be broken for sure. > Some comments embedded below. > >> Signed-off-by: Yevgeny Kliteynik >> --- >> osm/opensm/osm_sa_path_record.c | 822 ++++++++++++++++++++++++++++++++++++++- >> 1 files changed, 816 insertions(+), 6 deletions(-) >> >> diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c >> index a0dbb07..2ff7a42 100644 >> --- a/osm/opensm/osm_sa_path_record.c >> +++ b/osm/opensm/osm_sa_path_record.c >> @@ -70,6 +70,7 @@ >> #include >> #include >> #endif >> +#include >> >> #define OSM_PR_RCV_POOL_MIN_SIZE 64 >> #define OSM_PR_RCV_POOL_GROW_SIZE 64 >> @@ -87,6 +88,7 @@ typedef struct _osm_path_parms >> uint8_t rate; >> uint8_t sl; >> uint8_t pkt_life; >> + uint16_t class; >> boolean_t reversible; >> } osm_path_parms_t; >> >> @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms( >> >> /********************************************************************** >> **********************************************************************/ >> + >> +static ib_api_status_t >> +__osm_pr_rcv_get_path_parms_qos( > > This is the similar function to the non QoS one: > __osm_pr_rcv_get_path_parms Yes, the function with QoS has everything the function w/o QoS has, plus QoS constraints. Eventually, the function w/o QoS should be removed, and the function with QoS should ignore QoS constraints if QoS in osm is off. >> + IN osm_pr_rcv_t* const p_rcv, >> + IN const ib_path_rec_t* const p_pr, >> + IN const osm_port_t* const p_src_port, >> + IN const osm_port_t* const p_dest_port, >> + IN const uint16_t dest_lid_ho, >> + IN const ib_net64_t comp_mask, >> + OUT osm_path_parms_t* const p_parms ) >> +{ >> + const osm_node_t* p_node; >> + const osm_physp_t* p_physp; >> + const osm_physp_t* p_src_physp; >> + const osm_physp_t* p_dest_physp; >> + const osm_prtn_t* p_prtn; >> + const ib_port_info_t* p_pi; >> + ib_api_status_t status = IB_SUCCESS; >> + ib_net16_t pkey = 0; >> + ib_net16_t shared_pkey = 0; >> + uint8_t mtu = 0; >> + uint8_t rate = 0; >> + uint8_t pkt_life = 0; >> + uint8_t sl = 0; >> + uint16_t class = 0; >> + uint8_t required_mtu; >> + uint8_t required_rate; >> + uint8_t required_pkt_life; >> + uint8_t in_port_num; >> + uint8_t out_port_num; >> + ib_net16_t dest_lid; >> + uint8_t i; >> + uint8_t vl; >> + ib_slvl_table_t * p_slvl_tbl = NULL; >> + boolean_t valid_sls[IB_MAX_NUM_VLS]; >> + boolean_t sl2vl_valid_path = FALSE; >> + uint8_t first_valid_sl; >> + osm_qos_level_t * p_qos_level = NULL; >> + >> + OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos ); >> + >> + memset(valid_sls,TRUE,sizeof(valid_sls)); >> + dest_lid = cl_hton16( dest_lid_ho ); >> + >> + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); >> + p_physp = osm_port_get_default_phys_ptr( p_src_port ); >> + p_src_physp = p_physp; >> + p_pi = &p_physp->port_info; >> + >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + rate = ib_port_info_compute_rate( p_pi ); >> + >> + /* >> + * Mellanox Tavor device performance is better using 1K MTU. >> + * If required MTU and MTU selector are such that 1K is OK >> + * and at least one end of the path is Tavor we override the >> + * port MTU with 1K. >> + */ >> + if ( p_rcv->p_subn->opt.enable_quirks && >> + __osm_sa_path_rec_apply_tavor_mtu_limit( >> + p_pr, p_src_port, p_dest_port, comp_mask) ) >> + { >> + if (mtu > IB_MTU_LEN_1024) >> + { >> + mtu = IB_MTU_LEN_1024; >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); >> + } >> + } >> + >> + /* >> + * Walk the subnet object from source to destination, >> + * tracking the most restrictive rate and mtu values along the way... >> + * >> + * If source port node is a switch, then p_physp should >> + * point to the port that routes the destination lid >> + */ >> + >> + p_node = osm_physp_get_node_ptr( p_physp ); >> + >> + if( p_node->sw ) >> + { >> + /* source node is a switch */ >> + in_port_num = osm_physp_get_port_num(p_physp); >> + /* >> + * If the dest_lid_ho is equal to the lid of the switch pointed by >> + * p_sw then p_physp will be the physical port of the switch port zero, >> + * and out_port_num will be 0. >> + */ >> + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) ); >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: " >> + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); > > Nit: Error codes should be made unique and not overlap existing ones. Right. >> + status = IB_ERROR; >> + goto Exit; >> + } >> + p_src_physp = p_physp; >> + out_port_num = osm_physp_get_port_num(p_physp); >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); >> + } >> + else >> + { >> + /* source node is CA or Router */ >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); >> + } >> + >> + for (i = 0; i < IB_MAX_NUM_VLS; i++) >> + { >> + if (valid_sls[i]) >> + { >> + vl = ib_slvl_table_get(p_slvl_tbl,i); >> + if (vl == IB_DROP_VL) > > Does vl > Operational VLs need checking here or is it never set this way > ? I think that it would be better if the "setup" part would check it when configuring sl2vl tables, and when VL > Operational VL it should set some default value instead (VL15 looks as a good option). >> + valid_sls[i] = FALSE; >> + } >> + } >> + >> + /* >> + * now get pointer to the destination port (same as above) >> + */ >> + p_node = osm_physp_get_node_ptr( p_dest_physp ); >> + >> + if( p_node->sw ) >> + { >> + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); >> + if ( p_dest_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: " >> + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + } >> + >> + /* >> + * Now go through the path step by step >> + */ >> + >> + while( p_physp != p_dest_physp ) >> + { >> + p_physp = osm_physp_get_remote( p_physp ); >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: " >> + "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + in_port_num = osm_physp_get_port_num(p_physp); >> + >> + /* this is point to point case (no switch in between) */ >> + if( p_physp == p_dest_physp ) >> + break; > > > Ordering of check for switch and point to point case are different here > and original routine. Should they be the same ? If so, which should > change ? (Any reason why this was moved in this routine ?) Not sure I'm following. The order of check for switch and point to point case looks the same to me (am I missing something?). The difference that I see is that the mtu and rate in the original function are adjusted after the check for switch, and in the new function they are adjusted before the check, which I think is the same. >> + /* Check parameters for the ingress port in this switch */ >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + mtu, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + rate, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + p_node = osm_physp_get_node_ptr( p_physp ); >> + if( !p_node->sw ) >> + { >> + /* >> + There is some sort of problem in the subnet object! >> + If this isn't a switch, we should have reached >> + the destination by now! >> + */ >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: " >> + "Internal error, bad path\n" ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + /* Continue with the egress port on this switch */ >> + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); >> + >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: " >> + "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + CL_ASSERT( p_physp ); >> + CL_ASSERT( osm_physp_is_valid( p_physp ) ); >> + >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + mtu, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + rate, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + out_port_num = osm_physp_get_port_num(p_physp); >> + >> + /* >> + * Check SL2VL table of the switch >> + */ >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); >> + for ( i = 0; i < IB_MAX_NUM_VLS; i++ ) >> + { >> + if (valid_sls[i]) >> + { >> + vl = ib_slvl_table_get(p_slvl_tbl,i); >> + if (vl == IB_DROP_VL) >> + valid_sls[i] = FALSE; >> + } >> + } >> + >> + /* go to the next step in the path */ >> + } >> + >> + /* p_physp now points to the destination */ >> + >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n", >> + mtu, >> + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at destination port 0x%016" PRIx64 "\n", >> + rate, >> + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); >> + } >> + } >> + >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Path min MTU = %u, min rate = %u\n", mtu, rate ); >> + } >> + >> + /* check whether there is some SL that won't lead to VL15 eventually */ >> + for (i = 0; i < IB_MAX_NUM_VLS; i++) >> + { >> + if (valid_sls[i]) >> + { >> + sl2vl_valid_path = TRUE; >> + first_valid_sl = i; >> + break; >> + } >> + } >> + >> + if (!sl2vl_valid_path) >> + { >> + /* all the SLs will eventually lead to VL15 on this path */ >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* >> + * According to spec definition IBA 1.2 Table 205 PacketLifeTime >> + * description, for loopback paths, packetLifeTime shall be zero. >> + */ >> + if ( p_src_port == p_dest_port ) >> + pkt_life = 0; /* loopback */ >> + else >> + pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; >> + >> + /* >> + * Get QoS Level object according to the path request >> + */ >> + osm_qos_parser_get_qos_level_by_pr(p_rcv, >> + p_pr, >> + p_src_port, >> + p_dest_port, >> + dest_lid_ho, >> + comp_mask, >> + &p_qos_level); >> + >> + if (p_qos_level) >> + { >> + /* >> + * Found QoS level that should be applied to this path record request >> + */ >> + >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "PathRecord request matches QoS Level %u (%s)\n", >> + p_qos_level->sn, >> + (p_qos_level->use)? p_qos_level->use : "no description" ) ; >> + } >> + >> + /* adjust MTU limit according to QoS constraints */ >> + if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) ) >> + { >> + mtu = p_qos_level->mtu_limit; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest MTU = %u\n", >> + mtu); >> + } >> + } >> + >> + /* adjust Rate limit according to QoS constraints */ >> + if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) ) >> + { >> + rate = p_qos_level->rate_limit; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest Rate = %u\n", >> + rate); >> + } >> + } >> + >> + /* adjust Packet Lifetime according to QoS constraints */ >> + if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) ) >> + { >> + pkt_life = p_qos_level->pkt_life; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest Packet Lifetime = %u\n", >> + pkt_life); >> + } >> + } >> + >> + /* adjust SL according to QoS constraints */ >> + if (p_qos_level->sl_set) >> + { >> + if (!valid_sls[p_qos_level->sl]) >> + { >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + sl = p_qos_level->sl; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new SL = %u\n", >> + sl); >> + } >> + } >> + } >> + >> + /* adjust PKey according to QoS constraints */ >> + if (p_qos_level->pkey_set) >> + { >> + pkey = p_qos_level->pkey; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new PKey = %u\n", >> + pkey); >> + } >> + } >> + >> + /* adjust Class according to QoS constraints */ >> + if (p_qos_level->class_set) >> + { >> + class = p_qos_level->class; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new Class = %u\n", >> + class); >> + } >> + } >> + >> + } /*if (p_qos_level)*/ >> + >> + /* >> + * Determine if these values meet the user criteria >> + * and adjust appropriately >> + */ >> + >> + /* we silently ignore cases where only the MTU selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_MTU ) ) >> + { >> + required_mtu = ib_path_rec_mtu( p_pr ); >> + switch( ib_path_rec_mtu_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( mtu <= required_mtu ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( mtu >= required_mtu ) >> + { >> + /* adjust to use the highest mtu >> + lower then the required one */ >> + if( required_mtu > 1 ) >> + mtu = required_mtu - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( mtu < required_mtu ) >> + status = IB_NOT_FOUND; >> + else >> + mtu = required_mtu; >> + break; >> + >> + case 3: /* largest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* we silently ignore cases where only the Rate selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_RATE ) ) >> + { >> + required_rate = ib_path_rec_rate( p_pr ); >> + switch( ib_path_rec_rate_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( rate <= required_rate ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( rate >= required_rate ) >> + { >> + /* adjust the rate to use the highest rate >> + lower then the required one */ >> + if( required_rate > 2 ) >> + rate = required_rate - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( rate < required_rate ) >> + status = IB_NOT_FOUND; >> + else >> + rate = required_rate; >> + break; >> + >> + case 3: /* largest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* we silently ignore cases where only the PktLife selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) ) >> + { >> + required_pkt_life = ib_path_rec_pkt_life( p_pr ); >> + switch( ib_path_rec_pkt_life_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( pkt_life <= required_pkt_life ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( pkt_life >= required_pkt_life ) >> + { >> + /* adjust the lifetime to use the highest possible >> + lower then the required one */ >> + if( required_pkt_life > 1 ) >> + pkt_life = required_pkt_life - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( pkt_life < required_pkt_life ) >> + status = IB_NOT_FOUND; >> + else >> + pkt_life = required_pkt_life; >> + break; >> + >> + case 3: /* smallest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* >> + * set Pkey for this path record request >> + */ >> + >> + shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp ); >> + if ( !shared_pkey ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: " >> + "Ports do not have any shared PKeys\n"); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && >> + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) >> + { >> + if (p_qos_level && p_qos_level->pkey_set) >> + pkey = p_qos_level->pkey; >> + else >> + pkey = shared_pkey; >> + } >> + else if (comp_mask & IB_PR_COMPMASK_PKEY) >> + { >> + /* PathRecord requires specific PKey */ >> + if (p_qos_level && p_qos_level->pkey_set) >> + { >> + /* check that QoS pkey matches the required pkey */ >> + if (p_qos_level->pkey != p_pr->pkey) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n", >> + cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey)); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + pkey = p_qos_level->pkey; >> + } >> + else >> + pkey = p_pr->pkey; >> + } >> + else >> + { >> + if (p_qos_level && p_qos_level->pkey_set) >> + pkey = p_qos_level->pkey; >> + else >> + pkey = shared_pkey; >> + } >> + >> + /* >> + * PKey has been set. Now check that ports share this PKey. >> + */ >> + >> + if ( (pkey != shared_pkey) && >> + (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Ports do not share specified PKey 0x%04x\n", >> + cl_ntoh16(pkey)); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* >> + * Done selecting PKey - Now select valid SL >> + */ >> + >> + if (pkey) >> + { >> + p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, >> + pkey & cl_ntoh16((uint16_t)~0x8000)); >> + if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) ) >> + { >> + /* this may be possible when pkey tables are created somehow in >> + previous runs or things are going wrong here */ >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: " >> + "No partition found for PKey 0x%04x - using default SL %d\n", >> + cl_ntoh16(pkey), sl); >> + } >> + else >> + { >> + if (!valid_sls[p_prtn->sl]) >> + { >> + /* partition forces to use SL that eventually leads to VL15 */ >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Partition constraints (pkey=%u, sl=%u) lead to VL15\n", >> + pkey, p_prtn->sl); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + /* partition's SL is valid */ >> + if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) ) >> + { >> + /* partition's SL doesn't match QoS Level SL */ >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n", >> + pkey, p_prtn->sl, p_qos_level->sl); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + /* set SL to be partition's SL */ >> + sl = p_prtn->sl; >> + } >> + } >> + } >> + } >> + else >> + { >> + /* >> + * No pkey (no partition) >> + */ >> + if (p_qos_level && p_qos_level->sl_set) >> + { >> + /* sl has been already set */ >> + } >> + else >> + { >> + if (comp_mask & IB_PR_COMPMASK_SL) >> + { >> + /* specific SL was requested */ >> + if (!valid_sls[ib_path_rec_sl(p_pr)]) >> + { >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: required SL (%u) leads to VL15\n", >> + ib_path_rec_sl(p_pr)); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + /* set sl to whatever requested */ >> + sl = ib_path_rec_sl(p_pr); >> + } >> + else >> + { >> + /* set sl to the first valid sl that won't lead to VL15 */ >> + sl = first_valid_sl; >> + } >> + } >> + } >> + >> + if ( (comp_mask & IB_PR_COMPMASK_SL) && >> + (ib_path_rec_sl( p_pr ) != sl) ) >> + { >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: required SL (%u) doesn't match QoS constraints\n", >> + ib_path_rec_sl(p_pr)); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* reset pkey when raw traffic */ >> + if( (pkey) && >> + (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && >> + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) >> + { >> + pkey = 0; >> + } >> + >> + p_parms->mtu = mtu; >> + p_parms->rate = rate; >> + p_parms->pkt_life = pkt_life; >> + p_parms->pkey = pkey; >> + p_parms->sl = sl; >> + p_parms->class = class; >> + >> + Exit: >> + OSM_LOG_EXIT( p_rcv->p_log ); >> + return( status ); >> +} >> + >> +/********************************************************************** >> + **********************************************************************/ >> static void >> __osm_pr_rcv_build_pr( >> IN osm_pr_rcv_t* const p_rcv, >> @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr( >> #endif >> >> p_pr->pkey = p_parms->pkey; >> - p_pr->sl = cl_hton16(p_parms->sl); >> + ib_path_rec_set_qos_class(p_pr,p_parms->class); >> + ib_path_rec_set_sl(p_pr,p_parms->sl); >> p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); >> p_pr->rate = (uint8_t)(p_parms->rate | 0x80); >> >> @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path( >> goto Exit; >> } >> >> - status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, >> - p_dest_port, dest_lid_ho, >> - comp_mask, &path_parms ); >> - >> + if (p_rcv->p_subn->opt.no_qos) > > Shouldn't this be based on p_rcv->p_subn.opt.qos_policy_file rather than > no_qos ? I think there are cases where the QoS will be used without the > QoS policy (higher level QoS support). By totally ignoring sl2vl tables the original function may return path that isn't a "real" path - it may lead to VL15 at some point. So the new function takes care of this problem. When there's no policy file, the policy parse tree is empty, and then the ports would not have any qos-level to be applied on the examined path. In that case the new function does whatever the old one did, plus checking the path for sl2vl "consistency". -- Yevgeny > -- Hal > >> + status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, >> + p_dest_port, dest_lid_ho, >> + comp_mask, &path_parms ); >> + else >> + status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port, >> + p_dest_port, dest_lid_ho, >> + comp_mask, &path_parms ); >> if( status != IB_SUCCESS ) >> { >> cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item ); >> @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path( >> comp_mask, &rev_path_parms ); >> path_parms.reversible = ( rev_path_status == IB_SUCCESS ); >> >> + /* >> + * ToDo: >> + * ToDo: The whole implementation of reversible path is wrong. >> + * ToDo: It is not enough to know that the reversed path exist. >> + * ToDo: We should also adjust limits (mtu, rate, etc...) by >> + * ToDo: comparing path and reversed path's limits. >> + * ToDo: Also, need to think about reversible path in QoS. >> + * ToDo: >> + */ >> + >> /* did we get a Reversible Path compmask ? */ >> /* >> NOTE that if the reversible component = 0, it is a don't care >> @@ -2053,7 +2863,7 @@ osm_pr_rcv_process( >> /* SL, Hop Limit, and Flow Label */ >> ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, >> &sl, &flow_label, &hop_limit ); >> - p_pr_item->path_rec.sl = cl_hton16( sl ); >> + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); >> #ifndef ROUTER_EXP >> p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | >> (flow_label << 8); > From swise at opengridcomputing.com Wed Jan 31 14:26:19 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 16:26:19 -0600 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <45C11277.90403@ichips.intel.com> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> <1170276655.14294.11.camel@stevo-desktop> <45C1103C.3010301@ichips.intel.com> <1170280750.14294.19.camel@stevo-desktop> <45C11277.90403@ichips.intel.com> Message-ID: <1170282379.14294.27.camel@stevo-desktop> On Wed, 2007-01-31 at 14:04 -0800, Sean Hefty wrote: > > Fixed it for IB maybe, but not for iWarp, right? > > It should be fixed for both. > Ok. But there still exists an iwarp issue that I need to fix because librdmacm (the one shipped in OFED) now calls the kernel rdma_init_qp_attr() function via ucma before the library calls kernel rdma_connect() via ucma... From kliteyn at dev.mellanox.co.il Wed Jan 31 14:26:37 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 01 Feb 2007 00:26:37 +0200 Subject: [openib-general] [PATCH 0/10] osm: QoS in OpenSM In-Reply-To: <1170265270.15660.190215.camel@hal.voltaire.com> References: <45BF5B6C.30406@dev.mellanox.co.il> <1170265270.15660.190215.camel@hal.voltaire.com> Message-ID: <45C1179D.1080003@dev.mellanox.co.il> Hi Hal, Hal Rosenstock wrote: > Hi Yevgeny, > > On Tue, 2007-01-30 at 09:51, Yevgeny Kliteynik wrote: >> Hi Hal. >> >> The following is a series of 10 patches: >> 1. QoS policy file parser Yacc file >> 2. QoS policy file parser Lex file >> 3. QoS policy file parser Yacc & Lex generated files >> 4. QoS policy file parser header file >> 5. QoS policy file parser C file with auxiliary functions >> 6. Compilation changes for QoS policy file parser: >> Added new files to makefiles. >> Introduced new configuration switch '--enable-maintainer-mode', >> which will run Lex & Yacc instead of just using the generated >> files. >> 7. Renamed static function find_prtn_by_name() to non-static osm_prtn_find_by_name() >> This function will be used later by the PathRecord logic. >> 8. Added QoS class and service id fields to the path record. >> 9. Added new command line option for OSM: '-Y' or '--qos_policy_file' >> 10.Checking PathRecord query for QoS constraints. > > Is everyone on the list satisfied with an XML format or should there be > a text version ? Is anyone concerned about the ease of configuring XML > for QoS ? > > IMO, the XML syntax needs to be explained, discussed, and vetted on the > list. I am hopping this can occur reasonably quickly. If we are doing > XML for this, we need to get to a stable agreed syntax. > > A couple of missing minor things: > SA ClassPortInfo and SA MultiPathRecord similar to PathRecord > > A major missing component is a QoS manager which supports the granular > configuration of the SL2VL and VLArb tables. Based on our experience > with the existing QoS manager, this effort is not to be minimized. If > this is not part of this package, a fair portion of the QoS syntax is > "dormant". I know this can be run on top of the existing QoS manager to > get a more complete QoS solution than what already exists so this could > be considered an stepping stone towards that. > I already started working on multipath, and the next item on my list is QoS manager (or QoS setup), but I seriously doubt that I will manage to finish it till the feature freeze, which is today :) Anyway, qos policy file parser (whatever the format is) and the PathRecord are definitely a stepping stone towards full QoS support in OpenSM. -- Yevgeny > -- Hal > >> -- >> Yevgeny >> >> Signed-off-by: Yevgeny Kliteynik >> >> >> > From sean.hefty at intel.com Wed Jan 31 14:35:24 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 14:35:24 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <1170282379.14294.27.camel@stevo-desktop> Message-ID: <000301c74588$195c80f0$8698070a@amr.corp.intel.com> >But there still exists an iwarp issue that I need to fix because >librdmacm (the one shipped in OFED) now calls the kernel >rdma_init_qp_attr() function via ucma before the library calls kernel >rdma_connect() via ucma... Can you clarify which versions of the librdmacm and kernel you are using? The librdmacm shipped with OFED 1.1 shouldn't hit this issue. And neither should the upcoming OFED 1.2 version of the librdmacm (with the previously posted patch applied), when paired with either the OFED 1.2 kernel code, what was requested to go into 2.6.21, or older kernels. I just think that this problem is only exposed by developmental librdmacm code paired with older developmental rdma_cm multicast code. - Sean From kliteyn at dev.mellanox.co.il Wed Jan 31 14:36:36 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 01 Feb 2007 00:36:36 +0200 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM In-Reply-To: <20070131212949.GV3837@sashak.voltaire.com> References: <45BF6548.80104@dev.mellanox.co.il> <20070131212949.GV3837@sashak.voltaire.com> Message-ID: <45C119F4.9000209@dev.mellanox.co.il> Hi Sasha, Sasha Khapyorsky wrote: > On 17:33 Tue 30 Jan , Yevgeny Kliteynik wrote: >> Checking PathRecord query for QoS constraints >> >> The QoS-aware path selection logic is implemented in a >> separate function that is called only when QoS in OpenSM >> is on. It causes some code duplication, but the idea is >> to minimize the changes in the existing logic in OSM. >> Eventually, these two function (the old path selection >> and the new QoS-aware path selection) will be merged >> into a single function. > > Please merge __osm_pr_rcv_get_path_parms() and > __osm_pr_rcv_get_path_parms_qos() functions into single one - as you > stated most code is duplicated there. > > In fact __osm_pr_rcv_get_path_parms() is most "changeable" function in > SA PR processor, and it is not good idea to make this twice. IMHO it > creates more ground for future bugs comparing to the risk of possible > impacts to existing functionality. As I said, this actually won't be a "merge" - the original function will be removed, and the new function will have a few if() statements for cases when QoS in osm is down. However, this will be a bunch of new code that is running as part of the usual flow, and since this code didn't have enough time to be tested before feature freeze, and because we discussed the necessity of implementing QoS-aware PathRecord the way that it won't change the usual path (again, until it will be tested), I think that it would be better right now to leave it in two separate functions. Trust me, I want to get rid of this code duplication much more than you do :) And I'll do it as soon as I get to test the new code properly. -- Yevgeny > This also will make your patch much more review friendly. > > Thanks, > Sasha > >> Signed-off-by: Yevgeny Kliteynik >> --- >> osm/opensm/osm_sa_path_record.c | 822 ++++++++++++++++++++++++++++++++++++++- >> 1 files changed, 816 insertions(+), 6 deletions(-) >> >> diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c >> index a0dbb07..2ff7a42 100644 >> --- a/osm/opensm/osm_sa_path_record.c >> +++ b/osm/opensm/osm_sa_path_record.c >> @@ -70,6 +70,7 @@ >> #include >> #include >> #endif >> +#include >> >> #define OSM_PR_RCV_POOL_MIN_SIZE 64 >> #define OSM_PR_RCV_POOL_GROW_SIZE 64 >> @@ -87,6 +88,7 @@ typedef struct _osm_path_parms >> uint8_t rate; >> uint8_t sl; >> uint8_t pkt_life; >> + uint16_t class; >> boolean_t reversible; >> } osm_path_parms_t; >> >> @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms( >> >> /********************************************************************** >> **********************************************************************/ >> + >> +static ib_api_status_t >> +__osm_pr_rcv_get_path_parms_qos( >> + IN osm_pr_rcv_t* const p_rcv, >> + IN const ib_path_rec_t* const p_pr, >> + IN const osm_port_t* const p_src_port, >> + IN const osm_port_t* const p_dest_port, >> + IN const uint16_t dest_lid_ho, >> + IN const ib_net64_t comp_mask, >> + OUT osm_path_parms_t* const p_parms ) >> +{ >> + const osm_node_t* p_node; >> + const osm_physp_t* p_physp; >> + const osm_physp_t* p_src_physp; >> + const osm_physp_t* p_dest_physp; >> + const osm_prtn_t* p_prtn; >> + const ib_port_info_t* p_pi; >> + ib_api_status_t status = IB_SUCCESS; >> + ib_net16_t pkey = 0; >> + ib_net16_t shared_pkey = 0; >> + uint8_t mtu = 0; >> + uint8_t rate = 0; >> + uint8_t pkt_life = 0; >> + uint8_t sl = 0; >> + uint16_t class = 0; >> + uint8_t required_mtu; >> + uint8_t required_rate; >> + uint8_t required_pkt_life; >> + uint8_t in_port_num; >> + uint8_t out_port_num; >> + ib_net16_t dest_lid; >> + uint8_t i; >> + uint8_t vl; >> + ib_slvl_table_t * p_slvl_tbl = NULL; >> + boolean_t valid_sls[IB_MAX_NUM_VLS]; >> + boolean_t sl2vl_valid_path = FALSE; >> + uint8_t first_valid_sl; >> + osm_qos_level_t * p_qos_level = NULL; >> + >> + OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos ); >> + >> + memset(valid_sls,TRUE,sizeof(valid_sls)); >> + dest_lid = cl_hton16( dest_lid_ho ); >> + >> + p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port ); >> + p_physp = osm_port_get_default_phys_ptr( p_src_port ); >> + p_src_physp = p_physp; >> + p_pi = &p_physp->port_info; >> + >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + rate = ib_port_info_compute_rate( p_pi ); >> + >> + /* >> + * Mellanox Tavor device performance is better using 1K MTU. >> + * If required MTU and MTU selector are such that 1K is OK >> + * and at least one end of the path is Tavor we override the >> + * port MTU with 1K. >> + */ >> + if ( p_rcv->p_subn->opt.enable_quirks && >> + __osm_sa_path_rec_apply_tavor_mtu_limit( >> + p_pr, p_src_port, p_dest_port, comp_mask) ) >> + { >> + if (mtu > IB_MTU_LEN_1024) >> + { >> + mtu = IB_MTU_LEN_1024; >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); >> + } >> + } >> + >> + /* >> + * Walk the subnet object from source to destination, >> + * tracking the most restrictive rate and mtu values along the way... >> + * >> + * If source port node is a switch, then p_physp should >> + * point to the port that routes the destination lid >> + */ >> + >> + p_node = osm_physp_get_node_ptr( p_physp ); >> + >> + if( p_node->sw ) >> + { >> + /* source node is a switch */ >> + in_port_num = osm_physp_get_port_num(p_physp); >> + /* >> + * If the dest_lid_ho is equal to the lid of the switch pointed by >> + * p_sw then p_physp will be the physical port of the switch port zero, >> + * and out_port_num will be 0. >> + */ >> + p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) ); >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: " >> + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + p_src_physp = p_physp; >> + out_port_num = osm_physp_get_port_num(p_physp); >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); >> + } >> + else >> + { >> + /* source node is CA or Router */ >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); >> + } >> + >> + for (i = 0; i < IB_MAX_NUM_VLS; i++) >> + { >> + if (valid_sls[i]) >> + { >> + vl = ib_slvl_table_get(p_slvl_tbl,i); >> + if (vl == IB_DROP_VL) >> + valid_sls[i] = FALSE; >> + } >> + } >> + >> + /* >> + * now get pointer to the destination port (same as above) >> + */ >> + p_node = osm_physp_get_node_ptr( p_dest_physp ); >> + >> + if( p_node->sw ) >> + { >> + p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) ); >> + if ( p_dest_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: " >> + "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + } >> + >> + /* >> + * Now go through the path step by step >> + */ >> + >> + while( p_physp != p_dest_physp ) >> + { >> + p_physp = osm_physp_get_remote( p_physp ); >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: " >> + "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + in_port_num = osm_physp_get_port_num(p_physp); >> + >> + /* this is point to point case (no switch in between) */ >> + if( p_physp == p_dest_physp ) >> + break; >> + >> + /* Check parameters for the ingress port in this switch */ >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + mtu, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + rate, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + p_node = osm_physp_get_node_ptr( p_physp ); >> + if( !p_node->sw ) >> + { >> + /* >> + There is some sort of problem in the subnet object! >> + If this isn't a switch, we should have reached >> + the destination by now! >> + */ >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: " >> + "Internal error, bad path\n" ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + /* Continue with the egress port on this switch */ >> + p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid ); >> + >> + if ( p_physp == 0 ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: " >> + "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", >> + dest_lid_ho, >> + cl_ntoh64( osm_node_get_node_guid( p_node ) ) ); >> + status = IB_ERROR; >> + goto Exit; >> + } >> + >> + CL_ASSERT( p_physp ); >> + CL_ASSERT( osm_physp_is_valid( p_physp ) ); >> + >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + mtu, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at intervening port 0x%016" PRIx64 >> + " port num 0x%X\n", >> + rate, >> + cl_ntoh64( osm_physp_get_port_guid( p_physp ) ), >> + osm_physp_get_port_num( p_physp ) ); >> + } >> + } >> + >> + out_port_num = osm_physp_get_port_num(p_physp); >> + >> + /* >> + * Check SL2VL table of the switch >> + */ >> + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); >> + for ( i = 0; i < IB_MAX_NUM_VLS; i++ ) >> + { >> + if (valid_sls[i]) >> + { >> + vl = ib_slvl_table_get(p_slvl_tbl,i); >> + if (vl == IB_DROP_VL) >> + valid_sls[i] = FALSE; >> + } >> + } >> + >> + /* go to the next step in the path */ >> + } >> + >> + /* p_physp now points to the destination */ >> + >> + p_pi = &p_physp->port_info; >> + >> + if( mtu > ib_port_info_get_mtu_cap( p_pi ) ) >> + { >> + mtu = ib_port_info_get_mtu_cap( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n", >> + mtu, >> + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); >> + } >> + } >> + >> + if( rate > ib_port_info_compute_rate( p_pi ) ) >> + { >> + rate = ib_port_info_compute_rate( p_pi ); >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "New smallest rate = %u at destination port 0x%016" PRIx64 "\n", >> + rate, >> + cl_ntoh64(osm_physp_get_port_guid( p_physp )) ); >> + } >> + } >> + >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Path min MTU = %u, min rate = %u\n", mtu, rate ); >> + } >> + >> + /* check whether there is some SL that won't lead to VL15 eventually */ >> + for (i = 0; i < IB_MAX_NUM_VLS; i++) >> + { >> + if (valid_sls[i]) >> + { >> + sl2vl_valid_path = TRUE; >> + first_valid_sl = i; >> + break; >> + } >> + } >> + >> + if (!sl2vl_valid_path) >> + { >> + /* all the SLs will eventually lead to VL15 on this path */ >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* >> + * According to spec definition IBA 1.2 Table 205 PacketLifeTime >> + * description, for loopback paths, packetLifeTime shall be zero. >> + */ >> + if ( p_src_port == p_dest_port ) >> + pkt_life = 0; /* loopback */ >> + else >> + pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; >> + >> + /* >> + * Get QoS Level object according to the path request >> + */ >> + osm_qos_parser_get_qos_level_by_pr(p_rcv, >> + p_pr, >> + p_src_port, >> + p_dest_port, >> + dest_lid_ho, >> + comp_mask, >> + &p_qos_level); >> + >> + if (p_qos_level) >> + { >> + /* >> + * Found QoS level that should be applied to this path record request >> + */ >> + >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "PathRecord request matches QoS Level %u (%s)\n", >> + p_qos_level->sn, >> + (p_qos_level->use)? p_qos_level->use : "no description" ) ; >> + } >> + >> + /* adjust MTU limit according to QoS constraints */ >> + if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) ) >> + { >> + mtu = p_qos_level->mtu_limit; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest MTU = %u\n", >> + mtu); >> + } >> + } >> + >> + /* adjust Rate limit according to QoS constraints */ >> + if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) ) >> + { >> + rate = p_qos_level->rate_limit; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest Rate = %u\n", >> + rate); >> + } >> + } >> + >> + /* adjust Packet Lifetime according to QoS constraints */ >> + if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) ) >> + { >> + pkt_life = p_qos_level->pkt_life; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new smallest Packet Lifetime = %u\n", >> + pkt_life); >> + } >> + } >> + >> + /* adjust SL according to QoS constraints */ >> + if (p_qos_level->sl_set) >> + { >> + if (!valid_sls[p_qos_level->sl]) >> + { >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + sl = p_qos_level->sl; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new SL = %u\n", >> + sl); >> + } >> + } >> + } >> + >> + /* adjust PKey according to QoS constraints */ >> + if (p_qos_level->pkey_set) >> + { >> + pkey = p_qos_level->pkey; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new PKey = %u\n", >> + pkey); >> + } >> + } >> + >> + /* adjust Class according to QoS constraints */ >> + if (p_qos_level->class_set) >> + { >> + class = p_qos_level->class; >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: new Class = %u\n", >> + class); >> + } >> + } >> + >> + } /*if (p_qos_level)*/ >> + >> + /* >> + * Determine if these values meet the user criteria >> + * and adjust appropriately >> + */ >> + >> + /* we silently ignore cases where only the MTU selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_MTU ) ) >> + { >> + required_mtu = ib_path_rec_mtu( p_pr ); >> + switch( ib_path_rec_mtu_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( mtu <= required_mtu ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( mtu >= required_mtu ) >> + { >> + /* adjust to use the highest mtu >> + lower then the required one */ >> + if( required_mtu > 1 ) >> + mtu = required_mtu - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( mtu < required_mtu ) >> + status = IB_NOT_FOUND; >> + else >> + mtu = required_mtu; >> + break; >> + >> + case 3: /* largest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* we silently ignore cases where only the Rate selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_RATE ) ) >> + { >> + required_rate = ib_path_rec_rate( p_pr ); >> + switch( ib_path_rec_rate_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( rate <= required_rate ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( rate >= required_rate ) >> + { >> + /* adjust the rate to use the highest rate >> + lower then the required one */ >> + if( required_rate > 2 ) >> + rate = required_rate - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( rate < required_rate ) >> + status = IB_NOT_FOUND; >> + else >> + rate = required_rate; >> + break; >> + >> + case 3: /* largest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* we silently ignore cases where only the PktLife selector is defined */ >> + if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) && >> + ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) ) >> + { >> + required_pkt_life = ib_path_rec_pkt_life( p_pr ); >> + switch( ib_path_rec_pkt_life_sel( p_pr ) ) >> + { >> + case 0: /* must be greater than */ >> + if( pkt_life <= required_pkt_life ) >> + status = IB_NOT_FOUND; >> + break; >> + >> + case 1: /* must be less than */ >> + if( pkt_life >= required_pkt_life ) >> + { >> + /* adjust the lifetime to use the highest possible >> + lower then the required one */ >> + if( required_pkt_life > 1 ) >> + pkt_life = required_pkt_life - 1; >> + else >> + status = IB_NOT_FOUND; >> + } >> + break; >> + >> + case 2: /* exact match */ >> + if( pkt_life < required_pkt_life ) >> + status = IB_NOT_FOUND; >> + else >> + pkt_life = required_pkt_life; >> + break; >> + >> + case 3: /* smallest available */ >> + /* can't be disqualified by this one */ >> + break; >> + >> + default: >> + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ >> + CL_ASSERT( FALSE ); >> + status = IB_ERROR; >> + break; >> + } >> + } >> + if (status != IB_SUCCESS) >> + goto Exit; >> + >> + /* >> + * set Pkey for this path record request >> + */ >> + >> + shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp ); >> + if ( !shared_pkey ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: " >> + "Ports do not have any shared PKeys\n"); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && >> + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) >> + { >> + if (p_qos_level && p_qos_level->pkey_set) >> + pkey = p_qos_level->pkey; >> + else >> + pkey = shared_pkey; >> + } >> + else if (comp_mask & IB_PR_COMPMASK_PKEY) >> + { >> + /* PathRecord requires specific PKey */ >> + if (p_qos_level && p_qos_level->pkey_set) >> + { >> + /* check that QoS pkey matches the required pkey */ >> + if (p_qos_level->pkey != p_pr->pkey) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n", >> + cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey)); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + pkey = p_qos_level->pkey; >> + } >> + else >> + pkey = p_pr->pkey; >> + } >> + else >> + { >> + if (p_qos_level && p_qos_level->pkey_set) >> + pkey = p_qos_level->pkey; >> + else >> + pkey = shared_pkey; >> + } >> + >> + /* >> + * PKey has been set. Now check that ports share this PKey. >> + */ >> + >> + if ( (pkey != shared_pkey) && >> + (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Ports do not share specified PKey 0x%04x\n", >> + cl_ntoh16(pkey)); >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* >> + * Done selecting PKey - Now select valid SL >> + */ >> + >> + if (pkey) >> + { >> + p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl, >> + pkey & cl_ntoh16((uint16_t)~0x8000)); >> + if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) ) >> + { >> + /* this may be possible when pkey tables are created somehow in >> + previous runs or things are going wrong here */ >> + osm_log( p_rcv->p_log, OSM_LOG_ERROR, >> + "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: " >> + "No partition found for PKey 0x%04x - using default SL %d\n", >> + cl_ntoh16(pkey), sl); >> + } >> + else >> + { >> + if (!valid_sls[p_prtn->sl]) >> + { >> + /* partition forces to use SL that eventually leads to VL15 */ >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Partition constraints (pkey=%u, sl=%u) lead to VL15\n", >> + pkey, p_prtn->sl); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + /* partition's SL is valid */ >> + if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) ) >> + { >> + /* partition's SL doesn't match QoS Level SL */ >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n", >> + pkey, p_prtn->sl, p_qos_level->sl); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + else >> + { >> + /* set SL to be partition's SL */ >> + sl = p_prtn->sl; >> + } >> + } >> + } >> + } >> + else >> + { >> + /* >> + * No pkey (no partition) >> + */ >> + if (p_qos_level && p_qos_level->sl_set) >> + { >> + /* sl has been already set */ >> + } >> + else >> + { >> + if (comp_mask & IB_PR_COMPMASK_SL) >> + { >> + /* specific SL was requested */ >> + if (!valid_sls[ib_path_rec_sl(p_pr)]) >> + { >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: required SL (%u) leads to VL15\n", >> + ib_path_rec_sl(p_pr)); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + /* set sl to whatever requested */ >> + sl = ib_path_rec_sl(p_pr); >> + } >> + else >> + { >> + /* set sl to the first valid sl that won't lead to VL15 */ >> + sl = first_valid_sl; >> + } >> + } >> + } >> + >> + if ( (comp_mask & IB_PR_COMPMASK_SL) && >> + (ib_path_rec_sl( p_pr ) != sl) ) >> + { >> + if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) ) >> + { >> + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, >> + "__osm_pr_rcv_get_path_parms_qos: " >> + "QoS constaraints: required SL (%u) doesn't match QoS constraints\n", >> + ib_path_rec_sl(p_pr)); >> + } >> + status = IB_NOT_FOUND; >> + goto Exit; >> + } >> + >> + /* reset pkey when raw traffic */ >> + if( (pkey) && >> + (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && >> + (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) ) >> + { >> + pkey = 0; >> + } >> + >> + p_parms->mtu = mtu; >> + p_parms->rate = rate; >> + p_parms->pkt_life = pkt_life; >> + p_parms->pkey = pkey; >> + p_parms->sl = sl; >> + p_parms->class = class; >> + >> + Exit: >> + OSM_LOG_EXIT( p_rcv->p_log ); >> + return( status ); >> +} >> + >> +/********************************************************************** >> + **********************************************************************/ >> static void >> __osm_pr_rcv_build_pr( >> IN osm_pr_rcv_t* const p_rcv, >> @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr( >> #endif >> >> p_pr->pkey = p_parms->pkey; >> - p_pr->sl = cl_hton16(p_parms->sl); >> + ib_path_rec_set_qos_class(p_pr,p_parms->class); >> + ib_path_rec_set_sl(p_pr,p_parms->sl); >> p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80); >> p_pr->rate = (uint8_t)(p_parms->rate | 0x80); >> >> @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path( >> goto Exit; >> } >> >> - status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, >> - p_dest_port, dest_lid_ho, >> - comp_mask, &path_parms ); >> - >> + if (p_rcv->p_subn->opt.no_qos) >> + status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port, >> + p_dest_port, dest_lid_ho, >> + comp_mask, &path_parms ); >> + else >> + status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port, >> + p_dest_port, dest_lid_ho, >> + comp_mask, &path_parms ); >> if( status != IB_SUCCESS ) >> { >> cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item ); >> @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path( >> comp_mask, &rev_path_parms ); >> path_parms.reversible = ( rev_path_status == IB_SUCCESS ); >> >> + /* >> + * ToDo: >> + * ToDo: The whole implementation of reversible path is wrong. >> + * ToDo: It is not enough to know that the reversed path exist. >> + * ToDo: We should also adjust limits (mtu, rate, etc...) by >> + * ToDo: comparing path and reversed path's limits. >> + * ToDo: Also, need to think about reversible path in QoS. >> + * ToDo: >> + */ >> + >> /* did we get a Reversible Path compmask ? */ >> /* >> NOTE that if the reversible component = 0, it is a don't care >> @@ -2053,7 +2863,7 @@ osm_pr_rcv_process( >> /* SL, Hop Limit, and Flow Label */ >> ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop, >> &sl, &flow_label, &hop_limit ); >> - p_pr_item->path_rec.sl = cl_hton16( sl ); >> + ib_path_rec_set_sl(&(p_pr_item->path_rec), sl); >> #ifndef ROUTER_EXP >> p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) | >> (flow_label << 8); >> -- >> 1.4.4.1.GIT >> >> > From swise at opengridcomputing.com Wed Jan 31 14:43:51 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 16:43:51 -0600 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <000301c74588$195c80f0$8698070a@amr.corp.intel.com> References: <000301c74588$195c80f0$8698070a@amr.corp.intel.com> Message-ID: <1170283431.14294.38.camel@stevo-desktop> On Wed, 2007-01-31 at 14:35 -0800, Sean Hefty wrote: > >But there still exists an iwarp issue that I need to fix because > >librdmacm (the one shipped in OFED) now calls the kernel > >rdma_init_qp_attr() function via ucma before the library calls kernel > >rdma_connect() via ucma... > > Can you clarify which versions of the librdmacm and kernel you are using? > The 0130-0200 OFED 1.2 daily kernel and user builds applied to any kernel. But I'm using 2.6.20-rc6. > The librdmacm shipped with OFED 1.1 shouldn't hit this issue. And neither > should the upcoming OFED 1.2 version of the librdmacm (with the previously > posted patch applied), when paired with either the OFED 1.2 kernel code, what > was requested to go into 2.6.21, or older kernels. > What patch? > I just think that this problem is only exposed by developmental librdmacm code > paired with older developmental rdma_cm multicast code. Well the OFED 1.2 builds are busted now for iWARP. I guess I missed whatever patch you submitted that will fix this. Steve. From sean.hefty at intel.com Wed Jan 31 15:00:32 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 15:00:32 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <1170283431.14294.38.camel@stevo-desktop> Message-ID: <000401c7458b$9bff77d0$8698070a@amr.corp.intel.com> >> The librdmacm shipped with OFED 1.1 shouldn't hit this issue. And neither >> should the upcoming OFED 1.2 version of the librdmacm (with the previously >> posted patch applied), when paired with either the OFED 1.2 kernel code, what >> was requested to go into 2.6.21, or older kernels. >> > >What patch? I was referring to the patch to the librdmacm that I posted earlier today. I just committed this patch to my librdmacm.git tree. >Well the OFED 1.2 builds are busted now for iWARP. I guess I missed >whatever patch you submitted that will fix this. Okay - I _think_ the problem is that OFED 1.2 pulled code from my git tree before I created an ofed_1_2 branch (which contains the fix), and didn't update to match my ofed_1_2 branch. The crash that you reported occurring over iWarp should also happen over IB for the same reason, so both are likely broken atm... Vlad, can you please update the ofed build by pulling from the ofed_1_2 branches of my rdma-dev.git and librdmacm.git trees? - Sean From sashak at voltaire.com Wed Jan 31 15:20:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Feb 2007 01:20:01 +0200 Subject: [openib-general] [PATCH 10/10] osm: QoS in OpenSM In-Reply-To: <45C119F4.9000209@dev.mellanox.co.il> References: <45BF6548.80104@dev.mellanox.co.il> <20070131212949.GV3837@sashak.voltaire.com> <45C119F4.9000209@dev.mellanox.co.il> Message-ID: <20070131232001.GZ3837@sashak.voltaire.com> On 00:36 Thu 01 Feb , Yevgeny Kliteynik wrote: > Hi Sasha, > > Sasha Khapyorsky wrote: > > On 17:33 Tue 30 Jan , Yevgeny Kliteynik wrote: > >> Checking PathRecord query for QoS constraints > >> > >> The QoS-aware path selection logic is implemented in a > >> separate function that is called only when QoS in OpenSM > >> is on. It causes some code duplication, but the idea is > >> to minimize the changes in the existing logic in OSM. > >> Eventually, these two function (the old path selection > >> and the new QoS-aware path selection) will be merged > >> into a single function. > > > > Please merge __osm_pr_rcv_get_path_parms() and > > __osm_pr_rcv_get_path_parms_qos() functions into single one - as you > > stated most code is duplicated there. > > > > In fact __osm_pr_rcv_get_path_parms() is most "changeable" function in > > SA PR processor, and it is not good idea to make this twice. IMHO it > > creates more ground for future bugs comparing to the risk of possible > > impacts to existing functionality. > > As I said, this actually won't be a "merge" - the original function > will be removed, and the new function will have a few if() statements > for cases when QoS in osm is down. I call this merge, but doesn't matter... Those functions are similar (after removing some extra identation characters) and have identical meaning and we don't want to track two versions of the same. > However, this will be a bunch of new code that is running as part of > the usual flow, and since this code didn't have enough time to be tested > before feature freeze, and because we discussed the necessity of implementing > QoS-aware PathRecord the way that it won't change the usual path (again, > until it will be tested), I think that it would be better right now to > leave it in two separate functions. For version control we are using git instead of keeping old and new function versions in the source files. > Trust me, I want to get rid of this code duplication much more than you do :) > And I'll do it as soon as I get to test the new code properly. Great, so please just do it in this way. Then we will see the patch with the actual changes instead of untested code which is good just because it is "off". Sasha From caitlinb at broadcom.com Wed Jan 31 15:35:23 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Wed, 31 Jan 2007 15:35:23 -0800 Subject: [openib-general] new IB CM reject reason In-Reply-To: <000201c74585$a0bc7260$8698070a@amr.corp.intel.com> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1FBCDE6@NT-SJCA-0751.brcm.ad.broadcom.com> openib-general-bounces at openib.org wrote: > We've hit into an issue with the IB CM reject reason codes. > When a remote application crashes during connection > establishment, the connection will be rejected by the kernel > CM. Unfortunately, there's not a decent reject reason that > maps to this event. Currently, the ib_cm issues the reject > as consumer defined (code 28). > > I'd like to propose adding reject reason 0, which would mean > other/unknown/or none given. This is a deviation from the > spec, but does anyone know of any issues with such an approach? > > - Sean > Is there a reason to distinquish between a connection that is being rejected because the listener crashed and a connection that is being rejected because the listener does not exist? From sean.hefty at intel.com Wed Jan 31 15:39:33 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 15:39:33 -0800 Subject: [openib-general] new IB CM reject reason In-Reply-To: <54AD0F12E08D1541B826BE97C98F99F1FBCDE6@NT-SJCA-0751.brcm.ad.broadcom.com> Message-ID: <000501c74591$0f4f53e0$8698070a@amr.corp.intel.com> >Is there a reason to distinquish between a connection that >is being rejected because the listener crashed and a connection >that is being rejected because the listener does not exist? This only covers the case for the REQ received state, and could work for that state. But the problem can also occur after sending/receiving an MRA, REQ, or REP. - Sean From swise at opengridcomputing.com Wed Jan 31 15:43:54 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 31 Jan 2007 17:43:54 -0600 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <45C11277.90403@ichips.intel.com> References: <000001c7456c$abbe2be0$8698070a@amr.corp.intel.com> <1170276655.14294.11.camel@stevo-desktop> <45C1103C.3010301@ichips.intel.com> <1170280750.14294.19.camel@stevo-desktop> <45C11277.90403@ichips.intel.com> Message-ID: <1170287034.14294.58.camel@stevo-desktop> On Wed, 2007-01-31 at 14:04 -0800, Sean Hefty wrote: > > Fixed it for IB maybe, but not for iWarp, right? > > It should be fixed for both. > > > So OFED 1.2 will be ABI 3, right? > > OFED will be ABI 4, since it will include multicast support (which is what > causes the ABI to bump from 3 to 4). > Has the ofed tree been updated to ABI 4 yet? From caitlinb at broadcom.com Wed Jan 31 15:45:26 2007 From: caitlinb at broadcom.com (Caitlin Bestler) Date: Wed, 31 Jan 2007 15:45:26 -0800 Subject: [openib-general] new IB CM reject reason In-Reply-To: <000501c74591$0f4f53e0$8698070a@amr.corp.intel.com> Message-ID: <54AD0F12E08D1541B826BE97C98F99F1FBCDE7@NT-SJCA-0751.brcm.ad.broadcom.com> Sean Hefty wrote: >> Is there a reason to distinquish between a connection that is being >> rejected because the listener crashed and a connection that is being >> rejected because the listener does not exist? > > This only covers the case for the REQ received state, and > could work for that state. But the problem can also occur > after sending/receiving an MRA, REQ, or REP. > > - Sean So would that would mean that only an InfiniBand specific wire-protocol code was needed, and that no API enhancement was required? Trying to describe failures in a transport neutral fashion is a real pain. From sean.hefty at intel.com Wed Jan 31 15:49:34 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 15:49:34 -0800 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <1170287034.14294.58.camel@stevo-desktop> Message-ID: <000701c74592$757174e0$8698070a@amr.corp.intel.com> >> OFED will be ABI 4, since it will include multicast support (which is what >> causes the ABI to bump from 3 to 4). >> > >Has the ofed tree been updated to ABI 4 yet? I just looked in vlad's git tree a while ago, and his ofed_1_2 branch had ABI 3. His ofed_1_2_multicast didn't have an rdma_user_cm.h file, so I'm not sure about that branch. - Sean From sean.hefty at intel.com Wed Jan 31 15:53:56 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 15:53:56 -0800 Subject: [openib-general] new IB CM reject reason In-Reply-To: <54AD0F12E08D1541B826BE97C98F99F1FBCDE7@NT-SJCA-0751.brcm.ad.broadcom.com> Message-ID: <000801c74593$11f22760$8698070a@amr.corp.intel.com> >So would that would mean that only an InfiniBand specific wire-protocol >code was needed, and that no API enhancement was required? Yes - I'm talking about the IB CM wire-protocol specifically. Actual implementation changes would likewise be limited to the ib_cm. >Trying to describe failures in a transport neutral fashion >is a real pain. The rdma_cm exports the underlying transport reject code as a status value that is left up to the user to interpret. The event code is transport neutral, and likely all that most users care about, but the transport specific code is useful for debugging. - Sean From jgunthorpe at obsidianresearch.com Wed Jan 31 16:22:02 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 31 Jan 2007 17:22:02 -0700 Subject: [openib-general] ipath and current git woes Message-ID: <20070201002202.GA12386@obsidianresearch.com> Has anyone been able to use ipath with the current latest git everything? The libipathverbs git repository seems to be missing a patch from Roland to make it work with libibverbs.2 in an email titled: [openib-general] [PATCH 3/7] libipathverbs: Update libipathverbs for new libibverbs driver handling After applying that patch the user space consumers load but we got a kernel oops when we tried to run a test here :< Unable to handle kernel NULL pointer dereference at 0000000000000918 RIP: [] :ib_ipath:ipath_mmap+0x37/0x95 PGD 3ad46067 PUD 3ad4f067 PMD 0 Oops: 0000 [1] CPU 0 Modules linked in: usb_storage skge bitrev crc32 ib_ipath k8temp hwmon forcedeth ehci_hcd ohci_hcd usbcore i2c_nforce2 i2c_core ib_uverbs ib_umad ib_mad ib_core Pid: 4009, comm: ib_rdma_lat Not tainted 2.6.20-rc4-gf3a2c3ee-dirty #6 RIP: 0010:[] [] :ib_ipath:ipath_mmap+0x37/0x95 RSP: 0000:ffff81003aaa3e88 EFLAGS: 00010002 RAX: ffff81003c434000 RBX: 0000000000000910 RCX: 0000000000001000 RDX: 00000000002b3000 RSI: ffff81003bcab440 RDI: ffff81003bc2d840 RBP: ffff81003b7af918 R08: ffff81003aaa3f08 R09: ffff81003aa38c98 R10: ffff81003aa38c90 R11: ffffffff88074c3f R12: 00000000ffffffea R13: ffff81003ac496c0 R14: 00000000f7ee3000 R15: 0000000000001000 FS: 0000000000000000(0000) GS:ffffffff80530000(0063) knlGS:00000000f7d8c6c0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000918 CR3: 000000003aa2f000 CR4: 00000000000006e0 Process ib_rdma_lat (pid: 4009, threadinfo ffff81003aaa2000, task ffff81003c0495e0) Stack: ffff81003b7af918 00000000001000fb 00000000ffffffea ffffffff80250151 ffff81003be9f440 ffff81003bc2d140 0000000000000028 00000000ff99df20 0000000000000000 0000000000000000 ffff81003b87d818 ffff81003dc04840 Call Trace: [] do_mmap_pgoff+0x4d5/0x739 [] sys32_mmap2+0x76/0x9e [] ia32_sysret+0x0/0xa Code: 48 3b 7b 08 75 46 48 3b 53 10 75 40 8b 43 1c 48 39 c1 77 40 RIP [] :ib_ipath:ipath_mmap+0x37/0x95 RSP CR2: 0000000000000918 This is with a PCI-E qlogic card: 0000:03:00.0 InfiniBand: Unknown device 1fc1:0010 (rev 01) Anyone have any clues? One notable thing is that I have a 32 bit user space and a 64 bit kernel. I'll try a 64 bit user space tomorrow in case there is some thing wrong with 32bit compatability... The last time we had these cards working was with OFED 1.1 on 64 bit FC4 using a linus kernel (2.6.18 I think).. Thanks, Jason From robert.walsh at qlogic.com Wed Jan 31 16:42:25 2007 From: robert.walsh at qlogic.com (Robert Walsh) Date: Wed, 31 Jan 2007 16:42:25 -0800 Subject: [openib-general] ipath and current git woes In-Reply-To: <20070201002202.GA12386@obsidianresearch.com> References: <20070201002202.GA12386@obsidianresearch.com> Message-ID: <45C13771.2070406@qlogic.com> Jason Gunthorpe wrote: > Has anyone been able to use ipath with the current latest git > everything? We're working on getting this up to date right now. Give us a couple of days and we'll have some new patches ready. Regards, Robert. From rowland at cse.ohio-state.edu Wed Jan 31 17:32:52 2007 From: rowland at cse.ohio-state.edu (Shaun Rowland) Date: Wed, 31 Jan 2007 20:32:52 -0500 Subject: [openib-general] MVAPICH2 SRPM and install file patches Message-ID: <45C14344.9010602@cse.ohio-state.edu> I've placed the MVAPICH2 SRPM on the OFA server in ~rowland/ofed_1_2, and it is linked to here: http://www.openfabrics.org/~rowland/ofed_1_2/ Additionally, I am including a patch in this email that updates the ofed_1_2_scripts files from the GIT repository we were given to handle the MVAPICH2 SRPM file. Basically, installing MVAPICH2 is similar to the other MPI packages, except that I have added a choice option to build with iWARP support or not. The default is IB only. If the user has selected the librdmacm packages and the mvapich2 package, this choice is presented. This is also saved in the ofed.conf file using an MVAPICH2_IMPL variable, and the librdmacm packages are added as dependencies if the iWARP version of MVAPICH2 is desired and they are not already in the ofed.conf file, which seems like standard behavior in the scripts. The resulting binary RPM uses the name convention mvapich2_ as normal in either case. There are various ways this could be implemented, perhaps in a better manner. This is what I was able to come up with by today. Since the installation scripts given were very similar to the original OFED 1.1 scripts, I was able to test the installation procedure using OFED 1.1 files. Everything worked for me, including building the mpitests package against the mvapich2 package. There are some comments about this in what I have done. I hope that it is helpful in getting our SRPM integrated into the installation scripts. Additionally, I put a README file in my ofed_1_2 directory that contains information about the macros that can be used with our SRPM file. The SRPM can be used to install against an existing OFED installation, and those macros control various aspects of the result. There is one special macro I use for when the SRPM is being built along with the OFED source, and its use should be clear in the patched build.sh script and associated comment. -- Shaun Rowland rowland at cse.ohio-state.edu http://www.cse.ohio-state.edu/~rowland/ -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: ofed_1_2_scripts.patch URL: From mshefty at ichips.intel.com Wed Jan 31 17:53:16 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 31 Jan 2007 17:53:16 -0800 Subject: [openib-general] ip_ib_mc_map? In-Reply-To: <1170275680.14294.5.camel@stevo-desktop> References: <000101c74576$fedc81f0$8698070a@amr.corp.intel.com> <1170275680.14294.5.camel@stevo-desktop> Message-ID: <45C1480C.1020600@ichips.intel.com> Steve Wise wrote: > Perhaps there's no backport for this to rhel4u4? I would have thought so, but I really don't know. The function is called from net/ipv4/arp.c, and not directly by ipoib. So, I don't know how the backport patches typically handle this. - Sean From eitan at sw053.yok.mtl.com Wed Jan 31 21:21:22 2007 From: eitan at sw053.yok.mtl.com (Eitan Zahavi) Date: Thu, 1 Feb 2007 07:21:22 +0200 Subject: [openib-general] nightly osm_sim report 2007-02-01:normal completion Message-ID: <200702010521.l115LMdT008341@sw053.yok.mtl.com> OSM Simulation Regression Summary OpenSM rev = Wed_Jan_31_12:00:12_2007 2095ee ibutils rev = Wed_Jan_3_11:42:12_2007 913448 Total=410 Pass=410 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree part-4-ary-3-tree.topo 10 FatTree merge-roots-reorder-4-ary-2-tree.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree merge-root-12-ary-2-tree.topo 10 FatTree merge-2-ary-4-tree.topo 10 FatTree half-4-ary-3-tree.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree 4-ary-4-tree.topo 10 FatTree 4-ary-3-tree.topo 10 FatTree 32nodes-3lvl-is1.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FatTree 12-ary-2-tree.topo Failures: From mst at mellanox.co.il Wed Jan 31 22:24:31 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 1 Feb 2007 08:24:31 +0200 Subject: [openib-general] new IB CM reject reason In-Reply-To: <000201c74585$a0bc7260$8698070a@amr.corp.intel.com> References: <000201c74585$a0bc7260$8698070a@amr.corp.intel.com> Message-ID: <20070201062431.GB4499@mellanox.co.il> > Quoting Sean Hefty : > Subject: new IB CM reject reason > > We've hit into an issue with the IB CM reject reason codes. When a remote > application crashes during connection establishment, the connection will be > rejected by the kernel CM. Unfortunately, there's not a decent reject reason > that maps to this event. Currently, the ib_cm issues the reject as consumer > defined (code 28). > > I'd like to propose adding reject reason 0, which would mean other/unknown/or > none given. This is a deviation from the spec, but does anyone know of any > issues with such an approach? No, I don't think "application crashed" makes sense as an element of wire protocol. I think an optional logging of errors in kernel CM would be a much better solution. I know I had to add some printks it each time I was debugging SDP. A couple of reasons that come to mind: 1. Should the remote side care whether remote implementation is kernel or userspace? Userspace consumers are not the only ones of interest. What about various error codes and failure values returned from callback on passive side? If you are trying to develop a debug aid, these need to be covered as well. 2. Another objection is that this feature seems to invite misuse where applications will use REJ reason as a hint on whether remote side crashed. But REJ could be lost. Wouldn't this confuse the remote side? -- MST From mst at mellanox.co.il Wed Jan 31 22:27:56 2007 From: mst at mellanox.co.il (Michael S. Tsirkin) Date: Thu, 1 Feb 2007 08:27:56 +0200 Subject: [openib-general] [PATCH] RE: regression in ofed 1.2 In-Reply-To: <000701c74592$757174e0$8698070a@amr.corp.intel.com> References: <1170287034.14294.58.camel@stevo-desktop> <000701c74592$757174e0$8698070a@amr.corp.intel.com> Message-ID: <20070201062756.GC4499@mellanox.co.il> > His ofed_1_2_multicast didn't have an rdma_user_cm.h file, so I'm not sure about > that branch. That one should be removed. It was created as a debugging aid to help people debug crashes observed by Dotan in the multicast module. -- MST From bugzilla-daemon at lists.openfabrics.org Wed Jan 31 23:19:44 2007 From: bugzilla-daemon at lists.openfabrics.org (bugzilla-daemon at lists.openfabrics.org) Date: Wed, 31 Jan 2007 23:19:44 -0800 (PST) Subject: [openib-general] [Bug 334] Problems with build OFED-1.1.1-ib_local_sa In-Reply-To: Message-ID: <20070201071944.77F9CE607F7@openfabrics.org> https://bugs.openfabrics.org/show_bug.cgi?id=334 dmitry.yulov at intel.com changed: What |Removed |Added ---------------------------------------------------------------------------- Priority|P1 |P2 ------- Comment #1 from dmitry.yulov at intel.com 2007-01-31 23:19 ------- I resolve the problem with build cma.c file (I added rdma/ib_local_sa.h file in include), but I found a new problem: gcc -Wp,-MD,/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/.iser_verbs.o.d -nostdinc -isystem /usr/lib64/gcc/x86_64-suse-linux/4.1.0/include -D__KERNEL__ -I/var/tmp/OFEDRPM/BUILD/openib-1.1/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/include -Iinclude -Iinclude2 -I/usr/src/linux-2.6.16.21-0.8/include -include include/linux/autoconf.h -include /var/tmp/OFEDRPM/BUILD/openib-1.1/include/linux/autoconf.h -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -Werror-implicit-function-declaration -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -mtune=generic -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -Wdeclaration-after-statement -Wno-pointer-sign -I/var/tmp/OFEDRPM/BUILD/openib-1.1/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/include -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/ipoib -I/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/debug -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(iser_verbs)" -D"KBUILD_MODNAME=KBUILD_STR(ib_iser)" -c -o /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/.tmp_iser_verbs.o /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c In file included from /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:42: /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iscsi_iser.h:47:27: error: scsi/libiscsi.h: No such file or directory In file included from /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iscsi_iser.h:48, from /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:42: /usr/src/linux-2.6.16.21-0.8/include/scsi/scsi_transport_iscsi.h:213: error: field 'mutex' has incomplete type /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c: In function 'iser_create_device_ib_res': /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:80: error: 'ISCSI_XMIT_CMDS_MAX' undeclared (first use in this function) /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:80: error: (Each undeclared identifier is reported only once /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:80: error: for each function it appears in.) /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c: In function 'iser_create_ib_conn_res': /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:159: error: 'ISCSI_XMIT_CMDS_MAX' undeclared (first use in this function) /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c: In function 'iser_disconnected_handler': /var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.c:418: error: implicit declaration of function 'iscsi_conn_failure' make[5]: *** [/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser/iser_verbs.o] Error 1 make[4]: *** [/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband/ulp/iser] Error 2 make[3]: *** [_module_/var/tmp/OFEDRPM/BUILD/openib-1.1/drivers/infiniband] Error 2 make[2]: *** [modules] Error 2 make[1]: *** [modules] Error 2 Could you please help me resolve this problem? -- Configure bugmail: https://bugs.openfabrics.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee.