[ofa-general] ***SPAM*** Re: mlx4_core 0000:c3:00.0: SW2HW_MPT failed (-16) (dmesg)
Phillip Wilson
phillipwils at gmail.com
Sun Mar 8 00:14:48 PST 2009
I updated the HCA "InfiniBand: Mellanox Technologies: Unknown device 634a
(rev a0)" to the latest firmware and issue remains. "fw_ver" is now
2.6.000.
Any ideas on why the time out is occuring in the function?
203<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l203>static
int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
204<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l204>
int out_is_imm, u32 in_modifier, u8
op_modifier,
205<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l205>
u16 op, unsigned long timeout)
206<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l206>{
207<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l207>
struct mlx4_priv *priv = mlx4_priv(dev);
208<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l208>
void __iomem *hcr = priv->cmd.hcr;
209<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l209>
int err = 0;
210<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l210>
unsigned long end;
211<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l211>
212<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l212>
down(&priv->cmd.poll_sem);
213<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l213>
214<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l214>
err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
215<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l215>
in_modifier, op_modifier, op,
CMD_POLL_TOKEN, 0);
216<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l216>
if (err)
217<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l217>
goto out;
218<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l218>
219<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l219>
end = msecs_to_jiffies(timeout) + jiffies;
220<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l220>
while (cmd_pending(dev) && time_before(jiffies, end))
221<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l221>
cond_resched();
222<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l222>
223<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l223>
if (cmd_pending(dev)) {
224<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l224>
err = -
ETIMEDOUT;
225<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l225>
goto out;
226<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l226>
}
227<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l227>
228<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l228>
if (out_is_imm)
229<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l229>
*out_param =
230<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l230>
(u64) be32_to_cpu((__force __be32)
231<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l231>
__raw_readl(hcr +
HCR_OUT_PARAM_OFFSET)) << 32 |
232<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l232>
(u64) be32_to_cpu((__force __be32)
233<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l233>
__raw_readl(hcr +
HCR_OUT_PARAM_OFFSET + 4));
234<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l234>
235<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l235>
err = mlx4_status_to_errno(be32_to_cpu((__force __be32)
236<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l236>
__raw_readl(hcr +
HCR_STATUS_OFFSET)) >> 24);
237<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l237>
238<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l238>out:
239<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l239>
up(&priv->cmd.poll_sem);
240<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l240>
return err;
241<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l241>}
On Sat, Mar 7, 2009 at 12:55 AM, Dotan Barak <dotanba at gmail.com> wrote:
> Please make sure that you have the latest firmware for the HCA that has the
> failure.
>
> Dotan
>
>
> Phillip Wilson wrote:
>
>> This is related to the thread I stared on Feb 24
>>
>> Re: [ofa-general] ***SPAM*** Mellanox ibv_reg_mr (memory region) function
>> call fails under load when using the mlx4 driver
>> So far I have modified the "num_mtt" to 1 << 21, as Dotan suggested, but
>> I will try some more values this weekend.
>> I think the code for this is in ../drivers/net/mlx4/cmd.c mlx4_cmd( ...
>> )
>> -ETIMEOUT > System Information:
>> >
>> >
>> >
>> > The system has 4GB of memory.
>> >
>> >
>> >
>> > uname -a
>> >
>> > Linux (none) 2.6.24.02.02.08 #21 SMP Thu Feb 19 11:04:35 PST 2009 ia64
>> > unknown
>> >
>> >
>> >
>> > OFED 1.2.5
>> >
>> >
>> >
>> > lspci -d 15b3:
>> >
>> >
>> >
>> > 0000:10:00.0 InfiniBand: Mellanox Technologies MT25208 InfiniHost III Ex
>> > (Tavor compatibility mode) (rev 20)
>> >
>> > 0000:c3:00.0 InfiniBand: Mellanox Technologies: Unknown device 634a (rev
>> a0)
>> >
>> >
>> >
>> > lspci -d 15b3: -n
>> >
>> > 0000:10:00.0 0c06: 15b3:6278 (rev 20)
>> >
>> > 0000:c3:00.0 0c06: 15b3:634a (rev a0)
>> >
>> >
>> >
>> > ibv_devinfo -v
>> >
>> > hca_id: mlx4_0
>> >
>> > fw_ver: 2.5.000
>> >
>> >
>> >
>> > hca_id: mthca0
>> >
>> > fw_ver: 4.8.930
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090308/0826ec70/attachment.html>
More information about the general
mailing list