[ofa-general] ***SPAM*** Re: mlx4_core 0000:c3:00.0: SW2HW_MPT failed (-16) (dmesg)

Phillip Wilson phillipwils at gmail.com
Sun Mar 8 00:14:48 PST 2009


I updated the HCA "InfiniBand: Mellanox Technologies: Unknown device 634a
(rev a0)" to the latest firmware and issue remains.  "fw_ver" is now
2.6.000.

Any ideas on why the time out is occuring in the function?


203<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l203>static
int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
204<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l204>
                        int out_is_imm, u32 in_modifier, u8
op_modifier,
205<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l205>
                        u16 op, unsigned long timeout)
206<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l206>{
207<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l207>
       struct mlx4_priv *priv = mlx4_priv(dev);
208<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l208>
       void __iomem *hcr = priv->cmd.hcr;
209<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l209>
       int err = 0;
210<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l210>
       unsigned long end;
211<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l211>
212<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l212>
       down(&priv->cmd.poll_sem);
213<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l213>
214<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l214>
       err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
215<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l215>
                           in_modifier, op_modifier, op,
CMD_POLL_TOKEN, 0);
216<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l216>
       if (err)
217<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l217>
               goto out;
218<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l218>
219<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l219>
       end = msecs_to_jiffies(timeout) + jiffies;
220<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l220>
       while (cmd_pending(dev) && time_before(jiffies, end))
221<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l221>
               cond_resched();
222<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l222>
223<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l223>
       if (cmd_pending(dev)) {
224<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l224>
               err = -
ETIMEDOUT;
225<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l225>
               goto out;
226<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l226>
       }
227<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l227>
228<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l228>
       if (out_is_imm)
229<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l229>
               *out_param =
230<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l230>
                       (u64) be32_to_cpu((__force __be32)
231<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l231>
                                         __raw_readl(hcr +
HCR_OUT_PARAM_OFFSET)) << 32 |
232<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l232>
                       (u64) be32_to_cpu((__force __be32)
233<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l233>
                                         __raw_readl(hcr +
HCR_OUT_PARAM_OFFSET + 4));
234<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l234>
235<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l235>
       err = mlx4_status_to_errno(be32_to_cpu((__force __be32)
236<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l236>
                                              __raw_readl(hcr +
HCR_STATUS_OFFSET)) >> 24);
237<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l237>
238<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l238>out:
239<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l239>
       up(&priv->cmd.poll_sem);
240<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l240>
       return err;
241<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l241>}


On Sat, Mar 7, 2009 at 12:55 AM, Dotan Barak <dotanba at gmail.com> wrote:

> Please make sure that you have the latest firmware for the HCA that has the
> failure.
>
> Dotan
>
>
> Phillip Wilson wrote:
>
>> This is related to the thread I stared on Feb 24
>>
>> Re: [ofa-general] ***SPAM*** Mellanox ibv_reg_mr (memory region) function
>> call fails under load when using the mlx4 driver
>>  So far I have modified the "num_mtt" to 1 << 21, as Dotan suggested, but
>> I will try some more values this weekend.
>>  I think the code for this is in ../drivers/net/mlx4/cmd.c  mlx4_cmd(  ...
>>  )
>>   -ETIMEOUT   > System Information:
>> >
>> >
>> >
>> > The system has 4GB of memory.
>> >
>> >
>> >
>> > uname -a
>> >
>> > Linux (none) 2.6.24.02.02.08 #21 SMP Thu Feb 19 11:04:35 PST 2009 ia64
>> > unknown
>> >
>> >
>> >
>> > OFED 1.2.5
>> >
>> >
>> >
>> > lspci -d 15b3:
>> >
>> >
>> >
>> > 0000:10:00.0 InfiniBand: Mellanox Technologies MT25208 InfiniHost III Ex
>> > (Tavor compatibility mode) (rev 20)
>> >
>> > 0000:c3:00.0 InfiniBand: Mellanox Technologies: Unknown device 634a (rev
>> a0)
>> >
>> >
>> >
>> > lspci -d 15b3: -n
>> >
>> > 0000:10:00.0 0c06: 15b3:6278 (rev 20)
>> >
>> > 0000:c3:00.0 0c06: 15b3:634a (rev a0)
>> >
>> >
>> >
>> > ibv_devinfo -v
>> >
>> > hca_id: mlx4_0
>> >
>> >         fw_ver:                         2.5.000
>> >
>> >
>> >
>> > hca_id: mthca0
>> >
>> >         fw_ver:                         4.8.930
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090308/0826ec70/attachment.html>


More information about the general mailing list