[ewg] IPoIB crash SA notice handler: NULL pointer dereference and possible fix

Amar Mudrankit amar.mudrankit at qlogic.com
Fri Oct 31 06:49:28 PDT 2008


I encountered a NULL pointer dereference while testing IPoIB code.
The trace is as follows:

Unable to handle kernel NULL pointer dereference at 0000000000000008 RIP:
<ffffffff801f6cdb>{kref_get+4}
PGD 428f98067 PUD 428f99067 PMD 0
Oops: 0000 [1] SMP
last sysfs file: /class/infiniband/mlx4_0/ports/2/pkeys/0
CPU 0
Modules linked in: iscsi_tcp qlgc_srp qlgc_vnic rdma_ucm ib_iser libiscsi
scsi_transport_iscsi ib_srp ib_sdp rdma_cm iw_cm ib_addr ib_ipoib ib_cm ib_usa
ib_sa ib_uverbs ib_umad iw_cxgb3 ipv6 cxgb3 firmware_class ib_ipath snd_pcm_oss
snd_mixer_oss snd_seq snd_seq_device mlx4_ib mlx4_core ib_mthca ib_mad dock
ib_core button battery ac apparmor loop dm_mod shpchp ide_cd cdrom pci_hotplug
snd_hda_intel ohci1394 ehci_hcd i2c_nforce2 ohci_hcd ieee1394 i2c_core snd_pcm
usbcore snd_timer snd soundcore snd_page_alloc forcedeth parport_pc lp parport
reiserfs edd fan thermal processor sg sata_nv libata amd74xx sd_mod scsi_mod
ide_disk ide_core
Pid: 3464, comm: ib_mad1 Tainted: GF    U 2.6.16.60-0.21-smp #1
RIP: 0010:[<ffffffff801f6cdb>] <ffffffff801f6cdb>{kref_get+4}
RSP: 0018:ffff810237b91d18  EFLAGS: 00010002
RAX: ffff810249b25c00 RBX: 0000000000000008 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff810218eaa540 RDI: 0000000000000008
RBP: ffff810249b25d30 R08: ffff81023d7b1770 R09: ffff81023ec04c00
R10: ffffffff88386631 R11: 0000000000000292 R12: ffff81023ec03428
R13: ffff810218eaa398 R14: ffff810237b91dd8 R15: ffff81042ee99500
FS:  0000000042804940(0000) GS:ffffffff803d3000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000008 CR3: 0000000428f97000 CR4: 00000000000006e0
Process ib_mad1 (pid: 3464, threadinfo ffff810237b90000, task ffff81043f2527a0)
Stack: ffff810249b25c00 ffffffff884dd6ca 0400000000000401 000002003d884000
       000081044043a500 000080fe00000000 0075110000000000 00000000d7010000
       0000000000000000 0000000000000000
Call Trace: <ffffffff884dd6ca>{:ib_sa:notice_handler+175}
       <ffffffff883360d2>{:ib_mad:ib_mad_completion_handler+1119}
       <ffffffff8012d6bc>{__wake_up+56}
<ffffffff88335c73>{:ib_mad:ib_mad_completion_handler+0}
       <ffffffff80144386>{run_workqueue+139}
<ffffffff80144a94>{worker_thread+0}
       <ffffffff80147b34>{keventd_create_kthread+0}
<ffffffff80144b88>{worker_thread+244}
       <ffffffff8012c668>{default_wake_function+0}
<ffffffff80147dfc>{kthread+236}
       <ffffffff8010bed2>{child_rip+8}
<ffffffff80147b34>{keventd_create_kthread+0}
       <ffffffff882f0a43>{:apparmor:apparmor_inode_permission+0}
       <ffffffff80147d10>{kthread+0} <ffffffff8010beca>{child_rip+0}

Code: 8b 07 85 c0 75 24 b9 20 00 00 00 48 c7 c2 37 86 31 80 48 c7
RIP <ffffffff801f6cdb>{kref_get+4} RSP <ffff810237b91d18>
CR2: 0000000000000008

When I debugged the ib_sa code,  I could find some discrepancy in it
which I thought could be the potential reason for the NULL pointer
dereference oops.

In the file drivers/infiniband/core/sa_query.c, the function
ib_sa_event is setting SM address handle of the given IB port to NULL
in case of port error, lid change, sm change etc events.
Concurrently, there could another thread, which is in function
notice_handler()->ib_sa_notice_resp() [brought in by
kernel_patches/fixes/sean_local_sa_1_notifications.patch], scheduled
to run after the event handling, referencing SM address handle of the
same IB port. In such a scenario, there is NULL pointer dereference
expected in notice_handler().

<<snippet>>
static void ib_sa_event(struct ib_event_handler *handler, struct
ib_event *event)
{
        if (event->event == IB_EVENT_PORT_ERR    || {
                spin_lock_irqsave(&port->ah_lock, flags);
                if (port->sm_ah)
                        kref_put(&port->sm_ah->ref, free_sm_ah);
                port->sm_ah = NULL;
                spin_unlock_irqrestore(&port->ah_lock, flags);

                schedule_work(&sa_dev->port[event->element.port_num -
                                            sa_dev->start_port].update_task);
        }
}

notice_handler code comes from
kernel_patches/fixes/sean_local_sa_1_notifications.patch

+static void ib_sa_notice_resp(struct ib_sa_port *port,
+                             struct ib_mad_recv_wc *mad_recv_wc)
+{
+       spin_lock_irq(&port->ah_lock);
+       kref_get(&port->sm_ah->ref);
+       mad_buf->context[0] = &port->sm_ah->ref;
+       mad_buf->ah = port->sm_ah->ah;
+       spin_unlock_irq(&port->ah_lock);
+err:
+       kref_put(mad_buf->context[0], free_sm_ah);
+       ib_free_send_mad(mad_buf);
+}
+

Following is the proposed patch for the NULL pointer dereference.
This patch applies over
kernel_patches/fixes/sean_local_sa_1_notifications.patch

diff -urp ofa_kernel-1.4_old/drivers/infiniband/core/sa_query.c
ofa_kernel-1.4_new/drivers/infiniband/core/sa_query.c
--- ofa_kernel-1.4_old/drivers/infiniband/core/sa_query.c
2008-10-30 18:40:16.000000000 +0530
+++ ofa_kernel-1.4_new/drivers/infiniband/core/sa_query.c
2008-10-30 18:41:00.000000000 +0530
@@ -1186,6 +1186,7 @@ static void ib_sa_notice_resp(struct ib_
        struct ib_mad_send_buf *mad_buf;
        struct ib_sa_mad *mad;
        int ret;
+       unsigned long flags;

        mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0,
                                     IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
@@ -1197,11 +1198,17 @@ static void ib_sa_notice_resp(struct ib_
        memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad);
        mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP;

-       spin_lock_irq(&port->ah_lock);
-       kref_get(&port->sm_ah->ref);
-       mad_buf->context[0] = &port->sm_ah->ref;
-       mad_buf->ah = port->sm_ah->ah;
-       spin_unlock_irq(&port->ah_lock);
+       spin_lock_irqsave(&port->ah_lock, flags);
+       if (port->sm_ah) {
+               kref_get(&port->sm_ah->ref);
+               mad_buf->context[0] = &port->sm_ah->ref;
+               mad_buf->ah = port->sm_ah->ah;
+               spin_unlock_irqrestore(&port->ah_lock, flags);
+       } else {
+               spin_unlock_irqrestore(&port->ah_lock, flags);
+               ib_free_send_mad(mad_buf);
+               return;
+       }

        ret = ib_post_send_mad(mad_buf, NULL);

I used spin_lock_irqsave in the patch.  Any specific reason why
spin_lock_irq() was used originally?

Thanks and Regards,
Amar



More information about the ewg mailing list