LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] drm: assure aux_dev is nonzero before using it
@ 2019-05-23 11:09 tcamuso
2019-05-24 8:36 ` Jani Nikula
0 siblings, 1 reply; 19+ messages in thread
From: tcamuso @ 2019-05-23 11:09 UTC (permalink / raw)
To: dri-devel, linux-kernel; +Cc: airlied, daniel, tcamuso, dkwon
From Daniel Kwon <dkwon@redhat.com>
The system was crashed due to invalid memory access while trying to access
auxiliary device.
crash> bt
PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
#0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
#1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
#2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
#3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
#4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
#5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
#6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
#7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
#8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
#9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
[exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
----------------------------------------------------------------------------
It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
check on this, but had failed to do it.
----------------------------------------------------------------------------
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
114 struct idr_layer *hint = rcu_dereference_raw(idr->hint);
0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>: mov 0x8a41(%rip),%rax # 0xffffffffc0a613e0 <aux_idr>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
116 if (hint && (id & ~IDR_MASK) == hint->prefix)
117 return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>: test %rax,%rax
0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>: je 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>: mov %ebx,%edx
0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>: xor %dl,%dl
0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>: cmp (%rax),%edx
0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>: je 0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
119 return idr_find_slowpath(idr, id);
0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>: mov %ebx,%esi
0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>: mov $0xffffffffc0a613e0,%rdi
0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>: callq 0xffffffffb09771b0 <idr_find_slowpath>
0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>: mov %rax,%rbx
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
25 return ACCESS_ONCE((v)->counter);
0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>: mov 0x18(%rbx),%edx
crash> struct file.f_path 0xffff89d18fadfa00
f_path = {
mnt = 0xffff89f23feaa620,
dentry = 0xffff89f1be7a1cc0
}
crash> files -d 0xffff89f1be7a1cc0
DENTRY INODE SUPERBLK TYPE PATH
ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR /dev/ipmi0
crash> struct inode.i_rdev ffff89f1b32a2830
i_rdev = 0xf200000
crash> eval (0xfffff & 0xf200000)
hexadecimal: 0
decimal: 0
octal: 0
binary: 0000000000000000000000000000000000000000000000000000000000000000
----------------------------------------------------------------------------
As the index value was 0 and aux_idr had value 0 for all, it can have value
NULL from idr_find() function, but the below function doesn't check and just
tries to use it.
----------------------------------------------------------------------------
crash> aux_idr
aux_idr = $8 = {
hint = 0x0,
top = 0x0,
id_free = 0x0,
layers = 0x0,
id_free_cnt = 0x0,
cur = 0x0,
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0x0
}
}
}
}
}
}
crash> edis -f drm_dp_aux_dev_get_by_minor
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
57 {
58 struct drm_dp_aux_dev *aux_dev = NULL;
59
60 mutex_lock(&aux_idr_mutex);
61 aux_dev = idr_find(&aux_idr, index);
62 if (!kref_get_unless_zero(&aux_dev->refcount))
63 aux_dev = NULL;
64 mutex_unlock(&aux_idr_mutex);
65
66 return aux_dev;
67 }
----------------------------------------------------------------------------
To avoid this kinds of situation, we should make a safeguard for the returned
value. Changing the line 62 with the below would do.
62 if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
^^^^^^^^^^
From Tony Camuso <tcamuso@redhat.com>
I built a patched kernel for several architectures.
Booted the kernel, and ran the following for 100 iterations.
rmmod ipmi kmods to remove /dev/ipmi0.
Invoked ipmitool
insmod ipmi kmods
Did not see any crashes or call traces.
Suggested-by: Daniel Kwon <dkwon@redhat.com>
Signed-off-by: Tony Camuso <tcamuso@redhat.com>
---
drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0e4f25d63fd2d..0b11210c882ee 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
mutex_lock(&aux_idr_mutex);
aux_dev = idr_find(&aux_idr, index);
- if (!kref_get_unless_zero(&aux_dev->refcount))
+ if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
aux_dev = NULL;
mutex_unlock(&aux_idr_mutex);
--
2.20.1
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-05-23 11:09 [PATCH] drm: assure aux_dev is nonzero before using it tcamuso
@ 2019-05-24 8:36 ` Jani Nikula
2019-05-24 10:48 ` tony camuso
2019-07-10 13:47 ` Tony Camuso
0 siblings, 2 replies; 19+ messages in thread
From: Jani Nikula @ 2019-05-24 8:36 UTC (permalink / raw)
To: tcamuso, dri-devel, linux-kernel; +Cc: airlied, dkwon, tcamuso
On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> From Daniel Kwon <dkwon@redhat.com>
>
> The system was crashed due to invalid memory access while trying to access
> auxiliary device.
>
> crash> bt
> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>
> ----------------------------------------------------------------------------
>
> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> check on this, but had failed to do it.
I think the better question is, *why* does the idr_find() return NULL? I
don't think it should, under any circumstances. I fear adding the check
here papers over some other problem, taking us further away from the
root cause.
Also, can you reproduce this on a recent upstream kernel? The aux device
nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
is pretty much irrelevant for upstream.
BR,
Jani.
>
> ----------------------------------------------------------------------------
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
> 114 struct idr_layer *hint = rcu_dereference_raw(idr->hint);
> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>: mov 0x8a41(%rip),%rax # 0xffffffffc0a613e0 <aux_idr>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
> 116 if (hint && (id & ~IDR_MASK) == hint->prefix)
> 117 return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>: test %rax,%rax
> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>: je 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>: mov %ebx,%edx
> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>: xor %dl,%dl
> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>: cmp (%rax),%edx
> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>: je 0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
> 119 return idr_find_slowpath(idr, id);
> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>: mov %ebx,%esi
> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>: mov $0xffffffffc0a613e0,%rdi
> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>: callq 0xffffffffb09771b0 <idr_find_slowpath>
> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>: mov %rax,%rbx
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
> 25 return ACCESS_ONCE((v)->counter);
> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>: mov 0x18(%rbx),%edx
>
> crash> struct file.f_path 0xffff89d18fadfa00
> f_path = {
> mnt = 0xffff89f23feaa620,
> dentry = 0xffff89f1be7a1cc0
> }
> crash> files -d 0xffff89f1be7a1cc0
> DENTRY INODE SUPERBLK TYPE PATH
> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR /dev/ipmi0
>
> crash> struct inode.i_rdev ffff89f1b32a2830
> i_rdev = 0xf200000
> crash> eval (0xfffff & 0xf200000)
> hexadecimal: 0
> decimal: 0
> octal: 0
> binary: 0000000000000000000000000000000000000000000000000000000000000000
> ----------------------------------------------------------------------------
>
> As the index value was 0 and aux_idr had value 0 for all, it can have value
> NULL from idr_find() function, but the below function doesn't check and just
> tries to use it.
>
> ----------------------------------------------------------------------------
> crash> aux_idr
> aux_idr = $8 = {
> hint = 0x0,
> top = 0x0,
> id_free = 0x0,
> layers = 0x0,
> id_free_cnt = 0x0,
> cur = 0x0,
> lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0x0
> }
> }
> }
> }
> }
> }
>
> crash> edis -f drm_dp_aux_dev_get_by_minor
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
>
> 56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> 57 {
> 58 struct drm_dp_aux_dev *aux_dev = NULL;
> 59
> 60 mutex_lock(&aux_idr_mutex);
> 61 aux_dev = idr_find(&aux_idr, index);
> 62 if (!kref_get_unless_zero(&aux_dev->refcount))
> 63 aux_dev = NULL;
> 64 mutex_unlock(&aux_idr_mutex);
> 65
> 66 return aux_dev;
> 67 }
> ----------------------------------------------------------------------------
>
> To avoid this kinds of situation, we should make a safeguard for the returned
> value. Changing the line 62 with the below would do.
>
> 62 if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> ^^^^^^^^^^
> From Tony Camuso <tcamuso@redhat.com>
> I built a patched kernel for several architectures.
> Booted the kernel, and ran the following for 100 iterations.
> rmmod ipmi kmods to remove /dev/ipmi0.
> Invoked ipmitool
> insmod ipmi kmods
> Did not see any crashes or call traces.
>
> Suggested-by: Daniel Kwon <dkwon@redhat.com>
> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
> ---
> drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
> index 0e4f25d63fd2d..0b11210c882ee 100644
> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>
> mutex_lock(&aux_idr_mutex);
> aux_dev = idr_find(&aux_idr, index);
> - if (!kref_get_unless_zero(&aux_dev->refcount))
> + if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> aux_dev = NULL;
> mutex_unlock(&aux_idr_mutex);
--
Jani Nikula, Intel Open Source Graphics Center
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-05-24 8:36 ` Jani Nikula
@ 2019-05-24 10:48 ` tony camuso
2019-05-24 11:58 ` Ville Syrjälä
2019-07-10 13:47 ` Tony Camuso
1 sibling, 1 reply; 19+ messages in thread
From: tony camuso @ 2019-05-24 10:48 UTC (permalink / raw)
To: Jani Nikula, dri-devel, linux-kernel; +Cc: airlied, dkwon
On 5/24/19 4:36 AM, Jani Nikula wrote:
> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>> From Daniel Kwon <dkwon@redhat.com>
>>
>> The system was crashed due to invalid memory access while trying to access
>> auxiliary device.
>>
>> crash> bt
>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>>
>> ----------------------------------------------------------------------------
>>
>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>> check on this, but had failed to do it.
>
> I think the better question is, *why* does the idr_find() return NULL? I
> don't think it should, under any circumstances. I fear adding the check
> here papers over some other problem, taking us further away from the
> root cause.
That's a very good question.
> Also, can you reproduce this on a recent upstream kernel? The aux device
> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> is pretty much irrelevant for upstream.
I will look into this deeper, using the upstream kernel.
>
>
> BR,
> Jani.
-- snip --
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-05-24 10:48 ` tony camuso
@ 2019-05-24 11:58 ` Ville Syrjälä
0 siblings, 0 replies; 19+ messages in thread
From: Ville Syrjälä @ 2019-05-24 11:58 UTC (permalink / raw)
To: tony camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon
On Fri, May 24, 2019 at 06:48:32AM -0400, tony camuso wrote:
> On 5/24/19 4:36 AM, Jani Nikula wrote:
> > On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >> From Daniel Kwon <dkwon@redhat.com>
> >>
> >> The system was crashed due to invalid memory access while trying to access
> >> auxiliary device.
> >>
> >> crash> bt
> >> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
> >> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
> >> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
> >> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
> >> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
> >> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
> >> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
> >> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> >> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
> >> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
> >> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
> >> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
> >> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
> >> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
> >> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
> >>
> >> ----------------------------------------------------------------------------
> >>
> >> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >> check on this, but had failed to do it.
> >
> > I think the better question is, *why* does the idr_find() return NULL? I
> > don't think it should, under any circumstances. I fear adding the check
> > here papers over some other problem, taking us further away from the
> > root cause.
>
> That's a very good question.
>
> > Also, can you reproduce this on a recent upstream kernel? The aux device
> > nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> > is pretty much irrelevant for upstream.
>
> I will look into this deeper, using the upstream kernel.
Should be trivial to reproduce with mknod. I wonder if we should stick a
test like that into igt actually. Not sure how happy people would be if
igt creates new device nodes...
--
Ville Syrjälä
Intel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-05-24 8:36 ` Jani Nikula
2019-05-24 10:48 ` tony camuso
@ 2019-07-10 13:47 ` Tony Camuso
2019-07-10 13:56 ` Ville Syrjälä
1 sibling, 1 reply; 19+ messages in thread
From: Tony Camuso @ 2019-07-10 13:47 UTC (permalink / raw)
To: Jani Nikula, dri-devel, linux-kernel; +Cc: airlied, dkwon
On 5/24/19 4:36 AM, Jani Nikula wrote:
> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>> From Daniel Kwon <dkwon@redhat.com>
>>
>> The system was crashed due to invalid memory access while trying to access
>> auxiliary device.
>>
>> crash> bt
>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>>
>> ----------------------------------------------------------------------------
>>
>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>> check on this, but had failed to do it.
>
> I think the better question is, *why* does the idr_find() return NULL? I
> don't think it should, under any circumstances. I fear adding the check
> here papers over some other problem, taking us further away from the
> root cause.
>
> Also, can you reproduce this on a recent upstream kernel? The aux device
> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> is pretty much irrelevant for upstream.
>
>
> BR,
> Jani.
I have not been able to reproduce this problem.
However, whatever the reason idr_find() returns NULL, isn't it good form to
check it before using it? What would be the software engineering reason not
to do this?
>
>
>
>
>>
>> ----------------------------------------------------------------------------
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
>> 114 struct idr_layer *hint = rcu_dereference_raw(idr->hint);
>> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>: mov 0x8a41(%rip),%rax # 0xffffffffc0a613e0 <aux_idr>
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
>> 116 if (hint && (id & ~IDR_MASK) == hint->prefix)
>> 117 return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
>> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>: test %rax,%rax
>> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>: je 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
>> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>: mov %ebx,%edx
>> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>: xor %dl,%dl
>> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>: cmp (%rax),%edx
>> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>: je 0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
>> 119 return idr_find_slowpath(idr, id);
>> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>: mov %ebx,%esi
>> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>: mov $0xffffffffc0a613e0,%rdi
>> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>: callq 0xffffffffb09771b0 <idr_find_slowpath>
>> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>: mov %rax,%rbx
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
>> 25 return ACCESS_ONCE((v)->counter);
>> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>: mov 0x18(%rbx),%edx
>>
>> crash> struct file.f_path 0xffff89d18fadfa00
>> f_path = {
>> mnt = 0xffff89f23feaa620,
>> dentry = 0xffff89f1be7a1cc0
>> }
>> crash> files -d 0xffff89f1be7a1cc0
>> DENTRY INODE SUPERBLK TYPE PATH
>> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR /dev/ipmi0
>>
>> crash> struct inode.i_rdev ffff89f1b32a2830
>> i_rdev = 0xf200000
>> crash> eval (0xfffff & 0xf200000)
>> hexadecimal: 0
>> decimal: 0
>> octal: 0
>> binary: 0000000000000000000000000000000000000000000000000000000000000000
>> ----------------------------------------------------------------------------
>>
>> As the index value was 0 and aux_idr had value 0 for all, it can have value
>> NULL from idr_find() function, but the below function doesn't check and just
>> tries to use it.
>>
>> ----------------------------------------------------------------------------
>> crash> aux_idr
>> aux_idr = $8 = {
>> hint = 0x0,
>> top = 0x0,
>> id_free = 0x0,
>> layers = 0x0,
>> id_free_cnt = 0x0,
>> cur = 0x0,
>> lock = {
>> {
>> rlock = {
>> raw_lock = {
>> val = {
>> counter = 0x0
>> }
>> }
>> }
>> }
>> }
>> }
>>
>> crash> edis -f drm_dp_aux_dev_get_by_minor
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
>>
>> 56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>> 57 {
>> 58 struct drm_dp_aux_dev *aux_dev = NULL;
>> 59
>> 60 mutex_lock(&aux_idr_mutex);
>> 61 aux_dev = idr_find(&aux_idr, index);
>> 62 if (!kref_get_unless_zero(&aux_dev->refcount))
>> 63 aux_dev = NULL;
>> 64 mutex_unlock(&aux_idr_mutex);
>> 65
>> 66 return aux_dev;
>> 67 }
>> ----------------------------------------------------------------------------
>>
>> To avoid this kinds of situation, we should make a safeguard for the returned
>> value. Changing the line 62 with the below would do.
>>
>> 62 if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>> ^^^^^^^^^^
>> From Tony Camuso <tcamuso@redhat.com>
>> I built a patched kernel for several architectures.
>> Booted the kernel, and ran the following for 100 iterations.
>> rmmod ipmi kmods to remove /dev/ipmi0.
>> Invoked ipmitool
>> insmod ipmi kmods
>> Did not see any crashes or call traces.
>>
>> Suggested-by: Daniel Kwon <dkwon@redhat.com>
>> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
>> ---
>> drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
>> index 0e4f25d63fd2d..0b11210c882ee 100644
>> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
>> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
>> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>>
>> mutex_lock(&aux_idr_mutex);
>> aux_dev = idr_find(&aux_idr, index);
>> - if (!kref_get_unless_zero(&aux_dev->refcount))
>> + if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>> aux_dev = NULL;
>> mutex_unlock(&aux_idr_mutex);
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-07-10 13:47 ` Tony Camuso
@ 2019-07-10 13:56 ` Ville Syrjälä
2019-07-12 16:07 ` Tony Camuso
0 siblings, 1 reply; 19+ messages in thread
From: Ville Syrjälä @ 2019-07-10 13:56 UTC (permalink / raw)
To: Tony Camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon
On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> On 5/24/19 4:36 AM, Jani Nikula wrote:
> > On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >> From Daniel Kwon <dkwon@redhat.com>
> >>
> >> The system was crashed due to invalid memory access while trying to access
> >> auxiliary device.
> >>
> >> crash> bt
> >> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
> >> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
> >> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
> >> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
> >> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
> >> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
> >> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
> >> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> >> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
> >> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
> >> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
> >> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
> >> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
> >> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
> >> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
> >>
> >> ----------------------------------------------------------------------------
> >>
> >> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >> check on this, but had failed to do it.
> >
> > I think the better question is, *why* does the idr_find() return NULL? I
> > don't think it should, under any circumstances. I fear adding the check
> > here papers over some other problem, taking us further away from the
> > root cause.
> >
> > Also, can you reproduce this on a recent upstream kernel? The aux device
> > nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> > is pretty much irrelevant for upstream.
> >
> >
> > BR,
> > Jani.
>
> I have not been able to reproduce this problem.
mknod /dev/foo c <drm_dp_aux major> 255
cat /dev/foo
should do it.
>
> However, whatever the reason idr_find() returns NULL, isn't it good form to
> check it before using it? What would be the software engineering reason not
> to do this?
>
> >
> >
> >
> >
> >>
> >> ----------------------------------------------------------------------------
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
> >> 114 struct idr_layer *hint = rcu_dereference_raw(idr->hint);
> >> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>: mov 0x8a41(%rip),%rax # 0xffffffffc0a613e0 <aux_idr>
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
> >> 116 if (hint && (id & ~IDR_MASK) == hint->prefix)
> >> 117 return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
> >> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>: test %rax,%rax
> >> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>: je 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
> >> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>: mov %ebx,%edx
> >> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>: xor %dl,%dl
> >> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>: cmp (%rax),%edx
> >> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>: je 0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
> >> 119 return idr_find_slowpath(idr, id);
> >> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>: mov %ebx,%esi
> >> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>: mov $0xffffffffc0a613e0,%rdi
> >> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>: callq 0xffffffffb09771b0 <idr_find_slowpath>
> >> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>: mov %rax,%rbx
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
> >> 25 return ACCESS_ONCE((v)->counter);
> >> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>: mov 0x18(%rbx),%edx
> >>
> >> crash> struct file.f_path 0xffff89d18fadfa00
> >> f_path = {
> >> mnt = 0xffff89f23feaa620,
> >> dentry = 0xffff89f1be7a1cc0
> >> }
> >> crash> files -d 0xffff89f1be7a1cc0
> >> DENTRY INODE SUPERBLK TYPE PATH
> >> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR /dev/ipmi0
> >>
> >> crash> struct inode.i_rdev ffff89f1b32a2830
> >> i_rdev = 0xf200000
> >> crash> eval (0xfffff & 0xf200000)
> >> hexadecimal: 0
> >> decimal: 0
> >> octal: 0
> >> binary: 0000000000000000000000000000000000000000000000000000000000000000
> >> ----------------------------------------------------------------------------
> >>
> >> As the index value was 0 and aux_idr had value 0 for all, it can have value
> >> NULL from idr_find() function, but the below function doesn't check and just
> >> tries to use it.
> >>
> >> ----------------------------------------------------------------------------
> >> crash> aux_idr
> >> aux_idr = $8 = {
> >> hint = 0x0,
> >> top = 0x0,
> >> id_free = 0x0,
> >> layers = 0x0,
> >> id_free_cnt = 0x0,
> >> cur = 0x0,
> >> lock = {
> >> {
> >> rlock = {
> >> raw_lock = {
> >> val = {
> >> counter = 0x0
> >> }
> >> }
> >> }
> >> }
> >> }
> >> }
> >>
> >> crash> edis -f drm_dp_aux_dev_get_by_minor
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
> >>
> >> 56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> >> 57 {
> >> 58 struct drm_dp_aux_dev *aux_dev = NULL;
> >> 59
> >> 60 mutex_lock(&aux_idr_mutex);
> >> 61 aux_dev = idr_find(&aux_idr, index);
> >> 62 if (!kref_get_unless_zero(&aux_dev->refcount))
> >> 63 aux_dev = NULL;
> >> 64 mutex_unlock(&aux_idr_mutex);
> >> 65
> >> 66 return aux_dev;
> >> 67 }
> >> ----------------------------------------------------------------------------
> >>
> >> To avoid this kinds of situation, we should make a safeguard for the returned
> >> value. Changing the line 62 with the below would do.
> >>
> >> 62 if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> >> ^^^^^^^^^^
> >> From Tony Camuso <tcamuso@redhat.com>
> >> I built a patched kernel for several architectures.
> >> Booted the kernel, and ran the following for 100 iterations.
> >> rmmod ipmi kmods to remove /dev/ipmi0.
> >> Invoked ipmitool
> >> insmod ipmi kmods
> >> Did not see any crashes or call traces.
> >>
> >> Suggested-by: Daniel Kwon <dkwon@redhat.com>
> >> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
> >> ---
> >> drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
> >> 1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
> >> index 0e4f25d63fd2d..0b11210c882ee 100644
> >> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
> >> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
> >> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> >>
> >> mutex_lock(&aux_idr_mutex);
> >> aux_dev = idr_find(&aux_idr, index);
> >> - if (!kref_get_unless_zero(&aux_dev->refcount))
> >> + if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> >> aux_dev = NULL;
> >> mutex_unlock(&aux_idr_mutex);
> >
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Ville Syrjälä
Intel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-07-10 13:56 ` Ville Syrjälä
@ 2019-07-12 16:07 ` Tony Camuso
2019-07-12 17:06 ` Ville Syrjälä
0 siblings, 1 reply; 19+ messages in thread
From: Tony Camuso @ 2019-07-12 16:07 UTC (permalink / raw)
To: Ville Syrjälä
Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon
On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>> From Daniel Kwon <dkwon@redhat.com>
>>>>
>>>> The system was crashed due to invalid memory access while trying to access
>>>> auxiliary device.
>>>>
>>>> crash> bt
>>>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
>>>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
>>>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
>>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
>>>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
>>>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
>>>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
>>>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>>>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
>>>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
>>>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
>>>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
>>>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
>>>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
>>>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>>>>
>>>> ----------------------------------------------------------------------------
>>>>
>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>> check on this, but had failed to do it.
>>>
>>> I think the better question is, *why* does the idr_find() return NULL? I
>>> don't think it should, under any circumstances. I fear adding the check
>>> here papers over some other problem, taking us further away from the
>>> root cause.
>>>
>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>> is pretty much irrelevant for upstream.
>>>
>>>
>>> BR,
>>> Jani.
>>
>> I have not been able to reproduce this problem.
>
> mknod /dev/foo c <drm_dp_aux major> 255
> cat /dev/foo
>
> should do it.
How do I determine <drm_dp_aux major>?
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-07-12 16:07 ` Tony Camuso
@ 2019-07-12 17:06 ` Ville Syrjälä
2019-07-12 17:35 ` Tony Camuso
2019-09-23 15:03 ` Tony Camuso
0 siblings, 2 replies; 19+ messages in thread
From: Ville Syrjälä @ 2019-07-12 17:06 UTC (permalink / raw)
To: Tony Camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon
On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> > On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> >> On 5/24/19 4:36 AM, Jani Nikula wrote:
> >>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>>> From Daniel Kwon <dkwon@redhat.com>
> >>>>
> >>>> The system was crashed due to invalid memory access while trying to access
> >>>> auxiliary device.
> >>>>
> >>>> crash> bt
> >>>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
> >>>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
> >>>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
> >>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
> >>>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
> >>>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
> >>>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
> >>>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> >>>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
> >>>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
> >>>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
> >>>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
> >>>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
> >>>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
> >>>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
> >>>>
> >>>> ----------------------------------------------------------------------------
> >>>>
> >>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >>>> check on this, but had failed to do it.
> >>>
> >>> I think the better question is, *why* does the idr_find() return NULL? I
> >>> don't think it should, under any circumstances. I fear adding the check
> >>> here papers over some other problem, taking us further away from the
> >>> root cause.
> >>>
> >>> Also, can you reproduce this on a recent upstream kernel? The aux device
> >>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> >>> is pretty much irrelevant for upstream.
> >>>
> >>>
> >>> BR,
> >>> Jani.
> >>
> >> I have not been able to reproduce this problem.
> >
> > mknod /dev/foo c <drm_dp_aux major> 255
> > cat /dev/foo
> >
> > should do it.
>
> How do I determine <drm_dp_aux major>?
ls,file,stat. Take your pick.
--
Ville Syrjälä
Intel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-07-12 17:06 ` Ville Syrjälä
@ 2019-07-12 17:35 ` Tony Camuso
2019-09-23 15:03 ` Tony Camuso
1 sibling, 0 replies; 19+ messages in thread
From: Tony Camuso @ 2019-07-12 17:35 UTC (permalink / raw)
To: Ville Syrjälä
Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon
On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
>> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
>>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>>>> From Daniel Kwon <dkwon@redhat.com>
>>>>>>
>>>>>> The system was crashed due to invalid memory access while trying to access
>>>>>> auxiliary device.
>>>>>>
>>>>>> crash> bt
>>>>>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
>>>>>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>>>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>>>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>>>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>>>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>>>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>>>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>>>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>>>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>>>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>>>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>>>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
>>>>>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
>>>>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
>>>>>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
>>>>>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
>>>>>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
>>>>>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>>>>>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
>>>>>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
>>>>>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
>>>>>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
>>>>>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
>>>>>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
>>>>>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>>>>>>
>>>>>> ----------------------------------------------------------------------------
>>>>>>
>>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>>>> check on this, but had failed to do it.
>>>>>
>>>>> I think the better question is, *why* does the idr_find() return NULL? I
>>>>> don't think it should, under any circumstances. I fear adding the check
>>>>> here papers over some other problem, taking us further away from the
>>>>> root cause.
>>>>>
>>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>>>> is pretty much irrelevant for upstream.
>>>>>
>>>>>
>>>>> BR,
>>>>> Jani.
>>>>
>>>> I have not been able to reproduce this problem.
>>>
>>> mknod /dev/foo c <drm_dp_aux major> 255
>>> cat /dev/foo
>>>
>>> should do it.
>>
>> How do I determine <drm_dp_aux major>?
>
> ls,file,stat. Take your pick.
>
Doh. Thanks!!
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-07-12 17:06 ` Ville Syrjälä
2019-07-12 17:35 ` Tony Camuso
@ 2019-09-23 15:03 ` Tony Camuso
2019-09-23 15:22 ` Ville Syrjälä
1 sibling, 1 reply; 19+ messages in thread
From: Tony Camuso @ 2019-09-23 15:03 UTC (permalink / raw)
To: Ville Syrjälä
Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon,
Joe Donahue, John Feeney
On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
>> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
>>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>>>> From Daniel Kwon <dkwon@redhat.com>
>>>>>>
>>>>>> The system was crashed due to invalid memory access while trying to access
>>>>>> auxiliary device.
>>>>>>
>>>>>> crash> bt
>>>>>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
>>>>>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>>>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>>>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>>>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>>>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>>>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>>>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>>>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>>>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>>>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>>>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>>>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
>>>>>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
>>>>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
>>>>>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
>>>>>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
>>>>>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
>>>>>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>>>>>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
>>>>>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
>>>>>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
>>>>>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
>>>>>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
>>>>>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
>>>>>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
>>>>>>
>>>>>> ----------------------------------------------------------------------------
>>>>>>
>>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>>>> check on this, but had failed to do it.
>>>>>
>>>>> I think the better question is, *why* does the idr_find() return NULL? I
>>>>> don't think it should, under any circumstances. I fear adding the check
>>>>> here papers over some other problem, taking us further away from the
>>>>> root cause.
>>>>>
>>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>>>> is pretty much irrelevant for upstream.
>>>>>
>>>>>
>>>>> BR,
>>>>> Jani.
>>>>
>>>> I have not been able to reproduce this problem.
>>>
>>> mknod /dev/foo c <drm_dp_aux major> 255
>>> cat /dev/foo
>>>
>>> should do it.
>>
>> How do I determine <drm_dp_aux major>?
>
> ls,file,stat. Take your pick.
>
Problem here is I can't ls,file,stat /dev/foo until after it's created,
but I need to know the drm_dp_aux major number befroe I can use mknod.
What am I missing here?
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2019-09-23 15:03 ` Tony Camuso
@ 2019-09-23 15:22 ` Ville Syrjälä
0 siblings, 0 replies; 19+ messages in thread
From: Ville Syrjälä @ 2019-09-23 15:22 UTC (permalink / raw)
To: Tony Camuso
Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon,
Joe Donahue, John Feeney
On Mon, Sep 23, 2019 at 11:03:35AM -0400, Tony Camuso wrote:
> On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> > On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
> >> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> >>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> >>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
> >>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>>>>> From Daniel Kwon <dkwon@redhat.com>
> >>>>>>
> >>>>>> The system was crashed due to invalid memory access while trying to access
> >>>>>> auxiliary device.
> >>>>>>
> >>>>>> crash> bt
> >>>>>> PID: 9863 TASK: ffff89d1bdf11040 CPU: 1 COMMAND: "ipmitool"
> >>>>>> #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>>>>> #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>>>>> #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>>>>> #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>>>>> #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>>>>> #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>>>>> #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>>>>> #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>>>>> #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>>>>> #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>>>>> [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>>>>> RIP: ffffffffc0a589bd RSP: ffff89cedd7f3bf0 RFLAGS: 00010246
> >>>>>> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff89cedd7f3fd8
> >>>>>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc0a613e0
> >>>>>> RBP: ffff89cedd7f3bf8 R8: ffff89f1bcbabbd0 R9: 0000000000000000
> >>>>>> R10: ffff89f1be7a1cc0 R11: 0000000000000000 R12: 0000000000000000
> >>>>>> R13: ffff89f1b32a2830 R14: ffff89d18fadfa00 R15: 0000000000000000
> >>>>>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> >>>>>> RIP: 00002b45f0d80d30 RSP: 00007ffc416066a0 RFLAGS: 00010246
> >>>>>> RAX: 0000000000000002 RBX: 000056062e212d80 RCX: 00007ffc41606810
> >>>>>> RDX: 0000000000000000 RSI: 0000000000000002 RDI: 00007ffc41606ec0
> >>>>>> RBP: 0000000000000000 R8: 000056062dfed229 R9: 00002b45f0cdf14d
> >>>>>> R10: 0000000000000002 R11: 0000000000000246 R12: 00007ffc41606ec0
> >>>>>> R13: 00007ffc41606ed0 R14: 00007ffc41606ee0 R15: 0000000000000000
> >>>>>> ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b
> >>>>>>
> >>>>>> ----------------------------------------------------------------------------
> >>>>>>
> >>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >>>>>> check on this, but had failed to do it.
> >>>>>
> >>>>> I think the better question is, *why* does the idr_find() return NULL? I
> >>>>> don't think it should, under any circumstances. I fear adding the check
> >>>>> here papers over some other problem, taking us further away from the
> >>>>> root cause.
> >>>>>
> >>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
> >>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> >>>>> is pretty much irrelevant for upstream.
> >>>>>
> >>>>>
> >>>>> BR,
> >>>>> Jani.
> >>>>
> >>>> I have not been able to reproduce this problem.
> >>>
> >>> mknod /dev/foo c <drm_dp_aux major> 255
> >>> cat /dev/foo
> >>>
> >>> should do it.
> >>
> >> How do I determine <drm_dp_aux major>?
> >
> > ls,file,stat. Take your pick.
> >
>
> Problem here is I can't ls,file,stat /dev/foo until after it's created,
> but I need to know the drm_dp_aux major number befroe I can use mknod.
>
> What am I missing here?
udev/whatever should create a bunch of these for you so you can check
from them. If not, then dig around in /sys/class/drm_dp_aux_dev.
--
Ville Syrjälä
Intel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-18 17:58 ` Zwane Mwaikambo
@ 2020-09-08 18:41 ` Lyude Paul
0 siblings, 0 replies; 19+ messages in thread
From: Lyude Paul @ 2020-09-08 18:41 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel
On Tue, 2020-08-18 at 10:58 -0700, Zwane Mwaikambo wrote:
> On Wed, 12 Aug 2020, Lyude Paul wrote:
>
> > On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com>
> > > wrote:
> > > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > >
> > > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > > Hi Folks,
> > > > > > I know this thread eventually dropped off due to not
> > > > > > identifying
> > > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > > happened because the udev device nodes for the DP aux devices were
> > > > > > not
> > > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > > reproduce the bug just by creating a device node for a non-
> > > > > > existent
> > > > > > minor
> > > > > > device and calling open().
> > > > >
> > > > > Hm I don't have that thread anymore, but generally these bugs are
> > > > > solved
> > > > > by not registering the device before it's ready for use. We do have
> > > > > drm_connector->late_register for that stuff. Just a guess since I'm
> > > > > not
> > > > > seeing full details here.
> > > >
> > > > In this particular case, the physical device disappeared before the
> > > > nodes
> > > > were cleaned up. It involves putting a computer to sleep with a
> > > > monitor
> > > > plugged in and then waking it up with the monitor unplugged.
> > >
> > > We also have early_unregister for the reverse, but yes this sounds
> > > more tricky ... Adding Lyude who's been working on way too much
> > > lifetime fun around dp recently.
> > > -Daniel
> > >
> > Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> > fix, although I am curious as to how exactly the aux dev's parent is
> > getting
> > destroyed before it's child, which I would have thought would be the only
> > way
> > you could hit this?
>
> Hi, If this is acceptable, would you consider an updated patch against
> 5.8?
Sure-although the process to getting this into stable is to get the patch into
drm-next first, then it can get cherry-picked into the stable kernel branches.
See https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
>
> Thanks,
> Zwane
>
--
Cheers,
Lyude Paul (she/her)
Software Engineer at Red Hat
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-12 15:44 ` Lyude Paul
2020-08-12 20:21 ` Zwane Mwaikambo
@ 2020-08-18 17:58 ` Zwane Mwaikambo
2020-09-08 18:41 ` Lyude Paul
1 sibling, 1 reply; 19+ messages in thread
From: Zwane Mwaikambo @ 2020-08-18 17:58 UTC (permalink / raw)
To: Lyude Paul; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel
On Wed, 12 Aug 2020, Lyude Paul wrote:
> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > >
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > > I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > >
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > >
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> >
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> >
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?
Hi, If this is acceptable, would you consider an updated patch against
5.8?
Thanks,
Zwane
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-12 15:44 ` Lyude Paul
@ 2020-08-12 20:21 ` Zwane Mwaikambo
2020-08-18 17:58 ` Zwane Mwaikambo
1 sibling, 0 replies; 19+ messages in thread
From: Zwane Mwaikambo @ 2020-08-12 20:21 UTC (permalink / raw)
To: Lyude Paul; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel
On Wed, 12 Aug 2020, Lyude Paul wrote:
> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > >
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > > I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > >
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > >
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> >
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> >
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?
Here is what it looks like without (1) and with (2) monitor connected. In
the case where the monitor disappears during suspend, the device nodes
aux3,4 are still around
1) No monitor connected
ls -l /dev/drm*
crw------- 1 root root 238, 0 Aug 6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug 6 22:32 /dev/drm_dp_aux1
2) Monitor connected
crw------- 1 root root 238, 0 Aug 6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug 6 22:32 /dev/drm_dp_aux1
crw------- 1 root root 238, 2 Aug 11 14:51 /dev/drm_dp_aux2
crw------- 1 root root 238, 3 Aug 11 14:51 /dev/drm_dp_aux3
crw------- 1 root root 238, 4 Aug 11 14:51 /dev/drm_dp_aux4
>
> > >
> > > > > To me it still makes sense to just check aux_dev because the chardev
> > > > > has
> > > > > no way to check before calling.
> > > > >
> > > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > > 60 static struct drm_dp_aux_dev
> > > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > > 61 {
> > > > > 62 struct drm_dp_aux_dev *aux_dev = NULL;
> > > > > 63
> > > > > 64 mutex_lock(&aux_idr_mutex);
> > > > > 65 aux_dev = idr_find(&aux_idr, index);
> > > > > 66 if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > > 67 aux_dev = NULL;
> > > > > 68 mutex_unlock(&aux_idr_mutex);
> > > > > 69
> > > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > > $8 = 0x18
> > > > >
> > > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > > {
> > > > > unsigned int minor = iminor(inode);
> > > > > struct drm_dp_aux_dev *aux_dev;
> > > > >
> > > > > aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > > > if (!aux_dev)
> > > > > return -ENODEV;
> > > > >
> > > > > file->private_data = aux_dev;
> > > > > return 0;
> > > > > }
> > > > >
> > > > >
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-12 14:10 ` Daniel Vetter
@ 2020-08-12 15:44 ` Lyude Paul
2020-08-12 20:21 ` Zwane Mwaikambo
2020-08-18 17:58 ` Zwane Mwaikambo
0 siblings, 2 replies; 19+ messages in thread
From: Lyude Paul @ 2020-08-12 15:44 UTC (permalink / raw)
To: Daniel Vetter, Zwane Mwaikambo; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel
On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> >
> > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > Hi Folks,
> > > > I know this thread eventually dropped off due to not identifying
> > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > happened because the udev device nodes for the DP aux devices were not
> > > > cleaned up whereas the kernel had no association with them. I can
> > > > reproduce the bug just by creating a device node for a non-existent
> > > > minor
> > > > device and calling open().
> > >
> > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > by not registering the device before it's ready for use. We do have
> > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > seeing full details here.
> >
> > In this particular case, the physical device disappeared before the nodes
> > were cleaned up. It involves putting a computer to sleep with a monitor
> > plugged in and then waking it up with the monitor unplugged.
>
> We also have early_unregister for the reverse, but yes this sounds
> more tricky ... Adding Lyude who's been working on way too much
> lifetime fun around dp recently.
> -Daniel
>
Hi-I think just checking whether the auxdev is NULL or not is a reasonable
fix, although I am curious as to how exactly the aux dev's parent is getting
destroyed before it's child, which I would have thought would be the only way
you could hit this?
> >
> > > > To me it still makes sense to just check aux_dev because the chardev
> > > > has
> > > > no way to check before calling.
> > > >
> > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > 60 static struct drm_dp_aux_dev
> > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > 61 {
> > > > 62 struct drm_dp_aux_dev *aux_dev = NULL;
> > > > 63
> > > > 64 mutex_lock(&aux_idr_mutex);
> > > > 65 aux_dev = idr_find(&aux_idr, index);
> > > > 66 if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > 67 aux_dev = NULL;
> > > > 68 mutex_unlock(&aux_idr_mutex);
> > > > 69
> > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > $8 = 0x18
> > > >
> > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > {
> > > > unsigned int minor = iminor(inode);
> > > > struct drm_dp_aux_dev *aux_dev;
> > > >
> > > > aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > > if (!aux_dev)
> > > > return -ENODEV;
> > > >
> > > > file->private_data = aux_dev;
> > > > return 0;
> > > > }
> > > >
> > > >
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
--
Cheers,
Lyude Paul (she/her)
Software Engineer at Red Hat
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-11 22:16 ` Zwane Mwaikambo
@ 2020-08-12 14:10 ` Daniel Vetter
2020-08-12 15:44 ` Lyude Paul
0 siblings, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2020-08-12 14:10 UTC (permalink / raw)
To: Zwane Mwaikambo, Lyude; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel
On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
>
> On Tue, 11 Aug 2020, Daniel Vetter wrote:
>
> > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > Hi Folks,
> > > I know this thread eventually dropped off due to not identifying
> > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > happened because the udev device nodes for the DP aux devices were not
> > > cleaned up whereas the kernel had no association with them. I can
> > > reproduce the bug just by creating a device node for a non-existent minor
> > > device and calling open().
> >
> > Hm I don't have that thread anymore, but generally these bugs are solved
> > by not registering the device before it's ready for use. We do have
> > drm_connector->late_register for that stuff. Just a guess since I'm not
> > seeing full details here.
>
> In this particular case, the physical device disappeared before the nodes
> were cleaned up. It involves putting a computer to sleep with a monitor
> plugged in and then waking it up with the monitor unplugged.
We also have early_unregister for the reverse, but yes this sounds
more tricky ... Adding Lyude who's been working on way too much
lifetime fun around dp recently.
-Daniel
>
>
> > >
> > > To me it still makes sense to just check aux_dev because the chardev has
> > > no way to check before calling.
> > >
> > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > 60 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > 61 {
> > > 62 struct drm_dp_aux_dev *aux_dev = NULL;
> > > 63
> > > 64 mutex_lock(&aux_idr_mutex);
> > > 65 aux_dev = idr_find(&aux_idr, index);
> > > 66 if (!kref_get_unless_zero(&aux_dev->refcount))
> > > 67 aux_dev = NULL;
> > > 68 mutex_unlock(&aux_idr_mutex);
> > > 69
> > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > $8 = 0x18
> > >
> > > static int auxdev_open(struct inode *inode, struct file *file)
> > > {
> > > unsigned int minor = iminor(inode);
> > > struct drm_dp_aux_dev *aux_dev;
> > >
> > > aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > if (!aux_dev)
> > > return -ENODEV;
> > >
> > > file->private_data = aux_dev;
> > > return 0;
> > > }
> > >
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-11 8:58 ` Daniel Vetter
@ 2020-08-11 22:16 ` Zwane Mwaikambo
2020-08-12 14:10 ` Daniel Vetter
0 siblings, 1 reply; 19+ messages in thread
From: Zwane Mwaikambo @ 2020-08-11 22:16 UTC (permalink / raw)
To: Daniel Vetter; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel
On Tue, 11 Aug 2020, Daniel Vetter wrote:
> On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > Hi Folks,
> > I know this thread eventually dropped off due to not identifying
> > the underlying issue. It's still occuring on 5.8 and in my case it
> > happened because the udev device nodes for the DP aux devices were not
> > cleaned up whereas the kernel had no association with them. I can
> > reproduce the bug just by creating a device node for a non-existent minor
> > device and calling open().
>
> Hm I don't have that thread anymore, but generally these bugs are solved
> by not registering the device before it's ready for use. We do have
> drm_connector->late_register for that stuff. Just a guess since I'm not
> seeing full details here.
In this particular case, the physical device disappeared before the nodes
were cleaned up. It involves putting a computer to sleep with a monitor
plugged in and then waking it up with the monitor unplugged.
> >
> > To me it still makes sense to just check aux_dev because the chardev has
> > no way to check before calling.
> >
> > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > 60 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > 61 {
> > 62 struct drm_dp_aux_dev *aux_dev = NULL;
> > 63
> > 64 mutex_lock(&aux_idr_mutex);
> > 65 aux_dev = idr_find(&aux_idr, index);
> > 66 if (!kref_get_unless_zero(&aux_dev->refcount))
> > 67 aux_dev = NULL;
> > 68 mutex_unlock(&aux_idr_mutex);
> > 69
> > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > $8 = 0x18
> >
> > static int auxdev_open(struct inode *inode, struct file *file)
> > {
> > unsigned int minor = iminor(inode);
> > struct drm_dp_aux_dev *aux_dev;
> >
> > aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > if (!aux_dev)
> > return -ENODEV;
> >
> > file->private_data = aux_dev;
> > return 0;
> > }
> >
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
2020-08-10 17:11 Zwane Mwaikambo
@ 2020-08-11 8:58 ` Daniel Vetter
2020-08-11 22:16 ` Zwane Mwaikambo
0 siblings, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2020-08-11 8:58 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel
On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> Hi Folks,
> I know this thread eventually dropped off due to not identifying
> the underlying issue. It's still occuring on 5.8 and in my case it
> happened because the udev device nodes for the DP aux devices were not
> cleaned up whereas the kernel had no association with them. I can
> reproduce the bug just by creating a device node for a non-existent minor
> device and calling open().
Hm I don't have that thread anymore, but generally these bugs are solved
by not registering the device before it's ready for use. We do have
drm_connector->late_register for that stuff. Just a guess since I'm not
seeing full details here.
-Daniel
>
> To me it still makes sense to just check aux_dev because the chardev has
> no way to check before calling.
>
> (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> 60 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> 61 {
> 62 struct drm_dp_aux_dev *aux_dev = NULL;
> 63
> 64 mutex_lock(&aux_idr_mutex);
> 65 aux_dev = idr_find(&aux_idr, index);
> 66 if (!kref_get_unless_zero(&aux_dev->refcount))
> 67 aux_dev = NULL;
> 68 mutex_unlock(&aux_idr_mutex);
> 69
> (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> $8 = 0x18
>
> static int auxdev_open(struct inode *inode, struct file *file)
> {
> unsigned int minor = iminor(inode);
> struct drm_dp_aux_dev *aux_dev;
>
> aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> if (!aux_dev)
> return -ENODEV;
>
> file->private_data = aux_dev;
> return 0;
> }
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-10 17:11 Zwane Mwaikambo
2020-08-11 8:58 ` Daniel Vetter
0 siblings, 1 reply; 19+ messages in thread
From: Zwane Mwaikambo @ 2020-08-10 17:11 UTC (permalink / raw)
To: tcamuso; +Cc: Linux Kernel, dri-devel, dkwon
Hi Folks,
I know this thread eventually dropped off due to not identifying
the underlying issue. It's still occuring on 5.8 and in my case it
happened because the udev device nodes for the DP aux devices were not
cleaned up whereas the kernel had no association with them. I can
reproduce the bug just by creating a device node for a non-existent minor
device and calling open().
To me it still makes sense to just check aux_dev because the chardev has
no way to check before calling.
(gdb) list *drm_dp_aux_dev_get_by_minor+0x29
0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
60 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
61 {
62 struct drm_dp_aux_dev *aux_dev = NULL;
63
64 mutex_lock(&aux_idr_mutex);
65 aux_dev = idr_find(&aux_idr, index);
66 if (!kref_get_unless_zero(&aux_dev->refcount))
67 aux_dev = NULL;
68 mutex_unlock(&aux_idr_mutex);
69
(gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
$8 = 0x18
static int auxdev_open(struct inode *inode, struct file *file)
{
unsigned int minor = iminor(inode);
struct drm_dp_aux_dev *aux_dev;
aux_dev = drm_dp_aux_dev_get_by_minor(minor);
if (!aux_dev)
return -ENODEV;
file->private_data = aux_dev;
return 0;
}
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2020-09-08 18:45 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-23 11:09 [PATCH] drm: assure aux_dev is nonzero before using it tcamuso
2019-05-24 8:36 ` Jani Nikula
2019-05-24 10:48 ` tony camuso
2019-05-24 11:58 ` Ville Syrjälä
2019-07-10 13:47 ` Tony Camuso
2019-07-10 13:56 ` Ville Syrjälä
2019-07-12 16:07 ` Tony Camuso
2019-07-12 17:06 ` Ville Syrjälä
2019-07-12 17:35 ` Tony Camuso
2019-09-23 15:03 ` Tony Camuso
2019-09-23 15:22 ` Ville Syrjälä
2020-08-10 17:11 Zwane Mwaikambo
2020-08-11 8:58 ` Daniel Vetter
2020-08-11 22:16 ` Zwane Mwaikambo
2020-08-12 14:10 ` Daniel Vetter
2020-08-12 15:44 ` Lyude Paul
2020-08-12 20:21 ` Zwane Mwaikambo
2020-08-18 17:58 ` Zwane Mwaikambo
2020-09-08 18:41 ` Lyude Paul
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).