[thirdparty/kernel/stable-queue.git] / releases / 4.18.11 / drm-nouveau-fix-deadlocks-in-nouveau_connector_detect.patch

From 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Wed, 15 Aug 2018 15:00:15 -0400
Subject: drm/nouveau: Fix deadlocks in nouveau_connector_detect()

From: Lyude Paul <lyude@redhat.com>

commit 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a upstream.

When we disable hotplugging on the GPU, we need to be able to
synchronize with each connector's hotplug interrupt handler before the
interrupt is finally disabled. This can be a problem however, since
nouveau_connector_detect() currently grabs a runtime power reference
when handling connector probing. This will deadlock the runtime suspend
handler like so:

[  861.480896] INFO: task kworker/0:2:61 blocked for more than 120 seconds.
[  861.483290]       Tainted: G           O      4.18.0-rc6Lyude-Test+ #1
[  861.485158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  861.486332] kworker/0:2     D    0    61      2 0x80000000
[  861.487044] Workqueue: events nouveau_display_hpd_work [nouveau]
[  861.487737] Call Trace:
[  861.488394]  __schedule+0x322/0xaf0
[  861.489070]  schedule+0x33/0x90
[  861.489744]  rpm_resume+0x19c/0x850
[  861.490392]  ? finish_wait+0x90/0x90
[  861.491068]  __pm_runtime_resume+0x4e/0x90
[  861.491753]  nouveau_display_hpd_work+0x22/0x60 [nouveau]
[  861.492416]  process_one_work+0x231/0x620
[  861.493068]  worker_thread+0x44/0x3a0
[  861.493722]  kthread+0x12b/0x150
[  861.494342]  ? wq_pool_ids_show+0x140/0x140
[  861.494991]  ? kthread_create_worker_on_cpu+0x70/0x70
[  861.495648]  ret_from_fork+0x3a/0x50
[  861.496304] INFO: task kworker/6:2:320 blocked for more than 120 seconds.
[  861.496968]       Tainted: G           O      4.18.0-rc6Lyude-Test+ #1
[  861.497654] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  861.498341] kworker/6:2     D    0   320      2 0x80000080
[  861.499045] Workqueue: pm pm_runtime_work
[  861.499739] Call Trace:
[  861.500428]  __schedule+0x322/0xaf0
[  861.501134]  ? wait_for_completion+0x104/0x190
[  861.501851]  schedule+0x33/0x90
[  861.502564]  schedule_timeout+0x3a5/0x590
[  861.503284]  ? mark_held_locks+0x58/0x80
[  861.503988]  ? _raw_spin_unlock_irq+0x2c/0x40
[  861.504710]  ? wait_for_completion+0x104/0x190
[  861.505417]  ? trace_hardirqs_on_caller+0xf4/0x190
[  861.506136]  ? wait_for_completion+0x104/0x190
[  861.506845]  wait_for_completion+0x12c/0x190
[  861.507555]  ? wake_up_q+0x80/0x80
[  861.508268]  flush_work+0x1c9/0x280
[  861.508990]  ? flush_workqueue_prep_pwqs+0x1b0/0x1b0
[  861.509735]  nvif_notify_put+0xb1/0xc0 [nouveau]
[  861.510482]  nouveau_display_fini+0xbd/0x170 [nouveau]
[  861.511241]  nouveau_display_suspend+0x67/0x120 [nouveau]
[  861.511969]  nouveau_do_suspend+0x5e/0x2d0 [nouveau]
[  861.512715]  nouveau_pmops_runtime_suspend+0x47/0xb0 [nouveau]
[  861.513435]  pci_pm_runtime_suspend+0x6b/0x180
[  861.514165]  ? pci_has_legacy_pm_support+0x70/0x70
[  861.514897]  __rpm_callback+0x7a/0x1d0
[  861.515618]  ? pci_has_legacy_pm_support+0x70/0x70
[  861.516313]  rpm_callback+0x24/0x80
[  861.517027]  ? pci_has_legacy_pm_support+0x70/0x70
[  861.517741]  rpm_suspend+0x142/0x6b0
[  861.518449]  pm_runtime_work+0x97/0xc0
[  861.519144]  process_one_work+0x231/0x620
[  861.519831]  worker_thread+0x44/0x3a0
[  861.520522]  kthread+0x12b/0x150
[  861.521220]  ? wq_pool_ids_show+0x140/0x140
[  861.521925]  ? kthread_create_worker_on_cpu+0x70/0x70
[  861.522622]  ret_from_fork+0x3a/0x50
[  861.523299] INFO: task kworker/6:0:1329 blocked for more than 120 seconds.
[  861.523977]       Tainted: G           O      4.18.0-rc6Lyude-Test+ #1
[  861.524644] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  861.525349] kworker/6:0     D    0  1329      2 0x80000000
[  861.526073] Workqueue: events nvif_notify_work [nouveau]
[  861.526751] Call Trace:
[  861.527411]  __schedule+0x322/0xaf0
[  861.528089]  schedule+0x33/0x90
[  861.528758]  rpm_resume+0x19c/0x850
[  861.529399]  ? finish_wait+0x90/0x90
[  861.530073]  __pm_runtime_resume+0x4e/0x90
[  861.530798]  nouveau_connector_detect+0x7e/0x510 [nouveau]
[  861.531459]  ? ww_mutex_lock+0x47/0x80
[  861.532097]  ? ww_mutex_lock+0x47/0x80
[  861.532819]  ? drm_modeset_lock+0x88/0x130 [drm]
[  861.533481]  drm_helper_probe_detect_ctx+0xa0/0x100 [drm_kms_helper]
[  861.534127]  drm_helper_hpd_irq_event+0xa4/0x120 [drm_kms_helper]
[  861.534940]  nouveau_connector_hotplug+0x98/0x120 [nouveau]
[  861.535556]  nvif_notify_work+0x2d/0xb0 [nouveau]
[  861.536221]  process_one_work+0x231/0x620
[  861.536994]  worker_thread+0x44/0x3a0
[  861.537757]  kthread+0x12b/0x150
[  861.538463]  ? wq_pool_ids_show+0x140/0x140
[  861.539102]  ? kthread_create_worker_on_cpu+0x70/0x70
[  861.539815]  ret_from_fork+0x3a/0x50
[  861.540521]
               Showing all locks held in the system:
[  861.541696] 2 locks held by kworker/0:2/61:
[  861.542406]  #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620
[  861.543071]  #1: 0000000076868126 ((work_completion)(&drm->hpd_work)){+.+.}, at: process_one_work+0x1b3/0x620
[  861.543814] 1 lock held by khungtaskd/64:
[  861.544535]  #0: 0000000059db4b53 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185
[  861.545160] 3 locks held by kworker/6:2/320:
[  861.545896]  #0: 00000000d9e1bc59 ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620
[  861.546702]  #1: 00000000c9f92d84 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620
[  861.547443]  #2: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: nouveau_display_fini+0x96/0x170 [nouveau]
[  861.548146] 1 lock held by dmesg/983:
[  861.548889] 2 locks held by zsh/1250:
[  861.549605]  #0: 00000000348e3cf6 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40
[  861.550393]  #1: 000000007009a7a8 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870
[  861.551122] 6 locks held by kworker/6:0/1329:
[  861.551957]  #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620
[  861.552765]  #1: 00000000ddb499ad ((work_completion)(&notify->work)#2){+.+.}, at: process_one_work+0x1b3/0x620
[  861.553582]  #2: 000000006e013cbe (&dev->mode_config.mutex){+.+.}, at: drm_helper_hpd_irq_event+0x6c/0x120 [drm_kms_helper]
[  861.554357]  #3: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: drm_helper_hpd_irq_event+0x78/0x120 [drm_kms_helper]
[  861.555227]  #4: 0000000044f294d9 (crtc_ww_class_acquire){+.+.}, at: drm_helper_probe_detect_ctx+0x3d/0x100 [drm_kms_helper]
[  861.556133]  #5: 00000000db193642 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_lock+0x4b/0x130 [drm]

[  861.557864] =============================================

[  861.559507] NMI backtrace for cpu 2
[  861.560363] CPU: 2 PID: 64 Comm: khungtaskd Tainted: G           O      4.18.0-rc6Lyude-Test+ #1
[  861.561197] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018
[  861.561948] Call Trace:
[  861.562757]  dump_stack+0x8e/0xd3
[  861.563516]  nmi_cpu_backtrace.cold.3+0x14/0x5a
[  861.564269]  ? lapic_can_unplug_cpu.cold.27+0x42/0x42
[  861.565029]  nmi_trigger_cpumask_backtrace+0xa1/0xae
[  861.565789]  arch_trigger_cpumask_backtrace+0x19/0x20
[  861.566558]  watchdog+0x316/0x580
[  861.567355]  kthread+0x12b/0x150
[  861.568114]  ? reset_hung_task_detector+0x20/0x20
[  861.568863]  ? kthread_create_worker_on_cpu+0x70/0x70
[  861.569598]  ret_from_fork+0x3a/0x50
[  861.570370] Sending NMI from CPU 2 to CPUs 0-1,3-7:
[  861.571426] NMI backtrace for cpu 6 skipped: idling at intel_idle+0x7f/0x120
[  861.571429] NMI backtrace for cpu 7 skipped: idling at intel_idle+0x7f/0x120
[  861.571432] NMI backtrace for cpu 3 skipped: idling at intel_idle+0x7f/0x120
[  861.571464] NMI backtrace for cpu 5 skipped: idling at intel_idle+0x7f/0x120
[  861.571467] NMI backtrace for cpu 0 skipped: idling at intel_idle+0x7f/0x120
[  861.571469] NMI backtrace for cpu 4 skipped: idling at intel_idle+0x7f/0x120
[  861.571472] NMI backtrace for cpu 1 skipped: idling at intel_idle+0x7f/0x120
[  861.572428] Kernel panic - not syncing: hung_task: blocked tasks

So: fix this by making it so that normal hotplug handling /only/ happens
so long as the GPU is currently awake without any pending runtime PM
requests. In the event that a hotplug occurs while the device is
suspending or resuming, we can simply defer our response until the GPU
is fully runtime resumed again.

Changes since v4:
- Use a new trick I came up with using pm_runtime_get() instead of the
  hackish junk we had before

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Cc: stable@vger.kernel.org
Cc: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/gpu/drm/nouveau/nouveau_connector.c |   22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1120,6 +1120,26 @@ nouveau_connector_hotplug(struct nvif_no
 	const struct nvif_notify_conn_rep_v0 *rep = notify->data;
 	const char *name = connector->name;
 	struct nouveau_encoder *nv_encoder;
+	int ret;
+
+	ret = pm_runtime_get(drm->dev->dev);
+	if (ret == 0) {
+		/* We can't block here if there's a pending PM request
+		 * running, as we'll deadlock nouveau_display_fini() when it
+		 * calls nvif_put() on our nvif_notify struct. So, simply
+		 * defer the hotplug event until the device finishes resuming
+		 */
+		NV_DEBUG(drm, "Deferring HPD on %s until runtime resume\n",
+			 name);
+		schedule_work(&drm->hpd_work);
+
+		pm_runtime_put_noidle(drm->dev->dev);
+		return NVIF_NOTIFY_KEEP;
+	} else if (ret != 1 && ret != -EACCES) {
+		NV_WARN(drm, "HPD on %s dropped due to RPM failure: %d\n",
+			name, ret);
+		return NVIF_NOTIFY_DROP;
+	}
 
 	if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) {
 		NV_DEBUG(drm, "service %s\n", name);
@@ -1137,6 +1157,8 @@ nouveau_connector_hotplug(struct nvif_no
 		drm_helper_hpd_irq_event(connector->dev);
 	}
 
+	pm_runtime_mark_last_busy(drm->dev->dev);
+	pm_runtime_put_autosuspend(drm->dev->dev);
 	return NVIF_NOTIFY_KEEP;
 }
Commit	Line	Data
9d09275e GKH	1	From 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a Mon Sep 17 00:00:00 2001
	2	From: Lyude Paul <lyude@redhat.com>
	3	Date: Wed, 15 Aug 2018 15:00:15 -0400
	4	Subject: drm/nouveau: Fix deadlocks in nouveau_connector_detect()
	5
	6	From: Lyude Paul <lyude@redhat.com>
	7
	8	commit 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a upstream.
	9
	10	When we disable hotplugging on the GPU, we need to be able to
	11	synchronize with each connector's hotplug interrupt handler before the
	12	interrupt is finally disabled. This can be a problem however, since
	13	nouveau_connector_detect() currently grabs a runtime power reference
	14	when handling connector probing. This will deadlock the runtime suspend
	15	handler like so:
	16
	17	[ 861.480896] INFO: task kworker/0:2:61 blocked for more than 120 seconds.
	18	[ 861.483290] Tainted: G O 4.18.0-rc6Lyude-Test+ #1
	19	[ 861.485158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
	20	[ 861.486332] kworker/0:2 D 0 61 2 0x80000000
	21	[ 861.487044] Workqueue: events nouveau_display_hpd_work [nouveau]
	22	[ 861.487737] Call Trace:
	23	[ 861.488394] __schedule+0x322/0xaf0
	24	[ 861.489070] schedule+0x33/0x90
	25	[ 861.489744] rpm_resume+0x19c/0x850
	26	[ 861.490392] ? finish_wait+0x90/0x90
	27	[ 861.491068] __pm_runtime_resume+0x4e/0x90
	28	[ 861.491753] nouveau_display_hpd_work+0x22/0x60 [nouveau]
	29	[ 861.492416] process_one_work+0x231/0x620
	30	[ 861.493068] worker_thread+0x44/0x3a0
	31	[ 861.493722] kthread+0x12b/0x150
	32	[ 861.494342] ? wq_pool_ids_show+0x140/0x140
	33	[ 861.494991] ? kthread_create_worker_on_cpu+0x70/0x70
	34	[ 861.495648] ret_from_fork+0x3a/0x50
	35	[ 861.496304] INFO: task kworker/6:2:320 blocked for more than 120 seconds.
	36	[ 861.496968] Tainted: G O 4.18.0-rc6Lyude-Test+ #1
	37	[ 861.497654] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
	38	[ 861.498341] kworker/6:2 D 0 320 2 0x80000080
	39	[ 861.499045] Workqueue: pm pm_runtime_work
	40	[ 861.499739] Call Trace:
	41	[ 861.500428] __schedule+0x322/0xaf0
	42	[ 861.501134] ? wait_for_completion+0x104/0x190
	43	[ 861.501851] schedule+0x33/0x90
	44	[ 861.502564] schedule_timeout+0x3a5/0x590
	45	[ 861.503284] ? mark_held_locks+0x58/0x80
	46	[ 861.503988] ? _raw_spin_unlock_irq+0x2c/0x40
	47	[ 861.504710] ? wait_for_completion+0x104/0x190
	48	[ 861.505417] ? trace_hardirqs_on_caller+0xf4/0x190
	49	[ 861.506136] ? wait_for_completion+0x104/0x190
	50	[ 861.506845] wait_for_completion+0x12c/0x190
	51	[ 861.507555] ? wake_up_q+0x80/0x80
	52	[ 861.508268] flush_work+0x1c9/0x280
	53	[ 861.508990] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0
	54	[ 861.509735] nvif_notify_put+0xb1/0xc0 [nouveau]
	55	[ 861.510482] nouveau_display_fini+0xbd/0x170 [nouveau]
	56	[ 861.511241] nouveau_display_suspend+0x67/0x120 [nouveau]
	57	[ 861.511969] nouveau_do_suspend+0x5e/0x2d0 [nouveau]
	58	[ 861.512715] nouveau_pmops_runtime_suspend+0x47/0xb0 [nouveau]
	59	[ 861.513435] pci_pm_runtime_suspend+0x6b/0x180
	60	[ 861.514165] ? pci_has_legacy_pm_support+0x70/0x70
	61	[ 861.514897] __rpm_callback+0x7a/0x1d0
	62	[ 861.515618] ? pci_has_legacy_pm_support+0x70/0x70
	63	[ 861.516313] rpm_callback+0x24/0x80
	64	[ 861.517027] ? pci_has_legacy_pm_support+0x70/0x70
65	[ 861.517741] rpm_suspend+0x142/0x6b0
66	[ 861.518449] pm_runtime_work+0x97/0xc0
67	[ 861.519144] process_one_work+0x231/0x620
68	[ 861.519831] worker_thread+0x44/0x3a0
69	[ 861.520522] kthread+0x12b/0x150
70	[ 861.521220] ? wq_pool_ids_show+0x140/0x140
71	[ 861.521925] ? kthread_create_worker_on_cpu+0x70/0x70
72	[ 861.522622] ret_from_fork+0x3a/0x50
73	[ 861.523299] INFO: task kworker/6:0:1329 blocked for more than 120 seconds.
74	[ 861.523977] Tainted: G O 4.18.0-rc6Lyude-Test+ #1
75	[ 861.524644] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
76	[ 861.525349] kworker/6:0 D 0 1329 2 0x80000000
77	[ 861.526073] Workqueue: events nvif_notify_work [nouveau]
78	[ 861.526751] Call Trace:
79	[ 861.527411] __schedule+0x322/0xaf0
80	[ 861.528089] schedule+0x33/0x90
81	[ 861.528758] rpm_resume+0x19c/0x850
82	[ 861.529399] ? finish_wait+0x90/0x90
83	[ 861.530073] __pm_runtime_resume+0x4e/0x90
84	[ 861.530798] nouveau_connector_detect+0x7e/0x510 [nouveau]
85	[ 861.531459] ? ww_mutex_lock+0x47/0x80
86	[ 861.532097] ? ww_mutex_lock+0x47/0x80
87	[ 861.532819] ? drm_modeset_lock+0x88/0x130 [drm]
88	[ 861.533481] drm_helper_probe_detect_ctx+0xa0/0x100 [drm_kms_helper]
89	[ 861.534127] drm_helper_hpd_irq_event+0xa4/0x120 [drm_kms_helper]
90	[ 861.534940] nouveau_connector_hotplug+0x98/0x120 [nouveau]
91	[ 861.535556] nvif_notify_work+0x2d/0xb0 [nouveau]
92	[ 861.536221] process_one_work+0x231/0x620
93	[ 861.536994] worker_thread+0x44/0x3a0
94	[ 861.537757] kthread+0x12b/0x150
95	[ 861.538463] ? wq_pool_ids_show+0x140/0x140
96	[ 861.539102] ? kthread_create_worker_on_cpu+0x70/0x70
97	[ 861.539815] ret_from_fork+0x3a/0x50
98	[ 861.540521]
99	Showing all locks held in the system:
100	[ 861.541696] 2 locks held by kworker/0:2/61:
101	[ 861.542406] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620
102	[ 861.543071] #1: 0000000076868126 ((work_completion)(&drm->hpd_work)){+.+.}, at: process_one_work+0x1b3/0x620
103	[ 861.543814] 1 lock held by khungtaskd/64:
104	[ 861.544535] #0: 0000000059db4b53 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185
105	[ 861.545160] 3 locks held by kworker/6:2/320:
106	[ 861.545896] #0: 00000000d9e1bc59 ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620
107	[ 861.546702] #1: 00000000c9f92d84 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620
108	[ 861.547443] #2: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: nouveau_display_fini+0x96/0x170 [nouveau]
109	[ 861.548146] 1 lock held by dmesg/983:
110	[ 861.548889] 2 locks held by zsh/1250:
111	[ 861.549605] #0: 00000000348e3cf6 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40
112	[ 861.550393] #1: 000000007009a7a8 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870
113	[ 861.551122] 6 locks held by kworker/6:0/1329:
114	[ 861.551957] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620
115	[ 861.552765] #1: 00000000ddb499ad ((work_completion)(&notify->work)#2){+.+.}, at: process_one_work+0x1b3/0x620
116	[ 861.553582] #2: 000000006e013cbe (&dev->mode_config.mutex){+.+.}, at: drm_helper_hpd_irq_event+0x6c/0x120 [drm_kms_helper]
117	[ 861.554357] #3: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: drm_helper_hpd_irq_event+0x78/0x120 [drm_kms_helper]
118	[ 861.555227] #4: 0000000044f294d9 (crtc_ww_class_acquire){+.+.}, at: drm_helper_probe_detect_ctx+0x3d/0x100 [drm_kms_helper]
119	[ 861.556133] #5: 00000000db193642 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_lock+0x4b/0x130 [drm]
120
121	[ 861.557864] =============================================
122
123	[ 861.559507] NMI backtrace for cpu 2
124	[ 861.560363] CPU: 2 PID: 64 Comm: khungtaskd Tainted: G O 4.18.0-rc6Lyude-Test+ #1
125	[ 861.561197] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018
126	[ 861.561948] Call Trace:
127	[ 861.562757] dump_stack+0x8e/0xd3
128	[ 861.563516] nmi_cpu_backtrace.cold.3+0x14/0x5a
129	[ 861.564269] ? lapic_can_unplug_cpu.cold.27+0x42/0x42
130	[ 861.565029] nmi_trigger_cpumask_backtrace+0xa1/0xae
131	[ 861.565789] arch_trigger_cpumask_backtrace+0x19/0x20
132	[ 861.566558] watchdog+0x316/0x580
133	[ 861.567355] kthread+0x12b/0x150
134	[ 861.568114] ? reset_hung_task_detector+0x20/0x20
135	[ 861.568863] ? kthread_create_worker_on_cpu+0x70/0x70
136	[ 861.569598] ret_from_fork+0x3a/0x50
137	[ 861.570370] Sending NMI from CPU 2 to CPUs 0-1,3-7:
138	[ 861.571426] NMI backtrace for cpu 6 skipped: idling at intel_idle+0x7f/0x120
139	[ 861.571429] NMI backtrace for cpu 7 skipped: idling at intel_idle+0x7f/0x120
140	[ 861.571432] NMI backtrace for cpu 3 skipped: idling at intel_idle+0x7f/0x120
141	[ 861.571464] NMI backtrace for cpu 5 skipped: idling at intel_idle+0x7f/0x120
142	[ 861.571467] NMI backtrace for cpu 0 skipped: idling at intel_idle+0x7f/0x120
143	[ 861.571469] NMI backtrace for cpu 4 skipped: idling at intel_idle+0x7f/0x120
144	[ 861.571472] NMI backtrace for cpu 1 skipped: idling at intel_idle+0x7f/0x120
145	[ 861.572428] Kernel panic - not syncing: hung_task: blocked tasks
146
147	So: fix this by making it so that normal hotplug handling /only/ happens
148	so long as the GPU is currently awake without any pending runtime PM
149	requests. In the event that a hotplug occurs while the device is
150	suspending or resuming, we can simply defer our response until the GPU
151	is fully runtime resumed again.
152
153	Changes since v4:
154	- Use a new trick I came up with using pm_runtime_get() instead of the
155	hackish junk we had before
156
157	Signed-off-by: Lyude Paul <lyude@redhat.com>
158	Reviewed-by: Karol Herbst <kherbst@redhat.com>
159	Acked-by: Daniel Vetter <daniel@ffwll.ch>
160	Cc: stable@vger.kernel.org
161	Cc: Lukas Wunner <lukas@wunner.de>
162	Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
163	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
164
165	---
166	drivers/gpu/drm/nouveau/nouveau_connector.c \| 22 ++++++++++++++++++++++
167	1 file changed, 22 insertions(+)
168
169	--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
170	+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
171	@@ -1120,6 +1120,26 @@ nouveau_connector_hotplug(struct nvif_no
172	const struct nvif_notify_conn_rep_v0 *rep = notify->data;
173	const char *name = connector->name;
174	struct nouveau_encoder *nv_encoder;
175	+ int ret;
176	+
177	+ ret = pm_runtime_get(drm->dev->dev);
178	+ if (ret == 0) {
179	+ /* We can't block here if there's a pending PM request
180	+ * running, as we'll deadlock nouveau_display_fini() when it
181	+ * calls nvif_put() on our nvif_notify struct. So, simply
182	+ * defer the hotplug event until the device finishes resuming
183	+ */
184	+ NV_DEBUG(drm, "Deferring HPD on %s until runtime resume\n",
185	+ name);
186	+ schedule_work(&drm->hpd_work);
187	+
188	+ pm_runtime_put_noidle(drm->dev->dev);
189	+ return NVIF_NOTIFY_KEEP;
190	+ } else if (ret != 1 && ret != -EACCES) {
191	+ NV_WARN(drm, "HPD on %s dropped due to RPM failure: %d\n",
192	+ name, ret);
193	+ return NVIF_NOTIFY_DROP;
194	+ }
195
196	if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) {
197	NV_DEBUG(drm, "service %s\n", name);
198	@@ -1137,6 +1157,8 @@ nouveau_connector_hotplug(struct nvif_no
199	drm_helper_hpd_irq_event(connector->dev);
200	}
201
202	+ pm_runtime_mark_last_busy(drm->dev->dev);
203	+ pm_runtime_put_autosuspend(drm->dev->dev);
204	return NVIF_NOTIFY_KEEP;
205	}
206