]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-6.6/ice-fix-memory-corruption-bug-with-suspend-and-rebui.patch
Linux 6.1.85
[thirdparty/kernel/stable-queue.git] / queue-6.6 / ice-fix-memory-corruption-bug-with-suspend-and-rebui.patch
1 From ddace4ce3fff0e042cdb7a7b701b3a51d0832dad Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Tue, 5 Mar 2024 15:02:03 -0800
4 Subject: ice: fix memory corruption bug with suspend and rebuild
5
6 From: Jesse Brandeburg <jesse.brandeburg@intel.com>
7
8 [ Upstream commit 1cb7fdb1dfde1aab66780b4ba44dba6402172111 ]
9
10 The ice driver would previously panic after suspend. This is caused
11 from the driver *only* calling the ice_vsi_free_q_vectors() function by
12 itself, when it is suspending. Since commit b3e7b3a6ee92 ("ice: prevent
13 NULL pointer deref during reload") the driver has zeroed out
14 num_q_vectors, and only restored it in ice_vsi_cfg_def().
15
16 This further causes the ice_rebuild() function to allocate a zero length
17 buffer, after which num_q_vectors is updated, and then the new value of
18 num_q_vectors is used to index into the zero length buffer, which
19 corrupts memory.
20
21 The fix entails making sure all the code referencing num_q_vectors only
22 does so after it has been reset via ice_vsi_cfg_def().
23
24 I didn't perform a full bisect, but I was able to test against 6.1.77
25 kernel and that ice driver works fine for suspend/resume with no panic,
26 so sometime since then, this problem was introduced.
27
28 Also clean up an un-needed init of a local variable in the function
29 being modified.
30
31 PANIC from 6.8.0-rc1:
32
33 [1026674.915596] PM: suspend exit
34 [1026675.664697] ice 0000:17:00.1: PTP reset successful
35 [1026675.664707] ice 0000:17:00.1: 2755 msecs passed between update to cached PHC time
36 [1026675.667660] ice 0000:b1:00.0: PTP reset successful
37 [1026675.675944] ice 0000:b1:00.0: 2832 msecs passed between update to cached PHC time
38 [1026677.137733] ixgbe 0000:31:00.0 ens787: NIC Link is Up 1 Gbps, Flow Control: None
39 [1026677.190201] BUG: kernel NULL pointer dereference, address: 0000000000000010
40 [1026677.192753] ice 0000:17:00.0: PTP reset successful
41 [1026677.192764] ice 0000:17:00.0: 4548 msecs passed between update to cached PHC time
42 [1026677.197928] #PF: supervisor read access in kernel mode
43 [1026677.197933] #PF: error_code(0x0000) - not-present page
44 [1026677.197937] PGD 1557a7067 P4D 0
45 [1026677.212133] ice 0000:b1:00.1: PTP reset successful
46 [1026677.212143] ice 0000:b1:00.1: 4344 msecs passed between update to cached PHC time
47 [1026677.212575]
48 [1026677.243142] Oops: 0000 [#1] PREEMPT SMP NOPTI
49 [1026677.247918] CPU: 23 PID: 42790 Comm: kworker/23:0 Kdump: loaded Tainted: G W 6.8.0-rc1+ #1
50 [1026677.257989] Hardware name: Intel Corporation M50CYP2SBSTD/M50CYP2SBSTD, BIOS SE5C620.86B.01.01.0005.2202160810 02/16/2022
51 [1026677.269367] Workqueue: ice ice_service_task [ice]
52 [1026677.274592] RIP: 0010:ice_vsi_rebuild_set_coalesce+0x130/0x1e0 [ice]
53 [1026677.281421] Code: 0f 84 3a ff ff ff 41 0f b7 74 ec 02 66 89 b0 22 02 00 00 81 e6 ff 1f 00 00 e8 ec fd ff ff e9 35 ff ff ff 48 8b 43 30 49 63 ed <41> 0f b7 34 24 41 83 c5 01 48 8b 3c e8 66 89 b7 aa 02 00 00 81 e6
54 [1026677.300877] RSP: 0018:ff3be62a6399bcc0 EFLAGS: 00010202
55 [1026677.306556] RAX: ff28691e28980828 RBX: ff28691e41099828 RCX: 0000000000188000
56 [1026677.314148] RDX: 0000000000000000 RSI: 0000000000000010 RDI: ff28691e41099828
57 [1026677.321730] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
58 [1026677.329311] R10: 0000000000000007 R11: ffffffffffffffc0 R12: 0000000000000010
59 [1026677.336896] R13: 0000000000000000 R14: 0000000000000000 R15: ff28691e0eaa81a0
60 [1026677.344472] FS: 0000000000000000(0000) GS:ff28693cbffc0000(0000) knlGS:0000000000000000
61 [1026677.353000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
62 [1026677.359195] CR2: 0000000000000010 CR3: 0000000128df4001 CR4: 0000000000771ef0
63 [1026677.366779] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
64 [1026677.374369] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
65 [1026677.381952] PKRU: 55555554
66 [1026677.385116] Call Trace:
67 [1026677.388023] <TASK>
68 [1026677.390589] ? __die+0x20/0x70
69 [1026677.394105] ? page_fault_oops+0x82/0x160
70 [1026677.398576] ? do_user_addr_fault+0x65/0x6a0
71 [1026677.403307] ? exc_page_fault+0x6a/0x150
72 [1026677.407694] ? asm_exc_page_fault+0x22/0x30
73 [1026677.412349] ? ice_vsi_rebuild_set_coalesce+0x130/0x1e0 [ice]
74 [1026677.418614] ice_vsi_rebuild+0x34b/0x3c0 [ice]
75 [1026677.423583] ice_vsi_rebuild_by_type+0x76/0x180 [ice]
76 [1026677.429147] ice_rebuild+0x18b/0x520 [ice]
77 [1026677.433746] ? delay_tsc+0x8f/0xc0
78 [1026677.437630] ice_do_reset+0xa3/0x190 [ice]
79 [1026677.442231] ice_service_task+0x26/0x440 [ice]
80 [1026677.447180] process_one_work+0x174/0x340
81 [1026677.451669] worker_thread+0x27e/0x390
82 [1026677.455890] ? __pfx_worker_thread+0x10/0x10
83 [1026677.460627] kthread+0xee/0x120
84 [1026677.464235] ? __pfx_kthread+0x10/0x10
85 [1026677.468445] ret_from_fork+0x2d/0x50
86 [1026677.472476] ? __pfx_kthread+0x10/0x10
87 [1026677.476671] ret_from_fork_asm+0x1b/0x30
88 [1026677.481050] </TASK>
89
90 Fixes: b3e7b3a6ee92 ("ice: prevent NULL pointer deref during reload")
91 Reported-by: Robert Elliott <elliott@hpe.com>
92 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
93 Reviewed-by: Simon Horman <horms@kernel.org>
94 Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
95 Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
96 Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
97 Signed-off-by: Sasha Levin <sashal@kernel.org>
98 ---
99 drivers/net/ethernet/intel/ice/ice_lib.c | 18 +++++++++---------
100 1 file changed, 9 insertions(+), 9 deletions(-)
101
102 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
103 index 47298ab675a55..0b7132a42e359 100644
104 --- a/drivers/net/ethernet/intel/ice/ice_lib.c
105 +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
106 @@ -3157,7 +3157,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
107 {
108 struct ice_vsi_cfg_params params = {};
109 struct ice_coalesce_stored *coalesce;
110 - int prev_num_q_vectors = 0;
111 + int prev_num_q_vectors;
112 struct ice_pf *pf;
113 int ret;
114
115 @@ -3171,13 +3171,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
116 if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
117 return -EINVAL;
118
119 - coalesce = kcalloc(vsi->num_q_vectors,
120 - sizeof(struct ice_coalesce_stored), GFP_KERNEL);
121 - if (!coalesce)
122 - return -ENOMEM;
123 -
124 - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
125 -
126 ret = ice_vsi_realloc_stat_arrays(vsi);
127 if (ret)
128 goto err_vsi_cfg;
129 @@ -3187,6 +3180,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
130 if (ret)
131 goto err_vsi_cfg;
132
133 + coalesce = kcalloc(vsi->num_q_vectors,
134 + sizeof(struct ice_coalesce_stored), GFP_KERNEL);
135 + if (!coalesce)
136 + return -ENOMEM;
137 +
138 + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
139 +
140 ret = ice_vsi_cfg_tc_lan(pf, vsi);
141 if (ret) {
142 if (vsi_flags & ICE_VSI_FLAG_INIT) {
143 @@ -3205,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
144
145 err_vsi_cfg_tc_lan:
146 ice_vsi_decfg(vsi);
147 -err_vsi_cfg:
148 kfree(coalesce);
149 +err_vsi_cfg:
150 return ret;
151 }
152
153 --
154 2.43.0
155