2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_psp.h"
32 #include "amdgpu_smu.h"
36 #include "gc/gc_10_1_0_offset.h"
37 #include "gc/gc_10_1_0_sh_mask.h"
38 #include "smuio/smuio_11_0_0_offset.h"
39 #include "smuio/smuio_11_0_0_sh_mask.h"
40 #include "navi10_enum.h"
41 #include "hdp/hdp_5_0_0_offset.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
46 #include "soc15_common.h"
47 #include "clearstate_gfx10.h"
48 #include "v10_structs.h"
49 #include "gfx_v10_0.h"
50 #include "nbio_v2_3.h"
53 * Navi10 has two graphic rings to share each graphic pipe.
57 #define GFX10_NUM_GFX_RINGS_NV1X 1
58 #define GFX10_MEC_HPD_SIZE 2048
60 #define F32_CE_PROGRAM_RAM_SIZE 65536
61 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define mmCGTT_GS_NGG_CLK_CTRL 0x5087
64 #define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
66 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
69 MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
73 MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
74 MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
75 MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
76 MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
77 MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
78 MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
79 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/navi14_me.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
82 MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
83 MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
85 MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
86 MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
87 MODULE_FIRMWARE("amdgpu/navi12_me.bin");
88 MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
89 MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
90 MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
92 static const struct soc15_reg_golden golden_settings_gc_10_1
[] =
94 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0xffffffff, 0x00400014),
95 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_CPF_CLK_CTRL
, 0xfcff8fff, 0xf8000100),
96 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xcd000000, 0x0d000100),
97 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0x60000ff0, 0x60000100),
98 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0x40000000, 0x40000100),
99 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
100 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xfeff8fff, 0xfeff8100),
101 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
102 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000002, 0x00000000),
103 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x000007ff, 0x000005ff),
104 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0x20000000, 0x20000000),
105 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
106 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x00000200, 0x00000200),
107 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0x07900000, 0x04900000),
108 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
109 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
110 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
111 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x000007ff, 0x000001fe),
112 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
113 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_0
, 0x77777777, 0x10321032),
114 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_1
, 0x77777777, 0x02310231),
115 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
116 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
117 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0x10000000, 0x10000100),
118 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
119 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffff9fff, 0x00001188),
120 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
121 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
122 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00400000, 0x04440000),
123 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000800, 0x00000820),
124 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
125 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
126 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x001f0000, 0x00070104),
127 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
128 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000100, 0x00000130),
129 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
130 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
131 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0x60000010, 0x479c0010),
132 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CGTT_CLK_CTRL
, 0xfeff0fff, 0x40000100),
133 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0x00c00000, 0x00c00000)
136 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10
[] =
138 /* Pending on emulation bring up */
141 static const struct soc15_reg_golden golden_settings_gc_10_1_1
[] =
143 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0xffffffff, 0x003c0014),
144 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_GS_NGG_CLK_CTRL
, 0xffff8fff, 0xffff8100),
145 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_IA_CLK_CTRL
, 0xffff0fff, 0xffff0100),
146 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xcd000000, 0x0d000100),
147 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0xf8ff0fff, 0x60000100),
148 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0x40000ff0, 0x40000100),
149 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
150 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xffff8fff, 0xffff8100),
151 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
152 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000002, 0x00000000),
153 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x800007ff, 0x000005ff),
154 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0xffffffff, 0x20000000),
155 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
156 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x00000200, 0x00000200),
157 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0xffffffff, 0x04900000),
158 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
159 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
160 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
161 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x000007ff, 0x000001fe),
162 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
163 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffe7),
164 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffe7),
165 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0xffff0fff, 0x10000100),
166 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
167 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffffbfff, 0x00000188),
168 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
169 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
170 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00400000, 0x04440000),
171 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000800, 0x00000820),
172 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
173 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
174 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x001f0000, 0x00070105),
175 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
176 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000133, 0x00000130),
177 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
178 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
179 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0x60000010, 0x479c0010),
180 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0x00c00000, 0x00c00000),
183 static const struct soc15_reg_golden golden_settings_gc_10_1_2
[] =
185 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0x003e001f, 0x003c0014),
186 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_GS_NGG_CLK_CTRL
, 0xffff8fff, 0xffff8100),
187 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_IA_CLK_CTRL
, 0xffff0fff, 0xffff0100),
188 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xff7f0fff, 0x0d000100),
189 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0xffffcfff, 0x60000100),
190 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0xffff0fff, 0x40000100),
191 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
192 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xffff8fff, 0xffff8100),
193 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
194 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000003, 0x00000000),
195 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x800007ff, 0x000005ff),
196 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0xffffffff, 0x20000000),
197 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
198 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0xffffffff, 0x00000200),
199 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0xffffffff, 0x04800000),
200 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
201 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
202 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
203 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x00007fff, 0x000001fe),
204 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
205 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_0
, 0x77777777, 0x10321032),
206 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_1
, 0x77777777, 0x02310231),
207 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
208 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
209 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0xffff0fff, 0x10000100),
210 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
211 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffffbfff, 0x00000188),
212 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_0
, 0xffffffff, 0x842a4c02),
213 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
214 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
215 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04440000),
216 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000820, 0x00000820),
217 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
218 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
219 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
220 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000133, 0x00000130),
221 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
222 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
223 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0xffdf80ff, 0x479c0010),
224 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0xffffffff, 0x00800000)
227 static void gfx_v10_rlcg_wreg(struct amdgpu_device
*adev
, u32 offset
, u32 v
)
229 static void *scratch_reg0
;
230 static void *scratch_reg1
;
231 static void *scratch_reg2
;
232 static void *scratch_reg3
;
233 static void *spare_int
;
234 static uint32_t grbm_cntl
;
235 static uint32_t grbm_idx
;
237 uint32_t retries
= 50000;
239 scratch_reg0
= adev
->rmmio
+ (adev
->reg_offset
[GC_HWIP
][0][mmSCRATCH_REG0_BASE_IDX
] + mmSCRATCH_REG0
)*4;
240 scratch_reg1
= adev
->rmmio
+ (adev
->reg_offset
[GC_HWIP
][0][mmSCRATCH_REG1_BASE_IDX
] + mmSCRATCH_REG1
)*4;
241 scratch_reg2
= adev
->rmmio
+ (adev
->reg_offset
[GC_HWIP
][0][mmSCRATCH_REG1_BASE_IDX
] + mmSCRATCH_REG2
)*4;
242 scratch_reg3
= adev
->rmmio
+ (adev
->reg_offset
[GC_HWIP
][0][mmSCRATCH_REG1_BASE_IDX
] + mmSCRATCH_REG3
)*4;
243 spare_int
= adev
->rmmio
+ (adev
->reg_offset
[GC_HWIP
][0][mmRLC_SPARE_INT_BASE_IDX
] + mmRLC_SPARE_INT
)*4;
245 grbm_cntl
= adev
->reg_offset
[GC_HWIP
][0][mmGRBM_GFX_CNTL_BASE_IDX
] + mmGRBM_GFX_CNTL
;
246 grbm_idx
= adev
->reg_offset
[GC_HWIP
][0][mmGRBM_GFX_INDEX_BASE_IDX
] + mmGRBM_GFX_INDEX
;
248 if (amdgpu_sriov_runtime(adev
)) {
249 pr_err("shouldn't call rlcg write register during runtime\n");
253 writel(v
, scratch_reg0
);
254 writel(offset
| 0x80000000, scratch_reg1
);
255 writel(1, spare_int
);
256 for (i
= 0; i
< retries
; i
++) {
259 tmp
= readl(scratch_reg1
);
260 if (!(tmp
& 0x80000000))
267 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset
);
270 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14
[] =
272 /* Pending on emulation bring up */
275 static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12
[] =
277 /* Pending on emulation bring up */
280 #define DEFAULT_SH_MEM_CONFIG \
281 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
282 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
283 (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
284 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
287 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device
*adev
);
288 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device
*adev
);
289 static void gfx_v10_0_set_gds_init(struct amdgpu_device
*adev
);
290 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device
*adev
);
291 static int gfx_v10_0_get_cu_info(struct amdgpu_device
*adev
,
292 struct amdgpu_cu_info
*cu_info
);
293 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
294 static void gfx_v10_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
,
295 u32 sh_num
, u32 instance
);
296 static u32
gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device
*adev
);
298 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device
*adev
);
299 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device
*adev
);
300 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device
*adev
);
301 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device
*adev
);
302 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
, bool resume
);
303 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring
*ring
, bool resume
);
304 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
);
306 static void gfx10_kiq_set_resources(struct amdgpu_ring
*kiq_ring
, uint64_t queue_mask
)
308 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
309 amdgpu_ring_write(kiq_ring
, PACKET3_SET_RESOURCES_VMID_MASK(0) |
310 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
311 amdgpu_ring_write(kiq_ring
, lower_32_bits(queue_mask
)); /* queue mask lo */
312 amdgpu_ring_write(kiq_ring
, upper_32_bits(queue_mask
)); /* queue mask hi */
313 amdgpu_ring_write(kiq_ring
, 0); /* gws mask lo */
314 amdgpu_ring_write(kiq_ring
, 0); /* gws mask hi */
315 amdgpu_ring_write(kiq_ring
, 0); /* oac mask */
316 amdgpu_ring_write(kiq_ring
, 0); /* gds heap base:0, gds heap size:0 */
319 static void gfx10_kiq_map_queues(struct amdgpu_ring
*kiq_ring
,
320 struct amdgpu_ring
*ring
)
322 struct amdgpu_device
*adev
= kiq_ring
->adev
;
323 uint64_t mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
324 uint64_t wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
325 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
327 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
328 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
329 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
330 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
331 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
332 PACKET3_MAP_QUEUES_QUEUE(ring
->queue
) |
333 PACKET3_MAP_QUEUES_PIPE(ring
->pipe
) |
334 PACKET3_MAP_QUEUES_ME((ring
->me
== 1 ? 0 : 1)) |
335 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
336 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
337 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel
) |
338 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
339 amdgpu_ring_write(kiq_ring
, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring
->doorbell_index
));
340 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
341 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
342 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
343 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
346 static void gfx10_kiq_unmap_queues(struct amdgpu_ring
*kiq_ring
,
347 struct amdgpu_ring
*ring
,
348 enum amdgpu_unmap_queues_action action
,
349 u64 gpu_addr
, u64 seq
)
351 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
353 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_UNMAP_QUEUES
, 4));
354 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
355 PACKET3_UNMAP_QUEUES_ACTION(action
) |
356 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
357 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel
) |
358 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
359 amdgpu_ring_write(kiq_ring
,
360 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring
->doorbell_index
));
362 if (action
== PREEMPT_QUEUES_NO_UNMAP
) {
363 amdgpu_ring_write(kiq_ring
, lower_32_bits(gpu_addr
));
364 amdgpu_ring_write(kiq_ring
, upper_32_bits(gpu_addr
));
365 amdgpu_ring_write(kiq_ring
, seq
);
367 amdgpu_ring_write(kiq_ring
, 0);
368 amdgpu_ring_write(kiq_ring
, 0);
369 amdgpu_ring_write(kiq_ring
, 0);
373 static void gfx10_kiq_query_status(struct amdgpu_ring
*kiq_ring
,
374 struct amdgpu_ring
*ring
,
378 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
380 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_QUERY_STATUS
, 5));
381 amdgpu_ring_write(kiq_ring
,
382 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
383 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
384 PACKET3_QUERY_STATUS_COMMAND(2));
385 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
386 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring
->doorbell_index
) |
387 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel
));
388 amdgpu_ring_write(kiq_ring
, lower_32_bits(addr
));
389 amdgpu_ring_write(kiq_ring
, upper_32_bits(addr
));
390 amdgpu_ring_write(kiq_ring
, lower_32_bits(seq
));
391 amdgpu_ring_write(kiq_ring
, upper_32_bits(seq
));
394 static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring
*kiq_ring
,
395 uint16_t pasid
, uint32_t flush_type
,
398 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_INVALIDATE_TLBS
, 0));
399 amdgpu_ring_write(kiq_ring
,
400 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
401 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub
) |
402 PACKET3_INVALIDATE_TLBS_PASID(pasid
) |
403 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type
));
406 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs
= {
407 .kiq_set_resources
= gfx10_kiq_set_resources
,
408 .kiq_map_queues
= gfx10_kiq_map_queues
,
409 .kiq_unmap_queues
= gfx10_kiq_unmap_queues
,
410 .kiq_query_status
= gfx10_kiq_query_status
,
411 .kiq_invalidate_tlbs
= gfx10_kiq_invalidate_tlbs
,
412 .set_resources_size
= 8,
413 .map_queues_size
= 7,
414 .unmap_queues_size
= 6,
415 .query_status_size
= 7,
416 .invalidate_tlbs_size
= 2,
419 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device
*adev
)
421 adev
->gfx
.kiq
.pmf
= &gfx_v10_0_kiq_pm4_funcs
;
424 static void gfx_v10_0_init_golden_registers(struct amdgpu_device
*adev
)
426 switch (adev
->asic_type
) {
428 soc15_program_register_sequence(adev
,
429 golden_settings_gc_10_1
,
430 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1
));
431 soc15_program_register_sequence(adev
,
432 golden_settings_gc_10_0_nv10
,
433 (const u32
)ARRAY_SIZE(golden_settings_gc_10_0_nv10
));
436 soc15_program_register_sequence(adev
,
437 golden_settings_gc_10_1_1
,
438 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_1
));
439 soc15_program_register_sequence(adev
,
440 golden_settings_gc_10_1_nv14
,
441 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_nv14
));
444 soc15_program_register_sequence(adev
,
445 golden_settings_gc_10_1_2
,
446 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_2
));
447 soc15_program_register_sequence(adev
,
448 golden_settings_gc_10_1_2_nv12
,
449 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12
));
456 static void gfx_v10_0_scratch_init(struct amdgpu_device
*adev
)
458 adev
->gfx
.scratch
.num_reg
= 8;
459 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
460 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
463 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
464 bool wc
, uint32_t reg
, uint32_t val
)
466 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
467 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
468 WRITE_DATA_DST_SEL(0) | (wc
? WR_CONFIRM
: 0));
469 amdgpu_ring_write(ring
, reg
);
470 amdgpu_ring_write(ring
, 0);
471 amdgpu_ring_write(ring
, val
);
474 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
475 int mem_space
, int opt
, uint32_t addr0
,
476 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
479 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
480 amdgpu_ring_write(ring
,
481 /* memory (1) or register (0) */
482 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
483 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
484 WAIT_REG_MEM_FUNCTION(3) | /* equal */
485 WAIT_REG_MEM_ENGINE(eng_sel
)));
488 BUG_ON(addr0
& 0x3); /* Dword align */
489 amdgpu_ring_write(ring
, addr0
);
490 amdgpu_ring_write(ring
, addr1
);
491 amdgpu_ring_write(ring
, ref
);
492 amdgpu_ring_write(ring
, mask
);
493 amdgpu_ring_write(ring
, inv
); /* poll interval */
496 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring
*ring
)
498 struct amdgpu_device
*adev
= ring
->adev
;
504 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
506 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
510 WREG32(scratch
, 0xCAFEDEAD);
512 r
= amdgpu_ring_alloc(ring
, 3);
514 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
516 amdgpu_gfx_scratch_free(adev
, scratch
);
520 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
521 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
522 amdgpu_ring_write(ring
, 0xDEADBEEF);
523 amdgpu_ring_commit(ring
);
525 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
526 tmp
= RREG32(scratch
);
527 if (tmp
== 0xDEADBEEF)
529 if (amdgpu_emu_mode
== 1)
535 if (i
>= adev
->usec_timeout
)
538 amdgpu_gfx_scratch_free(adev
, scratch
);
543 static int gfx_v10_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
545 struct amdgpu_device
*adev
= ring
->adev
;
547 struct dma_fence
*f
= NULL
;
553 r
= amdgpu_device_wb_get(adev
, &index
);
557 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
558 adev
->wb
.wb
[index
] = cpu_to_le32(0xCAFEDEAD);
559 memset(&ib
, 0, sizeof(ib
));
560 r
= amdgpu_ib_get(adev
, NULL
, 16, &ib
);
564 ib
.ptr
[0] = PACKET3(PACKET3_WRITE_DATA
, 3);
565 ib
.ptr
[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM
;
566 ib
.ptr
[2] = lower_32_bits(gpu_addr
);
567 ib
.ptr
[3] = upper_32_bits(gpu_addr
);
568 ib
.ptr
[4] = 0xDEADBEEF;
571 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
575 r
= dma_fence_wait_timeout(f
, false, timeout
);
583 tmp
= adev
->wb
.wb
[index
];
584 if (tmp
== 0xDEADBEEF)
589 amdgpu_ib_free(adev
, &ib
, NULL
);
592 amdgpu_device_wb_free(adev
, index
);
596 static void gfx_v10_0_free_microcode(struct amdgpu_device
*adev
)
598 release_firmware(adev
->gfx
.pfp_fw
);
599 adev
->gfx
.pfp_fw
= NULL
;
600 release_firmware(adev
->gfx
.me_fw
);
601 adev
->gfx
.me_fw
= NULL
;
602 release_firmware(adev
->gfx
.ce_fw
);
603 adev
->gfx
.ce_fw
= NULL
;
604 release_firmware(adev
->gfx
.rlc_fw
);
605 adev
->gfx
.rlc_fw
= NULL
;
606 release_firmware(adev
->gfx
.mec_fw
);
607 adev
->gfx
.mec_fw
= NULL
;
608 release_firmware(adev
->gfx
.mec2_fw
);
609 adev
->gfx
.mec2_fw
= NULL
;
611 kfree(adev
->gfx
.rlc
.register_list_format
);
614 static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device
*adev
)
616 adev
->gfx
.cp_fw_write_wait
= false;
618 switch (adev
->asic_type
) {
622 if ((adev
->gfx
.me_fw_version
>= 0x00000046) &&
623 (adev
->gfx
.me_feature_version
>= 27) &&
624 (adev
->gfx
.pfp_fw_version
>= 0x00000068) &&
625 (adev
->gfx
.pfp_feature_version
>= 27) &&
626 (adev
->gfx
.mec_fw_version
>= 0x0000005b) &&
627 (adev
->gfx
.mec_feature_version
>= 27))
628 adev
->gfx
.cp_fw_write_wait
= true;
634 if (adev
->gfx
.cp_fw_write_wait
== false)
635 DRM_WARN_ONCE("CP firmware version too old, please update!");
639 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device
*adev
)
641 const struct rlc_firmware_header_v2_1
*rlc_hdr
;
643 rlc_hdr
= (const struct rlc_firmware_header_v2_1
*)adev
->gfx
.rlc_fw
->data
;
644 adev
->gfx
.rlc_srlc_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_ucode_ver
);
645 adev
->gfx
.rlc_srlc_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_feature_ver
);
646 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_size_bytes
);
647 adev
->gfx
.rlc
.save_restore_list_cntl
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_cntl_offset_bytes
);
648 adev
->gfx
.rlc_srlg_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_ucode_ver
);
649 adev
->gfx
.rlc_srlg_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_feature_ver
);
650 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_size_bytes
);
651 adev
->gfx
.rlc
.save_restore_list_gpm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_gpm_offset_bytes
);
652 adev
->gfx
.rlc_srls_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_ucode_ver
);
653 adev
->gfx
.rlc_srls_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_feature_ver
);
654 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_size_bytes
);
655 adev
->gfx
.rlc
.save_restore_list_srm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_srm_offset_bytes
);
656 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
=
657 le32_to_cpu(rlc_hdr
->reg_list_format_direct_reg_list_length
);
660 static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device
*adev
)
664 switch (adev
->pdev
->revision
) {
677 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device
*adev
)
679 switch (adev
->asic_type
) {
681 if (!gfx_v10_0_navi10_gfxoff_should_enable(adev
))
682 adev
->pm
.pp_feature
&= ~PP_GFXOFF_MASK
;
689 static int gfx_v10_0_init_microcode(struct amdgpu_device
*adev
)
691 const char *chip_name
;
695 struct amdgpu_firmware_info
*info
= NULL
;
696 const struct common_firmware_header
*header
= NULL
;
697 const struct gfx_firmware_header_v1_0
*cp_hdr
;
698 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
699 unsigned int *tmp
= NULL
;
701 uint16_t version_major
;
702 uint16_t version_minor
;
706 memset(wks
, 0, sizeof(wks
));
707 switch (adev
->asic_type
) {
709 chip_name
= "navi10";
712 chip_name
= "navi14";
713 if (!(adev
->pdev
->device
== 0x7340 &&
714 adev
->pdev
->revision
!= 0x00))
715 snprintf(wks
, sizeof(wks
), "_wks");
718 chip_name
= "navi12";
724 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp%s.bin", chip_name
, wks
);
725 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
728 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
731 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
732 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
733 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
735 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me%s.bin", chip_name
, wks
);
736 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
739 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
742 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
743 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
744 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
746 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce%s.bin", chip_name
, wks
);
747 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
750 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
753 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
754 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
755 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
757 if (!amdgpu_sriov_vf(adev
)) {
758 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
759 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
762 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
763 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
764 version_major
= le16_to_cpu(rlc_hdr
->header
.header_version_major
);
765 version_minor
= le16_to_cpu(rlc_hdr
->header
.header_version_minor
);
766 if (version_major
== 2 && version_minor
== 1)
767 adev
->gfx
.rlc
.is_rlc_v2_1
= true;
769 adev
->gfx
.rlc_fw_version
= le32_to_cpu(rlc_hdr
->header
.ucode_version
);
770 adev
->gfx
.rlc_feature_version
= le32_to_cpu(rlc_hdr
->ucode_feature_version
);
771 adev
->gfx
.rlc
.save_and_restore_offset
=
772 le32_to_cpu(rlc_hdr
->save_and_restore_offset
);
773 adev
->gfx
.rlc
.clear_state_descriptor_offset
=
774 le32_to_cpu(rlc_hdr
->clear_state_descriptor_offset
);
775 adev
->gfx
.rlc
.avail_scratch_ram_locations
=
776 le32_to_cpu(rlc_hdr
->avail_scratch_ram_locations
);
777 adev
->gfx
.rlc
.reg_restore_list_size
=
778 le32_to_cpu(rlc_hdr
->reg_restore_list_size
);
779 adev
->gfx
.rlc
.reg_list_format_start
=
780 le32_to_cpu(rlc_hdr
->reg_list_format_start
);
781 adev
->gfx
.rlc
.reg_list_format_separate_start
=
782 le32_to_cpu(rlc_hdr
->reg_list_format_separate_start
);
783 adev
->gfx
.rlc
.starting_offsets_start
=
784 le32_to_cpu(rlc_hdr
->starting_offsets_start
);
785 adev
->gfx
.rlc
.reg_list_format_size_bytes
=
786 le32_to_cpu(rlc_hdr
->reg_list_format_size_bytes
);
787 adev
->gfx
.rlc
.reg_list_size_bytes
=
788 le32_to_cpu(rlc_hdr
->reg_list_size_bytes
);
789 adev
->gfx
.rlc
.register_list_format
=
790 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
+
791 adev
->gfx
.rlc
.reg_list_size_bytes
, GFP_KERNEL
);
792 if (!adev
->gfx
.rlc
.register_list_format
) {
797 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
798 le32_to_cpu(rlc_hdr
->reg_list_format_array_offset_bytes
));
799 for (i
= 0 ; i
< (rlc_hdr
->reg_list_format_size_bytes
>> 2); i
++)
800 adev
->gfx
.rlc
.register_list_format
[i
] = le32_to_cpu(tmp
[i
]);
802 adev
->gfx
.rlc
.register_restore
= adev
->gfx
.rlc
.register_list_format
+ i
;
804 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
805 le32_to_cpu(rlc_hdr
->reg_list_array_offset_bytes
));
806 for (i
= 0 ; i
< (rlc_hdr
->reg_list_size_bytes
>> 2); i
++)
807 adev
->gfx
.rlc
.register_restore
[i
] = le32_to_cpu(tmp
[i
]);
809 if (adev
->gfx
.rlc
.is_rlc_v2_1
)
810 gfx_v10_0_init_rlc_ext_microcode(adev
);
813 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec%s.bin", chip_name
, wks
);
814 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
817 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
820 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
821 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
822 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
824 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2%s.bin", chip_name
, wks
);
825 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
827 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
830 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
831 adev
->gfx
.mec2_fw
->data
;
832 adev
->gfx
.mec2_fw_version
=
833 le32_to_cpu(cp_hdr
->header
.ucode_version
);
834 adev
->gfx
.mec2_feature_version
=
835 le32_to_cpu(cp_hdr
->ucode_feature_version
);
838 adev
->gfx
.mec2_fw
= NULL
;
841 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
842 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
843 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
844 info
->fw
= adev
->gfx
.pfp_fw
;
845 header
= (const struct common_firmware_header
*)info
->fw
->data
;
846 adev
->firmware
.fw_size
+=
847 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
849 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
850 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
851 info
->fw
= adev
->gfx
.me_fw
;
852 header
= (const struct common_firmware_header
*)info
->fw
->data
;
853 adev
->firmware
.fw_size
+=
854 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
856 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
857 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
858 info
->fw
= adev
->gfx
.ce_fw
;
859 header
= (const struct common_firmware_header
*)info
->fw
->data
;
860 adev
->firmware
.fw_size
+=
861 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
863 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
864 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
865 info
->fw
= adev
->gfx
.rlc_fw
;
867 header
= (const struct common_firmware_header
*)info
->fw
->data
;
868 adev
->firmware
.fw_size
+=
869 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
871 if (adev
->gfx
.rlc
.is_rlc_v2_1
&&
872 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
&&
873 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
&&
874 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
) {
875 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
];
876 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
;
877 info
->fw
= adev
->gfx
.rlc_fw
;
878 adev
->firmware
.fw_size
+=
879 ALIGN(adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
, PAGE_SIZE
);
881 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
];
882 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
;
883 info
->fw
= adev
->gfx
.rlc_fw
;
884 adev
->firmware
.fw_size
+=
885 ALIGN(adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
, PAGE_SIZE
);
887 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
];
888 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
;
889 info
->fw
= adev
->gfx
.rlc_fw
;
890 adev
->firmware
.fw_size
+=
891 ALIGN(adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
, PAGE_SIZE
);
894 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
895 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
896 info
->fw
= adev
->gfx
.mec_fw
;
897 header
= (const struct common_firmware_header
*)info
->fw
->data
;
898 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
899 adev
->firmware
.fw_size
+=
900 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) -
901 le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
903 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
904 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
905 info
->fw
= adev
->gfx
.mec_fw
;
906 adev
->firmware
.fw_size
+=
907 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
909 if (adev
->gfx
.mec2_fw
) {
910 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
911 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
912 info
->fw
= adev
->gfx
.mec2_fw
;
913 header
= (const struct common_firmware_header
*)info
->fw
->data
;
914 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
915 adev
->firmware
.fw_size
+=
916 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) -
917 le32_to_cpu(cp_hdr
->jt_size
) * 4,
919 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
920 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
921 info
->fw
= adev
->gfx
.mec2_fw
;
922 adev
->firmware
.fw_size
+=
923 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4,
928 gfx_v10_0_check_fw_write_wait(adev
);
932 "gfx10: Failed to load firmware \"%s\"\n",
934 release_firmware(adev
->gfx
.pfp_fw
);
935 adev
->gfx
.pfp_fw
= NULL
;
936 release_firmware(adev
->gfx
.me_fw
);
937 adev
->gfx
.me_fw
= NULL
;
938 release_firmware(adev
->gfx
.ce_fw
);
939 adev
->gfx
.ce_fw
= NULL
;
940 release_firmware(adev
->gfx
.rlc_fw
);
941 adev
->gfx
.rlc_fw
= NULL
;
942 release_firmware(adev
->gfx
.mec_fw
);
943 adev
->gfx
.mec_fw
= NULL
;
944 release_firmware(adev
->gfx
.mec2_fw
);
945 adev
->gfx
.mec2_fw
= NULL
;
948 gfx_v10_0_check_gfxoff_flag(adev
);
953 static u32
gfx_v10_0_get_csb_size(struct amdgpu_device
*adev
)
956 const struct cs_section_def
*sect
= NULL
;
957 const struct cs_extent_def
*ext
= NULL
;
959 /* begin clear state */
961 /* context control state */
964 for (sect
= gfx10_cs_data
; sect
->section
!= NULL
; ++sect
) {
965 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
966 if (sect
->id
== SECT_CONTEXT
)
967 count
+= 2 + ext
->reg_count
;
973 /* set PA_SC_TILE_STEERING_OVERRIDE */
975 /* end clear state */
983 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device
*adev
,
984 volatile u32
*buffer
)
987 const struct cs_section_def
*sect
= NULL
;
988 const struct cs_extent_def
*ext
= NULL
;
991 if (adev
->gfx
.rlc
.cs_data
== NULL
)
996 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
997 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
999 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
1000 buffer
[count
++] = cpu_to_le32(0x80000000);
1001 buffer
[count
++] = cpu_to_le32(0x80000000);
1003 for (sect
= adev
->gfx
.rlc
.cs_data
; sect
->section
!= NULL
; ++sect
) {
1004 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1005 if (sect
->id
== SECT_CONTEXT
) {
1007 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, ext
->reg_count
));
1008 buffer
[count
++] = cpu_to_le32(ext
->reg_index
-
1009 PACKET3_SET_CONTEXT_REG_START
);
1010 for (i
= 0; i
< ext
->reg_count
; i
++)
1011 buffer
[count
++] = cpu_to_le32(ext
->extent
[i
]);
1019 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_TILE_STEERING_OVERRIDE
) - PACKET3_SET_CONTEXT_REG_START
;
1020 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, 1));
1021 buffer
[count
++] = cpu_to_le32(ctx_reg_offset
);
1022 buffer
[count
++] = cpu_to_le32(adev
->gfx
.config
.pa_sc_tile_steering_override
);
1024 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1025 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE
);
1027 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE
, 0));
1028 buffer
[count
++] = cpu_to_le32(0);
1031 static void gfx_v10_0_rlc_fini(struct amdgpu_device
*adev
)
1033 /* clear state block */
1034 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
1035 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
1036 (void **)&adev
->gfx
.rlc
.cs_ptr
);
1038 /* jump table block */
1039 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
1040 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
1041 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
1044 static int gfx_v10_0_rlc_init(struct amdgpu_device
*adev
)
1046 const struct cs_section_def
*cs_data
;
1049 adev
->gfx
.rlc
.cs_data
= gfx10_cs_data
;
1051 cs_data
= adev
->gfx
.rlc
.cs_data
;
1054 /* init clear state block */
1055 r
= amdgpu_gfx_rlc_init_csb(adev
);
1060 /* init spm vmid with 0xf */
1061 if (adev
->gfx
.rlc
.funcs
->update_spm_vmid
)
1062 adev
->gfx
.rlc
.funcs
->update_spm_vmid(adev
, 0xf);
1067 static void gfx_v10_0_mec_fini(struct amdgpu_device
*adev
)
1069 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.hpd_eop_obj
, NULL
, NULL
);
1070 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.mec_fw_obj
, NULL
, NULL
);
1073 static int gfx_v10_0_me_init(struct amdgpu_device
*adev
)
1077 bitmap_zero(adev
->gfx
.me
.queue_bitmap
, AMDGPU_MAX_GFX_QUEUES
);
1079 amdgpu_gfx_graphics_queue_acquire(adev
);
1081 r
= gfx_v10_0_init_microcode(adev
);
1083 DRM_ERROR("Failed to load gfx firmware!\n");
1088 static int gfx_v10_0_mec_init(struct amdgpu_device
*adev
)
1092 const __le32
*fw_data
= NULL
;
1095 size_t mec_hpd_size
;
1097 const struct gfx_firmware_header_v1_0
*mec_hdr
= NULL
;
1099 bitmap_zero(adev
->gfx
.mec
.queue_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
);
1101 /* take ownership of the relevant compute queues */
1102 amdgpu_gfx_compute_queue_acquire(adev
);
1103 mec_hpd_size
= adev
->gfx
.num_compute_rings
* GFX10_MEC_HPD_SIZE
;
1105 r
= amdgpu_bo_create_reserved(adev
, mec_hpd_size
, PAGE_SIZE
,
1106 AMDGPU_GEM_DOMAIN_GTT
,
1107 &adev
->gfx
.mec
.hpd_eop_obj
,
1108 &adev
->gfx
.mec
.hpd_eop_gpu_addr
,
1111 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
1112 gfx_v10_0_mec_fini(adev
);
1116 memset(hpd
, 0, mec_hpd_size
);
1118 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
1119 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
1121 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
1122 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1124 fw_data
= (const __le32
*) (adev
->gfx
.mec_fw
->data
+
1125 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1126 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
);
1128 r
= amdgpu_bo_create_reserved(adev
, mec_hdr
->header
.ucode_size_bytes
,
1129 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
1130 &adev
->gfx
.mec
.mec_fw_obj
,
1131 &adev
->gfx
.mec
.mec_fw_gpu_addr
,
1134 dev_err(adev
->dev
, "(%d) failed to create mec fw bo\n", r
);
1135 gfx_v10_0_mec_fini(adev
);
1139 memcpy(fw
, fw_data
, fw_size
);
1141 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
1142 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
1148 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t wave
, uint32_t address
)
1150 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1151 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1152 (address
<< SQ_IND_INDEX__INDEX__SHIFT
));
1153 return RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1156 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t wave
,
1157 uint32_t thread
, uint32_t regno
,
1158 uint32_t num
, uint32_t *out
)
1160 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1161 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1162 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1163 (thread
<< SQ_IND_INDEX__WORKITEM_ID__SHIFT
) |
1164 (SQ_IND_INDEX__AUTO_INCR_MASK
));
1166 *(out
++) = RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1169 static void gfx_v10_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
1171 /* in gfx10 the SIMD_ID is specified as part of the INSTANCE
1172 * field when performing a select_se_sh so it should be
1176 /* type 2 wave data */
1177 dst
[(*no_fields
)++] = 2;
1178 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_STATUS
);
1179 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_PC_LO
);
1180 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_PC_HI
);
1181 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_EXEC_LO
);
1182 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_EXEC_HI
);
1183 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_HW_ID1
);
1184 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_HW_ID2
);
1185 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_INST_DW0
);
1186 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_GPR_ALLOC
);
1187 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_LDS_ALLOC
);
1188 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_TRAPSTS
);
1189 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_STS
);
1190 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_STS2
);
1191 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_DBG1
);
1192 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_M0
);
1195 static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1196 uint32_t wave
, uint32_t start
,
1197 uint32_t size
, uint32_t *dst
)
1202 adev
, wave
, 0, start
+ SQIND_WAVE_SGPRS_OFFSET
, size
,
1206 static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1207 uint32_t wave
, uint32_t thread
,
1208 uint32_t start
, uint32_t size
,
1213 start
+ SQIND_WAVE_VGPRS_OFFSET
, size
, dst
);
1216 static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device
*adev
,
1217 u32 me
, u32 pipe
, u32 q
, u32 vm
)
1219 nv_grbm_select(adev
, me
, pipe
, q
, vm
);
1223 static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs
= {
1224 .get_gpu_clock_counter
= &gfx_v10_0_get_gpu_clock_counter
,
1225 .select_se_sh
= &gfx_v10_0_select_se_sh
,
1226 .read_wave_data
= &gfx_v10_0_read_wave_data
,
1227 .read_wave_sgprs
= &gfx_v10_0_read_wave_sgprs
,
1228 .read_wave_vgprs
= &gfx_v10_0_read_wave_vgprs
,
1229 .select_me_pipe_q
= &gfx_v10_0_select_me_pipe_q
,
1232 static void gfx_v10_0_gpu_early_init(struct amdgpu_device
*adev
)
1236 adev
->gfx
.funcs
= &gfx_v10_0_gfx_funcs
;
1238 switch (adev
->asic_type
) {
1242 adev
->gfx
.config
.max_hw_contexts
= 8;
1243 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1244 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1245 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0;
1246 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1247 gb_addr_config
= RREG32_SOC15(GC
, 0, mmGB_ADDR_CONFIG
);
1254 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
1256 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
1257 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1258 GB_ADDR_CONFIG
, NUM_PIPES
);
1260 adev
->gfx
.config
.max_tile_pipes
=
1261 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
;
1263 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
1264 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1265 GB_ADDR_CONFIG
, MAX_COMPRESSED_FRAGS
);
1266 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
1267 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1268 GB_ADDR_CONFIG
, NUM_RB_PER_SE
);
1269 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
1270 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1271 GB_ADDR_CONFIG
, NUM_SHADER_ENGINES
);
1272 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
1273 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1274 GB_ADDR_CONFIG
, PIPE_INTERLEAVE_SIZE
));
1277 static int gfx_v10_0_gfx_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1278 int me
, int pipe
, int queue
)
1281 struct amdgpu_ring
*ring
;
1282 unsigned int irq_type
;
1284 ring
= &adev
->gfx
.gfx_ring
[ring_id
];
1288 ring
->queue
= queue
;
1290 ring
->ring_obj
= NULL
;
1291 ring
->use_doorbell
= true;
1294 ring
->doorbell_index
= adev
->doorbell_index
.gfx_ring0
<< 1;
1296 ring
->doorbell_index
= adev
->doorbell_index
.gfx_ring1
<< 1;
1297 sprintf(ring
->name
, "gfx_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1299 irq_type
= AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP
+ ring
->pipe
;
1300 r
= amdgpu_ring_init(adev
, ring
, 1024,
1301 &adev
->gfx
.eop_irq
, irq_type
);
1307 static int gfx_v10_0_compute_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1308 int mec
, int pipe
, int queue
)
1312 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[ring_id
];
1314 ring
= &adev
->gfx
.compute_ring
[ring_id
];
1319 ring
->queue
= queue
;
1321 ring
->ring_obj
= NULL
;
1322 ring
->use_doorbell
= true;
1323 ring
->doorbell_index
= (adev
->doorbell_index
.mec_ring0
+ ring_id
) << 1;
1324 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
1325 + (ring_id
* GFX10_MEC_HPD_SIZE
);
1326 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1328 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1329 + ((ring
->me
- 1) * adev
->gfx
.mec
.num_pipe_per_mec
)
1332 /* type-2 packets are deprecated on MEC, use type-3 instead */
1333 r
= amdgpu_ring_init(adev
, ring
, 1024,
1334 &adev
->gfx
.eop_irq
, irq_type
);
1341 static int gfx_v10_0_sw_init(void *handle
)
1343 int i
, j
, k
, r
, ring_id
= 0;
1344 struct amdgpu_kiq
*kiq
;
1345 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1347 switch (adev
->asic_type
) {
1351 adev
->gfx
.me
.num_me
= 1;
1352 adev
->gfx
.me
.num_pipe_per_me
= 1;
1353 adev
->gfx
.me
.num_queue_per_pipe
= 1;
1354 adev
->gfx
.mec
.num_mec
= 2;
1355 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1356 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1359 adev
->gfx
.me
.num_me
= 1;
1360 adev
->gfx
.me
.num_pipe_per_me
= 1;
1361 adev
->gfx
.me
.num_queue_per_pipe
= 1;
1362 adev
->gfx
.mec
.num_mec
= 1;
1363 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1364 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1369 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
,
1370 GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT
,
1371 &adev
->gfx
.kiq
.irq
);
1376 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
,
1377 GFX_10_1__SRCID__CP_EOP_INTERRUPT
,
1378 &adev
->gfx
.eop_irq
);
1382 /* Privileged reg */
1383 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_10_1__SRCID__CP_PRIV_REG_FAULT
,
1384 &adev
->gfx
.priv_reg_irq
);
1388 /* Privileged inst */
1389 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT
,
1390 &adev
->gfx
.priv_inst_irq
);
1394 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1396 gfx_v10_0_scratch_init(adev
);
1398 r
= gfx_v10_0_me_init(adev
);
1402 r
= gfx_v10_0_rlc_init(adev
);
1404 DRM_ERROR("Failed to init rlc BOs!\n");
1408 r
= gfx_v10_0_mec_init(adev
);
1410 DRM_ERROR("Failed to init MEC BOs!\n");
1414 /* set up the gfx ring */
1415 for (i
= 0; i
< adev
->gfx
.me
.num_me
; i
++) {
1416 for (j
= 0; j
< adev
->gfx
.me
.num_queue_per_pipe
; j
++) {
1417 for (k
= 0; k
< adev
->gfx
.me
.num_pipe_per_me
; k
++) {
1418 if (!amdgpu_gfx_is_me_queue_enabled(adev
, i
, k
, j
))
1421 r
= gfx_v10_0_gfx_ring_init(adev
, ring_id
,
1431 /* set up the compute queues - allocate horizontally across pipes */
1432 for (i
= 0; i
< adev
->gfx
.mec
.num_mec
; ++i
) {
1433 for (j
= 0; j
< adev
->gfx
.mec
.num_queue_per_pipe
; j
++) {
1434 for (k
= 0; k
< adev
->gfx
.mec
.num_pipe_per_mec
; k
++) {
1435 if (!amdgpu_gfx_is_mec_queue_enabled(adev
, i
, k
,
1439 r
= gfx_v10_0_compute_ring_init(adev
, ring_id
,
1449 r
= amdgpu_gfx_kiq_init(adev
, GFX10_MEC_HPD_SIZE
);
1451 DRM_ERROR("Failed to init KIQ BOs!\n");
1455 kiq
= &adev
->gfx
.kiq
;
1456 r
= amdgpu_gfx_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1460 r
= amdgpu_gfx_mqd_sw_init(adev
, sizeof(struct v10_compute_mqd
));
1464 /* allocate visible FB for rlc auto-loading fw */
1465 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1466 r
= gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev
);
1471 adev
->gfx
.ce_ram_size
= F32_CE_PROGRAM_RAM_SIZE
;
1473 gfx_v10_0_gpu_early_init(adev
);
1478 static void gfx_v10_0_pfp_fini(struct amdgpu_device
*adev
)
1480 amdgpu_bo_free_kernel(&adev
->gfx
.pfp
.pfp_fw_obj
,
1481 &adev
->gfx
.pfp
.pfp_fw_gpu_addr
,
1482 (void **)&adev
->gfx
.pfp
.pfp_fw_ptr
);
1485 static void gfx_v10_0_ce_fini(struct amdgpu_device
*adev
)
1487 amdgpu_bo_free_kernel(&adev
->gfx
.ce
.ce_fw_obj
,
1488 &adev
->gfx
.ce
.ce_fw_gpu_addr
,
1489 (void **)&adev
->gfx
.ce
.ce_fw_ptr
);
1492 static void gfx_v10_0_me_fini(struct amdgpu_device
*adev
)
1494 amdgpu_bo_free_kernel(&adev
->gfx
.me
.me_fw_obj
,
1495 &adev
->gfx
.me
.me_fw_gpu_addr
,
1496 (void **)&adev
->gfx
.me
.me_fw_ptr
);
1499 static int gfx_v10_0_sw_fini(void *handle
)
1502 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1504 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1505 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1506 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1507 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1509 amdgpu_gfx_mqd_sw_fini(adev
);
1510 amdgpu_gfx_kiq_free_ring(&adev
->gfx
.kiq
.ring
);
1511 amdgpu_gfx_kiq_fini(adev
);
1513 gfx_v10_0_pfp_fini(adev
);
1514 gfx_v10_0_ce_fini(adev
);
1515 gfx_v10_0_me_fini(adev
);
1516 gfx_v10_0_rlc_fini(adev
);
1517 gfx_v10_0_mec_fini(adev
);
1519 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
)
1520 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev
);
1522 gfx_v10_0_free_microcode(adev
);
1528 static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1533 static void gfx_v10_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
,
1534 u32 sh_num
, u32 instance
)
1538 if (instance
== 0xffffffff)
1539 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
,
1540 INSTANCE_BROADCAST_WRITES
, 1);
1542 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_INDEX
,
1545 if (se_num
== 0xffffffff)
1546 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
,
1549 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1551 if (sh_num
== 0xffffffff)
1552 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SA_BROADCAST_WRITES
,
1555 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SA_INDEX
, sh_num
);
1557 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, data
);
1560 static u32
gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1564 data
= RREG32_SOC15(GC
, 0, mmCC_RB_BACKEND_DISABLE
);
1565 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
);
1567 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1568 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1570 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1571 adev
->gfx
.config
.max_sh_per_se
);
1573 return (~data
) & mask
;
1576 static void gfx_v10_0_setup_rb(struct amdgpu_device
*adev
)
1581 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1582 adev
->gfx
.config
.max_sh_per_se
;
1584 mutex_lock(&adev
->grbm_idx_mutex
);
1585 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1586 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1587 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1588 data
= gfx_v10_0_get_rb_active_bitmap(adev
);
1589 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1590 rb_bitmap_width_per_sh
);
1593 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1594 mutex_unlock(&adev
->grbm_idx_mutex
);
1596 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1597 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1600 static u32
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device
*adev
)
1603 uint32_t enabled_rb_per_sh
;
1604 uint32_t active_rb_bitmap
;
1605 uint32_t num_rb_per_sc
;
1606 uint32_t num_packer_per_sc
;
1607 uint32_t pa_sc_tile_steering_override
;
1610 num_sc
= adev
->gfx
.config
.max_shader_engines
* adev
->gfx
.config
.max_sh_per_se
*
1611 adev
->gfx
.config
.num_sc_per_sh
;
1612 /* init num_rb_per_sc */
1613 active_rb_bitmap
= gfx_v10_0_get_rb_active_bitmap(adev
);
1614 enabled_rb_per_sh
= hweight32(active_rb_bitmap
);
1615 num_rb_per_sc
= enabled_rb_per_sh
/ adev
->gfx
.config
.num_sc_per_sh
;
1616 /* init num_packer_per_sc */
1617 num_packer_per_sc
= adev
->gfx
.config
.num_packer_per_sc
;
1619 pa_sc_tile_steering_override
= 0;
1620 pa_sc_tile_steering_override
|=
1621 (order_base_2(num_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT
) &
1622 PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK
;
1623 pa_sc_tile_steering_override
|=
1624 (order_base_2(num_rb_per_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT
) &
1625 PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK
;
1626 pa_sc_tile_steering_override
|=
1627 (order_base_2(num_packer_per_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT
) &
1628 PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK
;
1630 return pa_sc_tile_steering_override
;
1633 #define DEFAULT_SH_MEM_BASES (0x6000)
1634 #define FIRST_COMPUTE_VMID (8)
1635 #define LAST_COMPUTE_VMID (16)
1637 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device
*adev
)
1640 uint32_t sh_mem_bases
;
1643 * Configure apertures:
1644 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1645 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1646 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1648 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1650 mutex_lock(&adev
->srbm_mutex
);
1651 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1652 nv_grbm_select(adev
, 0, 0, 0, i
);
1653 /* CP and shaders */
1654 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, DEFAULT_SH_MEM_CONFIG
);
1655 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, sh_mem_bases
);
1657 nv_grbm_select(adev
, 0, 0, 0, 0);
1658 mutex_unlock(&adev
->srbm_mutex
);
1660 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1661 acccess. These should be enabled by FW for target VMIDs. */
1662 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1663 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_BASE
, 2 * i
, 0);
1664 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
, 2 * i
, 0);
1665 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_GWS_VMID0
, i
, 0);
1666 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_OA_VMID0
, i
, 0);
1670 static void gfx_v10_0_init_gds_vmid(struct amdgpu_device
*adev
)
1675 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1676 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1677 * the driver can enable them for graphics. VMID0 should maintain
1678 * access so that HWS firmware can save/restore entries.
1680 for (vmid
= 1; vmid
< 16; vmid
++) {
1681 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_BASE
, 2 * vmid
, 0);
1682 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
, 2 * vmid
, 0);
1683 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_GWS_VMID0
, vmid
, 0);
1684 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_OA_VMID0
, vmid
, 0);
1689 static void gfx_v10_0_tcp_harvest(struct amdgpu_device
*adev
)
1692 int max_wgp_per_sh
= adev
->gfx
.config
.max_cu_per_sh
>> 1;
1693 u32 tmp
, wgp_active_bitmap
= 0;
1694 u32 gcrd_targets_disable_tcp
= 0;
1695 u32 utcl_invreq_disable
= 0;
1697 * GCRD_TARGETS_DISABLE field contains
1698 * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
1699 * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
1701 u32 gcrd_targets_disable_mask
= amdgpu_gfx_create_bitmask(
1702 2 * max_wgp_per_sh
+ /* TCP */
1703 max_wgp_per_sh
+ /* SQC */
1706 * UTCL1_UTCL0_INVREQ_DISABLE field contains
1707 * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
1708 * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
1710 u32 utcl_invreq_disable_mask
= amdgpu_gfx_create_bitmask(
1711 2 * max_wgp_per_sh
+ /* TCP */
1712 2 * max_wgp_per_sh
+ /* SQC */
1716 if (adev
->asic_type
== CHIP_NAVI10
||
1717 adev
->asic_type
== CHIP_NAVI14
||
1718 adev
->asic_type
== CHIP_NAVI12
) {
1719 mutex_lock(&adev
->grbm_idx_mutex
);
1720 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1721 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1722 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1723 wgp_active_bitmap
= gfx_v10_0_get_wgp_active_bitmap_per_sh(adev
);
1725 * Set corresponding TCP bits for the inactive WGPs in
1726 * GCRD_SA_TARGETS_DISABLE
1728 gcrd_targets_disable_tcp
= 0;
1729 /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
1730 utcl_invreq_disable
= 0;
1732 for (k
= 0; k
< max_wgp_per_sh
; k
++) {
1733 if (!(wgp_active_bitmap
& (1 << k
))) {
1734 gcrd_targets_disable_tcp
|= 3 << (2 * k
);
1735 utcl_invreq_disable
|= (3 << (2 * k
)) |
1736 (3 << (2 * (max_wgp_per_sh
+ k
)));
1740 tmp
= RREG32_SOC15(GC
, 0, mmUTCL1_UTCL0_INVREQ_DISABLE
);
1741 /* only override TCP & SQC bits */
1742 tmp
&= 0xffffffff << (4 * max_wgp_per_sh
);
1743 tmp
|= (utcl_invreq_disable
& utcl_invreq_disable_mask
);
1744 WREG32_SOC15(GC
, 0, mmUTCL1_UTCL0_INVREQ_DISABLE
, tmp
);
1746 tmp
= RREG32_SOC15(GC
, 0, mmGCRD_SA_TARGETS_DISABLE
);
1747 /* only override TCP bits */
1748 tmp
&= 0xffffffff << (2 * max_wgp_per_sh
);
1749 tmp
|= (gcrd_targets_disable_tcp
& gcrd_targets_disable_mask
);
1750 WREG32_SOC15(GC
, 0, mmGCRD_SA_TARGETS_DISABLE
, tmp
);
1754 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1755 mutex_unlock(&adev
->grbm_idx_mutex
);
1759 static void gfx_v10_0_get_tcc_info(struct amdgpu_device
*adev
)
1761 /* TCCs are global (not instanced). */
1762 uint32_t tcc_disable
= RREG32_SOC15(GC
, 0, mmCGTS_TCC_DISABLE
) |
1763 RREG32_SOC15(GC
, 0, mmCGTS_USER_TCC_DISABLE
);
1765 adev
->gfx
.config
.tcc_disabled_mask
=
1766 REG_GET_FIELD(tcc_disable
, CGTS_TCC_DISABLE
, TCC_DISABLE
) |
1767 (REG_GET_FIELD(tcc_disable
, CGTS_TCC_DISABLE
, HI_TCC_DISABLE
) << 16);
1770 static void gfx_v10_0_constants_init(struct amdgpu_device
*adev
)
1775 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1777 gfx_v10_0_tiling_mode_table_init(adev
);
1779 gfx_v10_0_setup_rb(adev
);
1780 gfx_v10_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1781 gfx_v10_0_get_tcc_info(adev
);
1782 adev
->gfx
.config
.pa_sc_tile_steering_override
=
1783 gfx_v10_0_init_pa_sc_tile_steering_override(adev
);
1785 /* XXX SH_MEM regs */
1786 /* where to put LDS, scratch, GPUVM in FSA64 space */
1787 mutex_lock(&adev
->srbm_mutex
);
1788 for (i
= 0; i
< adev
->vm_manager
.id_mgr
[AMDGPU_GFXHUB_0
].num_ids
; i
++) {
1789 nv_grbm_select(adev
, 0, 0, 0, i
);
1790 /* CP and shaders */
1791 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, DEFAULT_SH_MEM_CONFIG
);
1793 tmp
= REG_SET_FIELD(0, SH_MEM_BASES
, PRIVATE_BASE
,
1794 (adev
->gmc
.private_aperture_start
>> 48));
1795 tmp
= REG_SET_FIELD(tmp
, SH_MEM_BASES
, SHARED_BASE
,
1796 (adev
->gmc
.shared_aperture_start
>> 48));
1797 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, tmp
);
1800 nv_grbm_select(adev
, 0, 0, 0, 0);
1802 mutex_unlock(&adev
->srbm_mutex
);
1804 gfx_v10_0_init_compute_vmid(adev
);
1805 gfx_v10_0_init_gds_vmid(adev
);
1809 static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1812 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
);
1814 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
,
1816 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
,
1818 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
,
1820 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
,
1823 WREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
, tmp
);
1826 static int gfx_v10_0_init_csb(struct amdgpu_device
*adev
)
1828 adev
->gfx
.rlc
.funcs
->get_csb_buffer(adev
, adev
->gfx
.rlc
.cs_ptr
);
1831 WREG32_SOC15_RLC(GC
, 0, mmRLC_CSIB_ADDR_HI
,
1832 adev
->gfx
.rlc
.clear_state_gpu_addr
>> 32);
1833 WREG32_SOC15_RLC(GC
, 0, mmRLC_CSIB_ADDR_LO
,
1834 adev
->gfx
.rlc
.clear_state_gpu_addr
& 0xfffffffc);
1835 WREG32_SOC15_RLC(GC
, 0, mmRLC_CSIB_LENGTH
, adev
->gfx
.rlc
.clear_state_size
);
1840 void gfx_v10_0_rlc_stop(struct amdgpu_device
*adev
)
1842 u32 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
1844 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 0);
1845 WREG32_SOC15(GC
, 0, mmRLC_CNTL
, tmp
);
1848 static void gfx_v10_0_rlc_reset(struct amdgpu_device
*adev
)
1850 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
1852 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
1856 static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device
*adev
,
1859 uint32_t rlc_pg_cntl
;
1861 rlc_pg_cntl
= RREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
);
1864 /* RLC_PG_CNTL[23] = 0 (default)
1865 * RLC will wait for handshake acks with SMU
1866 * GFXOFF will be enabled
1867 * RLC_PG_CNTL[23] = 1
1868 * RLC will not issue any message to SMU
1869 * hence no handshake between SMU & RLC
1870 * GFXOFF will be disabled
1872 rlc_pg_cntl
|= 0x800000;
1874 rlc_pg_cntl
&= ~0x800000;
1875 WREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
, rlc_pg_cntl
);
1878 static void gfx_v10_0_rlc_start(struct amdgpu_device
*adev
)
1880 /* TODO: enable rlc & smu handshake until smu
1881 * and gfxoff feature works as expected */
1882 if (!(amdgpu_pp_feature_mask
& PP_GFXOFF_MASK
))
1883 gfx_v10_0_rlc_smu_handshake_cntl(adev
, false);
1885 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
1889 static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device
*adev
)
1893 /* enable Save Restore Machine */
1894 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
));
1895 tmp
|= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK
;
1896 tmp
|= RLC_SRM_CNTL__SRM_ENABLE_MASK
;
1897 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
), tmp
);
1900 static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device
*adev
)
1902 const struct rlc_firmware_header_v2_0
*hdr
;
1903 const __le32
*fw_data
;
1904 unsigned i
, fw_size
;
1906 if (!adev
->gfx
.rlc_fw
)
1909 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
1910 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
1912 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
1913 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1914 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
1916 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
,
1917 RLCG_UCODE_LOADING_START_ADDRESS
);
1919 for (i
= 0; i
< fw_size
; i
++)
1920 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_DATA
,
1921 le32_to_cpup(fw_data
++));
1923 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
1928 static int gfx_v10_0_rlc_resume(struct amdgpu_device
*adev
)
1932 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
1934 r
= gfx_v10_0_wait_for_rlc_autoload_complete(adev
);
1938 gfx_v10_0_init_csb(adev
);
1940 if (!amdgpu_sriov_vf(adev
)) /* enable RLC SRM */
1941 gfx_v10_0_rlc_enable_srm(adev
);
1943 if (amdgpu_sriov_vf(adev
)) {
1944 gfx_v10_0_init_csb(adev
);
1948 adev
->gfx
.rlc
.funcs
->stop(adev
);
1951 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, 0);
1954 WREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
, 0);
1956 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
1957 /* legacy rlc firmware loading */
1958 r
= gfx_v10_0_rlc_load_microcode(adev
);
1961 } else if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1962 /* rlc backdoor autoload firmware */
1963 r
= gfx_v10_0_rlc_backdoor_autoload_enable(adev
);
1968 gfx_v10_0_init_csb(adev
);
1970 adev
->gfx
.rlc
.funcs
->start(adev
);
1972 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1973 r
= gfx_v10_0_wait_for_rlc_autoload_complete(adev
);
1983 unsigned int offset
;
1985 } rlc_autoload_info
[FIRMWARE_ID_MAX
];
1987 static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device
*adev
)
1990 RLC_TABLE_OF_CONTENT
*rlc_toc
;
1992 ret
= amdgpu_bo_create_reserved(adev
, adev
->psp
.toc_bin_size
, PAGE_SIZE
,
1993 AMDGPU_GEM_DOMAIN_GTT
,
1994 &adev
->gfx
.rlc
.rlc_toc_bo
,
1995 &adev
->gfx
.rlc
.rlc_toc_gpu_addr
,
1996 (void **)&adev
->gfx
.rlc
.rlc_toc_buf
);
1998 dev_err(adev
->dev
, "(%d) failed to create rlc toc bo\n", ret
);
2002 /* Copy toc from psp sos fw to rlc toc buffer */
2003 memcpy(adev
->gfx
.rlc
.rlc_toc_buf
, adev
->psp
.toc_start_addr
, adev
->psp
.toc_bin_size
);
2005 rlc_toc
= (RLC_TABLE_OF_CONTENT
*)adev
->gfx
.rlc
.rlc_toc_buf
;
2006 while (rlc_toc
&& (rlc_toc
->id
> FIRMWARE_ID_INVALID
) &&
2007 (rlc_toc
->id
< FIRMWARE_ID_MAX
)) {
2008 if ((rlc_toc
->id
>= FIRMWARE_ID_CP_CE
) &&
2009 (rlc_toc
->id
<= FIRMWARE_ID_CP_MES
)) {
2010 /* Offset needs 4KB alignment */
2011 rlc_toc
->offset
= ALIGN(rlc_toc
->offset
* 4, PAGE_SIZE
);
2014 rlc_autoload_info
[rlc_toc
->id
].id
= rlc_toc
->id
;
2015 rlc_autoload_info
[rlc_toc
->id
].offset
= rlc_toc
->offset
* 4;
2016 rlc_autoload_info
[rlc_toc
->id
].size
= rlc_toc
->size
* 4;
2024 static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device
*adev
)
2026 uint32_t total_size
= 0;
2030 ret
= gfx_v10_0_parse_rlc_toc(adev
);
2032 dev_err(adev
->dev
, "failed to parse rlc toc\n");
2036 for (id
= FIRMWARE_ID_RLC_G_UCODE
; id
< FIRMWARE_ID_MAX
; id
++)
2037 total_size
+= rlc_autoload_info
[id
].size
;
2039 /* In case the offset in rlc toc ucode is aligned */
2040 if (total_size
< rlc_autoload_info
[FIRMWARE_ID_MAX
-1].offset
)
2041 total_size
= rlc_autoload_info
[FIRMWARE_ID_MAX
-1].offset
+
2042 rlc_autoload_info
[FIRMWARE_ID_MAX
-1].size
;
2047 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device
*adev
)
2050 uint32_t total_size
;
2052 total_size
= gfx_v10_0_calc_toc_total_size(adev
);
2054 r
= amdgpu_bo_create_reserved(adev
, total_size
, PAGE_SIZE
,
2055 AMDGPU_GEM_DOMAIN_GTT
,
2056 &adev
->gfx
.rlc
.rlc_autoload_bo
,
2057 &adev
->gfx
.rlc
.rlc_autoload_gpu_addr
,
2058 (void **)&adev
->gfx
.rlc
.rlc_autoload_ptr
);
2060 dev_err(adev
->dev
, "(%d) failed to create fw autoload bo\n", r
);
2067 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device
*adev
)
2069 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.rlc_toc_bo
,
2070 &adev
->gfx
.rlc
.rlc_toc_gpu_addr
,
2071 (void **)&adev
->gfx
.rlc
.rlc_toc_buf
);
2072 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.rlc_autoload_bo
,
2073 &adev
->gfx
.rlc
.rlc_autoload_gpu_addr
,
2074 (void **)&adev
->gfx
.rlc
.rlc_autoload_ptr
);
2077 static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device
*adev
,
2079 const void *fw_data
,
2082 uint32_t toc_offset
;
2083 uint32_t toc_fw_size
;
2084 char *ptr
= adev
->gfx
.rlc
.rlc_autoload_ptr
;
2086 if (id
<= FIRMWARE_ID_INVALID
|| id
>= FIRMWARE_ID_MAX
)
2089 toc_offset
= rlc_autoload_info
[id
].offset
;
2090 toc_fw_size
= rlc_autoload_info
[id
].size
;
2093 fw_size
= toc_fw_size
;
2095 if (fw_size
> toc_fw_size
)
2096 fw_size
= toc_fw_size
;
2098 memcpy(ptr
+ toc_offset
, fw_data
, fw_size
);
2100 if (fw_size
< toc_fw_size
)
2101 memset(ptr
+ toc_offset
+ fw_size
, 0, toc_fw_size
- fw_size
);
2104 static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device
*adev
)
2109 data
= adev
->gfx
.rlc
.rlc_toc_buf
;
2110 size
= rlc_autoload_info
[FIRMWARE_ID_RLC_TOC
].size
;
2112 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2113 FIRMWARE_ID_RLC_TOC
,
2117 static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device
*adev
)
2119 const __le32
*fw_data
;
2121 const struct gfx_firmware_header_v1_0
*cp_hdr
;
2122 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
2125 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2126 adev
->gfx
.pfp_fw
->data
;
2127 fw_data
= (const __le32
*)(adev
->gfx
.pfp_fw
->data
+
2128 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2129 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2130 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2135 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2136 adev
->gfx
.ce_fw
->data
;
2137 fw_data
= (const __le32
*)(adev
->gfx
.ce_fw
->data
+
2138 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2139 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2140 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2145 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2146 adev
->gfx
.me_fw
->data
;
2147 fw_data
= (const __le32
*)(adev
->gfx
.me_fw
->data
+
2148 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2149 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2150 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2155 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)
2156 adev
->gfx
.rlc_fw
->data
;
2157 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
2158 le32_to_cpu(rlc_hdr
->header
.ucode_array_offset_bytes
));
2159 fw_size
= le32_to_cpu(rlc_hdr
->header
.ucode_size_bytes
);
2160 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2161 FIRMWARE_ID_RLC_G_UCODE
,
2165 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2166 adev
->gfx
.mec_fw
->data
;
2167 fw_data
= (const __le32
*) (adev
->gfx
.mec_fw
->data
+
2168 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2169 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
) -
2170 cp_hdr
->jt_size
* 4;
2171 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2174 /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
2177 /* Temporarily put sdma part here */
2178 static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device
*adev
)
2180 const __le32
*fw_data
;
2182 const struct sdma_firmware_header_v1_0
*sdma_hdr
;
2185 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
2186 sdma_hdr
= (const struct sdma_firmware_header_v1_0
*)
2187 adev
->sdma
.instance
[i
].fw
->data
;
2188 fw_data
= (const __le32
*) (adev
->sdma
.instance
[i
].fw
->data
+
2189 le32_to_cpu(sdma_hdr
->header
.ucode_array_offset_bytes
));
2190 fw_size
= le32_to_cpu(sdma_hdr
->header
.ucode_size_bytes
);
2193 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2194 FIRMWARE_ID_SDMA0_UCODE
, fw_data
, fw_size
);
2195 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2196 FIRMWARE_ID_SDMA0_JT
,
2197 (uint32_t *)fw_data
+
2198 sdma_hdr
->jt_offset
,
2199 sdma_hdr
->jt_size
* 4);
2200 } else if (i
== 1) {
2201 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2202 FIRMWARE_ID_SDMA1_UCODE
, fw_data
, fw_size
);
2203 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2204 FIRMWARE_ID_SDMA1_JT
,
2205 (uint32_t *)fw_data
+
2206 sdma_hdr
->jt_offset
,
2207 sdma_hdr
->jt_size
* 4);
2212 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device
*adev
)
2214 uint32_t rlc_g_offset
, rlc_g_size
, tmp
;
2217 gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev
);
2218 gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev
);
2219 gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev
);
2221 rlc_g_offset
= rlc_autoload_info
[FIRMWARE_ID_RLC_G_UCODE
].offset
;
2222 rlc_g_size
= rlc_autoload_info
[FIRMWARE_ID_RLC_G_UCODE
].size
;
2223 gpu_addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+ rlc_g_offset
;
2225 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI
, upper_32_bits(gpu_addr
));
2226 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO
, lower_32_bits(gpu_addr
));
2227 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_SIZE
, rlc_g_size
);
2229 tmp
= RREG32_SOC15(GC
, 0, mmRLC_HYP_RESET_VECTOR
);
2230 if (!(tmp
& (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK
|
2231 RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK
))) {
2232 DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
2236 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
2237 if (tmp
& RLC_CNTL__RLC_ENABLE_F32_MASK
) {
2238 DRM_ERROR("RLC ROM should halt itself\n");
2245 static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device
*adev
)
2247 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2252 /* Trigger an invalidation of the L1 instruction caches */
2253 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2254 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2255 WREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
, tmp
);
2257 /* Wait for invalidation complete */
2258 for (i
= 0; i
< usec_timeout
; i
++) {
2259 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2260 if (1 == REG_GET_FIELD(tmp
, CP_ME_IC_OP_CNTL
,
2261 INVALIDATE_CACHE_COMPLETE
))
2266 if (i
>= usec_timeout
) {
2267 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2271 /* Program me ucode address into intruction cache address register */
2272 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2273 rlc_autoload_info
[FIRMWARE_ID_CP_ME
].offset
;
2274 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_LO
,
2275 lower_32_bits(addr
) & 0xFFFFF000);
2276 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_HI
,
2277 upper_32_bits(addr
));
2282 static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device
*adev
)
2284 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2289 /* Trigger an invalidation of the L1 instruction caches */
2290 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2291 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2292 WREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
, tmp
);
2294 /* Wait for invalidation complete */
2295 for (i
= 0; i
< usec_timeout
; i
++) {
2296 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2297 if (1 == REG_GET_FIELD(tmp
, CP_CE_IC_OP_CNTL
,
2298 INVALIDATE_CACHE_COMPLETE
))
2303 if (i
>= usec_timeout
) {
2304 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2308 /* Program ce ucode address into intruction cache address register */
2309 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2310 rlc_autoload_info
[FIRMWARE_ID_CP_CE
].offset
;
2311 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_LO
,
2312 lower_32_bits(addr
) & 0xFFFFF000);
2313 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_HI
,
2314 upper_32_bits(addr
));
2319 static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device
*adev
)
2321 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2326 /* Trigger an invalidation of the L1 instruction caches */
2327 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2328 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2329 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
, tmp
);
2331 /* Wait for invalidation complete */
2332 for (i
= 0; i
< usec_timeout
; i
++) {
2333 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2334 if (1 == REG_GET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
,
2335 INVALIDATE_CACHE_COMPLETE
))
2340 if (i
>= usec_timeout
) {
2341 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2345 /* Program pfp ucode address into intruction cache address register */
2346 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2347 rlc_autoload_info
[FIRMWARE_ID_CP_PFP
].offset
;
2348 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_LO
,
2349 lower_32_bits(addr
) & 0xFFFFF000);
2350 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_HI
,
2351 upper_32_bits(addr
));
2356 static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device
*adev
)
2358 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2363 /* Trigger an invalidation of the L1 instruction caches */
2364 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2365 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2366 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
, tmp
);
2368 /* Wait for invalidation complete */
2369 for (i
= 0; i
< usec_timeout
; i
++) {
2370 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2371 if (1 == REG_GET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
,
2372 INVALIDATE_CACHE_COMPLETE
))
2377 if (i
>= usec_timeout
) {
2378 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2382 /* Program mec1 ucode address into intruction cache address register */
2383 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2384 rlc_autoload_info
[FIRMWARE_ID_CP_MEC
].offset
;
2385 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
,
2386 lower_32_bits(addr
) & 0xFFFFF000);
2387 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2388 upper_32_bits(addr
));
2393 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device
*adev
)
2396 uint32_t bootload_status
;
2399 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2400 cp_status
= RREG32_SOC15(GC
, 0, mmCP_STAT
);
2401 bootload_status
= RREG32_SOC15(GC
, 0, mmRLC_RLCS_BOOTLOAD_STATUS
);
2402 if ((cp_status
== 0) &&
2403 (REG_GET_FIELD(bootload_status
,
2404 RLC_RLCS_BOOTLOAD_STATUS
, BOOTLOAD_COMPLETE
) == 1)) {
2410 if (i
>= adev
->usec_timeout
) {
2411 dev_err(adev
->dev
, "rlc autoload: gc ucode autoload timeout\n");
2415 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
2416 r
= gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev
);
2420 r
= gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev
);
2424 r
= gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev
);
2428 r
= gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev
);
2436 static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
2439 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_CNTL
);
2441 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
2442 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
2443 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
2445 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2446 adev
->gfx
.gfx_ring
[i
].sched
.ready
= false;
2448 WREG32_SOC15_RLC(GC
, 0, mmCP_ME_CNTL
, tmp
);
2450 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2451 if (RREG32_SOC15(GC
, 0, mmCP_STAT
) == 0)
2456 if (i
>= adev
->usec_timeout
)
2457 DRM_ERROR("failed to %s cp gfx\n", enable
? "unhalt" : "halt");
2462 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device
*adev
)
2465 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
2466 const __le32
*fw_data
;
2467 unsigned i
, fw_size
;
2469 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2471 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2472 adev
->gfx
.pfp_fw
->data
;
2474 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
2476 fw_data
= (const __le32
*)(adev
->gfx
.pfp_fw
->data
+
2477 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
2478 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
);
2480 r
= amdgpu_bo_create_reserved(adev
, pfp_hdr
->header
.ucode_size_bytes
,
2481 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2482 &adev
->gfx
.pfp
.pfp_fw_obj
,
2483 &adev
->gfx
.pfp
.pfp_fw_gpu_addr
,
2484 (void **)&adev
->gfx
.pfp
.pfp_fw_ptr
);
2486 dev_err(adev
->dev
, "(%d) failed to create pfp fw bo\n", r
);
2487 gfx_v10_0_pfp_fini(adev
);
2491 memcpy(adev
->gfx
.pfp
.pfp_fw_ptr
, fw_data
, fw_size
);
2493 amdgpu_bo_kunmap(adev
->gfx
.pfp
.pfp_fw_obj
);
2494 amdgpu_bo_unreserve(adev
->gfx
.pfp
.pfp_fw_obj
);
2496 /* Trigger an invalidation of the L1 instruction caches */
2497 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2498 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2499 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
, tmp
);
2501 /* Wait for invalidation complete */
2502 for (i
= 0; i
< usec_timeout
; i
++) {
2503 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2504 if (1 == REG_GET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
,
2505 INVALIDATE_CACHE_COMPLETE
))
2510 if (i
>= usec_timeout
) {
2511 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2515 if (amdgpu_emu_mode
== 1)
2516 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2518 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_CNTL
);
2519 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, VMID
, 0);
2520 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2521 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2522 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2523 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_CNTL
, tmp
);
2524 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_LO
,
2525 adev
->gfx
.pfp
.pfp_fw_gpu_addr
& 0xFFFFF000);
2526 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_HI
,
2527 upper_32_bits(adev
->gfx
.pfp
.pfp_fw_gpu_addr
));
2532 static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device
*adev
)
2535 const struct gfx_firmware_header_v1_0
*ce_hdr
;
2536 const __le32
*fw_data
;
2537 unsigned i
, fw_size
;
2539 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2541 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
2542 adev
->gfx
.ce_fw
->data
;
2544 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
2546 fw_data
= (const __le32
*)(adev
->gfx
.ce_fw
->data
+
2547 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
2548 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
);
2550 r
= amdgpu_bo_create_reserved(adev
, ce_hdr
->header
.ucode_size_bytes
,
2551 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2552 &adev
->gfx
.ce
.ce_fw_obj
,
2553 &adev
->gfx
.ce
.ce_fw_gpu_addr
,
2554 (void **)&adev
->gfx
.ce
.ce_fw_ptr
);
2556 dev_err(adev
->dev
, "(%d) failed to create ce fw bo\n", r
);
2557 gfx_v10_0_ce_fini(adev
);
2561 memcpy(adev
->gfx
.ce
.ce_fw_ptr
, fw_data
, fw_size
);
2563 amdgpu_bo_kunmap(adev
->gfx
.ce
.ce_fw_obj
);
2564 amdgpu_bo_unreserve(adev
->gfx
.ce
.ce_fw_obj
);
2566 /* Trigger an invalidation of the L1 instruction caches */
2567 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2568 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2569 WREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
, tmp
);
2571 /* Wait for invalidation complete */
2572 for (i
= 0; i
< usec_timeout
; i
++) {
2573 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2574 if (1 == REG_GET_FIELD(tmp
, CP_CE_IC_OP_CNTL
,
2575 INVALIDATE_CACHE_COMPLETE
))
2580 if (i
>= usec_timeout
) {
2581 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2585 if (amdgpu_emu_mode
== 1)
2586 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2588 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_CNTL
);
2589 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, VMID
, 0);
2590 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2591 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2592 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2593 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_LO
,
2594 adev
->gfx
.ce
.ce_fw_gpu_addr
& 0xFFFFF000);
2595 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_HI
,
2596 upper_32_bits(adev
->gfx
.ce
.ce_fw_gpu_addr
));
2601 static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device
*adev
)
2604 const struct gfx_firmware_header_v1_0
*me_hdr
;
2605 const __le32
*fw_data
;
2606 unsigned i
, fw_size
;
2608 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2610 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
2611 adev
->gfx
.me_fw
->data
;
2613 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
2615 fw_data
= (const __le32
*)(adev
->gfx
.me_fw
->data
+
2616 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
2617 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
);
2619 r
= amdgpu_bo_create_reserved(adev
, me_hdr
->header
.ucode_size_bytes
,
2620 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2621 &adev
->gfx
.me
.me_fw_obj
,
2622 &adev
->gfx
.me
.me_fw_gpu_addr
,
2623 (void **)&adev
->gfx
.me
.me_fw_ptr
);
2625 dev_err(adev
->dev
, "(%d) failed to create me fw bo\n", r
);
2626 gfx_v10_0_me_fini(adev
);
2630 memcpy(adev
->gfx
.me
.me_fw_ptr
, fw_data
, fw_size
);
2632 amdgpu_bo_kunmap(adev
->gfx
.me
.me_fw_obj
);
2633 amdgpu_bo_unreserve(adev
->gfx
.me
.me_fw_obj
);
2635 /* Trigger an invalidation of the L1 instruction caches */
2636 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2637 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2638 WREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
, tmp
);
2640 /* Wait for invalidation complete */
2641 for (i
= 0; i
< usec_timeout
; i
++) {
2642 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2643 if (1 == REG_GET_FIELD(tmp
, CP_ME_IC_OP_CNTL
,
2644 INVALIDATE_CACHE_COMPLETE
))
2649 if (i
>= usec_timeout
) {
2650 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2654 if (amdgpu_emu_mode
== 1)
2655 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2657 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_CNTL
);
2658 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, VMID
, 0);
2659 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2660 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2661 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2662 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_LO
,
2663 adev
->gfx
.me
.me_fw_gpu_addr
& 0xFFFFF000);
2664 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_HI
,
2665 upper_32_bits(adev
->gfx
.me
.me_fw_gpu_addr
));
2670 static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
2674 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
2677 gfx_v10_0_cp_gfx_enable(adev
, false);
2679 r
= gfx_v10_0_cp_gfx_load_pfp_microcode(adev
);
2681 dev_err(adev
->dev
, "(%d) failed to load pfp fw\n", r
);
2685 r
= gfx_v10_0_cp_gfx_load_ce_microcode(adev
);
2687 dev_err(adev
->dev
, "(%d) failed to load ce fw\n", r
);
2691 r
= gfx_v10_0_cp_gfx_load_me_microcode(adev
);
2693 dev_err(adev
->dev
, "(%d) failed to load me fw\n", r
);
2700 static int gfx_v10_0_cp_gfx_start(struct amdgpu_device
*adev
)
2702 struct amdgpu_ring
*ring
;
2703 const struct cs_section_def
*sect
= NULL
;
2704 const struct cs_extent_def
*ext
= NULL
;
2709 WREG32_SOC15(GC
, 0, mmCP_MAX_CONTEXT
,
2710 adev
->gfx
.config
.max_hw_contexts
- 1);
2711 WREG32_SOC15(GC
, 0, mmCP_DEVICE_ID
, 1);
2713 gfx_v10_0_cp_gfx_enable(adev
, true);
2715 ring
= &adev
->gfx
.gfx_ring
[0];
2716 r
= amdgpu_ring_alloc(ring
, gfx_v10_0_get_csb_size(adev
) + 4);
2718 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2722 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2723 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2725 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2726 amdgpu_ring_write(ring
, 0x80000000);
2727 amdgpu_ring_write(ring
, 0x80000000);
2729 for (sect
= gfx10_cs_data
; sect
->section
!= NULL
; ++sect
) {
2730 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2731 if (sect
->id
== SECT_CONTEXT
) {
2732 amdgpu_ring_write(ring
,
2733 PACKET3(PACKET3_SET_CONTEXT_REG
,
2735 amdgpu_ring_write(ring
, ext
->reg_index
-
2736 PACKET3_SET_CONTEXT_REG_START
);
2737 for (i
= 0; i
< ext
->reg_count
; i
++)
2738 amdgpu_ring_write(ring
, ext
->extent
[i
]);
2744 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_TILE_STEERING_OVERRIDE
) - PACKET3_SET_CONTEXT_REG_START
;
2745 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 1));
2746 amdgpu_ring_write(ring
, ctx_reg_offset
);
2747 amdgpu_ring_write(ring
, adev
->gfx
.config
.pa_sc_tile_steering_override
);
2749 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2750 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2752 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2753 amdgpu_ring_write(ring
, 0);
2755 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2756 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2757 amdgpu_ring_write(ring
, 0x8000);
2758 amdgpu_ring_write(ring
, 0x8000);
2760 amdgpu_ring_commit(ring
);
2762 /* submit cs packet to copy state 0 to next available state */
2763 if (adev
->gfx
.num_gfx_rings
> 1) {
2764 /* maximum supported gfx ring is 2 */
2765 ring
= &adev
->gfx
.gfx_ring
[1];
2766 r
= amdgpu_ring_alloc(ring
, 2);
2768 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2772 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2773 amdgpu_ring_write(ring
, 0);
2775 amdgpu_ring_commit(ring
);
2780 static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device
*adev
,
2785 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_GFX_CNTL
);
2786 tmp
= REG_SET_FIELD(tmp
, GRBM_GFX_CNTL
, PIPEID
, pipe
);
2788 WREG32_SOC15(GC
, 0, mmGRBM_GFX_CNTL
, tmp
);
2791 static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device
*adev
,
2792 struct amdgpu_ring
*ring
)
2796 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
2797 if (ring
->use_doorbell
) {
2798 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2799 DOORBELL_OFFSET
, ring
->doorbell_index
);
2800 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2803 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2806 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, tmp
);
2807 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
2808 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
2809 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
2811 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
,
2812 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
2815 static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device
*adev
)
2817 struct amdgpu_ring
*ring
;
2820 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
2823 /* Set the write pointer delay */
2824 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_DELAY
, 0);
2826 /* set the RB to use vmid 0 */
2827 WREG32_SOC15(GC
, 0, mmCP_RB_VMID
, 0);
2829 /* Init gfx ring 0 for pipe 0 */
2830 mutex_lock(&adev
->srbm_mutex
);
2831 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID0
);
2833 /* Set ring buffer size */
2834 ring
= &adev
->gfx
.gfx_ring
[0];
2835 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2836 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
2837 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2839 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
2841 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2843 /* Initialize the ring buffer's write pointers */
2845 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
2846 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
2848 /* set the wb address wether it's enabled or not */
2849 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2850 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2851 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) &
2852 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2854 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2855 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
,
2856 lower_32_bits(wptr_gpu_addr
));
2857 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
,
2858 upper_32_bits(wptr_gpu_addr
));
2861 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2863 rb_addr
= ring
->gpu_addr
>> 8;
2864 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE
, rb_addr
);
2865 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2867 WREG32_SOC15(GC
, 0, mmCP_RB_ACTIVE
, 1);
2869 gfx_v10_0_cp_gfx_set_doorbell(adev
, ring
);
2870 mutex_unlock(&adev
->srbm_mutex
);
2872 /* Init gfx ring 1 for pipe 1 */
2873 if (adev
->gfx
.num_gfx_rings
> 1) {
2874 mutex_lock(&adev
->srbm_mutex
);
2875 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID1
);
2876 /* maximum supported gfx ring is 2 */
2877 ring
= &adev
->gfx
.gfx_ring
[1];
2878 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2879 tmp
= REG_SET_FIELD(0, CP_RB1_CNTL
, RB_BUFSZ
, rb_bufsz
);
2880 tmp
= REG_SET_FIELD(tmp
, CP_RB1_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2881 WREG32_SOC15(GC
, 0, mmCP_RB1_CNTL
, tmp
);
2882 /* Initialize the ring buffer's write pointers */
2884 WREG32_SOC15(GC
, 0, mmCP_RB1_WPTR
, lower_32_bits(ring
->wptr
));
2885 WREG32_SOC15(GC
, 0, mmCP_RB1_WPTR_HI
, upper_32_bits(ring
->wptr
));
2886 /* Set the wb address wether it's enabled or not */
2887 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2888 WREG32_SOC15(GC
, 0, mmCP_RB1_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2889 WREG32_SOC15(GC
, 0, mmCP_RB1_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) &
2890 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2891 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2892 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
,
2893 lower_32_bits(wptr_gpu_addr
));
2894 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
,
2895 upper_32_bits(wptr_gpu_addr
));
2898 WREG32_SOC15(GC
, 0, mmCP_RB1_CNTL
, tmp
);
2900 rb_addr
= ring
->gpu_addr
>> 8;
2901 WREG32_SOC15(GC
, 0, mmCP_RB1_BASE
, rb_addr
);
2902 WREG32_SOC15(GC
, 0, mmCP_RB1_BASE_HI
, upper_32_bits(rb_addr
));
2903 WREG32_SOC15(GC
, 0, mmCP_RB1_ACTIVE
, 1);
2905 gfx_v10_0_cp_gfx_set_doorbell(adev
, ring
);
2906 mutex_unlock(&adev
->srbm_mutex
);
2908 /* Switch to pipe 0 */
2909 mutex_lock(&adev
->srbm_mutex
);
2910 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID0
);
2911 mutex_unlock(&adev
->srbm_mutex
);
2913 /* start the ring */
2914 gfx_v10_0_cp_gfx_start(adev
);
2916 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
2917 ring
= &adev
->gfx
.gfx_ring
[i
];
2918 ring
->sched
.ready
= true;
2924 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
2929 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
, 0);
2931 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
,
2932 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
|
2933 CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
2934 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2935 adev
->gfx
.compute_ring
[i
].sched
.ready
= false;
2936 adev
->gfx
.kiq
.ring
.sched
.ready
= false;
2941 static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
2943 const struct gfx_firmware_header_v1_0
*mec_hdr
;
2944 const __le32
*fw_data
;
2947 u32 usec_timeout
= 50000; /* Wait for 50 ms */
2949 if (!adev
->gfx
.mec_fw
)
2952 gfx_v10_0_cp_compute_enable(adev
, false);
2954 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
2955 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
2957 fw_data
= (const __le32
*)
2958 (adev
->gfx
.mec_fw
->data
+
2959 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
2961 /* Trigger an invalidation of the L1 instruction caches */
2962 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2963 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2964 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
, tmp
);
2966 /* Wait for invalidation complete */
2967 for (i
= 0; i
< usec_timeout
; i
++) {
2968 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2969 if (1 == REG_GET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
,
2970 INVALIDATE_CACHE_COMPLETE
))
2975 if (i
>= usec_timeout
) {
2976 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2980 if (amdgpu_emu_mode
== 1)
2981 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2983 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
);
2984 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2985 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2986 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2987 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
, tmp
);
2989 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
, adev
->gfx
.mec
.mec_fw_gpu_addr
&
2991 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2992 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
2995 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
, 0);
2997 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
2998 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
,
2999 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
3001 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
, adev
->gfx
.mec_fw_version
);
3004 * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
3005 * different microcode than MEC1.
3011 static void gfx_v10_0_kiq_setting(struct amdgpu_ring
*ring
)
3014 struct amdgpu_device
*adev
= ring
->adev
;
3016 /* tell RLC which is KIQ queue */
3017 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
);
3019 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
3020 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
3022 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
3025 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring
*ring
)
3027 struct amdgpu_device
*adev
= ring
->adev
;
3028 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
3029 uint64_t hqd_gpu_addr
, wb_gpu_addr
;
3033 /* set up gfx hqd wptr */
3034 mqd
->cp_gfx_hqd_wptr
= 0;
3035 mqd
->cp_gfx_hqd_wptr_hi
= 0;
3037 /* set the pointer to the MQD */
3038 mqd
->cp_mqd_base_addr
= ring
->mqd_gpu_addr
& 0xfffffffc;
3039 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
3041 /* set up mqd control */
3042 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_MQD_CONTROL
);
3043 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, VMID
, 0);
3044 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, PRIV_STATE
, 1);
3045 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, CACHE_POLICY
, 0);
3046 mqd
->cp_gfx_mqd_control
= tmp
;
3048 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3049 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_VMID
);
3050 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_VMID
, VMID
, 0);
3051 mqd
->cp_gfx_hqd_vmid
= 0;
3053 /* set up default queue priority level
3054 * 0x0 = low priority, 0x1 = high priority */
3055 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUEUE_PRIORITY
);
3056 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_QUEUE_PRIORITY
, PRIORITY_LEVEL
, 0);
3057 mqd
->cp_gfx_hqd_queue_priority
= tmp
;
3059 /* set up time quantum */
3060 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUANTUM
);
3061 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_QUANTUM
, QUANTUM_EN
, 1);
3062 mqd
->cp_gfx_hqd_quantum
= tmp
;
3064 /* set up gfx hqd base. this is similar as CP_RB_BASE */
3065 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3066 mqd
->cp_gfx_hqd_base
= hqd_gpu_addr
;
3067 mqd
->cp_gfx_hqd_base_hi
= upper_32_bits(hqd_gpu_addr
);
3069 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3070 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3071 mqd
->cp_gfx_hqd_rptr_addr
= wb_gpu_addr
& 0xfffffffc;
3072 mqd
->cp_gfx_hqd_rptr_addr_hi
=
3073 upper_32_bits(wb_gpu_addr
) & 0xffff;
3075 /* set up rb_wptr_poll addr */
3076 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3077 mqd
->cp_rb_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3078 mqd
->cp_rb_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3080 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3081 rb_bufsz
= order_base_2(ring
->ring_size
/ 4) - 1;
3082 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_CNTL
);
3083 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, RB_BUFSZ
, rb_bufsz
);
3084 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
3086 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, BUF_SWAP
, 1);
3088 mqd
->cp_gfx_hqd_cntl
= tmp
;
3090 /* set up cp_doorbell_control */
3091 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
3092 if (ring
->use_doorbell
) {
3093 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3094 DOORBELL_OFFSET
, ring
->doorbell_index
);
3095 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3098 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3100 mqd
->cp_rb_doorbell_control
= tmp
;
3102 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3104 mqd
->cp_gfx_hqd_rptr
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR
);
3106 /* active the queue */
3107 mqd
->cp_gfx_hqd_active
= 1;
3112 #ifdef BRING_UP_DEBUG
3113 static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring
*ring
)
3115 struct amdgpu_device
*adev
= ring
->adev
;
3116 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
3118 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3119 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_WPTR
, mqd
->cp_gfx_hqd_wptr
);
3120 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_WPTR_HI
, mqd
->cp_gfx_hqd_wptr_hi
);
3122 /* set GFX_MQD_BASE */
3123 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
, mqd
->cp_mqd_base_addr
);
3124 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
, mqd
->cp_mqd_base_addr_hi
);
3126 /* set GFX_MQD_CONTROL */
3127 WREG32_SOC15(GC
, 0, mmCP_GFX_MQD_CONTROL
, mqd
->cp_gfx_mqd_control
);
3129 /* set GFX_HQD_VMID to 0 */
3130 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_VMID
, mqd
->cp_gfx_hqd_vmid
);
3132 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUEUE_PRIORITY
,
3133 mqd
->cp_gfx_hqd_queue_priority
);
3134 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUANTUM
, mqd
->cp_gfx_hqd_quantum
);
3136 /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3137 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_BASE
, mqd
->cp_gfx_hqd_base
);
3138 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_BASE_HI
, mqd
->cp_gfx_hqd_base_hi
);
3140 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3141 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR_ADDR
, mqd
->cp_gfx_hqd_rptr_addr
);
3142 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR_ADDR_HI
, mqd
->cp_gfx_hqd_rptr_addr_hi
);
3144 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3145 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_CNTL
, mqd
->cp_gfx_hqd_cntl
);
3147 /* set RB_WPTR_POLL_ADDR */
3148 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
, mqd
->cp_rb_wptr_poll_addr_lo
);
3149 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
, mqd
->cp_rb_wptr_poll_addr_hi
);
3151 /* set RB_DOORBELL_CONTROL */
3152 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, mqd
->cp_rb_doorbell_control
);
3154 /* active the queue */
3155 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_ACTIVE
, mqd
->cp_gfx_hqd_active
);
3161 static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring
*ring
)
3163 struct amdgpu_device
*adev
= ring
->adev
;
3164 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
3165 int mqd_idx
= ring
- &adev
->gfx
.gfx_ring
[0];
3167 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3168 memset((void *)mqd
, 0, sizeof(*mqd
));
3169 mutex_lock(&adev
->srbm_mutex
);
3170 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3171 gfx_v10_0_gfx_mqd_init(ring
);
3172 #ifdef BRING_UP_DEBUG
3173 gfx_v10_0_gfx_queue_init_register(ring
);
3175 nv_grbm_select(adev
, 0, 0, 0, 0);
3176 mutex_unlock(&adev
->srbm_mutex
);
3177 if (adev
->gfx
.me
.mqd_backup
[mqd_idx
])
3178 memcpy(adev
->gfx
.me
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3179 } else if (adev
->in_gpu_reset
) {
3180 /* reset mqd with the backup copy */
3181 if (adev
->gfx
.me
.mqd_backup
[mqd_idx
])
3182 memcpy(mqd
, adev
->gfx
.me
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3183 /* reset the ring */
3185 adev
->wb
.wb
[ring
->wptr_offs
] = 0;
3186 amdgpu_ring_clear_ring(ring
);
3187 #ifdef BRING_UP_DEBUG
3188 mutex_lock(&adev
->srbm_mutex
);
3189 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3190 gfx_v10_0_gfx_queue_init_register(ring
);
3191 nv_grbm_select(adev
, 0, 0, 0, 0);
3192 mutex_unlock(&adev
->srbm_mutex
);
3195 amdgpu_ring_clear_ring(ring
);
3201 #ifndef BRING_UP_DEBUG
3202 static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device
*adev
)
3204 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
3205 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
3208 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_map_queues
)
3211 r
= amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->map_queues_size
*
3212 adev
->gfx
.num_gfx_rings
);
3214 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
3218 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3219 kiq
->pmf
->kiq_map_queues(kiq_ring
, &adev
->gfx
.gfx_ring
[i
]);
3221 return amdgpu_ring_test_helper(kiq_ring
);
3225 static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device
*adev
)
3228 struct amdgpu_ring
*ring
;
3230 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3231 ring
= &adev
->gfx
.gfx_ring
[i
];
3233 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3234 if (unlikely(r
!= 0))
3237 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3239 r
= gfx_v10_0_gfx_init_queue(ring
);
3240 amdgpu_bo_kunmap(ring
->mqd_obj
);
3241 ring
->mqd_ptr
= NULL
;
3243 amdgpu_bo_unreserve(ring
->mqd_obj
);
3247 #ifndef BRING_UP_DEBUG
3248 r
= gfx_v10_0_kiq_enable_kgq(adev
);
3252 r
= gfx_v10_0_cp_gfx_start(adev
);
3256 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3257 ring
= &adev
->gfx
.gfx_ring
[i
];
3258 ring
->sched
.ready
= true;
3264 static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring
*ring
, struct v10_compute_mqd
*mqd
)
3266 struct amdgpu_device
*adev
= ring
->adev
;
3268 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
3269 if (amdgpu_gfx_is_high_priority_compute_queue(adev
, ring
->queue
)) {
3270 mqd
->cp_hqd_pipe_priority
= AMDGPU_GFX_PIPE_PRIO_HIGH
;
3271 ring
->has_high_prio
= true;
3272 mqd
->cp_hqd_queue_priority
=
3273 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM
;
3275 ring
->has_high_prio
= false;
3280 static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring
*ring
)
3282 struct amdgpu_device
*adev
= ring
->adev
;
3283 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3284 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
3287 mqd
->header
= 0xC0310800;
3288 mqd
->compute_pipelinestat_enable
= 0x00000001;
3289 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
3290 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
3291 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
3292 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
3293 mqd
->compute_misc_reserved
= 0x00000003;
3295 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
3296 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
3297 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
3299 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3300 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
3301 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
3302 (order_base_2(GFX10_MEC_HPD_SIZE
/ 4) - 1));
3304 mqd
->cp_hqd_eop_control
= tmp
;
3306 /* enable doorbell? */
3307 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3309 if (ring
->use_doorbell
) {
3310 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3311 DOORBELL_OFFSET
, ring
->doorbell_index
);
3312 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3314 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3315 DOORBELL_SOURCE
, 0);
3316 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3319 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3323 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3325 /* disable the queue if it's active */
3327 mqd
->cp_hqd_dequeue_request
= 0;
3328 mqd
->cp_hqd_pq_rptr
= 0;
3329 mqd
->cp_hqd_pq_wptr_lo
= 0;
3330 mqd
->cp_hqd_pq_wptr_hi
= 0;
3332 /* set the pointer to the MQD */
3333 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
3334 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
3336 /* set MQD vmid to 0 */
3337 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
3338 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
3339 mqd
->cp_mqd_control
= tmp
;
3341 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3342 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3343 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
3344 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3346 /* set up the HQD, this is similar to CP_RB0_CNTL */
3347 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
3348 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
3349 (order_base_2(ring
->ring_size
/ 4) - 1));
3350 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
3351 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
3353 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
3355 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
3356 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, TUNNEL_DISPATCH
, 0);
3357 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
3358 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
3359 mqd
->cp_hqd_pq_control
= tmp
;
3361 /* set the wb address whether it's enabled or not */
3362 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3363 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3364 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
3365 upper_32_bits(wb_gpu_addr
) & 0xffff;
3367 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3368 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3369 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3370 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3373 /* enable the doorbell if requested */
3374 if (ring
->use_doorbell
) {
3375 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3376 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3377 DOORBELL_OFFSET
, ring
->doorbell_index
);
3379 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3381 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3382 DOORBELL_SOURCE
, 0);
3383 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3387 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3389 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3391 mqd
->cp_hqd_pq_rptr
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
);
3393 /* set the vmid for the queue */
3394 mqd
->cp_hqd_vmid
= 0;
3396 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
3397 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
3398 mqd
->cp_hqd_persistent_state
= tmp
;
3400 /* set MIN_IB_AVAIL_SIZE */
3401 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
);
3402 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
3403 mqd
->cp_hqd_ib_control
= tmp
;
3405 /* set static priority for a compute queue/ring */
3406 gfx_v10_0_compute_mqd_set_priority(ring
, mqd
);
3408 /* map_queues packet doesn't need activate the queue,
3409 * so only kiq need set this field.
3411 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_KIQ
)
3412 mqd
->cp_hqd_active
= 1;
3417 static int gfx_v10_0_kiq_init_register(struct amdgpu_ring
*ring
)
3419 struct amdgpu_device
*adev
= ring
->adev
;
3420 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3423 /* disable wptr polling */
3424 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3426 /* write the EOP addr */
3427 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
,
3428 mqd
->cp_hqd_eop_base_addr_lo
);
3429 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
,
3430 mqd
->cp_hqd_eop_base_addr_hi
);
3432 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3433 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
,
3434 mqd
->cp_hqd_eop_control
);
3436 /* enable doorbell? */
3437 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3438 mqd
->cp_hqd_pq_doorbell_control
);
3440 /* disable the queue if it's active */
3441 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
3442 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
3443 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
3444 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
3448 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
3449 mqd
->cp_hqd_dequeue_request
);
3450 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
,
3451 mqd
->cp_hqd_pq_rptr
);
3452 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
3453 mqd
->cp_hqd_pq_wptr_lo
);
3454 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
3455 mqd
->cp_hqd_pq_wptr_hi
);
3458 /* set the pointer to the MQD */
3459 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
,
3460 mqd
->cp_mqd_base_addr_lo
);
3461 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
,
3462 mqd
->cp_mqd_base_addr_hi
);
3464 /* set MQD vmid to 0 */
3465 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
,
3466 mqd
->cp_mqd_control
);
3468 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3469 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
,
3470 mqd
->cp_hqd_pq_base_lo
);
3471 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
,
3472 mqd
->cp_hqd_pq_base_hi
);
3474 /* set up the HQD, this is similar to CP_RB0_CNTL */
3475 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
,
3476 mqd
->cp_hqd_pq_control
);
3478 /* set the wb address whether it's enabled or not */
3479 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
3480 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
3481 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3482 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
3484 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3485 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
3486 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
3487 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3488 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
3490 /* enable the doorbell if requested */
3491 if (ring
->use_doorbell
) {
3492 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
3493 (adev
->doorbell_index
.kiq
* 2) << 2);
3494 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
3495 (adev
->doorbell_index
.userqueue_end
* 2) << 2);
3498 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3499 mqd
->cp_hqd_pq_doorbell_control
);
3501 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3502 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
3503 mqd
->cp_hqd_pq_wptr_lo
);
3504 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
3505 mqd
->cp_hqd_pq_wptr_hi
);
3507 /* set the vmid for the queue */
3508 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
3510 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
,
3511 mqd
->cp_hqd_persistent_state
);
3513 /* activate the queue */
3514 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
,
3515 mqd
->cp_hqd_active
);
3517 if (ring
->use_doorbell
)
3518 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3523 static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring
*ring
)
3525 struct amdgpu_device
*adev
= ring
->adev
;
3526 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3527 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
3529 gfx_v10_0_kiq_setting(ring
);
3531 if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3532 /* reset MQD to a clean status */
3533 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3534 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3536 /* reset ring buffer */
3538 amdgpu_ring_clear_ring(ring
);
3540 mutex_lock(&adev
->srbm_mutex
);
3541 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3542 gfx_v10_0_kiq_init_register(ring
);
3543 nv_grbm_select(adev
, 0, 0, 0, 0);
3544 mutex_unlock(&adev
->srbm_mutex
);
3546 memset((void *)mqd
, 0, sizeof(*mqd
));
3547 mutex_lock(&adev
->srbm_mutex
);
3548 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3549 gfx_v10_0_compute_mqd_init(ring
);
3550 gfx_v10_0_kiq_init_register(ring
);
3551 nv_grbm_select(adev
, 0, 0, 0, 0);
3552 mutex_unlock(&adev
->srbm_mutex
);
3554 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3555 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3561 static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring
*ring
)
3563 struct amdgpu_device
*adev
= ring
->adev
;
3564 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3565 int mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
3567 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3568 memset((void *)mqd
, 0, sizeof(*mqd
));
3569 mutex_lock(&adev
->srbm_mutex
);
3570 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3571 gfx_v10_0_compute_mqd_init(ring
);
3572 nv_grbm_select(adev
, 0, 0, 0, 0);
3573 mutex_unlock(&adev
->srbm_mutex
);
3575 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3576 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3577 } else if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3578 /* reset MQD to a clean status */
3579 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3580 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3582 /* reset ring buffer */
3584 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], 0);
3585 amdgpu_ring_clear_ring(ring
);
3587 amdgpu_ring_clear_ring(ring
);
3593 static int gfx_v10_0_kiq_resume(struct amdgpu_device
*adev
)
3595 struct amdgpu_ring
*ring
;
3598 ring
= &adev
->gfx
.kiq
.ring
;
3600 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3601 if (unlikely(r
!= 0))
3604 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3605 if (unlikely(r
!= 0))
3608 gfx_v10_0_kiq_init_queue(ring
);
3609 amdgpu_bo_kunmap(ring
->mqd_obj
);
3610 ring
->mqd_ptr
= NULL
;
3611 amdgpu_bo_unreserve(ring
->mqd_obj
);
3612 ring
->sched
.ready
= true;
3616 static int gfx_v10_0_kcq_resume(struct amdgpu_device
*adev
)
3618 struct amdgpu_ring
*ring
= NULL
;
3621 gfx_v10_0_cp_compute_enable(adev
, true);
3623 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3624 ring
= &adev
->gfx
.compute_ring
[i
];
3626 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3627 if (unlikely(r
!= 0))
3629 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3631 r
= gfx_v10_0_kcq_init_queue(ring
);
3632 amdgpu_bo_kunmap(ring
->mqd_obj
);
3633 ring
->mqd_ptr
= NULL
;
3635 amdgpu_bo_unreserve(ring
->mqd_obj
);
3640 r
= amdgpu_gfx_enable_kcq(adev
);
3645 static int gfx_v10_0_cp_resume(struct amdgpu_device
*adev
)
3648 struct amdgpu_ring
*ring
;
3650 if (!(adev
->flags
& AMD_IS_APU
))
3651 gfx_v10_0_enable_gui_idle_interrupt(adev
, false);
3653 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
3654 /* legacy firmware loading */
3655 r
= gfx_v10_0_cp_gfx_load_microcode(adev
);
3659 r
= gfx_v10_0_cp_compute_load_microcode(adev
);
3664 r
= gfx_v10_0_kiq_resume(adev
);
3668 r
= gfx_v10_0_kcq_resume(adev
);
3672 if (!amdgpu_async_gfx_ring
) {
3673 r
= gfx_v10_0_cp_gfx_resume(adev
);
3677 r
= gfx_v10_0_cp_async_gfx_ring_resume(adev
);
3682 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3683 ring
= &adev
->gfx
.gfx_ring
[i
];
3684 r
= amdgpu_ring_test_helper(ring
);
3689 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3690 ring
= &adev
->gfx
.compute_ring
[i
];
3691 r
= amdgpu_ring_test_helper(ring
);
3699 static void gfx_v10_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
3701 gfx_v10_0_cp_gfx_enable(adev
, enable
);
3702 gfx_v10_0_cp_compute_enable(adev
, enable
);
3705 static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device
*adev
)
3707 uint32_t data
, pattern
= 0xDEADBEEF;
3709 /* check if mmVGT_ESGS_RING_SIZE_UMD
3710 * has been remapped to mmVGT_ESGS_RING_SIZE */
3711 data
= RREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
);
3713 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
, 0);
3715 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
, pattern
);
3717 if (RREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
) == pattern
) {
3718 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
, data
);
3721 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
, data
);
3726 static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device
*adev
)
3730 /* initialize cam_index to 0
3731 * index will auto-inc after each data writting */
3732 WREG32_SOC15(GC
, 0, mmGRBM_CAM_INDEX
, 0);
3734 /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
3735 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_RING_SIZE_UMD
) <<
3736 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3737 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_RING_SIZE
) <<
3738 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3739 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3740 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3742 /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
3743 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_UMD
) <<
3744 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3745 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE
) <<
3746 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3747 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3748 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3750 /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
3751 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_HI_UMD
) <<
3752 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3753 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_HI
) <<
3754 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3755 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3756 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3758 /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
3759 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_HS_OFFCHIP_PARAM_UMD
) <<
3760 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3761 (SOC15_REG_OFFSET(GC
, 0, mmVGT_HS_OFFCHIP_PARAM
) <<
3762 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3763 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3764 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3766 /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
3767 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
) <<
3768 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3769 (SOC15_REG_OFFSET(GC
, 0, mmVGT_ESGS_RING_SIZE
) <<
3770 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3771 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3772 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3774 /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
3775 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_GSVS_RING_SIZE_UMD
) <<
3776 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3777 (SOC15_REG_OFFSET(GC
, 0, mmVGT_GSVS_RING_SIZE
) <<
3778 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3779 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3780 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3782 /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
3783 data
= (SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL_REMAP
) <<
3784 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3785 (SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL
) <<
3786 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3787 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3788 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3791 static int gfx_v10_0_hw_init(void *handle
)
3794 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3796 if (!amdgpu_emu_mode
)
3797 gfx_v10_0_init_golden_registers(adev
);
3799 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
3801 * For gfx 10, rlc firmware loading relies on smu firmware is
3802 * loaded firstly, so in direct type, it has to load smc ucode
3805 r
= smu_load_microcode(&adev
->smu
);
3809 r
= smu_check_fw_status(&adev
->smu
);
3811 pr_err("SMC firmware status is not correct\n");
3816 /* if GRBM CAM not remapped, set up the remapping */
3817 if (!gfx_v10_0_check_grbm_cam_remapping(adev
))
3818 gfx_v10_0_setup_grbm_cam_remapping(adev
);
3820 gfx_v10_0_constants_init(adev
);
3822 r
= gfx_v10_0_rlc_resume(adev
);
3827 * init golden registers and rlc resume may override some registers,
3828 * reconfig them here
3830 gfx_v10_0_tcp_harvest(adev
);
3832 r
= gfx_v10_0_cp_resume(adev
);
3839 #ifndef BRING_UP_DEBUG
3840 static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device
*adev
)
3842 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
3843 struct amdgpu_ring
*kiq_ring
= &kiq
->ring
;
3846 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_unmap_queues
)
3849 if (amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->unmap_queues_size
*
3850 adev
->gfx
.num_gfx_rings
))
3853 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3854 kiq
->pmf
->kiq_unmap_queues(kiq_ring
, &adev
->gfx
.gfx_ring
[i
],
3855 PREEMPT_QUEUES
, 0, 0);
3857 return amdgpu_ring_test_helper(kiq_ring
);
3861 static int gfx_v10_0_hw_fini(void *handle
)
3863 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3866 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3867 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3868 #ifndef BRING_UP_DEBUG
3869 if (amdgpu_async_gfx_ring
) {
3870 r
= gfx_v10_0_kiq_disable_kgq(adev
);
3872 DRM_ERROR("KGQ disable failed\n");
3875 if (amdgpu_gfx_disable_kcq(adev
))
3876 DRM_ERROR("KCQ disable failed\n");
3877 if (amdgpu_sriov_vf(adev
)) {
3878 gfx_v10_0_cp_gfx_enable(adev
, false);
3881 gfx_v10_0_cp_enable(adev
, false);
3882 gfx_v10_0_enable_gui_idle_interrupt(adev
, false);
3887 static int gfx_v10_0_suspend(void *handle
)
3889 return gfx_v10_0_hw_fini(handle
);
3892 static int gfx_v10_0_resume(void *handle
)
3894 return gfx_v10_0_hw_init(handle
);
3897 static bool gfx_v10_0_is_idle(void *handle
)
3899 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3901 if (REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmGRBM_STATUS
),
3902 GRBM_STATUS
, GUI_ACTIVE
))
3908 static int gfx_v10_0_wait_for_idle(void *handle
)
3912 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3914 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3915 /* read MC_STATUS */
3916 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
) &
3917 GRBM_STATUS__GUI_ACTIVE_MASK
;
3919 if (!REG_GET_FIELD(tmp
, GRBM_STATUS
, GUI_ACTIVE
))
3926 static int gfx_v10_0_soft_reset(void *handle
)
3928 u32 grbm_soft_reset
= 0;
3930 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3933 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
);
3934 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
3935 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
3936 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__DB_BUSY_MASK
|
3937 GRBM_STATUS__CB_BUSY_MASK
| GRBM_STATUS__GDS_BUSY_MASK
|
3938 GRBM_STATUS__SPI_BUSY_MASK
| GRBM_STATUS__GE_BUSY_NO_DMA_MASK
3939 | GRBM_STATUS__BCI_BUSY_MASK
)) {
3940 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3941 GRBM_SOFT_RESET
, SOFT_RESET_CP
,
3943 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3944 GRBM_SOFT_RESET
, SOFT_RESET_GFX
,
3948 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
3949 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3950 GRBM_SOFT_RESET
, SOFT_RESET_CP
,
3955 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS2
);
3956 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
3957 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3958 GRBM_SOFT_RESET
, SOFT_RESET_RLC
,
3961 if (grbm_soft_reset
) {
3963 gfx_v10_0_rlc_stop(adev
);
3965 /* Disable GFX parsing/prefetching */
3966 gfx_v10_0_cp_gfx_enable(adev
, false);
3968 /* Disable MEC parsing/prefetching */
3969 gfx_v10_0_cp_compute_enable(adev
, false);
3971 if (grbm_soft_reset
) {
3972 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3973 tmp
|= grbm_soft_reset
;
3974 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3975 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3976 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3980 tmp
&= ~grbm_soft_reset
;
3981 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3982 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3985 /* Wait a little for things to settle down */
3991 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
3995 amdgpu_gfx_off_ctrl(adev
, false);
3996 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
3997 clock
= (uint64_t)RREG32_SOC15(SMUIO
, 0, mmGOLDEN_TSC_COUNT_LOWER
) |
3998 ((uint64_t)RREG32_SOC15(SMUIO
, 0, mmGOLDEN_TSC_COUNT_UPPER
) << 32ULL);
3999 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
4000 amdgpu_gfx_off_ctrl(adev
, true);
4004 static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
4006 uint32_t gds_base
, uint32_t gds_size
,
4007 uint32_t gws_base
, uint32_t gws_size
,
4008 uint32_t oa_base
, uint32_t oa_size
)
4010 struct amdgpu_device
*adev
= ring
->adev
;
4013 gfx_v10_0_write_data_to_reg(ring
, 0, false,
4014 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
) + 2 * vmid
,
4018 gfx_v10_0_write_data_to_reg(ring
, 0, false,
4019 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
) + 2 * vmid
,
4023 gfx_v10_0_write_data_to_reg(ring
, 0, false,
4024 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
) + vmid
,
4025 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
4028 gfx_v10_0_write_data_to_reg(ring
, 0, false,
4029 SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
) + vmid
,
4030 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
4033 static int gfx_v10_0_early_init(void *handle
)
4035 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4037 adev
->gfx
.num_gfx_rings
= GFX10_NUM_GFX_RINGS_NV1X
;
4039 adev
->gfx
.num_compute_rings
= AMDGPU_MAX_COMPUTE_RINGS
;
4041 gfx_v10_0_set_kiq_pm4_funcs(adev
);
4042 gfx_v10_0_set_ring_funcs(adev
);
4043 gfx_v10_0_set_irq_funcs(adev
);
4044 gfx_v10_0_set_gds_init(adev
);
4045 gfx_v10_0_set_rlc_funcs(adev
);
4050 static int gfx_v10_0_late_init(void *handle
)
4052 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4055 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
4059 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
4066 static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device
*adev
)
4070 /* if RLC is not enabled, do nothing */
4071 rlc_cntl
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
4072 return (REG_GET_FIELD(rlc_cntl
, RLC_CNTL
, RLC_ENABLE_F32
)) ? true : false;
4075 static void gfx_v10_0_set_safe_mode(struct amdgpu_device
*adev
)
4080 data
= RLC_SAFE_MODE__CMD_MASK
;
4081 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
4082 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
4084 /* wait for RLC_SAFE_MODE */
4085 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4086 if (!REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
4092 static void gfx_v10_0_unset_safe_mode(struct amdgpu_device
*adev
)
4096 data
= RLC_SAFE_MODE__CMD_MASK
;
4097 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
4100 static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
4105 /* It is disabled by HW by default */
4106 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
4107 /* 0 - Disable some blocks' MGCG */
4108 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, 0xe0000000);
4109 WREG32_SOC15(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xff000000);
4110 WREG32_SOC15(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xff000000);
4111 WREG32_SOC15(GC
, 0, mmCGTT_IA_CLK_CTRL
, 0xff000000);
4113 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4114 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4115 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
4116 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
4117 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
4119 /* only for Vega10 & Raven1 */
4120 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
4123 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4125 /* MGLS is a global flag to control all MGLS in GFX */
4126 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
4127 /* 2 - RLC memory Light sleep */
4128 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
4129 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4130 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
4132 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
4134 /* 3 - CP memory Light sleep */
4135 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
4136 def
= data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4137 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
4139 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
4143 /* 1 - MGCG_OVERRIDE */
4144 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4145 data
|= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
4146 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
4147 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
4148 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
4150 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4152 /* 2 - disable MGLS in CP */
4153 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4154 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
4155 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
4156 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
4159 /* 3 - disable MGLS in RLC */
4160 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4161 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
4162 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
4163 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
4169 static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
4174 /* Enable 3D CGCG/CGLS */
4175 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
4176 /* write cmd to clear cgcg/cgls ov */
4177 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4178 /* unset CGCG override */
4179 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
4180 /* update CGCG and CGLS override bits */
4182 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4183 /* enable 3Dcgcg FSM(0x0000363f) */
4184 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4185 data
= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
4186 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
4187 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
4188 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
4189 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
4191 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
4193 /* set IDLE_POLL_COUNT(0x00900100) */
4194 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
4195 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
4196 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
4198 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
4200 /* Disable CGCG/CGLS */
4201 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4202 /* disable cgcg, cgls should be disabled */
4203 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
4204 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
4205 /* disable cgcg and cgls in FSM */
4207 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
4211 static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
4216 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
4217 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4218 /* unset CGCG override */
4219 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
4220 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
4221 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
4223 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
4224 /* update CGCG and CGLS override bits */
4226 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4228 /* enable cgcg FSM(0x0000363F) */
4229 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4230 data
= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
4231 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
4232 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
4233 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
4234 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
4236 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
4238 /* set IDLE_POLL_COUNT(0x00900100) */
4239 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
4240 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
4241 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
4243 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
4245 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4246 /* reset CGCG/CGLS bits */
4247 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
4248 /* disable cgcg and cgls in FSM */
4250 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
4254 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
4257 amdgpu_gfx_rlc_enter_safe_mode(adev
);
4260 /* CGCG/CGLS should be enabled after MGCG/MGLS
4261 * === MGCG + MGLS ===
4263 gfx_v10_0_update_medium_grain_clock_gating(adev
, enable
);
4264 /* === CGCG /CGLS for GFX 3D Only === */
4265 gfx_v10_0_update_3d_clock_gating(adev
, enable
);
4266 /* === CGCG + CGLS === */
4267 gfx_v10_0_update_coarse_grain_clock_gating(adev
, enable
);
4269 /* CGCG/CGLS should be disabled before MGCG/MGLS
4270 * === CGCG + CGLS ===
4272 gfx_v10_0_update_coarse_grain_clock_gating(adev
, enable
);
4273 /* === CGCG /CGLS for GFX 3D Only === */
4274 gfx_v10_0_update_3d_clock_gating(adev
, enable
);
4275 /* === MGCG + MGLS === */
4276 /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
4279 if (adev
->cg_flags
&
4280 (AMD_CG_SUPPORT_GFX_MGCG
|
4281 AMD_CG_SUPPORT_GFX_CGLS
|
4282 AMD_CG_SUPPORT_GFX_CGCG
|
4283 AMD_CG_SUPPORT_GFX_CGLS
|
4284 AMD_CG_SUPPORT_GFX_3D_CGCG
|
4285 AMD_CG_SUPPORT_GFX_3D_CGLS
))
4286 gfx_v10_0_enable_gui_idle_interrupt(adev
, enable
);
4288 amdgpu_gfx_rlc_exit_safe_mode(adev
);
4293 static void gfx_v10_0_update_spm_vmid(struct amdgpu_device
*adev
, unsigned vmid
)
4297 data
= RREG32_SOC15(GC
, 0, mmRLC_SPM_MC_CNTL
);
4299 data
&= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK
;
4300 data
|= (vmid
& RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK
) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT
;
4302 WREG32_SOC15(GC
, 0, mmRLC_SPM_MC_CNTL
, data
);
4305 static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device
*adev
,
4307 struct soc15_reg_rlcg
*entries
, int arr_size
)
4315 for (i
= 0; i
< arr_size
; i
++) {
4316 const struct soc15_reg_rlcg
*entry
;
4318 entry
= &entries
[i
];
4319 reg
= adev
->reg_offset
[entry
->hwip
][entry
->instance
][entry
->segment
] + entry
->reg
;
4327 static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device
*adev
, u32 offset
)
4329 return gfx_v10_0_check_rlcg_range(adev
, offset
, NULL
, 0);
4332 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs
= {
4333 .is_rlc_enabled
= gfx_v10_0_is_rlc_enabled
,
4334 .set_safe_mode
= gfx_v10_0_set_safe_mode
,
4335 .unset_safe_mode
= gfx_v10_0_unset_safe_mode
,
4336 .init
= gfx_v10_0_rlc_init
,
4337 .get_csb_size
= gfx_v10_0_get_csb_size
,
4338 .get_csb_buffer
= gfx_v10_0_get_csb_buffer
,
4339 .resume
= gfx_v10_0_rlc_resume
,
4340 .stop
= gfx_v10_0_rlc_stop
,
4341 .reset
= gfx_v10_0_rlc_reset
,
4342 .start
= gfx_v10_0_rlc_start
,
4343 .update_spm_vmid
= gfx_v10_0_update_spm_vmid
,
4344 .rlcg_wreg
= gfx_v10_rlcg_wreg
,
4345 .is_rlcg_access_range
= gfx_v10_0_is_rlcg_access_range
,
4348 static int gfx_v10_0_set_powergating_state(void *handle
,
4349 enum amd_powergating_state state
)
4351 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4352 bool enable
= (state
== AMD_PG_STATE_GATE
);
4353 switch (adev
->asic_type
) {
4357 amdgpu_gfx_off_ctrl(adev
, false);
4358 cancel_delayed_work_sync(&adev
->gfx
.gfx_off_delay_work
);
4360 amdgpu_gfx_off_ctrl(adev
, true);
4368 static int gfx_v10_0_set_clockgating_state(void *handle
,
4369 enum amd_clockgating_state state
)
4371 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4373 switch (adev
->asic_type
) {
4377 gfx_v10_0_update_gfx_clock_gating(adev
,
4378 state
== AMD_CG_STATE_GATE
);
4386 static void gfx_v10_0_get_clockgating_state(void *handle
, u32
*flags
)
4388 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4391 /* AMD_CG_SUPPORT_GFX_MGCG */
4392 data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4393 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
4394 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
4396 /* AMD_CG_SUPPORT_GFX_CGCG */
4397 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4398 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
4399 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
4401 /* AMD_CG_SUPPORT_GFX_CGLS */
4402 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
4403 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
4405 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4406 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4407 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
4408 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
4410 /* AMD_CG_SUPPORT_GFX_CP_LS */
4411 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4412 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
4413 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
4415 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4416 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4417 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
4418 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
4420 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4421 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
4422 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
4425 static u64
gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
4427 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx10 is 32bit rptr*/
4430 static u64
gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
4432 struct amdgpu_device
*adev
= ring
->adev
;
4435 /* XXX check if swapping is necessary on BE */
4436 if (ring
->use_doorbell
) {
4437 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
4439 wptr
= RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
);
4440 wptr
+= (u64
)RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
) << 32;
4446 static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
4448 struct amdgpu_device
*adev
= ring
->adev
;
4450 if (ring
->use_doorbell
) {
4451 /* XXX check if swapping is necessary on BE */
4452 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4453 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4455 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
4456 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
4460 static u64
gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
4462 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx10 hardware is 32bit rptr */
4465 static u64
gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
4469 /* XXX check if swapping is necessary on BE */
4470 if (ring
->use_doorbell
)
4471 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
4477 static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
4479 struct amdgpu_device
*adev
= ring
->adev
;
4481 /* XXX check if swapping is necessary on BE */
4482 if (ring
->use_doorbell
) {
4483 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4484 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4486 BUG(); /* only DOORBELL method supported on gfx10 now */
4490 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
4492 struct amdgpu_device
*adev
= ring
->adev
;
4493 u32 ref_and_mask
, reg_mem_engine
;
4494 const struct nbio_hdp_flush_reg
*nbio_hf_reg
= adev
->nbio
.hdp_flush_reg
;
4496 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
4499 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
4502 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
4509 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
4510 reg_mem_engine
= 1; /* pfp */
4513 gfx_v10_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
4514 adev
->nbio
.funcs
->get_hdp_flush_req_offset(adev
),
4515 adev
->nbio
.funcs
->get_hdp_flush_done_offset(adev
),
4516 ref_and_mask
, ref_and_mask
, 0x20);
4519 static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
4520 struct amdgpu_job
*job
,
4521 struct amdgpu_ib
*ib
,
4524 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
4525 u32 header
, control
= 0;
4527 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
4528 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CNST
, 2);
4530 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
4532 control
|= ib
->length_dw
| (vmid
<< 24);
4534 if ((amdgpu_sriov_vf(ring
->adev
) || amdgpu_mcbp
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
)) {
4535 control
|= INDIRECT_BUFFER_PRE_ENB(1);
4537 if (flags
& AMDGPU_IB_PREEMPTED
)
4538 control
|= INDIRECT_BUFFER_PRE_RESUME(1);
4540 if (!(ib
->flags
& AMDGPU_IB_FLAG_CE
) && vmid
)
4541 gfx_v10_0_ring_emit_de_meta(ring
,
4542 (!amdgpu_sriov_vf(ring
->adev
) && flags
& AMDGPU_IB_PREEMPTED
) ? true : false);
4545 amdgpu_ring_write(ring
, header
);
4546 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
4547 amdgpu_ring_write(ring
,
4551 lower_32_bits(ib
->gpu_addr
));
4552 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4553 amdgpu_ring_write(ring
, control
);
4556 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
4557 struct amdgpu_job
*job
,
4558 struct amdgpu_ib
*ib
,
4561 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
4562 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vmid
<< 24);
4564 /* Currently, there is a high possibility to get wave ID mismatch
4565 * between ME and GDS, leading to a hw deadlock, because ME generates
4566 * different wave IDs than the GDS expects. This situation happens
4567 * randomly when at least 5 compute pipes use GDS ordered append.
4568 * The wave IDs generated by ME are also wrong after suspend/resume.
4569 * Those are probably bugs somewhere else in the kernel driver.
4571 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4572 * GDS to 0 for this ring (me/pipe).
4574 if (ib
->flags
& AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
) {
4575 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
4576 amdgpu_ring_write(ring
, mmGDS_COMPUTE_MAX_WAVE_ID
);
4577 amdgpu_ring_write(ring
, ring
->adev
->gds
.gds_compute_max_wave_id
);
4580 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
4581 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
4582 amdgpu_ring_write(ring
,
4586 lower_32_bits(ib
->gpu_addr
));
4587 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4588 amdgpu_ring_write(ring
, control
);
4591 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
4592 u64 seq
, unsigned flags
)
4594 struct amdgpu_device
*adev
= ring
->adev
;
4595 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
4596 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
4598 /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
4599 if (adev
->pdev
->device
== 0x50)
4602 /* RELEASE_MEM - flush caches, send int */
4603 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
4604 amdgpu_ring_write(ring
, (PACKET3_RELEASE_MEM_GCR_SEQ
|
4605 PACKET3_RELEASE_MEM_GCR_GL2_WB
|
4606 PACKET3_RELEASE_MEM_GCR_GLM_INV
| /* must be set with GLM_WB */
4607 PACKET3_RELEASE_MEM_GCR_GLM_WB
|
4608 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4609 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
4610 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4611 amdgpu_ring_write(ring
, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit
? 2 : 1) |
4612 PACKET3_RELEASE_MEM_INT_SEL(int_sel
? 2 : 0)));
4615 * the address should be Qword aligned if 64bit write, Dword
4616 * aligned if only send 32bit data low (discard data high)
4622 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4623 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4624 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4625 amdgpu_ring_write(ring
, upper_32_bits(seq
));
4626 amdgpu_ring_write(ring
, 0);
4629 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
4631 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4632 uint32_t seq
= ring
->fence_drv
.sync_seq
;
4633 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
4635 gfx_v10_0_wait_reg_mem(ring
, usepfp
, 1, 0, lower_32_bits(addr
),
4636 upper_32_bits(addr
), seq
, 0xffffffff, 4);
4639 static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
4640 unsigned vmid
, uint64_t pd_addr
)
4642 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
4644 /* compute doesn't have PFP */
4645 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
) {
4646 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4647 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4648 amdgpu_ring_write(ring
, 0x0);
4652 static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
4653 u64 seq
, unsigned int flags
)
4655 struct amdgpu_device
*adev
= ring
->adev
;
4657 /* we only allocate 32bit for each seq wb address */
4658 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
4660 /* write fence seq to the "addr" */
4661 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4662 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4663 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
4664 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4665 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4666 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4668 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
4669 /* set register to trigger INT */
4670 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4671 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4672 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
4673 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
4674 amdgpu_ring_write(ring
, 0);
4675 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
4679 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring
*ring
)
4681 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
4682 amdgpu_ring_write(ring
, 0);
4685 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
4689 if (amdgpu_mcbp
|| amdgpu_sriov_vf(ring
->adev
))
4690 gfx_v10_0_ring_emit_ce_meta(ring
,
4691 (!amdgpu_sriov_vf(ring
->adev
) && flags
& AMDGPU_IB_PREEMPTED
) ? true : false);
4693 gfx_v10_0_ring_emit_tmz(ring
, true);
4695 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4696 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
4697 /* set load_global_config & load_global_uconfig */
4699 /* set load_cs_sh_regs */
4701 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4704 /* set load_ce_ram if preamble presented */
4705 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
4708 /* still load_ce_ram if this is the first time preamble presented
4709 * although there is no context switch happens.
4711 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
4715 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
4716 amdgpu_ring_write(ring
, dw2
);
4717 amdgpu_ring_write(ring
, 0);
4720 static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
4724 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
4725 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
4726 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
4727 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4728 ret
= ring
->wptr
& ring
->buf_mask
;
4729 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
4734 static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
4737 BUG_ON(offset
> ring
->buf_mask
);
4738 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
4740 cur
= (ring
->wptr
- 1) & ring
->buf_mask
;
4741 if (likely(cur
> offset
))
4742 ring
->ring
[offset
] = cur
- offset
;
4744 ring
->ring
[offset
] = (ring
->buf_mask
+ 1) - offset
+ cur
;
4747 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring
*ring
)
4750 struct amdgpu_device
*adev
= ring
->adev
;
4751 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
4752 struct amdgpu_ring
*kiq_ring
= &kiq
->ring
;
4754 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_unmap_queues
)
4757 if (amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->unmap_queues_size
))
4760 /* assert preemption condition */
4761 amdgpu_ring_set_preempt_cond_exec(ring
, false);
4763 /* assert IB preemption, emit the trailing fence */
4764 kiq
->pmf
->kiq_unmap_queues(kiq_ring
, ring
, PREEMPT_QUEUES_NO_UNMAP
,
4765 ring
->trail_fence_gpu_addr
,
4767 amdgpu_ring_commit(kiq_ring
);
4769 /* poll the trailing fence */
4770 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4771 if (ring
->trail_seq
==
4772 le32_to_cpu(*(ring
->trail_fence_cpu_addr
)))
4777 if (i
>= adev
->usec_timeout
) {
4779 DRM_ERROR("ring %d failed to preempt ib\n", ring
->idx
);
4782 /* deassert preemption condition */
4783 amdgpu_ring_set_preempt_cond_exec(ring
, true);
4787 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
, bool resume
)
4789 struct amdgpu_device
*adev
= ring
->adev
;
4790 struct v10_ce_ib_state ce_payload
= {0};
4794 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
4795 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4797 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4798 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
4799 WRITE_DATA_DST_SEL(8) |
4801 WRITE_DATA_CACHE_POLICY(0));
4802 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+
4803 offsetof(struct v10_gfx_meta_data
, ce_payload
)));
4804 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+
4805 offsetof(struct v10_gfx_meta_data
, ce_payload
)));
4808 amdgpu_ring_write_multiple(ring
, adev
->virt
.csa_cpu_addr
+
4809 offsetof(struct v10_gfx_meta_data
,
4811 sizeof(ce_payload
) >> 2);
4813 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
,
4814 sizeof(ce_payload
) >> 2);
4817 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring
*ring
, bool resume
)
4819 struct amdgpu_device
*adev
= ring
->adev
;
4820 struct v10_de_ib_state de_payload
= {0};
4821 uint64_t csa_addr
, gds_addr
;
4824 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4825 gds_addr
= ALIGN(csa_addr
+ AMDGPU_CSA_SIZE
- adev
->gds
.gds_size
,
4827 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
4828 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
4830 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
4831 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4832 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
4833 WRITE_DATA_DST_SEL(8) |
4835 WRITE_DATA_CACHE_POLICY(0));
4836 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+
4837 offsetof(struct v10_gfx_meta_data
, de_payload
)));
4838 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+
4839 offsetof(struct v10_gfx_meta_data
, de_payload
)));
4842 amdgpu_ring_write_multiple(ring
, adev
->virt
.csa_cpu_addr
+
4843 offsetof(struct v10_gfx_meta_data
,
4845 sizeof(de_payload
) >> 2);
4847 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
,
4848 sizeof(de_payload
) >> 2);
4851 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
)
4853 amdgpu_ring_write(ring
, PACKET3(PACKET3_FRAME_CONTROL
, 0));
4854 amdgpu_ring_write(ring
, FRAME_CMD(start
? 0 : 1)); /* frame_end */
4857 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
4859 struct amdgpu_device
*adev
= ring
->adev
;
4860 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
4862 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
4863 amdgpu_ring_write(ring
, 0 | /* src: register*/
4864 (5 << 8) | /* dst: memory */
4865 (1 << 20)); /* write confirm */
4866 amdgpu_ring_write(ring
, reg
);
4867 amdgpu_ring_write(ring
, 0);
4868 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
4869 kiq
->reg_val_offs
* 4));
4870 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
4871 kiq
->reg_val_offs
* 4));
4874 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
4879 switch (ring
->funcs
->type
) {
4880 case AMDGPU_RING_TYPE_GFX
:
4881 cmd
= WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM
;
4883 case AMDGPU_RING_TYPE_KIQ
:
4884 cmd
= (1 << 16); /* no inc addr */
4890 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4891 amdgpu_ring_write(ring
, cmd
);
4892 amdgpu_ring_write(ring
, reg
);
4893 amdgpu_ring_write(ring
, 0);
4894 amdgpu_ring_write(ring
, val
);
4897 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring
*ring
, uint32_t reg
,
4898 uint32_t val
, uint32_t mask
)
4900 gfx_v10_0_wait_reg_mem(ring
, 0, 0, 0, reg
, 0, val
, mask
, 0x20);
4903 static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring
*ring
,
4904 uint32_t reg0
, uint32_t reg1
,
4905 uint32_t ref
, uint32_t mask
)
4907 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4908 struct amdgpu_device
*adev
= ring
->adev
;
4909 bool fw_version_ok
= false;
4911 fw_version_ok
= adev
->gfx
.cp_fw_write_wait
;
4914 gfx_v10_0_wait_reg_mem(ring
, usepfp
, 0, 1, reg0
, reg1
,
4917 amdgpu_ring_emit_reg_write_reg_wait_helper(ring
, reg0
, reg1
,
4922 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
4923 uint32_t me
, uint32_t pipe
,
4924 enum amdgpu_interrupt_state state
)
4926 uint32_t cp_int_cntl
, cp_int_cntl_reg
;
4931 cp_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING0
);
4934 cp_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING1
);
4937 DRM_DEBUG("invalid pipe %d\n", pipe
);
4941 DRM_DEBUG("invalid me %d\n", me
);
4946 case AMDGPU_IRQ_STATE_DISABLE
:
4947 cp_int_cntl
= RREG32(cp_int_cntl_reg
);
4948 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4949 TIME_STAMP_INT_ENABLE
, 0);
4950 WREG32(cp_int_cntl_reg
, cp_int_cntl
);
4952 case AMDGPU_IRQ_STATE_ENABLE
:
4953 cp_int_cntl
= RREG32(cp_int_cntl_reg
);
4954 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4955 TIME_STAMP_INT_ENABLE
, 1);
4956 WREG32(cp_int_cntl_reg
, cp_int_cntl
);
4963 static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
4965 enum amdgpu_interrupt_state state
)
4967 u32 mec_int_cntl
, mec_int_cntl_reg
;
4970 * amdgpu controls only the first MEC. That's why this function only
4971 * handles the setting of interrupts for this specific MEC. All other
4972 * pipes' interrupts are set by amdkfd.
4978 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
4981 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE1_INT_CNTL
);
4984 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE2_INT_CNTL
);
4987 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE3_INT_CNTL
);
4990 DRM_DEBUG("invalid pipe %d\n", pipe
);
4994 DRM_DEBUG("invalid me %d\n", me
);
4999 case AMDGPU_IRQ_STATE_DISABLE
:
5000 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
5001 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
5002 TIME_STAMP_INT_ENABLE
, 0);
5003 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
5005 case AMDGPU_IRQ_STATE_ENABLE
:
5006 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
5007 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
5008 TIME_STAMP_INT_ENABLE
, 1);
5009 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
5016 static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
5017 struct amdgpu_irq_src
*src
,
5019 enum amdgpu_interrupt_state state
)
5022 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP
:
5023 gfx_v10_0_set_gfx_eop_interrupt_state(adev
, 0, 0, state
);
5025 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP
:
5026 gfx_v10_0_set_gfx_eop_interrupt_state(adev
, 0, 1, state
);
5028 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
5029 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
5031 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
5032 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
5034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
5035 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
5037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
5038 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
5040 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
5041 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
5043 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
5044 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
5046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
5047 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
5049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
5050 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
5058 static int gfx_v10_0_eop_irq(struct amdgpu_device
*adev
,
5059 struct amdgpu_irq_src
*source
,
5060 struct amdgpu_iv_entry
*entry
)
5063 u8 me_id
, pipe_id
, queue_id
;
5064 struct amdgpu_ring
*ring
;
5066 DRM_DEBUG("IH: CP EOP\n");
5067 me_id
= (entry
->ring_id
& 0x0c) >> 2;
5068 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
5069 queue_id
= (entry
->ring_id
& 0x70) >> 4;
5074 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
5076 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[1]);
5080 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
5081 ring
= &adev
->gfx
.compute_ring
[i
];
5082 /* Per-queue interrupt is supported for MEC starting from VI.
5083 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5085 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
5086 amdgpu_fence_process(ring
);
5093 static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
5094 struct amdgpu_irq_src
*source
,
5096 enum amdgpu_interrupt_state state
)
5099 case AMDGPU_IRQ_STATE_DISABLE
:
5100 case AMDGPU_IRQ_STATE_ENABLE
:
5101 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
5102 PRIV_REG_INT_ENABLE
,
5103 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
5112 static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
5113 struct amdgpu_irq_src
*source
,
5115 enum amdgpu_interrupt_state state
)
5118 case AMDGPU_IRQ_STATE_DISABLE
:
5119 case AMDGPU_IRQ_STATE_ENABLE
:
5120 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
5121 PRIV_INSTR_INT_ENABLE
,
5122 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
5130 static void gfx_v10_0_handle_priv_fault(struct amdgpu_device
*adev
,
5131 struct amdgpu_iv_entry
*entry
)
5133 u8 me_id
, pipe_id
, queue_id
;
5134 struct amdgpu_ring
*ring
;
5137 me_id
= (entry
->ring_id
& 0x0c) >> 2;
5138 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
5139 queue_id
= (entry
->ring_id
& 0x70) >> 4;
5143 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
5144 ring
= &adev
->gfx
.gfx_ring
[i
];
5145 /* we only enabled 1 gfx queue per pipe for now */
5146 if (ring
->me
== me_id
&& ring
->pipe
== pipe_id
)
5147 drm_sched_fault(&ring
->sched
);
5152 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
5153 ring
= &adev
->gfx
.compute_ring
[i
];
5154 if (ring
->me
== me_id
&& ring
->pipe
== pipe_id
&&
5155 ring
->queue
== queue_id
)
5156 drm_sched_fault(&ring
->sched
);
5164 static int gfx_v10_0_priv_reg_irq(struct amdgpu_device
*adev
,
5165 struct amdgpu_irq_src
*source
,
5166 struct amdgpu_iv_entry
*entry
)
5168 DRM_ERROR("Illegal register access in command stream\n");
5169 gfx_v10_0_handle_priv_fault(adev
, entry
);
5173 static int gfx_v10_0_priv_inst_irq(struct amdgpu_device
*adev
,
5174 struct amdgpu_irq_src
*source
,
5175 struct amdgpu_iv_entry
*entry
)
5177 DRM_ERROR("Illegal instruction in command stream\n");
5178 gfx_v10_0_handle_priv_fault(adev
, entry
);
5182 static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
5183 struct amdgpu_irq_src
*src
,
5185 enum amdgpu_interrupt_state state
)
5187 uint32_t tmp
, target
;
5188 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
5191 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
5193 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME2_PIPE0_INT_CNTL
);
5194 target
+= ring
->pipe
;
5197 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
5198 if (state
== AMDGPU_IRQ_STATE_DISABLE
) {
5199 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
5200 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
5201 GENERIC2_INT_ENABLE
, 0);
5202 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
5204 tmp
= RREG32(target
);
5205 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
5206 GENERIC2_INT_ENABLE
, 0);
5207 WREG32(target
, tmp
);
5209 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
5210 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
5211 GENERIC2_INT_ENABLE
, 1);
5212 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
5214 tmp
= RREG32(target
);
5215 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
5216 GENERIC2_INT_ENABLE
, 1);
5217 WREG32(target
, tmp
);
5221 BUG(); /* kiq only support GENERIC2_INT now */
5227 static int gfx_v10_0_kiq_irq(struct amdgpu_device
*adev
,
5228 struct amdgpu_irq_src
*source
,
5229 struct amdgpu_iv_entry
*entry
)
5231 u8 me_id
, pipe_id
, queue_id
;
5232 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
5234 me_id
= (entry
->ring_id
& 0x0c) >> 2;
5235 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
5236 queue_id
= (entry
->ring_id
& 0x70) >> 4;
5237 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
5238 me_id
, pipe_id
, queue_id
);
5240 amdgpu_fence_process(ring
);
5244 static const struct amd_ip_funcs gfx_v10_0_ip_funcs
= {
5245 .name
= "gfx_v10_0",
5246 .early_init
= gfx_v10_0_early_init
,
5247 .late_init
= gfx_v10_0_late_init
,
5248 .sw_init
= gfx_v10_0_sw_init
,
5249 .sw_fini
= gfx_v10_0_sw_fini
,
5250 .hw_init
= gfx_v10_0_hw_init
,
5251 .hw_fini
= gfx_v10_0_hw_fini
,
5252 .suspend
= gfx_v10_0_suspend
,
5253 .resume
= gfx_v10_0_resume
,
5254 .is_idle
= gfx_v10_0_is_idle
,
5255 .wait_for_idle
= gfx_v10_0_wait_for_idle
,
5256 .soft_reset
= gfx_v10_0_soft_reset
,
5257 .set_clockgating_state
= gfx_v10_0_set_clockgating_state
,
5258 .set_powergating_state
= gfx_v10_0_set_powergating_state
,
5259 .get_clockgating_state
= gfx_v10_0_get_clockgating_state
,
5262 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx
= {
5263 .type
= AMDGPU_RING_TYPE_GFX
,
5265 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5266 .support_64bit_ptrs
= true,
5267 .vmhub
= AMDGPU_GFXHUB_0
,
5268 .get_rptr
= gfx_v10_0_ring_get_rptr_gfx
,
5269 .get_wptr
= gfx_v10_0_ring_get_wptr_gfx
,
5270 .set_wptr
= gfx_v10_0_ring_set_wptr_gfx
,
5271 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
5273 7 + /* PIPELINE_SYNC */
5274 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5275 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5277 8 + /* FENCE for VM_FLUSH */
5278 20 + /* GDS switch */
5279 4 + /* double SWITCH_BUFFER,
5280 * the first COND_EXEC jump to the place
5281 * just prior to this double SWITCH_BUFFER
5290 8 + 8 + /* FENCE x2 */
5291 2, /* SWITCH_BUFFER */
5292 .emit_ib_size
= 4, /* gfx_v10_0_ring_emit_ib_gfx */
5293 .emit_ib
= gfx_v10_0_ring_emit_ib_gfx
,
5294 .emit_fence
= gfx_v10_0_ring_emit_fence
,
5295 .emit_pipeline_sync
= gfx_v10_0_ring_emit_pipeline_sync
,
5296 .emit_vm_flush
= gfx_v10_0_ring_emit_vm_flush
,
5297 .emit_gds_switch
= gfx_v10_0_ring_emit_gds_switch
,
5298 .emit_hdp_flush
= gfx_v10_0_ring_emit_hdp_flush
,
5299 .test_ring
= gfx_v10_0_ring_test_ring
,
5300 .test_ib
= gfx_v10_0_ring_test_ib
,
5301 .insert_nop
= amdgpu_ring_insert_nop
,
5302 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5303 .emit_switch_buffer
= gfx_v10_0_ring_emit_sb
,
5304 .emit_cntxcntl
= gfx_v10_0_ring_emit_cntxcntl
,
5305 .init_cond_exec
= gfx_v10_0_ring_emit_init_cond_exec
,
5306 .patch_cond_exec
= gfx_v10_0_ring_emit_patch_cond_exec
,
5307 .preempt_ib
= gfx_v10_0_ring_preempt_ib
,
5308 .emit_tmz
= gfx_v10_0_ring_emit_tmz
,
5309 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5310 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5311 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5314 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute
= {
5315 .type
= AMDGPU_RING_TYPE_COMPUTE
,
5317 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5318 .support_64bit_ptrs
= true,
5319 .vmhub
= AMDGPU_GFXHUB_0
,
5320 .get_rptr
= gfx_v10_0_ring_get_rptr_compute
,
5321 .get_wptr
= gfx_v10_0_ring_get_wptr_compute
,
5322 .set_wptr
= gfx_v10_0_ring_set_wptr_compute
,
5324 20 + /* gfx_v10_0_ring_emit_gds_switch */
5325 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5326 5 + /* hdp invalidate */
5327 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5328 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5329 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5330 2 + /* gfx_v10_0_ring_emit_vm_flush */
5331 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
5332 .emit_ib_size
= 7, /* gfx_v10_0_ring_emit_ib_compute */
5333 .emit_ib
= gfx_v10_0_ring_emit_ib_compute
,
5334 .emit_fence
= gfx_v10_0_ring_emit_fence
,
5335 .emit_pipeline_sync
= gfx_v10_0_ring_emit_pipeline_sync
,
5336 .emit_vm_flush
= gfx_v10_0_ring_emit_vm_flush
,
5337 .emit_gds_switch
= gfx_v10_0_ring_emit_gds_switch
,
5338 .emit_hdp_flush
= gfx_v10_0_ring_emit_hdp_flush
,
5339 .test_ring
= gfx_v10_0_ring_test_ring
,
5340 .test_ib
= gfx_v10_0_ring_test_ib
,
5341 .insert_nop
= amdgpu_ring_insert_nop
,
5342 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5343 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5344 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5345 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5348 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq
= {
5349 .type
= AMDGPU_RING_TYPE_KIQ
,
5351 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5352 .support_64bit_ptrs
= true,
5353 .vmhub
= AMDGPU_GFXHUB_0
,
5354 .get_rptr
= gfx_v10_0_ring_get_rptr_compute
,
5355 .get_wptr
= gfx_v10_0_ring_get_wptr_compute
,
5356 .set_wptr
= gfx_v10_0_ring_set_wptr_compute
,
5358 20 + /* gfx_v10_0_ring_emit_gds_switch */
5359 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5360 5 + /*hdp invalidate */
5361 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5362 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5363 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5364 2 + /* gfx_v10_0_ring_emit_vm_flush */
5365 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5366 .emit_ib_size
= 7, /* gfx_v10_0_ring_emit_ib_compute */
5367 .emit_ib
= gfx_v10_0_ring_emit_ib_compute
,
5368 .emit_fence
= gfx_v10_0_ring_emit_fence_kiq
,
5369 .test_ring
= gfx_v10_0_ring_test_ring
,
5370 .test_ib
= gfx_v10_0_ring_test_ib
,
5371 .insert_nop
= amdgpu_ring_insert_nop
,
5372 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5373 .emit_rreg
= gfx_v10_0_ring_emit_rreg
,
5374 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5375 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5376 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5379 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device
*adev
)
5383 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v10_0_ring_funcs_kiq
;
5385 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
5386 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v10_0_ring_funcs_gfx
;
5388 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
5389 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v10_0_ring_funcs_compute
;
5392 static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs
= {
5393 .set
= gfx_v10_0_set_eop_interrupt_state
,
5394 .process
= gfx_v10_0_eop_irq
,
5397 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs
= {
5398 .set
= gfx_v10_0_set_priv_reg_fault_state
,
5399 .process
= gfx_v10_0_priv_reg_irq
,
5402 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs
= {
5403 .set
= gfx_v10_0_set_priv_inst_fault_state
,
5404 .process
= gfx_v10_0_priv_inst_irq
,
5407 static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs
= {
5408 .set
= gfx_v10_0_kiq_set_interrupt_state
,
5409 .process
= gfx_v10_0_kiq_irq
,
5412 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device
*adev
)
5414 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
5415 adev
->gfx
.eop_irq
.funcs
= &gfx_v10_0_eop_irq_funcs
;
5417 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
5418 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v10_0_kiq_irq_funcs
;
5420 adev
->gfx
.priv_reg_irq
.num_types
= 1;
5421 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v10_0_priv_reg_irq_funcs
;
5423 adev
->gfx
.priv_inst_irq
.num_types
= 1;
5424 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v10_0_priv_inst_irq_funcs
;
5427 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device
*adev
)
5429 switch (adev
->asic_type
) {
5433 adev
->gfx
.rlc
.funcs
= &gfx_v10_0_rlc_funcs
;
5440 static void gfx_v10_0_set_gds_init(struct amdgpu_device
*adev
)
5442 unsigned total_cu
= adev
->gfx
.config
.max_cu_per_sh
*
5443 adev
->gfx
.config
.max_sh_per_se
*
5444 adev
->gfx
.config
.max_shader_engines
;
5446 adev
->gds
.gds_size
= 0x10000;
5447 adev
->gds
.gds_compute_max_wave_id
= total_cu
* 32 - 1;
5448 adev
->gds
.gws_size
= 64;
5449 adev
->gds
.oa_size
= 16;
5452 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
*adev
,
5460 data
= bitmap
<< GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT
;
5461 data
&= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK
;
5463 WREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
, data
);
5466 static u32
gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device
*adev
)
5468 u32 data
, wgp_bitmask
;
5469 data
= RREG32_SOC15(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
);
5470 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
);
5472 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK
;
5473 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT
;
5476 amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
>> 1);
5478 return (~data
) & wgp_bitmask
;
5481 static u32
gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device
*adev
)
5483 u32 wgp_idx
, wgp_active_bitmap
;
5484 u32 cu_bitmap_per_wgp
, cu_active_bitmap
;
5486 wgp_active_bitmap
= gfx_v10_0_get_wgp_active_bitmap_per_sh(adev
);
5487 cu_active_bitmap
= 0;
5489 for (wgp_idx
= 0; wgp_idx
< 16; wgp_idx
++) {
5490 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5491 cu_bitmap_per_wgp
= 3 << (2 * wgp_idx
);
5492 if (wgp_active_bitmap
& (1 << wgp_idx
))
5493 cu_active_bitmap
|= cu_bitmap_per_wgp
;
5496 return cu_active_bitmap
;
5499 static int gfx_v10_0_get_cu_info(struct amdgpu_device
*adev
,
5500 struct amdgpu_cu_info
*cu_info
)
5502 int i
, j
, k
, counter
, active_cu_number
= 0;
5503 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
5504 unsigned disable_masks
[4 * 2];
5506 if (!adev
|| !cu_info
)
5509 amdgpu_gfx_parse_disable_cu(disable_masks
, 4, 2);
5511 mutex_lock(&adev
->grbm_idx_mutex
);
5512 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
5513 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
5517 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
5519 gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
5520 adev
, disable_masks
[i
* 2 + j
]);
5521 bitmap
= gfx_v10_0_get_cu_active_bitmap_per_sh(adev
);
5522 cu_info
->bitmap
[i
][j
] = bitmap
;
5524 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
5525 if (bitmap
& mask
) {
5526 if (counter
< adev
->gfx
.config
.max_cu_per_sh
)
5532 active_cu_number
+= counter
;
5534 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
5535 cu_info
->ao_cu_bitmap
[i
][j
] = ao_bitmap
;
5538 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
5539 mutex_unlock(&adev
->grbm_idx_mutex
);
5541 cu_info
->number
= active_cu_number
;
5542 cu_info
->ao_cu_mask
= ao_cu_mask
;
5543 cu_info
->simd_per_cu
= NUM_SIMD_PER_CU
;
5548 const struct amdgpu_ip_block_version gfx_v10_0_ip_block
=
5550 .type
= AMD_IP_BLOCK_TYPE_GFX
,
5554 .funcs
= &gfx_v10_0_ip_funcs
,