From 6733d8276ac02a8790e571d2af4a69a9039d0522 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 3 Jul 2025 10:51:19 -0700 Subject: [PATCH] drm/msm: Update register xml Sync register xml from mesa commit eb3e0b7164a3 ("freedreno/a6xx: Split descriptors out into their own file"). Signed-off-by: Rob Clark Acked-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/662470/ --- drivers/gpu/drm/msm/Makefile | 5 + drivers/gpu/drm/msm/adreno/a6xx_catalog.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 4 + drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 2 +- drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 2 +- .../drm/msm/adreno/adreno_gen7_9_0_snapshot.h | 4 +- drivers/gpu/drm/msm/registers/adreno/a6xx.xml | 3582 +++-------------- .../msm/registers/adreno/a6xx_descriptors.xml | 198 + .../drm/msm/registers/adreno/a6xx_enums.xml | 383 ++ .../msm/registers/adreno/a6xx_perfcntrs.xml | 600 +++ .../drm/msm/registers/adreno/a7xx_enums.xml | 223 + .../msm/registers/adreno/a7xx_perfcntrs.xml | 1030 +++++ .../drm/msm/registers/adreno/adreno_pm4.xml | 302 +- 14 files changed, 3312 insertions(+), 3027 deletions(-) create mode 100644 drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml create mode 100644 drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml create mode 100644 drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xml create mode 100644 drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml create mode 100644 drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xml diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 7229451350a91..514bacd5e4998 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -195,6 +195,11 @@ ADRENO_HEADERS = \ generated/a4xx.xml.h \ generated/a5xx.xml.h \ generated/a6xx.xml.h \ + generated/a6xx_descriptors.xml.h \ + generated/a6xx_enums.xml.h \ + generated/a6xx_perfcntrs.xml.h \ + generated/a7xx_enums.xml.h \ + generated/a7xx_perfcntrs.xml.h \ generated/a6xx_gmu.xml.h \ generated/adreno_common.xml.h \ generated/adreno_pm4.xml.h \ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 2fdaaf4372d38..00e1afd46b815 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -1335,7 +1335,7 @@ static const uint32_t a7xx_pwrup_reglist_regs[] = { REG_A6XX_RB_NC_MODE_CNTL, REG_A6XX_RB_CMP_DBG_ECO_CNTL, REG_A7XX_GRAS_NC_MODE_CNTL, - REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, REG_A6XX_UCHE_GBIF_GX_CONFIG, REG_A6XX_UCHE_CLIENT_PF, REG_A6XX_TPL1_DBG_ECO_CNTL1, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 9201a53dd341b..6e71f617fc3d0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -6,6 +6,10 @@ #include "adreno_gpu.h" +#include "a6xx_enums.xml.h" +#include "a7xx_enums.xml.h" +#include "a6xx_perfcntrs.xml.h" +#include "a7xx_perfcntrs.xml.h" #include "a6xx.xml.h" #include "a6xx_gmu.h" diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index ff06bb75b76db..faca2a0243ab9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -158,7 +158,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, /* Make sure all pending memory writes are posted */ wmb(); - gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova); + gpu_write64(gpu, REG_A6XX_CP_CRASH_DUMP_SCRIPT_BASE, dumper->iova); gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index e545106c70be7..95d93ac6812a4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -212,7 +212,7 @@ static const struct a6xx_shader_block { SHADER(A6XX_SP_LB_5_DATA, 0x200), SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x800), SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280), - SHADER(A6XX_SP_UAV_DATA, 0x80), + SHADER(A6XX_SP_GFX_UAV_BASE_DATA, 0x80), SHADER(A6XX_SP_INST_TAG, 0x80), SHADER(A6XX_SP_CB_BINDLESS_TAG, 0x80), SHADER(A6XX_SP_TMO_UMO_TAG, 0x80), diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index 9e7f2e5fb2b99..6a12a35dabff1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -210,7 +210,7 @@ void a6xx_preempt_hw_init(struct msm_gpu *gpu) gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); /* Enable the GMEM save/restore feature for preemption */ - gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, 0x1); + gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0x1); /* Reset the preemption state */ set_preempt_state(a6xx_gpu, PREEMPT_NONE); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index 9a327d543f27d..e02cabb39f194 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -1311,8 +1311,8 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x08000}, { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, REG_A7XX_CP_BV_SQE_STAT_DATA, 0x00040}, - { "CP_RESOURCE_TBL", REG_A7XX_CP_RESOURCE_TBL_DBG_ADDR, - REG_A7XX_CP_RESOURCE_TBL_DBG_DATA, 0x04100}, + { "CP_RESOURCE_TBL", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, + REG_A7XX_CP_RESOURCE_TABLE_DBG_DATA, 0x04100}, { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x00200}, { "CP_LPAC_ROQ", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index 2db425abf0f3c..d860fd94feae8 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -5,6 +5,11 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsdllow early z-test and early-lrz (if applicable) - - Disable early z-test and early-lrz test (if applicable) - - - A special mode that allows early-lrz test but disables - early-z test. Which might sound a bit funny, since - lrz-test happens before z-test. But as long as a couple - conditions are maintained this allows using lrz-test in - cases where fragment shader has kill/discard: - - 1) Disable lrz-write in cases where it is uncertain during - binning pass that a fragment will pass. Ie. if frag - shader has-kill, writes-z, or alpha/stencil test is - enabled. (For correctness, lrz-write must be disabled - when blend is enabled.) This is analogous to how a - z-prepass works. - - 2) Disable lrz-write and test if a depth-test direction - reversal is detected. Due to condition (1), the contents - of the lrz buffer are a conservative estimation of the - depth buffer during the draw pass. Meaning that geometry - that we know for certain will not be visible will not pass - lrz-test. But geometry which may be (or contributes to - blend) will pass the lrz-test. - - This allows us to keep early-lrz-test in cases where the frag - shader does not write-z (ie. we know the z-value before FS) - and does not have side-effects (image/ssbo writes, etc), but - does have kill/discard. Which turns out to be a common - enough case that it is useful to keep early-lrz test against - the conservative lrz buffer to discard fragments that we - know will definitely not be visible. - - - Not a real hw value, used internally by mesa - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2371,7 +177,7 @@ to upconvert to 32b float internally? - + @@ -2400,22 +206,22 @@ to upconvert to 32b float internally? --> - + - - - - - - - - - - - - + + + + + + + + + + + + number of remaining dwords incl current dword being consumed? @@ -2451,6 +257,7 @@ to upconvert to 32b float internally? + @@ -2468,8 +275,8 @@ to upconvert to 32b float internally? - - + + @@ -2619,28 +426,17 @@ to upconvert to 32b float internally? vertices in, number of primnitives assembled etc. --> - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + @@ -2779,7 +575,7 @@ to upconvert to 32b float internally? - + @@ -2840,7 +636,7 @@ to upconvert to 32b float internally? - + Set to true when binning, isn't changed afterwards @@ -2936,8 +732,8 @@ to upconvert to 32b float internally? - - + + @@ -2967,14 +763,14 @@ to upconvert to 32b float internally? LIMIT is set to PITCH - 64, to make room for a bit of overflow --> - - - - - - - - + + + + + + + + Seems to be a bitmap of which tiles mapped to the VSC pipe contain geometry. @@ -2985,7 +781,7 @@ to upconvert to 32b float internally? - + Has the size of data written to corresponding VSC_PRIM_STRM buffer. @@ -2993,10 +789,10 @@ to upconvert to 32b float internally? - + Has the size of data written to corresponding VSC pipe, ie. - same thing that is written out to VSC_DRAW_STRM_SIZE_ADDRESS_LO/HI + same thing that is written out to VSC_SIZE_BASE @@ -3028,17 +824,17 @@ to upconvert to 32b float internally? - + - - - - + + + + - - + + @@ -3067,7 +863,7 @@ to upconvert to 32b float internally? - + @@ -3075,7 +871,7 @@ to upconvert to 32b float internally? - + @@ -3124,7 +920,12 @@ to upconvert to 32b float internally? - + + + + + + @@ -3133,13 +934,13 @@ to upconvert to 32b float internally? - + - - - + + + @@ -3213,13 +1014,13 @@ to upconvert to 32b float internally? - + - + - + @@ -3235,22 +1036,22 @@ to upconvert to 32b float internally? - + - + - + - + @@ -3261,9 +1062,9 @@ to upconvert to 32b float internally? - - - + + + @@ -3286,13 +1087,36 @@ to upconvert to 32b float internally? - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3313,7 +1137,7 @@ to upconvert to 32b float internally? - + @@ -3339,14 +1163,13 @@ to upconvert to 32b float internally? - + - - + - + - + @@ -3408,7 +1231,7 @@ to upconvert to 32b float internally? - + @@ -3430,7 +1253,7 @@ to upconvert to 32b float internally? - + @@ -3447,22 +1270,22 @@ to upconvert to 32b float internally? - + - - - - - - + + + + + + - - + + @@ -3481,7 +1304,7 @@ to upconvert to 32b float internally? --> - + @@ -3490,7 +1313,7 @@ to upconvert to 32b float internally? - + @@ -3501,8 +1324,7 @@ to upconvert to 32b float internally? - - + @@ -3515,15 +1337,14 @@ to upconvert to 32b float internally? - - + - + @@ -3536,16 +1357,16 @@ to upconvert to 32b float internally? - - - + + + - - + + @@ -3555,7 +1376,7 @@ to upconvert to 32b float internally? - + @@ -3567,16 +1388,16 @@ to upconvert to 32b float internally? - + - + - + @@ -3608,7 +1429,7 @@ to upconvert to 32b float internally? - + @@ -3672,18 +1493,18 @@ to upconvert to 32b float internally? - - - - - + + + + + - + @@ -3726,12 +1547,12 @@ to upconvert to 32b float internally? - + - - + + - + - + @@ -3791,27 +1612,27 @@ to upconvert to 32b float internally? - - + + - + - - + + - + - - + + - + - + @@ -3820,25 +1641,31 @@ to upconvert to 32b float internally? - - + + - - - + + + - - - - + + + + + + + + + + + - - - + + @@ -3853,16 +1680,20 @@ to upconvert to 32b float internally? - - - + + + + + + + @@ -3871,7 +1702,7 @@ to upconvert to 32b float internally? - + @@ -3895,7 +1726,13 @@ to upconvert to 32b float internally? - + + + + + + + @@ -3906,7 +1743,7 @@ to upconvert to 32b float internally? - + @@ -3915,10 +1752,10 @@ to upconvert to 32b float internally? - RB_SAMPLE_COUNT_ADDR register is used up to (and including) a730. After that + RB_SAMPLE_COUNTER_BASE register is used up to (and including) a730. After that the address is specified through CP_EVENT_WRITE7::WRITE_SAMPLE_COUNT. - + @@ -3932,10 +1769,10 @@ to upconvert to 32b float internally? - - + + - + @@ -3954,7 +1791,7 @@ to upconvert to 32b float internally? - + @@ -3965,26 +1802,26 @@ to upconvert to 32b float internally? - - - + + + - - - + + + - - + + - - + + - - - - + + + + @@ -3996,7 +1833,7 @@ to upconvert to 32b float internally? - + @@ -4017,10 +1854,21 @@ to upconvert to 32b float internally? + + + + + + + + + + - - + + + @@ -4046,9 +1894,9 @@ to upconvert to 32b float internally? - + - + @@ -4069,38 +1917,38 @@ to upconvert to 32b float internally? - - - + + + - - - + + + - + - - - + + + - - - + + + - + - + - + @@ -4113,7 +1961,7 @@ to upconvert to 32b float internally? - + geometry shader @@ -4125,7 +1973,7 @@ to upconvert to 32b float internally? - + @@ -4139,10 +1987,10 @@ to upconvert to 32b float internally? - - - - + + + + @@ -4159,11 +2007,11 @@ to upconvert to 32b float internally? - + Packed array of a6xx_varying_interp_mode - + Packed array of a6xx_varying_ps_repl_mode @@ -4172,12 +2020,12 @@ to upconvert to 32b float internally? - + - + @@ -4206,7 +2054,7 @@ to upconvert to 32b float internally? - + @@ -4215,7 +2063,7 @@ to upconvert to 32b float internally? - + @@ -4225,14 +2073,14 @@ to upconvert to 32b float internally? - + - + num of varyings plus four for gl_Position (plus one if gl_PointSize) plus # of transform-feedback (streamout) varyings if using the @@ -4249,11 +2097,11 @@ to upconvert to 32b float internally? - - - + + + - + @@ -4272,7 +2120,7 @@ to upconvert to 32b float internally? - + @@ -4282,19 +2130,19 @@ to upconvert to 32b float internally? - + - + - + - + - + @@ -4311,15 +2159,15 @@ to upconvert to 32b float internally? - + - + - + @@ -4334,7 +2182,7 @@ to upconvert to 32b float internally? - + @@ -4344,15 +2192,15 @@ to upconvert to 32b float internally? - + - + - + @@ -4367,27 +2215,27 @@ to upconvert to 32b float internally? - + - + - + - - + + - + @@ -4397,17 +2245,17 @@ to upconvert to 32b float internally? - + - + - + - + num of varyings plus four for gl_Position (plus one if gl_PointSize) plus # of transform-feedback (streamout) varyings if using the @@ -4417,19 +2265,19 @@ to upconvert to 32b float internally? - + - - + + - - + + - + @@ -4438,9 +2286,9 @@ to upconvert to 32b float internally? - + - + @@ -4451,31 +2299,31 @@ to upconvert to 32b float internally? - - - - - + + + + + - + Possibly not really "initiating" the draw but the layout is similar to VGT_DRAW_INITIATOR on older gens - - + + - + - - + + - + Written by CP_SET_VISIBILITY_OVERRIDE handler @@ -4488,18 +2336,18 @@ to upconvert to 32b float internally? - + - + - + This is the ID of the current patch within the @@ -4512,32 +2360,32 @@ to upconvert to 32b float internally? - + - + - + - + - + - - + + @@ -4546,14 +2394,14 @@ to upconvert to 32b float internally? - + - + - + @@ -4573,7 +2421,7 @@ to upconvert to 32b float internally? - + @@ -4588,7 +2436,7 @@ to upconvert to 32b float internally? - + - + @@ -4630,17 +2478,17 @@ to upconvert to 32b float internally? --> - + - + - + - + - - + + @@ -4678,12 +2526,12 @@ to upconvert to 32b float internally? - + @@ -4752,7 +2600,7 @@ to upconvert to 32b float internally? - + This seems to be be the equivalent of HWSTACKOFFSET in a3xx. The ldp/stp offset formula above isn't affected by @@ -4763,18 +2611,18 @@ to upconvert to 32b float internally? - - + + - + - + - - - + + + - + @@ -4782,32 +2630,32 @@ to upconvert to 32b float internally? Total size of local storage in dwords divided by the wave size. The maximum value is 64. With the wave size being always 64 for HS, the maximum size of local storage should be: - 64 (wavesize) * 64 (SP_HS_WAVE_INPUT_SIZE) * 4 = 16k + 64 (wavesize) * 64 (SP_HS_CNTL_1) * 4 = 16k --> - - + + - - + + - + - + - - - + + + - + - + - - + + @@ -4815,7 +2663,7 @@ to upconvert to 32b float internally? - + @@ -4825,22 +2673,22 @@ to upconvert to 32b float internally? - - + + - + - + - - - + + + - + - + Normally the size of the output of the last stage in dwords. It should be programmed as follows: @@ -4854,11 +2702,11 @@ to upconvert to 32b float internally? doesn't matter in practice. - + - - + + @@ -4867,7 +2715,7 @@ to upconvert to 32b float internally? - + @@ -4877,29 +2725,29 @@ to upconvert to 32b float internally? - - + + - + - + - - - - - - - - - - - - + + + + + + + + + + + + - + @@ -4909,8 +2757,7 @@ to upconvert to 32b float internally? fine derivatives and quad subgroup ops. - - + @@ -4923,12 +2770,12 @@ to upconvert to 32b float internally? - - - - - - + + + + + + @@ -4948,7 +2795,7 @@ to upconvert to 32b float internally? - + @@ -4958,17 +2805,17 @@ to upconvert to 32b float internally? - + - + - + per MRT @@ -4976,7 +2823,7 @@ to upconvert to 32b float internally? - + @@ -4985,7 +2832,7 @@ to upconvert to 32b float internally? - + @@ -5002,7 +2849,7 @@ to upconvert to 32b float internally? - + @@ -5016,7 +2863,7 @@ to upconvert to 32b float internally? - + @@ -5028,22 +2875,23 @@ to upconvert to 32b float internally? - + - + - + + - + @@ -5053,8 +2901,15 @@ to upconvert to 32b float internally? + + + + + + + - + If 0 - all 32k of shared storage is enabled, otherwise @@ -5065,32 +2920,36 @@ to upconvert to 32b float internally? always return 0) - - - + + + This defines the split between consts and local + memory in the Local Buffer. The programmed value + must be at least the actual CONSTLEN. + + - - - + + + - + - + - - + + - + - - + + - - + + @@ -5110,18 +2980,16 @@ to upconvert to 32b float internally? - - + How invocations/fibers within a workgroup are tiled. + - - - - + + + + @@ -5146,18 +3014,19 @@ to upconvert to 32b float internally? - - + + + - + - + - + Specify for which components the output color should be read from alias, e.g. for: @@ -5167,7 +3036,7 @@ to upconvert to 32b float internally? alias.1.b32.0 r1.x, c4.x alias.1.b32.0 r0.x, c0.x - the SP_PS_ALIASED_COMPONENTS would be 0x00001111 + the SP_PS_OUTPUT_CONST_MASK would be 0x00001111 @@ -5193,7 +3062,7 @@ to upconvert to 32b float internally? - + - - + + - + @@ -5248,8 +3117,8 @@ to upconvert to 32b float internally? - - + + @@ -5257,16 +3126,16 @@ to upconvert to 32b float internally? - + - - - + + + - + @@ -5281,7 +3150,7 @@ to upconvert to 32b float internally? - + @@ -5301,33 +3170,44 @@ to upconvert to 32b float internally? "a6xx_sp_ps_tp_cluster" but this actually specifies the border color base for compute shaders. --> - + - - - + - + - - - - - - + + + + + + + + + + + + + + + + + - + + + @@ -5336,42 +3216,45 @@ to upconvert to 32b float internally? badly named or the functionality moved in a6xx. But downstream kernel calls this "a6xx_sp_ps_tp_2d_cluster" --> - - + + - - + + - - + + - - - - + + + + - - - + + + - - - + + + - - + + - - + + @@ -5383,8 +3266,12 @@ to upconvert to 32b float internally? - - + + + + + + @@ -5422,34 +3309,44 @@ to upconvert to 32b float internally? - + - - - - + + + + - - - - + + + + - - - + + - - + + + + + + + + + + - - + + + + + - + @@ -5462,94 +3359,94 @@ to upconvert to 32b float internally? - + - + - + - + - + - + - + - - - - - - + + + + + + - + - - - - + + + + - + - + - + - + - + - + - + - + @@ -5557,7 +3454,7 @@ to upconvert to 32b float internally? - + - - - + + + - + - + - + - + - + - + - + - - - + + + @@ -5611,19 +3508,29 @@ to upconvert to 32b float internally? - + - - - - + + When this bit is enabled, the dispatch order interleaves + the z coordinate instead of launching all workgroups + with z=0, then all with z=1 and so on. + + + + When both fields are non-0 then the dispatcher uses + these tile sizes to launch workgroups in a tiled manner + when the x and y workgroup counts are + both more than 1. + + + - - + + @@ -5641,29 +3548,27 @@ to upconvert to 32b float internally? - - + + - - - + - + - + - + - + This register clears pending loads queued up by CP_LOAD_STATE6. Each bit resets a particular kind(s) of @@ -5678,8 +3583,8 @@ to upconvert to 32b float internally? - - + + @@ -5690,20 +3595,20 @@ to upconvert to 32b float internally? - + - + - + - + This register clears pending loads queued up by CP_LOAD_STATE6. Each bit resets a particular kind(s) of @@ -5718,18 +3623,18 @@ to upconvert to 32b float internally? - - + + - - + + - + @@ -5738,7 +3643,7 @@ to upconvert to 32b float internally? const pool and 16 in the geometry const pool although only 8 are actually used (why?) and they are mapped to c504-c511 in each stage. Both VS and FS shared consts - are written using ST6_CONSTANTS/SB6_IBO, so that both + are written using ST6_CONSTANTS/SB6_UAV, so that both the geometry and FS shared consts can be written at once by using CP_LOAD_STATE6 rather than CP_LOAD_STATE6_FRAG/CP_LOAD_STATE6_GEOM. In addition @@ -5747,13 +3652,13 @@ to upconvert to 32b float internally? There is also a separate shared constant pool for CS, which is loaded through CP_LOAD_STATE6_FRAG with - ST6_UBO/ST6_IBO. However the only real difference for CS + ST6_UBO/ST6_UAV. However the only real difference for CS is the dword units. - + @@ -5788,10 +3693,10 @@ to upconvert to 32b float internally? sequence. The sequence used internally for an event looks like: - write EVENT_CMD pipe register - write CP_EVENT_START - - write HLSQ_EVENT_CMD with event or HLSQ_DRAW_CMD - - write PC_EVENT_CMD with event or PC_DRAW_CMD - - write HLSQ_EVENT_CMD(CONTEXT_DONE) - - write PC_EVENT_CMD(CONTEXT_DONE) + - write SP_EVENT_INITIATOR with event or SP_DRAW_INITIATOR + - write PC_EVENT_INITIATOR with event or PC_DRAW_INITIATOR + - write SP_EVENT_INITIATOR(CONTEXT_DONE) + - write PC_EVENT_INITIATOR(CONTEXT_DONE) - write CP_EVENT_END Writing to CP_EVENT_END seems to actually trigger the context roll --> @@ -5809,193 +3714,6 @@ to upconvert to 32b float internally? - - - Texture sampler dwords - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - clamp result to [0, 1] if the format is unorm or - [-1, 1] if the format is snorm, *after* - filtering. Has no effect for other formats. - - - - - - - - - - - - - - - - - - - Texture constant dwords - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - probably for D3D structured UAVs, normally set to 1 - - - - - - Pitch in bytes (so actually stride) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml new file mode 100644 index 0000000000000..307d43dda8a25 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml @@ -0,0 +1,198 @@ + + + + + + + + + Texture sampler dwords + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + clamp result to [0, 1] if the format is unorm or + [-1, 1] if the format is snorm, *after* + filtering. Has no effect for other formats. + + + + + + + + + + + + + + + + + + + + + Texture constant dwords + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + probably for D3D structured UAVs, normally set to 1 + + + + + + Pitch in bytes (so actually stride) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml new file mode 100644 index 0000000000000..665539b098c63 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xmlspecial buffer type for usage as the source for buffer + to image copies with lower alignment requirements than + A6XX_TEX_2D, available since A7XX. + + + + + + Allow early z-test and early-lrz (if applicable) + + Disable early z-test and early-lrz test (if applicable) + + + A special mode that allows early-lrz (if applicable) or early-z + tests, but also does late-z tests at which point it writes depth. + + This mode is used when fragment can be killed (via discard or + sample mask) after early-z tests and it writes depth. In such case + depth can be written only at late-z stage, but it's ok to use + early-z to discard fragments. + + However this mode is not compatible with: + - Lack of D/S attachment + - Stencil writes on stencil or depth test failures + - Per-sample shading + + + Not a real hw value, used internally by mesa + + + + + + + + + + + + + + + + diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xml new file mode 100644 index 0000000000000..c446a2eb11202 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.xmldiff --git a/drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml new file mode 100644 index 0000000000000..661b0dd0f675b --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a7xx_enums.xml @@ -0,0 +1,223 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xml b/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xml new file mode 100644 index 0000000000000..9bf78b0a854b1 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.xmldiff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 4627134016228..7abc08635495c 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -21,9 +21,9 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - - - + + + @@ -31,8 +31,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - If A6XX_RB_SAMPLE_COUNT_CONTROL.copy is true, writes OQ Z passed - sample counts to RB_SAMPLE_COUNT_ADDR. This writes to main + If A6XX_RB_SAMPLE_COUNTER_CNTL.copy is true, writes OQ Z passed + sample counts to RB_SAMPLE_COUNTER_BASE. This writes to main memory, skipping UCHE. @@ -97,6 +97,13 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> + + Flip between the primary and secondary LRZ buffers. This is used + for concurrent binning, so that BV can write to one buffer while + BR reads from the other. + + + Clears based on GRAS_LRZ_CNTL configuration, could clear fast-clear buffer or LRZ direction. @@ -114,6 +121,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> + @@ -372,7 +380,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> Conditionally load a IB based on a flag, prefetch enabled - + Conditionally load a IB based on a flag, prefetch disabled Load a buffer with pre-fetch enabled @@ -538,7 +546,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - + @@ -650,6 +658,11 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> Reset various on-chip state used for synchronization + + Invalidates the "CCHE" introduced on a740 + + + @@ -792,14 +805,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - - + + - + @@ -1121,39 +1134,93 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + + A mask of bins, starting at VSC_N, whose + visibility is OR'd together. A value of 0 is + interpreted as 1 (i.e. just use VSC_N for + visbility) for backwards compatibility. Only + exists on a7xx. + + + + + If this field is 1, VSC_MASK and VSC_N are + ignored and instead a new ordinal immediately + after specifies the full 32-bit mask of bins + to use. The mask is "absolute" instead of + relative to VSC_N. + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1164,23 +1231,42 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) stream is recorded. + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1198,6 +1284,9 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + @@ -1348,6 +1437,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + @@ -1655,8 +1746,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - - + + @@ -1670,15 +1761,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + - - - - - - + @@ -1773,13 +1860,23 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) Tell CP the current operation mode, indicates save and restore procedure + + + + + + + + + - - - - - - + + + + + + + @@ -1789,23 +1886,40 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) --> - - - + + + + + + + + + - - + CP_SET_MARKER is used with these bits to create a + critical section around a workaround for ray tracing. + The workaround happens after BVH building, and appears + to invalidate the RTU's BVH node cache. It makes sure + that only one of BR/BV/LPAC is executing the + workaround at a time, and no draws using RT on BV/LPAC + are executing while the workaround is executed on BR (or + vice versa, that no draws on BV/BR using RT are executed + while the workaround executes on LPAC), by + hooking subsequent CP_EVENT_WRITE/CP_DRAW_*/CP_EXEC_CS. + The blob usage is: + + CP_SET_MARKER(RT_WA_START) + ... workaround here ... + CP_SET_MARKER(RT_WA_END) + ... + CP_SET_MARKER(SHADER_USES_RT) + CP_DRAW_INDX(...) or CP_EXEC_CS(...) + --> + + + @@ -1832,9 +1946,9 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) If concurrent binning is disabled then BR also does binning so it will also write the "real" registers in BR. --> - - - + + + @@ -1935,11 +2049,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) a bitmask of which modes pass the test. --> - + - + @@ -2014,10 +2128,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - Used by the userspace and kernel drivers to set various IB's - which are executed during context save/restore for handling - state that isn't restored by the context switch routine itself. - + Used by the userspace and kernel drivers to set various IB's + which are executed during context save/restore for handling + state that isn't restored by the context switch routine itself. + Executed unconditionally when switching back to the context. @@ -2087,12 +2201,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and - GRAS_LRZ_DEPTH_VIEW with previous values, and if one of + GRAS_LRZ_VIEW_INFO with previous values, and if one of the following is true: - GRAS_LRZ_CNTL::GREATER has changed - GRAS_LRZ_CNTL::DIR has changed, the old value is not CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED - - GRAS_LRZ_DEPTH_VIEW has changed + - GRAS_LRZ_VIEW_INFO has changed then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE forced to 1. Only exists in a650_sqe.fw. @@ -2207,7 +2321,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - Best guess is that it is a faster way to fetch all the VSC_STATE registers + Best guess is that it is a faster way to fetch all the VSC_CHANNEL_VISIBILITY registers and keep them in a local scratch memory instead of fetching every time when skipping IBs. @@ -2260,6 +2374,16 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + + + + -- 2.47.2