]> git.ipfire.org Git - thirdparty/openwrt.git/commitdiff
bcm27xx: update 5.4 patches from RPi foundation
authorÁlvaro Fernández Rojas <noltari@gmail.com>
Tue, 31 Mar 2020 07:26:30 +0000 (09:26 +0200)
committerÁlvaro Fernández Rojas <noltari@gmail.com>
Tue, 31 Mar 2020 11:18:08 +0000 (13:18 +0200)
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
84 files changed:
target/linux/bcm27xx/bcm2708/config-5.4
target/linux/bcm27xx/bcm2709/config-5.4
target/linux/bcm27xx/bcm2710/config-5.4
target/linux/bcm27xx/bcm2711/config-5.4
target/linux/bcm27xx/patches-5.4/950-0435-ARM-dts-overlays-Create-custom-clocks-in.patch [moved from target/linux/bcm27xx/patches-5.4/950-0436-ARM-dts-overlays-Create-custom-clocks-in.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0436-staging-vc04_services-Fix-vcsm-overflow-bug-when-cou.patch [moved from target/linux/bcm27xx/patches-5.4/950-0437-staging-vc04_services-Fix-vcsm-overflow-bug-when-cou.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0437-overlays-Add-timeout_ms-parameter-to-gpio-poweroff.patch [moved from target/linux/bcm27xx/patches-5.4/950-0438-overlays-Add-timeout_ms-parameter-to-gpio-poweroff.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0438-of-overlay-Correct-symbol-path-fixups.patch [moved from target/linux/bcm27xx/patches-5.4/950-0439-of-overlay-Correct-symbol-path-fixups.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0439-overlays-sc16ic750-i2c-Fix-xtal-parameter.patch [moved from target/linux/bcm27xx/patches-5.4/950-0440-overlays-sc16ic750-i2c-Fix-xtal-parameter.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0440-of-address-Introduce-of_get_next_dma_parent-helper.patch [moved from target/linux/bcm27xx/patches-5.4/950-0441-of-address-Introduce-of_get_next_dma_parent-helper.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0441-of-address-Follow-DMA-parent-for-dma-coherent.patch [moved from target/linux/bcm27xx/patches-5.4/950-0442-of-address-Follow-DMA-parent-for-dma-coherent.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0442-of-Factor-out-addr-size-cells-parsing.patch [moved from target/linux/bcm27xx/patches-5.4/950-0443-of-Factor-out-addr-size-cells-parsing.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0443-of-address-Translate-dma-ranges-for-parent-nodes-mis.patch [moved from target/linux/bcm27xx/patches-5.4/950-0444-of-address-Translate-dma-ranges-for-parent-nodes-mis.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0444-of-Make-of_dma_get_range-work-on-bus-nodes.patch [moved from target/linux/bcm27xx/patches-5.4/950-0445-of-Make-of_dma_get_range-work-on-bus-nodes.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0445-arm64-mm-use-arm64_dma_phys_limit-instead-of-calling.patch [moved from target/linux/bcm27xx/patches-5.4/950-0446-arm64-mm-use-arm64_dma_phys_limit-instead-of-calling.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0446-arm64-rename-variables-used-to-calculate-ZONE_DMA32-.patch [moved from target/linux/bcm27xx/patches-5.4/950-0447-arm64-rename-variables-used-to-calculate-ZONE_DMA32-.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0447-arm64-use-both-ZONE_DMA-and-ZONE_DMA32.patch [moved from target/linux/bcm27xx/patches-5.4/950-0448-arm64-use-both-ZONE_DMA-and-ZONE_DMA32.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0448-mm-refresh-ZONE_DMA-and-ZONE_DMA32-comments-in-enum-.patch [moved from target/linux/bcm27xx/patches-5.4/950-0449-mm-refresh-ZONE_DMA-and-ZONE_DMA32-comments-in-enum-.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0449-resource-Add-a-resource_list_first_type-helper.patch [moved from target/linux/bcm27xx/patches-5.4/950-0450-resource-Add-a-resource_list_first_type-helper.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0450-dma-direct-turn-ARCH_ZONE_DMA_BITS-into-a-variable.patch [moved from target/linux/bcm27xx/patches-5.4/950-0451-dma-direct-turn-ARCH_ZONE_DMA_BITS-into-a-variable.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0451-x86-PCI-sta2x11-use-default-DMA-address-translation.patch [moved from target/linux/bcm27xx/patches-5.4/950-0452-x86-PCI-sta2x11-use-default-DMA-address-translation.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0452-PCI-of-Add-inbound-resource-parsing-to-helpers.patch [moved from target/linux/bcm27xx/patches-5.4/950-0453-PCI-of-Add-inbound-resource-parsing-to-helpers.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0453-dma-direct-unify-the-dma_capable-definitions.patch [moved from target/linux/bcm27xx/patches-5.4/950-0454-dma-direct-unify-the-dma_capable-definitions.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0454-dma-direct-avoid-a-forward-declaration-for-phys_to_d.patch [moved from target/linux/bcm27xx/patches-5.4/950-0455-dma-direct-avoid-a-forward-declaration-for-phys_to_d.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0455-dma-direct-exclude-dma_direct_map_resource-from-the-.patch [moved from target/linux/bcm27xx/patches-5.4/950-0456-dma-direct-exclude-dma_direct_map_resource-from-the-.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0456-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch [moved from target/linux/bcm27xx/patches-5.4/950-0457-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0457-ARM-dts-bcm2711-Enable-PCIe-controller.patch [moved from target/linux/bcm27xx/patches-5.4/950-0458-ARM-dts-bcm2711-Enable-PCIe-controller.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0458-PCI-brcmstb-Add-Broadcom-STB-PCIe-host-controller-dr.patch [moved from target/linux/bcm27xx/patches-5.4/950-0459-PCI-brcmstb-Add-Broadcom-STB-PCIe-host-controller-dr.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0459-PCI-brcmstb-Add-MSI-support.patch [moved from target/linux/bcm27xx/patches-5.4/950-0460-PCI-brcmstb-Add-MSI-support.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0460-PCI-brcmstb-Fix-build-on-32bit-ARM-platforms-with-ol.patch [moved from target/linux/bcm27xx/patches-5.4/950-0461-PCI-brcmstb-Fix-build-on-32bit-ARM-platforms-with-ol.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0461-bcm2711-rpi.dtsi-Use-upstream-pcie-node.patch [moved from target/linux/bcm27xx/patches-5.4/950-0462-bcm2711-rpi.dtsi-Use-upstream-pcie-node.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0462-media-dt-bindings-media-i2c-Add-IMX219-CMOS-sensor-b.patch [moved from target/linux/bcm27xx/patches-5.4/950-0463-media-dt-bindings-media-i2c-Add-IMX219-CMOS-sensor-b.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0463-media-i2c-Add-driver-for-Sony-IMX219-sensor.patch [moved from target/linux/bcm27xx/patches-5.4/950-0464-media-i2c-Add-driver-for-Sony-IMX219-sensor.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0464-overlays-imx219-Correct-link-frequency-to-match-the-.patch [moved from target/linux/bcm27xx/patches-5.4/950-0465-overlays-imx219-Correct-link-frequency-to-match-the-.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0465-Kbuild-Allow-.dtbo-overlays-to-be-built-adjust.patch [moved from target/linux/bcm27xx/patches-5.4/950-0466-Kbuild-Allow-.dtbo-overlays-to-be-built-adjust.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0466-media-ov5647-Fix-return-codes-from-ov5647_write-ov56.patch [moved from target/linux/bcm27xx/patches-5.4/950-0467-media-ov5647-Fix-return-codes-from-ov5647_write-ov56.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0467-media-ov5647-Add-basic-support-for-multiple-sensor-m.patch [moved from target/linux/bcm27xx/patches-5.4/950-0468-media-ov5647-Add-basic-support-for-multiple-sensor-m.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0468-media-ov5647-Add-V4L2-controls-for-analogue-gain-exp.patch [moved from target/linux/bcm27xx/patches-5.4/950-0469-media-ov5647-Add-V4L2-controls-for-analogue-gain-exp.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0469-media-ov5647-Add-extra-10-bit-sensor-modes.patch [moved from target/linux/bcm27xx/patches-5.4/950-0470-media-ov5647-Add-extra-10-bit-sensor-modes.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0470-media-ov5647-change-defaults-to-better-match-raw-cam.patch [moved from target/linux/bcm27xx/patches-5.4/950-0471-media-ov5647-change-defaults-to-better-match-raw-cam.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0471-drm-vc4-fkms-Change-crtc_state-structure-name-to-avo.patch [moved from target/linux/bcm27xx/patches-5.4/950-0472-drm-vc4-fkms-Change-crtc_state-structure-name-to-avo.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0472-drm-fourcc-Add-packed-10bit-YUV-4-2-0-format.patch [moved from target/linux/bcm27xx/patches-5.4/950-0473-drm-fourcc-Add-packed-10bit-YUV-4-2-0-format.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0473-drm-vc4-Add-DRM_FORMAT_P030-support-to-firmware-kms.patch [moved from target/linux/bcm27xx/patches-5.4/950-0474-drm-vc4-Add-DRM_FORMAT_P030-support-to-firmware-kms.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0474-gpio-ir-overlay-add-parameter-to-configure-signal-po.patch [moved from target/linux/bcm27xx/patches-5.4/950-0475-gpio-ir-overlay-add-parameter-to-configure-signal-po.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0475-Add-support-for-merus-amp-soundcard-and-ma120x0p-cod.patch [moved from target/linux/bcm27xx/patches-5.4/950-0476-Add-support-for-merus-amp-soundcard-and-ma120x0p-cod.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0476-ARM-dts-bcm2711-Add-32-bit-PMU-compatibility.patch [moved from target/linux/bcm27xx/patches-5.4/950-0477-ARM-dts-bcm2711-Add-32-bit-PMU-compatibility.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0477-ARM-dts-bcm271x-Use-a53-pmu-drop-RPI364.patch [moved from target/linux/bcm27xx/patches-5.4/950-0478-ARM-dts-bcm271x-Use-a53-pmu-drop-RPI364.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0478-net-bcmgenet-Clear-ID_MODE_DIS-in-EXT_RGMII_OOB_CTRL.patch [moved from target/linux/bcm27xx/patches-5.4/950-0479-net-bcmgenet-Clear-ID_MODE_DIS-in-EXT_RGMII_OOB_CTRL.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0479-drm-modes-parse_cmdline-Fix-possible-reference-past-.patch [moved from target/linux/bcm27xx/patches-5.4/950-0480-drm-modes-parse_cmdline-Fix-possible-reference-past-.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0480-drm-modes-parse_cmdline-Make-various-char-pointers-c.patch [moved from target/linux/bcm27xx/patches-5.4/950-0481-drm-modes-parse_cmdline-Make-various-char-pointers-c.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0481-drm-modes-parse_cmdline-Stop-parsing-extras-after-bp.patch [moved from target/linux/bcm27xx/patches-5.4/950-0482-drm-modes-parse_cmdline-Stop-parsing-extras-after-bp.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0482-drm-modes-parse_cmdline-Accept-extras-directly-after.patch [moved from target/linux/bcm27xx/patches-5.4/950-0483-drm-modes-parse_cmdline-Accept-extras-directly-after.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0483-drm-modes-parse_cmdline-Rework-drm_mode_parse_cmdlin.patch [moved from target/linux/bcm27xx/patches-5.4/950-0484-drm-modes-parse_cmdline-Rework-drm_mode_parse_cmdlin.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0484-drm-modes-parse_cmdline-Add-freestanding-argument-to.patch [moved from target/linux/bcm27xx/patches-5.4/950-0485-drm-modes-parse_cmdline-Add-freestanding-argument-to.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0485-drm-modes-parse_cmdline-Set-bpp-refresh_specified-af.patch [moved from target/linux/bcm27xx/patches-5.4/950-0486-drm-modes-parse_cmdline-Set-bpp-refresh_specified-af.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0486-drm-modes-parse_cmdline-Allow-specifying-stand-alone.patch [moved from target/linux/bcm27xx/patches-5.4/950-0487-drm-modes-parse_cmdline-Allow-specifying-stand-alone.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0487-drm-modes-parse_cmdline-Add-support-for-specifying-p.patch [moved from target/linux/bcm27xx/patches-5.4/950-0488-drm-modes-parse_cmdline-Add-support-for-specifying-p.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0488-drm-modes-parse_cmdline-Remove-some-unnecessary-code.patch [moved from target/linux/bcm27xx/patches-5.4/950-0489-drm-modes-parse_cmdline-Remove-some-unnecessary-code.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0489-drm-modes-parse_cmdline-Explicitly-memset-the-passed.patch [moved from target/linux/bcm27xx/patches-5.4/950-0490-drm-modes-parse_cmdline-Explicitly-memset-the-passed.patch with 100% similarity]
target/linux/bcm27xx/patches-5.4/950-0490-drm-v3d-Replace-wait_for-macros-to-remove-use-of-msl.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0491-Reduce-noise-from-rpi-poe-hat-fan.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0492-add-Sensirion-SPS30-to-i2c-sensor-overlay.patch [moved from target/linux/bcm27xx/patches-5.4/950-0491-add-Sensirion-SPS30-to-i2c-sensor-overlay.patch with 77% similarity]
target/linux/bcm27xx/patches-5.4/950-0493-media-add-V4L2_CTRL_TYPE_AREA-control-type.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0494-media-add-V4L2_CID_UNIT_CELL_SIZE-control.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0495-media-v4l2-common-add-pixel-encoding-support.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0496-media-v4l2-common-add-RGB565-and-RGB55-to-v4l2_forma.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0497-media-vb2-add-V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0498-media-v4l2-mem2mem-support-held-capture-buffers.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0499-media-videodev2.h-add-V4L2_DEC_CMD_FLUSH.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0500-media-v4l2-mem2mem-add-stateless_-try_-decoder_cmd-i.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0501-media-v4l2-mem2mem-add-new_frame-detection.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0502-media-Documentation-media-Document-V4L2_CTRL_TYPE_AR.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0503-media-v4l-Add-definitions-for-HEVC-stateless-decodin.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0504-media-v4l2-mem2mem-Fix-hold-buf-flag-checks.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0505-media-pixfmt-Document-the-HEVC-slice-pixel-format.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0506-media-uapi-hevc-Add-scaling-matrix-control.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0507-media-uapi-hevc-Add-segment-address-field.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0508-media-hevc_ctrls-Add-slice-param-dependent-slice-seg.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0509-media-uapi-Add-hevc-ctrls-for-WPP-decoding.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0510-media-videodev2.h-Add-a-format-for-column-YUV4-2-0-m.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0511-media-v4l2-mem2mem-allow-request-job-buffer-processi.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0512-media-dt-bindings-media-Add-binding-for-the-Raspberr.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0513-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch [new file with mode: 0644]
target/linux/bcm27xx/patches-5.4/950-0514-dtoverlays-Add-overlay-to-enable-the-HEVC-V4L2-drive.patch [new file with mode: 0644]

index 6e7d91e3f41d09b461d296353ef90911b5f7e9ee..1f12a36ee72c8e61ea1b19555e8cca01a2c1cbfd 100644 (file)
@@ -243,6 +243,8 @@ CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
 CONFIG_HAVE_UID16=y
 CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_HW_CONSOLE=y
+CONFIG_HZ=100
+CONFIG_HZ_100=y
 CONFIG_HZ_FIXED=0
 CONFIG_I2C=y
 # CONFIG_I2C_BCM2708 is not set
index e6ce625d1be143b4694df4d1b1e6a1555bbb1a27..c1630c599d8d49dba7c61729023ad9d5c0b5645a 100644 (file)
@@ -311,6 +311,8 @@ CONFIG_HIGHMEM=y
 CONFIG_HIGHPTE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HW_CONSOLE=y
+CONFIG_HZ=100
+CONFIG_HZ_100=y
 CONFIG_HZ_FIXED=0
 CONFIG_I2C=y
 # CONFIG_I2C_BCM2708 is not set
index a04d519b137cf61614c7bafb4b7ad34e0d723180..4a5b491b0fcdfa8b09661d8f5e1beb13843c08ef 100644 (file)
@@ -367,6 +367,8 @@ CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_HOLES_IN_ZONE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HW_CONSOLE=y
+CONFIG_HZ=250
+CONFIG_HZ_250=y
 CONFIG_I2C=y
 # CONFIG_I2C_BCM2708 is not set
 CONFIG_I2C_BOARDINFO=y
index 0b0e49ad62dd14ae8581a2f5f97f28ce390c30b6..abf6e8844a1048ae6c4dca32f9bc2204a50044a5 100644 (file)
@@ -373,6 +373,8 @@ CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_HOLES_IN_ZONE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HW_CONSOLE=y
+CONFIG_HZ=250
+CONFIG_HZ_250=y
 CONFIG_I2C=y
 # CONFIG_I2C_BCM2708 is not set
 CONFIG_I2C_BOARDINFO=y
diff --git a/target/linux/bcm27xx/patches-5.4/950-0490-drm-v3d-Replace-wait_for-macros-to-remove-use-of-msl.patch b/target/linux/bcm27xx/patches-5.4/950-0490-drm-v3d-Replace-wait_for-macros-to-remove-use-of-msl.patch
new file mode 100644 (file)
index 0000000..830bc11
--- /dev/null
@@ -0,0 +1,89 @@
+From 12b60ef71cc005ee7290f692169d46a7e78df01a Mon Sep 17 00:00:00 2001
+From: Yukimasa Sugizaki <4298265+Terminus-IMRC@users.noreply.github.com>
+Date: Fri, 20 Mar 2020 19:01:23 +0900
+Subject: [PATCH] drm/v3d: Replace wait_for macros to remove use of
+ msleep (#3510)
+
+commit 9daee6141cc9c75b09659b02b1cb9eeb2f5e16cc upstream.
+
+The wait_for macro's for Broadcom V3D driver used msleep, which is
+inappropriate due to its inaccuracy at low values (minimum wait time
+is about 30ms on the Raspberry Pi).  This sleep was triggering in
+v3d_clean_caches(), causing us to only be able to dispatch ~33 compute
+jobs per second.
+
+This patch replaces the macro with the one from the Intel i915 version
+which uses usleep_range to provide more accurate waits.
+
+v2: Split from the vc4 patch so that we can confidently apply to
+    stable (by anholt)
+
+Signed-off-by: James Hughes <james.hughes@raspberrypi.com>
+Signed-off-by: Eric Anholt <eric@anholt.net>
+Link: https://patchwork.freedesktop.org/patch/msgid/20200217153145.13780-1-james.hughes@raspberrypi.com
+Link: https://github.com/raspberrypi/linux/issues/3460
+Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
+
+Co-authored-by: James Hughes <james.hughes@raspberrypi.com>
+---
+ drivers/gpu/drm/v3d/v3d_drv.h | 41 ++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 13 deletions(-)
+
+--- a/drivers/gpu/drm/v3d/v3d_drv.h
++++ b/drivers/gpu/drm/v3d/v3d_drv.h
+@@ -260,27 +260,42 @@ struct v3d_csd_job {
+ };
+ /**
+- * _wait_for - magic (register) wait macro
++ * __wait_for - magic wait macro
+  *
+- * Does the right thing for modeset paths when run under kdgb or similar atomic
+- * contexts. Note that it's important that we check the condition again after
+- * having timed out, since the timeout could be due to preemption or similar and
+- * we've never had a chance to check the condition before the timeout.
++ * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
++ * important that we check the condition again after having timed out, since the
++ * timeout could be due to preemption or similar and we've never had a chance to
++ * check the condition before the timeout.
+  */
+-#define wait_for(COND, MS) ({ \
+-      unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;   \
+-      int ret__ = 0;                                                  \
+-      while (!(COND)) {                                               \
+-              if (time_after(jiffies, timeout__)) {                   \
+-                      if (!(COND))                                    \
+-                              ret__ = -ETIMEDOUT;                     \
++#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
++      const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
++      long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
++      int ret__;                                                      \
++      might_sleep();                                                  \
++      for (;;) {                                                      \
++              const bool expired__ = ktime_after(ktime_get_raw(), end__); \
++              OP;                                                     \
++              /* Guarantee COND check prior to timeout */             \
++              barrier();                                              \
++              if (COND) {                                             \
++                      ret__ = 0;                                      \
+                       break;                                          \
+               }                                                       \
+-              msleep(1);                                      \
++              if (expired__) {                                        \
++                      ret__ = -ETIMEDOUT;                             \
++                      break;                                          \
++              }                                                       \
++              usleep_range(wait__, wait__ * 2);                       \
++              if (wait__ < (Wmax))                                    \
++                      wait__ <<= 1;                                   \
+       }                                                               \
+       ret__;                                                          \
+ })
++#define _wait_for(COND, US, Wmin, Wmax)       __wait_for(, (COND), (US), (Wmin), \
++                                                 (Wmax))
++#define wait_for(COND, MS)            _wait_for((COND), (MS) * 1000, 10, 1000)
++
+ static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+ {
+       /* nsecs_to_jiffies64() does not guard against overflow */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0491-Reduce-noise-from-rpi-poe-hat-fan.patch b/target/linux/bcm27xx/patches-5.4/950-0491-Reduce-noise-from-rpi-poe-hat-fan.patch
new file mode 100644 (file)
index 0000000..7c50843
--- /dev/null
@@ -0,0 +1,96 @@
+From 863dace20e48954a7e013a2e88e27c692ce165b0 Mon Sep 17 00:00:00 2001
+From: Nick B <nick@pelagiris.org>
+Date: Mon, 9 Mar 2020 09:05:39 -0400
+Subject: [PATCH] Reduce noise from rpi poe hat fan
+
+This adds 2 extra states, at 40c and 45c, with PWM of 31 and 63 (out
+of 255) for the rpi poe hat fan.  This significantly improves user
+experience by providing a smoother ramp up of the fan, from a pwm 0
+to 31 to 63 then finally to 150, and additionally makes it very easy
+for users to further tweak the values as needed for their specific
+application.
+
+The possible concerns I have are that a hysteresis of 2000 (2c) could
+be too narrow, and that running the fan more at a reduced temperature
+(40000 - 40c) could cause problems.
+
+Signed-off-by: Nick B <nick@pelagiris.org>
+---
+ .../arm/boot/dts/overlays/rpi-poe-overlay.dts | 35 ++++++++++++++++---
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+--- a/arch/arm/boot/dts/overlays/rpi-poe-overlay.dts
++++ b/arch/arm/boot/dts/overlays/rpi-poe-overlay.dts
+@@ -14,9 +14,9 @@
+                               compatible = "raspberrypi,rpi-poe-fan";
+                               firmware = <&firmware>;
+                               cooling-min-state = <0>;
+-                              cooling-max-state = <2>;
++                              cooling-max-state = <4>;
+                               #cooling-cells = <2>;
+-                              cooling-levels = <0 150 255>;
++                              cooling-levels = <0 31 63 150 255>;
+                               status = "okay";
+                       };
+               };
+@@ -27,12 +27,21 @@
+               __overlay__ {
+                       trips {
+                               trip0: trip0 {
+-                                      temperature = <50000>;
+-                                      hysteresis = <5000>;
++                                      temperature = <40000>;
++                                      hysteresis = <2000>;
+                                       type = "active";
+                               };
+                               trip1: trip1 {
+-
++                                      temperature = <45000>;
++                                      hysteresis = <2000>;
++                                      type = "active";
++                              };
++                              trip2: trip2 {
++                                      temperature = <50000>;
++                                      hysteresis = <2000>;
++                                      type = "active";
++                              };
++                              trip3: trip3 {
+                                       temperature = <55000>;
+                                       hysteresis = <5000>;
+                                       type = "active";
+@@ -47,6 +56,14 @@
+                                       trip = <&trip1>;
+                                       cooling-device = <&fan0 1 2>;
+                               };
++                              map2 {
++                                      trip = <&trip2>;
++                                      cooling-device = <&fan0 2 3>;
++                              };
++                              map3 {
++                                      trip = <&trip3>;
++                                      cooling-device = <&fan0 3 4>;
++                              };
+                       };
+               };
+       };
+@@ -58,6 +75,10 @@
+                       poe_fan_temp0_hyst =    <&trip0>,"hysteresis:0";
+                       poe_fan_temp1 =         <&trip1>,"temperature:0";
+                       poe_fan_temp1_hyst =    <&trip1>,"hysteresis:0";
++                      poe_fan_temp2 =         <&trip2>,"temperature:0";
++                      poe_fan_temp2_hyst =    <&trip2>,"hysteresis:0";
++                      poe_fan_temp3 =         <&trip3>,"temperature:0";
++                      poe_fan_temp3_hyst =    <&trip3>,"hysteresis:0";
+               };
+       };
+@@ -66,5 +87,9 @@
+               poe_fan_temp0_hyst =    <&trip0>,"hysteresis:0";
+               poe_fan_temp1 =         <&trip1>,"temperature:0";
+               poe_fan_temp1_hyst =    <&trip1>,"hysteresis:0";
++              poe_fan_temp2 =         <&trip2>,"temperature:0";
++              poe_fan_temp2_hyst =    <&trip2>,"hysteresis:0";
++              poe_fan_temp3 =         <&trip3>,"temperature:0";
++              poe_fan_temp3_hyst =    <&trip3>,"hysteresis:0";
+       };
+ };
similarity index 77%
rename from target/linux/bcm27xx/patches-5.4/950-0491-add-Sensirion-SPS30-to-i2c-sensor-overlay.patch
rename to target/linux/bcm27xx/patches-5.4/950-0492-add-Sensirion-SPS30-to-i2c-sensor-overlay.patch
index e85329aa0837d25cde8febd51633be71bf668384..72941b59bbaf492062222669c05821a1a796c49c 100644 (file)
@@ -1,4 +1,4 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From 60f3874207c50db6f6d9dbac40977843cb77acd5 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= <ynezz@true.cz>
 Date: Sat, 7 Mar 2020 22:37:52 +0100
 Subject: [PATCH] add Sensirion SPS30 to i2c-sensor overlay
@@ -10,12 +10,14 @@ Add support for Sensirion SPS30 particulate matter sensor with fixed
 address 0x69.
 
 Signed-off-by: Petr Štetiar <ynezz@true.cz>
+---
+ arch/arm/boot/dts/overlays/README                 |  3 +++
+ arch/arm/boot/dts/overlays/i2c-sensor-overlay.dts | 15 +++++++++++++++
+ 2 files changed, 18 insertions(+)
 
-diff --git a/arch/arm/boot/dts/overlays/README b/arch/arm/boot/dts/overlays/README
-index 62ad35f78bad..0d7d00ac92c4 100644
 --- a/arch/arm/boot/dts/overlays/README
 +++ b/arch/arm/boot/dts/overlays/README
-@@ -1261,6 +1261,9 @@ Params: addr                    Set the address for the BME280, BME680, BMP280,
+@@ -1261,6 +1261,9 @@ Params: addr                    Set the
          si7020                  Select the Silicon Labs Si7013/20/21 humidity/
                                  temperature sensor
  
@@ -25,8 +27,6 @@ index 62ad35f78bad..0d7d00ac92c4 100644
          tmp102                  Select the Texas Instruments TMP102 temp sensor
                                  Valid addresses 0x48-0x4b, default 0x48
  
-diff --git a/arch/arm/boot/dts/overlays/i2c-sensor-overlay.dts b/arch/arm/boot/dts/overlays/i2c-sensor-overlay.dts
-index 40881d72a157..ce97837b0db5 100644
 --- a/arch/arm/boot/dts/overlays/i2c-sensor-overlay.dts
 +++ b/arch/arm/boot/dts/overlays/i2c-sensor-overlay.dts
 @@ -231,6 +231,20 @@
diff --git a/target/linux/bcm27xx/patches-5.4/950-0493-media-add-V4L2_CTRL_TYPE_AREA-control-type.patch b/target/linux/bcm27xx/patches-5.4/950-0493-media-add-V4L2_CTRL_TYPE_AREA-control-type.patch
new file mode 100644 (file)
index 0000000..265533e
--- /dev/null
@@ -0,0 +1,157 @@
+From 4af6218f1d01e5ae54dc43e4bd2421617c777570 Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Date: Mon, 7 Oct 2019 12:06:31 -0300
+Subject: [PATCH] media: add V4L2_CTRL_TYPE_AREA control type
+
+Commit d1dc49370f8371b00e682ac409aa1987ce641e93 upstream.
+
+This type contains the width and the height of a rectangular area.
+
+Reviewed-by: Jacopo Mondi <jacopo@jmondi.org>
+Signed-off-by: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-ctrls.c | 21 ++++++++++++++
+ include/media/v4l2-ctrls.h           | 42 ++++++++++++++++++++++++++++
+ include/uapi/linux/videodev2.h       |  6 ++++
+ 3 files changed, 69 insertions(+)
+
+--- a/drivers/media/v4l2-core/v4l2-ctrls.c
++++ b/drivers/media/v4l2-core/v4l2-ctrls.c
+@@ -1673,6 +1673,7 @@ static int std_validate_compound(const s
+ {
+       struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
+       struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
++      struct v4l2_area *area;
+       void *p = ptr.p + idx * ctrl->elem_size;
+       switch ((u32)ctrl->type) {
+@@ -1749,6 +1750,11 @@ static int std_validate_compound(const s
+               zero_padding(p_vp8_frame_header->entropy_header);
+               zero_padding(p_vp8_frame_header->coder_state);
+               break;
++      case V4L2_CTRL_TYPE_AREA:
++              area = p;
++              if (!area->width || !area->height)
++                      return -EINVAL;
++              break;
+       default:
+               return -EINVAL;
+       }
+@@ -2422,6 +2428,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(s
+       case V4L2_CTRL_TYPE_VP8_FRAME_HEADER:
+               elem_size = sizeof(struct v4l2_ctrl_vp8_frame_header);
+               break;
++      case V4L2_CTRL_TYPE_AREA:
++              elem_size = sizeof(struct v4l2_area);
++              break;
+       default:
+               if (type < V4L2_CTRL_COMPOUND_TYPES)
+                       elem_size = sizeof(s32);
+@@ -4086,6 +4095,18 @@ int __v4l2_ctrl_s_ctrl_string(struct v4l
+ }
+ EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_string);
++int __v4l2_ctrl_s_ctrl_area(struct v4l2_ctrl *ctrl,
++                          const struct v4l2_area *area)
++{
++      lockdep_assert_held(ctrl->handler->lock);
++
++      /* It's a driver bug if this happens. */
++      WARN_ON(ctrl->type != V4L2_CTRL_TYPE_AREA);
++      *ctrl->p_new.p_area = *area;
++      return set_ctrl(NULL, ctrl, 0);
++}
++EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_area);
++
+ void v4l2_ctrl_request_complete(struct media_request *req,
+                               struct v4l2_ctrl_handler *main_hdl)
+ {
+--- a/include/media/v4l2-ctrls.h
++++ b/include/media/v4l2-ctrls.h
+@@ -50,6 +50,7 @@ struct poll_table_struct;
+  * @p_h264_slice_params:      Pointer to a struct v4l2_ctrl_h264_slice_params.
+  * @p_h264_decode_params:     Pointer to a struct v4l2_ctrl_h264_decode_params.
+  * @p_vp8_frame_header:               Pointer to a VP8 frame header structure.
++ * @p_area:                   Pointer to an area.
+  * @p:                                Pointer to a compound value.
+  */
+ union v4l2_ctrl_ptr {
+@@ -68,6 +69,7 @@ union v4l2_ctrl_ptr {
+       struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
+       struct v4l2_ctrl_h264_decode_params *p_h264_decode_params;
+       struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
++      struct v4l2_area *p_area;
+       void *p;
+ };
+@@ -1063,6 +1065,46 @@ static inline int v4l2_ctrl_s_ctrl_strin
+       v4l2_ctrl_unlock(ctrl);
+       return rval;
++}
++
++/**
++ * __v4l2_ctrl_s_ctrl_area() - Unlocked variant of v4l2_ctrl_s_ctrl_area().
++ *
++ * @ctrl:     The control.
++ * @area:     The new area.
++ *
++ * This sets the control's new area safely by going through the control
++ * framework. This function assumes the control's handler is already locked,
++ * allowing it to be used from within the &v4l2_ctrl_ops functions.
++ *
++ * This function is for area type controls only.
++ */
++int __v4l2_ctrl_s_ctrl_area(struct v4l2_ctrl *ctrl,
++                          const struct v4l2_area *area);
++
++/**
++ * v4l2_ctrl_s_ctrl_area() - Helper function to set a control's area value
++ *     from within a driver.
++ *
++ * @ctrl:     The control.
++ * @area:     The new area.
++ *
++ * This sets the control's new area safely by going through the control
++ * framework. This function will lock the control's handler, so it cannot be
++ * used from within the &v4l2_ctrl_ops functions.
++ *
++ * This function is for area type controls only.
++ */
++static inline int v4l2_ctrl_s_ctrl_area(struct v4l2_ctrl *ctrl,
++                                      const struct v4l2_area *area)
++{
++      int rval;
++
++      v4l2_ctrl_lock(ctrl);
++      rval = __v4l2_ctrl_s_ctrl_area(ctrl, area);
++      v4l2_ctrl_unlock(ctrl);
++
++      return rval;
+ }
+ /* Internal helper functions that deal with control events. */
+--- a/include/uapi/linux/videodev2.h
++++ b/include/uapi/linux/videodev2.h
+@@ -427,6 +427,11 @@ struct v4l2_fract {
+       __u32   denominator;
+ };
++struct v4l2_area {
++      __u32   width;
++      __u32   height;
++};
++
+ /**
+   * struct v4l2_capability - Describes V4L2 device caps returned by VIDIOC_QUERYCAP
+   *
+@@ -1725,6 +1730,7 @@ enum v4l2_ctrl_type {
+       V4L2_CTRL_TYPE_U8            = 0x0100,
+       V4L2_CTRL_TYPE_U16           = 0x0101,
+       V4L2_CTRL_TYPE_U32           = 0x0102,
++      V4L2_CTRL_TYPE_AREA          = 0x0106,
+ };
+ /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0494-media-add-V4L2_CID_UNIT_CELL_SIZE-control.patch b/target/linux/bcm27xx/patches-5.4/950-0494-media-add-V4L2_CID_UNIT_CELL_SIZE-control.patch
new file mode 100644 (file)
index 0000000..0c860c7
--- /dev/null
@@ -0,0 +1,52 @@
+From 12eba72027d415bb3dfd4c8124813a322b27c793 Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Date: Mon, 7 Oct 2019 12:06:33 -0300
+Subject: [PATCH] media: add V4L2_CID_UNIT_CELL_SIZE control
+
+Commit 61fd036d01111679b01e4b92e6bd0cdd33809aea upstream.
+
+This control returns the unit cell size in nanometres. The struct provides
+the width and the height in separated fields to take into consideration
+asymmetric pixels and/or hardware binning.
+This control is required for automatic calibration of sensors/cameras.
+
+Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
+Signed-off-by: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-ctrls.c | 5 +++++
+ include/uapi/linux/v4l2-controls.h   | 1 +
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/media/v4l2-core/v4l2-ctrls.c
++++ b/drivers/media/v4l2-core/v4l2-ctrls.c
+@@ -995,6 +995,7 @@ const char *v4l2_ctrl_get_name(u32 id)
+       case V4L2_CID_AUTO_FOCUS_RANGE:         return "Auto Focus, Range";
+       case V4L2_CID_PAN_SPEED:                return "Pan, Speed";
+       case V4L2_CID_TILT_SPEED:               return "Tilt, Speed";
++      case V4L2_CID_UNIT_CELL_SIZE:           return "Unit Cell Size";
+       /* FM Radio Modulator controls */
+       /* Keep the order of the 'case's the same as in v4l2-controls.h! */
+@@ -1376,6 +1377,10 @@ void v4l2_ctrl_fill(u32 id, const char *
+       case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:
+               *type = V4L2_CTRL_TYPE_VP8_FRAME_HEADER;
+               break;
++      case V4L2_CID_UNIT_CELL_SIZE:
++              *type = V4L2_CTRL_TYPE_AREA;
++              *flags |= V4L2_CTRL_FLAG_READ_ONLY;
++              break;
+       default:
+               *type = V4L2_CTRL_TYPE_INTEGER;
+               break;
+--- a/include/uapi/linux/v4l2-controls.h
++++ b/include/uapi/linux/v4l2-controls.h
+@@ -1035,6 +1035,7 @@ enum v4l2_jpeg_chroma_subsampling {
+ #define V4L2_CID_TEST_PATTERN_GREENR          (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 5)
+ #define V4L2_CID_TEST_PATTERN_BLUE            (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 6)
+ #define V4L2_CID_TEST_PATTERN_GREENB          (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 7)
++#define V4L2_CID_UNIT_CELL_SIZE                       (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 8)
+ /* Image processing controls */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0495-media-v4l2-common-add-pixel-encoding-support.patch b/target/linux/bcm27xx/patches-5.4/950-0495-media-v4l2-common-add-pixel-encoding-support.patch
new file mode 100644 (file)
index 0000000..aa127ab
--- /dev/null
@@ -0,0 +1,228 @@
+From c63ea6a840ad87e32239eb6b771ac8bbc3279b54 Mon Sep 17 00:00:00 2001
+From: Benoit Parrot <bparrot@ti.com>
+Date: Mon, 7 Oct 2019 12:10:07 -0300
+Subject: [PATCH] media: v4l2-common: add pixel encoding support
+
+Commit d5a897c8428b38053df4b427a4277b1a0722bfa0 upstream.
+
+It is often useful to figure out if a pixel_format is either YUV or RGB
+especially for driver who can perform the pixel encoding conversion.
+
+Instead of having each driver implement its own "is_this_yuv/rgb"
+function based on a restricted set of pixel value, it is better to do
+this in centralized manner.
+
+We therefore add a pixel_enc member to the v4l2_format_info structure to
+quickly identify the related pixel encoding.
+And add helper functions to check pixel encoding.
+
+Signed-off-by: Benoit Parrot <bparrot@ti.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-common.c | 126 +++++++++++++-------------
+ include/media/v4l2-common.h           |  33 ++++++-
+ 2 files changed, 95 insertions(+), 64 deletions(-)
+
+--- a/drivers/media/v4l2-core/v4l2-common.c
++++ b/drivers/media/v4l2-core/v4l2-common.c
+@@ -236,77 +236,77 @@ const struct v4l2_format_info *v4l2_form
+ {
+       static const struct v4l2_format_info formats[] = {
+               /* RGB formats */
+-              { .format = V4L2_PIX_FMT_BGR24,   .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_RGB24,   .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_HSV24,   .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_BGR32,   .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_XBGR32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_BGRX32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_RGB32,   .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_XRGB32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_RGBX32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_HSV32,   .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_ARGB32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_RGBA32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_ABGR32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_BGRA32,  .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_GREY,    .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_BGR24,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGB24,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_HSV24,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_BGR32,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_XBGR32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_BGRX32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGB32,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_XRGB32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGBX32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_HSV32,   .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_ARGB32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGBA32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_ABGR32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_BGRA32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_GREY,    .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+               /* YUV packed formats */
+-              { .format = V4L2_PIX_FMT_YUYV,    .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_YVYU,    .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_UYVY,    .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_VYUY,    .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YUYV,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YVYU,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_UYVY,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_VYUY,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+               /* YUV planar formats */
+-              { .format = V4L2_PIX_FMT_NV12,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_NV21,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_NV16,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_NV61,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_NV24,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_NV42,    .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-
+-              { .format = V4L2_PIX_FMT_YUV410,  .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 4 },
+-              { .format = V4L2_PIX_FMT_YVU410,  .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 4 },
+-              { .format = V4L2_PIX_FMT_YUV411P, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_YUV420,  .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_YVU420,  .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_YUV422P, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_NV12,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_NV21,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_NV16,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_NV61,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_NV24,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_NV42,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++
++              { .format = V4L2_PIX_FMT_YUV410,  .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 4 },
++              { .format = V4L2_PIX_FMT_YVU410,  .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 4 },
++              { .format = V4L2_PIX_FMT_YUV411P, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 4, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YUV420,  .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_YVU420,  .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_YUV422P, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
+               /* YUV planar formats, non contiguous variant */
+-              { .format = V4L2_PIX_FMT_YUV420M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_YVU420M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_YUV422M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_YVU422M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_YUV444M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_YVU444M, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 1, .vdiv = 1 },
+-
+-              { .format = V4L2_PIX_FMT_NV12M,   .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_NV21M,   .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
+-              { .format = V4L2_PIX_FMT_NV16M,   .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_NV61M,   .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YUV420M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_YVU420M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_YUV422M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YVU422M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YUV444M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_YVU444M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .hdiv = 1, .vdiv = 1 },
++
++              { .format = V4L2_PIX_FMT_NV12M,   .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_NV21M,   .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 2 },
++              { .format = V4L2_PIX_FMT_NV16M,   .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_NV61M,   .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .hdiv = 2, .vdiv = 1 },
+               /* Bayer RGB formats */
+-              { .format = V4L2_PIX_FMT_SBGGR8,        .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGBRG8,        .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGRBG8,        .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SRGGB8,        .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SBGGR10,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGBRG10,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGRBG10,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SRGGB10,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SBGGR10ALAW8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGBRG10ALAW8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGRBG10ALAW8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SRGGB10ALAW8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SBGGR10DPCM8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGBRG10DPCM8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGRBG10DPCM8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SRGGB10DPCM8,  .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SBGGR12,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGBRG12,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SGRBG12,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+-              { .format = V4L2_PIX_FMT_SRGGB12,       .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SBGGR8,        .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGBRG8,        .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGRBG8,        .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SRGGB8,        .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SBGGR10,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGBRG10,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGRBG10,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SRGGB10,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SBGGR10ALAW8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGBRG10ALAW8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGRBG10ALAW8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SRGGB10ALAW8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SBGGR10DPCM8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGBRG10DPCM8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGRBG10DPCM8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SRGGB10DPCM8,  .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SBGGR12,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGBRG12,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SGRBG12,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_SRGGB12,       .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+       };
+       unsigned int i;
+--- a/include/media/v4l2-common.h
++++ b/include/media/v4l2-common.h
+@@ -457,8 +457,24 @@ int v4l2_s_parm_cap(struct video_device
+ /* Pixel format and FourCC helpers */
+ /**
++ * enum v4l2_pixel_encoding - specifies the pixel encoding value
++ *
++ * @V4L2_PIXEL_ENC_UNKNOWN:   Pixel encoding is unknown/un-initialized
++ * @V4L2_PIXEL_ENC_YUV:               Pixel encoding is YUV
++ * @V4L2_PIXEL_ENC_RGB:               Pixel encoding is RGB
++ * @V4L2_PIXEL_ENC_BAYER:     Pixel encoding is Bayer
++ */
++enum v4l2_pixel_encoding {
++      V4L2_PIXEL_ENC_UNKNOWN = 0,
++      V4L2_PIXEL_ENC_YUV = 1,
++      V4L2_PIXEL_ENC_RGB = 2,
++      V4L2_PIXEL_ENC_BAYER = 3,
++};
++
++/**
+  * struct v4l2_format_info - information about a V4L2 format
+  * @format: 4CC format identifier (V4L2_PIX_FMT_*)
++ * @pixel_enc: Pixel encoding (see enum v4l2_pixel_encoding above)
+  * @mem_planes: Number of memory planes, which includes the alpha plane (1 to 4).
+  * @comp_planes: Number of component planes, which includes the alpha plane (1 to 4).
+  * @bpp: Array of per-plane bytes per pixel
+@@ -469,6 +485,7 @@ int v4l2_s_parm_cap(struct video_device
+  */
+ struct v4l2_format_info {
+       u32 format;
++      u8 pixel_enc;
+       u8 mem_planes;
+       u8 comp_planes;
+       u8 bpp[4];
+@@ -478,8 +495,22 @@ struct v4l2_format_info {
+       u8 block_h[4];
+ };
+-const struct v4l2_format_info *v4l2_format_info(u32 format);
++static inline bool v4l2_is_format_rgb(const struct v4l2_format_info *f)
++{
++      return f && f->pixel_enc == V4L2_PIXEL_ENC_RGB;
++}
++
++static inline bool v4l2_is_format_yuv(const struct v4l2_format_info *f)
++{
++      return f && f->pixel_enc == V4L2_PIXEL_ENC_YUV;
++}
++static inline bool v4l2_is_format_bayer(const struct v4l2_format_info *f)
++{
++      return f && f->pixel_enc == V4L2_PIXEL_ENC_BAYER;
++}
++
++const struct v4l2_format_info *v4l2_format_info(u32 format);
+ void v4l2_apply_frmsize_constraints(u32 *width, u32 *height,
+                                   const struct v4l2_frmsize_stepwise *frmsize);
+ int v4l2_fill_pixfmt(struct v4l2_pix_format *pixfmt, u32 pixelformat,
diff --git a/target/linux/bcm27xx/patches-5.4/950-0496-media-v4l2-common-add-RGB565-and-RGB55-to-v4l2_forma.patch b/target/linux/bcm27xx/patches-5.4/950-0496-media-v4l2-common-add-RGB565-and-RGB55-to-v4l2_forma.patch
new file mode 100644 (file)
index 0000000..0171cdf
--- /dev/null
@@ -0,0 +1,28 @@
+From 560f3a9051578499e72ce4b1beaedd007ff46f96 Mon Sep 17 00:00:00 2001
+From: Benoit Parrot <bparrot@ti.com>
+Date: Mon, 7 Oct 2019 12:10:08 -0300
+Subject: [PATCH] media: v4l2-common: add RGB565 and RGB55 to
+ v4l2_format_info
+
+Commit b373f84d77e1c409aacb4ff5bb5726c45fc8b166 upstream.
+
+Add RGB565 and RGB555 to the v4l2_format_info table.
+
+Signed-off-by: Benoit Parrot <bparrot@ti.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-common.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/media/v4l2-core/v4l2-common.c
++++ b/drivers/media/v4l2-core/v4l2-common.c
+@@ -251,6 +251,8 @@ const struct v4l2_format_info *v4l2_form
+               { .format = V4L2_PIX_FMT_ABGR32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+               { .format = V4L2_PIX_FMT_BGRA32,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+               { .format = V4L2_PIX_FMT_GREY,    .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGB565,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
++              { .format = V4L2_PIX_FMT_RGB555,  .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 1, .vdiv = 1 },
+               /* YUV packed formats */
+               { .format = V4L2_PIX_FMT_YUYV,    .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .hdiv = 2, .vdiv = 1 },
diff --git a/target/linux/bcm27xx/patches-5.4/950-0497-media-vb2-add-V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF.patch b/target/linux/bcm27xx/patches-5.4/950-0497-media-vb2-add-V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF.patch
new file mode 100644 (file)
index 0000000..b114aef
--- /dev/null
@@ -0,0 +1,184 @@
+From dfcdc4ed9a514cd5d77dd18c6527f257f8aaf378 Mon Sep 17 00:00:00 2001
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Date: Fri, 11 Oct 2019 06:32:40 -0300
+Subject: [PATCH] media: vb2: add V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF
+
+This patch adds support for the V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF
+flag.
+
+It also adds a new V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF
+capability.
+
+Drivers should set vb2_queue->subsystem_flags to
+VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF to indicate support
+for this flag.
+
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ Documentation/media/uapi/v4l/buffer.rst         | 13 +++++++++++++
+ Documentation/media/uapi/v4l/vidioc-reqbufs.rst |  6 ++++++
+ drivers/media/common/videobuf2/videobuf2-v4l2.c | 12 ++++++++++--
+ include/media/videobuf2-core.h                  |  3 +++
+ include/media/videobuf2-v4l2.h                  |  5 +++++
+ include/uapi/linux/videodev2.h                  | 13 ++++++++-----
+ 6 files changed, 45 insertions(+), 7 deletions(-)
+
+--- a/Documentation/media/uapi/v4l/buffer.rst
++++ b/Documentation/media/uapi/v4l/buffer.rst
+@@ -607,6 +607,19 @@ Buffer Flags
+       applications shall use this flag for output buffers if the data in
+       this buffer has not been created by the CPU but by some
+       DMA-capable unit, in which case caches have not been used.
++    * .. _`V4L2-BUF-FLAG-M2M-HOLD-CAPTURE-BUF`:
++
++      - ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF``
++      - 0x00000200
++      - Only valid if ``V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF`` is
++      set. It is typically used with stateless decoders where multiple
++      output buffers each decode to a slice of the decoded frame.
++      Applications can set this flag when queueing the output buffer
++      to prevent the driver from dequeueing the capture buffer after
++      the output buffer has been decoded (i.e. the capture buffer is
++      'held'). If the timestamp of this output buffer differs from that
++      of the previous output buffer, then that indicates the start of a
++      new frame and the previously held capture buffer is dequeued.
+     * .. _`V4L2-BUF-FLAG-LAST`:
+       - ``V4L2_BUF_FLAG_LAST``
+--- a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
++++ b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
+@@ -125,6 +125,7 @@ aborting or finishing any DMA in progres
+ .. _V4L2-BUF-CAP-SUPPORTS-DMABUF:
+ .. _V4L2-BUF-CAP-SUPPORTS-REQUESTS:
+ .. _V4L2-BUF-CAP-SUPPORTS-ORPHANED-BUFS:
++.. _V4L2-BUF-CAP-SUPPORTS-M2M-HOLD-CAPTURE-BUF:
+ .. cssclass:: longtable
+@@ -150,6 +151,11 @@ aborting or finishing any DMA in progres
+       - The kernel allows calling :ref:`VIDIOC_REQBUFS` while buffers are still
+         mapped or exported via DMABUF. These orphaned buffers will be freed
+         when they are unmapped or when the exported DMABUF fds are closed.
++    * - ``V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF``
++      - 0x00000020
++      - Only valid for stateless decoders. If set, then userspace can set the
++        ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag to hold off on returning the
++      capture buffer until the OUTPUT timestamp changes.
+ Return Value
+ ============
+--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
++++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
+@@ -49,8 +49,11 @@ module_param(debug, int, 0644);
+                                V4L2_BUF_FLAG_REQUEST_FD | \
+                                V4L2_BUF_FLAG_TIMESTAMP_MASK)
+ /* Output buffer flags that should be passed on to the driver */
+-#define V4L2_BUFFER_OUT_FLAGS (V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_BFRAME | \
+-                               V4L2_BUF_FLAG_KEYFRAME | V4L2_BUF_FLAG_TIMECODE)
++#define V4L2_BUFFER_OUT_FLAGS (V4L2_BUF_FLAG_PFRAME | \
++                               V4L2_BUF_FLAG_BFRAME | \
++                               V4L2_BUF_FLAG_KEYFRAME | \
++                               V4L2_BUF_FLAG_TIMECODE | \
++                               V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF)
+ /*
+  * __verify_planes_array() - verify that the planes array passed in struct
+@@ -194,6 +197,7 @@ static int vb2_fill_vb2_v4l2_buffer(stru
+       }
+       vbuf->sequence = 0;
+       vbuf->request_fd = -1;
++      vbuf->is_held = false;
+       if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) {
+               switch (b->memory) {
+@@ -321,6 +325,8 @@ static int vb2_fill_vb2_v4l2_buffer(stru
+                */
+               vbuf->flags &= ~V4L2_BUF_FLAG_TIMECODE;
+               vbuf->field = b->field;
++              if (!(q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF))
++                      vbuf->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+       } else {
+               /* Zero any output buffer flags as this is a capture buffer */
+               vbuf->flags &= ~V4L2_BUFFER_OUT_FLAGS;
+@@ -654,6 +660,8 @@ static void fill_buf_caps(struct vb2_que
+               *caps |= V4L2_BUF_CAP_SUPPORTS_USERPTR;
+       if (q->io_modes & VB2_DMABUF)
+               *caps |= V4L2_BUF_CAP_SUPPORTS_DMABUF;
++      if (q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
++              *caps |= V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
+ #ifdef CONFIG_MEDIA_CONTROLLER_REQUEST_API
+       if (q->supports_requests)
+               *caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS;
+--- a/include/media/videobuf2-core.h
++++ b/include/media/videobuf2-core.h
+@@ -505,6 +505,8 @@ struct vb2_buf_ops {
+  * @buf_ops:  callbacks to deliver buffer information.
+  *            between user-space and kernel-space.
+  * @drv_priv: driver private data.
++ * @subsystem_flags: Flags specific to the subsystem (V4L2/DVB/etc.). Not used
++ *            by the vb2 core.
+  * @buf_struct_size: size of the driver-specific buffer structure;
+  *            "0" indicates the driver doesn't want to use a custom buffer
+  *            structure type. for example, ``sizeof(struct vb2_v4l2_buffer)``
+@@ -571,6 +573,7 @@ struct vb2_queue {
+       const struct vb2_buf_ops        *buf_ops;
+       void                            *drv_priv;
++      u32                             subsystem_flags;
+       unsigned int                    buf_struct_size;
+       u32                             timestamp_flags;
+       gfp_t                           gfp_flags;
+--- a/include/media/videobuf2-v4l2.h
++++ b/include/media/videobuf2-v4l2.h
+@@ -33,6 +33,7 @@
+  * @timecode: frame timecode.
+  * @sequence: sequence count of this frame.
+  * @request_fd:       the request_fd associated with this buffer
++ * @is_held:  if true, then this capture buffer was held
+  * @planes:   plane information (userptr/fd, length, bytesused, data_offset).
+  *
+  * Should contain enough information to be able to cover all the fields
+@@ -46,9 +47,13 @@ struct vb2_v4l2_buffer {
+       struct v4l2_timecode    timecode;
+       __u32                   sequence;
+       __s32                   request_fd;
++      bool                    is_held;
+       struct vb2_plane        planes[VB2_MAX_PLANES];
+ };
++/* VB2 V4L2 flags as set in vb2_queue.subsystem_flags */
++#define VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF (1 << 0)
++
+ /*
+  * to_vb2_v4l2_buffer() - cast struct vb2_buffer * to struct vb2_v4l2_buffer *
+  */
+--- a/include/uapi/linux/videodev2.h
++++ b/include/uapi/linux/videodev2.h
+@@ -925,11 +925,12 @@ struct v4l2_requestbuffers {
+ };
+ /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */
+-#define V4L2_BUF_CAP_SUPPORTS_MMAP    (1 << 0)
+-#define V4L2_BUF_CAP_SUPPORTS_USERPTR (1 << 1)
+-#define V4L2_BUF_CAP_SUPPORTS_DMABUF  (1 << 2)
+-#define V4L2_BUF_CAP_SUPPORTS_REQUESTS        (1 << 3)
+-#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
++#define V4L2_BUF_CAP_SUPPORTS_MMAP                    (1 << 0)
++#define V4L2_BUF_CAP_SUPPORTS_USERPTR                 (1 << 1)
++#define V4L2_BUF_CAP_SUPPORTS_DMABUF                  (1 << 2)
++#define V4L2_BUF_CAP_SUPPORTS_REQUESTS                        (1 << 3)
++#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS           (1 << 4)
++#define V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF    (1 << 5)
+ /**
+  * struct v4l2_plane - plane info for multi-planar buffers
+@@ -1051,6 +1052,8 @@ static inline __u64 v4l2_timeval_to_ns(c
+ #define V4L2_BUF_FLAG_IN_REQUEST              0x00000080
+ /* timecode field is valid */
+ #define V4L2_BUF_FLAG_TIMECODE                        0x00000100
++/* Don't return the capture buffer until OUTPUT timestamp changes */
++#define V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF    0x00000200
+ /* Buffer is prepared for queuing */
+ #define V4L2_BUF_FLAG_PREPARED                        0x00000400
+ /* Cache handling flags */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0498-media-v4l2-mem2mem-support-held-capture-buffers.patch b/target/linux/bcm27xx/patches-5.4/950-0498-media-v4l2-mem2mem-support-held-capture-buffers.patch
new file mode 100644 (file)
index 0000000..bb66baf
--- /dev/null
@@ -0,0 +1,260 @@
+From dc9b786e4b9a1262b536b3c9d0fa88e34a2b3f8f Mon Sep 17 00:00:00 2001
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Date: Fri, 11 Oct 2019 06:32:41 -0300
+Subject: [PATCH] media: v4l2-mem2mem: support held capture buffers
+
+Commit f8cca8c97a63d77f48334cde81d15014f43530ef upstream.
+
+Check for held buffers that are ready to be returned to vb2 in
+__v4l2_m2m_try_queue(). This avoids drivers having to handle this
+case.
+
+Add v4l2_m2m_buf_done_and_job_finish() to correctly return source
+and destination buffers and mark the job as finished while taking
+a held destination buffer into account (i.e. that buffer won't be
+returned). This has to be done while job_spinlock is held to avoid
+race conditions.
+
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-mem2mem.c | 130 ++++++++++++++++++-------
+ include/media/v4l2-mem2mem.h           |  33 ++++++-
+ 2 files changed, 128 insertions(+), 35 deletions(-)
+
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -284,7 +284,8 @@ static void v4l2_m2m_try_run(struct v4l2
+ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
+                                struct v4l2_m2m_ctx *m2m_ctx)
+ {
+-      unsigned long flags_job, flags_out, flags_cap;
++      unsigned long flags_job;
++      struct vb2_v4l2_buffer *dst, *src;
+       dprintk("Trying to schedule a job for m2m_ctx: %p\n", m2m_ctx);
+@@ -307,20 +308,30 @@ static void __v4l2_m2m_try_queue(struct
+               goto job_unlock;
+       }
+-      spin_lock_irqsave(&m2m_ctx->out_q_ctx.rdy_spinlock, flags_out);
+-      if (list_empty(&m2m_ctx->out_q_ctx.rdy_queue)
+-          && !m2m_ctx->out_q_ctx.buffered) {
++      src = v4l2_m2m_next_src_buf(m2m_ctx);
++      dst = v4l2_m2m_next_dst_buf(m2m_ctx);
++      if (!src && !m2m_ctx->out_q_ctx.buffered) {
+               dprintk("No input buffers available\n");
+-              goto out_unlock;
++              goto job_unlock;
+       }
+-      spin_lock_irqsave(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags_cap);
+-      if (list_empty(&m2m_ctx->cap_q_ctx.rdy_queue)
+-          && !m2m_ctx->cap_q_ctx.buffered) {
++      if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
+               dprintk("No output buffers available\n");
+-              goto cap_unlock;
++              goto job_unlock;
++      }
++
++      if (src && dst &&
++          dst->is_held && dst->vb2_buf.copied_timestamp &&
++          dst->vb2_buf.timestamp != src->vb2_buf.timestamp) {
++              dst->is_held = false;
++              v4l2_m2m_dst_buf_remove(m2m_ctx);
++              v4l2_m2m_buf_done(dst, VB2_BUF_STATE_DONE);
++              dst = v4l2_m2m_next_dst_buf(m2m_ctx);
++
++              if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
++                      dprintk("No output buffers available after returning held buffer\n");
++                      goto job_unlock;
++              }
+       }
+-      spin_unlock_irqrestore(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags_cap);
+-      spin_unlock_irqrestore(&m2m_ctx->out_q_ctx.rdy_spinlock, flags_out);
+       if (m2m_dev->m2m_ops->job_ready
+               && (!m2m_dev->m2m_ops->job_ready(m2m_ctx->priv))) {
+@@ -331,13 +342,6 @@ static void __v4l2_m2m_try_queue(struct
+       list_add_tail(&m2m_ctx->queue, &m2m_dev->job_queue);
+       m2m_ctx->job_flags |= TRANS_QUEUED;
+-      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags_job);
+-      return;
+-
+-cap_unlock:
+-      spin_unlock_irqrestore(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags_cap);
+-out_unlock:
+-      spin_unlock_irqrestore(&m2m_ctx->out_q_ctx.rdy_spinlock, flags_out);
+ job_unlock:
+       spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags_job);
+ }
+@@ -412,37 +416,97 @@ static void v4l2_m2m_cancel_job(struct v
+       }
+ }
+-void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+-                       struct v4l2_m2m_ctx *m2m_ctx)
++/*
++ * Schedule the next job, called from v4l2_m2m_job_finish() or
++ * v4l2_m2m_buf_done_and_job_finish().
++ */
++static void v4l2_m2m_schedule_next_job(struct v4l2_m2m_dev *m2m_dev,
++                                     struct v4l2_m2m_ctx *m2m_ctx)
+ {
+-      unsigned long flags;
++      /*
++       * This instance might have more buffers ready, but since we do not
++       * allow more than one job on the job_queue per instance, each has
++       * to be scheduled separately after the previous one finishes.
++       */
++      __v4l2_m2m_try_queue(m2m_dev, m2m_ctx);
+-      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      /*
++       * We might be running in atomic context,
++       * but the job must be run in non-atomic context.
++       */
++      schedule_work(&m2m_dev->job_work);
++}
++
++/*
++ * Assumes job_spinlock is held, called from v4l2_m2m_job_finish() or
++ * v4l2_m2m_buf_done_and_job_finish().
++ */
++static bool _v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
++                               struct v4l2_m2m_ctx *m2m_ctx)
++{
+       if (!m2m_dev->curr_ctx || m2m_dev->curr_ctx != m2m_ctx) {
+-              spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+               dprintk("Called by an instance not currently running\n");
+-              return;
++              return false;
+       }
+       list_del(&m2m_dev->curr_ctx->queue);
+       m2m_dev->curr_ctx->job_flags &= ~(TRANS_QUEUED | TRANS_RUNNING);
+       wake_up(&m2m_dev->curr_ctx->finished);
+       m2m_dev->curr_ctx = NULL;
++      return true;
++}
+-      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+-
+-      /* This instance might have more buffers ready, but since we do not
+-       * allow more than one job on the job_queue per instance, each has
+-       * to be scheduled separately after the previous one finishes. */
+-      __v4l2_m2m_try_queue(m2m_dev, m2m_ctx);
++void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
++                       struct v4l2_m2m_ctx *m2m_ctx)
++{
++      unsigned long flags;
++      bool schedule_next;
+-      /* We might be running in atomic context,
+-       * but the job must be run in non-atomic context.
++      /*
++       * This function should not be used for drivers that support
++       * holding capture buffers. Those should use
++       * v4l2_m2m_buf_done_and_job_finish() instead.
+        */
+-      schedule_work(&m2m_dev->job_work);
++      WARN_ON(m2m_ctx->cap_q_ctx.q.subsystem_flags &
++              VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF);
++      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx);
++      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
++
++      if (schedule_next)
++              v4l2_m2m_schedule_next_job(m2m_dev, m2m_ctx);
+ }
+ EXPORT_SYMBOL(v4l2_m2m_job_finish);
++void v4l2_m2m_buf_done_and_job_finish(struct v4l2_m2m_dev *m2m_dev,
++                                    struct v4l2_m2m_ctx *m2m_ctx,
++                                    enum vb2_buffer_state state)
++{
++      struct vb2_v4l2_buffer *src_buf, *dst_buf;
++      bool schedule_next = false;
++      unsigned long flags;
++
++      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
++      dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx);
++
++      if (WARN_ON(!src_buf || !dst_buf))
++              goto unlock;
++      v4l2_m2m_buf_done(src_buf, state);
++      dst_buf->is_held = src_buf->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++      if (!dst_buf->is_held) {
++              v4l2_m2m_dst_buf_remove(m2m_ctx);
++              v4l2_m2m_buf_done(dst_buf, state);
++      }
++      schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx);
++unlock:
++      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
++
++      if (schedule_next)
++              v4l2_m2m_schedule_next_job(m2m_dev, m2m_ctx);
++}
++EXPORT_SYMBOL(v4l2_m2m_buf_done_and_job_finish);
++
+ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+                    struct v4l2_requestbuffers *reqbufs)
+ {
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -21,7 +21,8 @@
+  *            callback.
+  *            The job does NOT have to end before this callback returns
+  *            (and it will be the usual case). When the job finishes,
+- *            v4l2_m2m_job_finish() has to be called.
++ *            v4l2_m2m_job_finish() or v4l2_m2m_buf_done_and_job_finish()
++ *            has to be called.
+  * @job_ready:        optional. Should return 0 if the driver does not have a job
+  *            fully prepared to run yet (i.e. it will not be able to finish a
+  *            transaction without sleeping). If not provided, it will be
+@@ -33,7 +34,8 @@
+  *            stop the device safely; e.g. in the next interrupt handler),
+  *            even if the transaction would not have been finished by then.
+  *            After the driver performs the necessary steps, it has to call
+- *            v4l2_m2m_job_finish() (as if the transaction ended normally).
++ *            v4l2_m2m_job_finish() or v4l2_m2m_buf_done_and_job_finish() as
++ *            if the transaction ended normally.
+  *            This function does not have to (and will usually not) wait
+  *            until the device enters a state when it can be stopped.
+  */
+@@ -173,6 +175,33 @@ void v4l2_m2m_try_schedule(struct v4l2_m
+ void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+                        struct v4l2_m2m_ctx *m2m_ctx);
++/**
++ * v4l2_m2m_buf_done_and_job_finish() - return source/destination buffers with
++ * state and inform the framework that a job has been finished and have it
++ * clean up
++ *
++ * @m2m_dev: opaque pointer to the internal data to handle M2M context
++ * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
++ * @state: vb2 buffer state passed to v4l2_m2m_buf_done().
++ *
++ * Drivers that set V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF must use this
++ * function instead of job_finish() to take held buffers into account. It is
++ * optional for other drivers.
++ *
++ * This function removes the source buffer from the ready list and returns
++ * it with the given state. The same is done for the destination buffer, unless
++ * it is marked 'held'. In that case the buffer is kept on the ready list.
++ *
++ * After that the job is finished (see job_finish()).
++ *
++ * This allows for multiple output buffers to be used to fill in a single
++ * capture buffer. This is typically used by stateless decoders where
++ * multiple e.g. H.264 slices contribute to a single decoded frame.
++ */
++void v4l2_m2m_buf_done_and_job_finish(struct v4l2_m2m_dev *m2m_dev,
++                                    struct v4l2_m2m_ctx *m2m_ctx,
++                                    enum vb2_buffer_state state);
++
+ static inline void
+ v4l2_m2m_buf_done(struct vb2_v4l2_buffer *buf, enum vb2_buffer_state state)
+ {
diff --git a/target/linux/bcm27xx/patches-5.4/950-0499-media-videodev2.h-add-V4L2_DEC_CMD_FLUSH.patch b/target/linux/bcm27xx/patches-5.4/950-0499-media-videodev2.h-add-V4L2_DEC_CMD_FLUSH.patch
new file mode 100644 (file)
index 0000000..ef075fd
--- /dev/null
@@ -0,0 +1,57 @@
+From b2ea711d2c21ec021de4ff09a0a2b5b4224f9749 Mon Sep 17 00:00:00 2001
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Date: Fri, 11 Oct 2019 06:32:42 -0300
+Subject: [PATCH] media: videodev2.h: add V4L2_DEC_CMD_FLUSH
+
+Add this new V4L2_DEC_CMD_FLUSH decoder command and document it.
+
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Alexandre Courbot <acourbot@chromium.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst | 10 +++++++++-
+ Documentation/media/videodev2.h.rst.exceptions      |  1 +
+ include/uapi/linux/videodev2.h                      |  1 +
+ 3 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst
++++ b/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst
+@@ -208,7 +208,15 @@ introduced in Linux 3.3. They are, howev
+       been started yet, the driver will return an ``EPERM`` error code. When
+       the decoder is already running, this command does nothing. No
+       flags are defined for this command.
+-
++    * - ``V4L2_DEC_CMD_FLUSH``
++      - 4
++      - Flush any held capture buffers. Only valid for stateless decoders.
++      This command is typically used when the application reached the
++      end of the stream and the last output buffer had the
++      ``V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF`` flag set. This would prevent
++      dequeueing the capture buffer containing the last decoded frame.
++      So this command can be used to explicitly flush that final decoded
++      frame. This command does nothing if there are no held capture buffers.
+ Return Value
+ ============
+--- a/Documentation/media/videodev2.h.rst.exceptions
++++ b/Documentation/media/videodev2.h.rst.exceptions
+@@ -434,6 +434,7 @@ replace define V4L2_DEC_CMD_START decode
+ replace define V4L2_DEC_CMD_STOP decoder-cmds
+ replace define V4L2_DEC_CMD_PAUSE decoder-cmds
+ replace define V4L2_DEC_CMD_RESUME decoder-cmds
++replace define V4L2_DEC_CMD_FLUSH decoder-cmds
+ replace define V4L2_DEC_CMD_START_MUTE_AUDIO decoder-cmds
+ replace define V4L2_DEC_CMD_PAUSE_TO_BLACK decoder-cmds
+--- a/include/uapi/linux/videodev2.h
++++ b/include/uapi/linux/videodev2.h
+@@ -1989,6 +1989,7 @@ struct v4l2_encoder_cmd {
+ #define V4L2_DEC_CMD_STOP        (1)
+ #define V4L2_DEC_CMD_PAUSE       (2)
+ #define V4L2_DEC_CMD_RESUME      (3)
++#define V4L2_DEC_CMD_FLUSH       (4)
+ /* Flags for V4L2_DEC_CMD_START */
+ #define V4L2_DEC_CMD_START_MUTE_AUDIO (1 << 0)
diff --git a/target/linux/bcm27xx/patches-5.4/950-0500-media-v4l2-mem2mem-add-stateless_-try_-decoder_cmd-i.patch b/target/linux/bcm27xx/patches-5.4/950-0500-media-v4l2-mem2mem-add-stateless_-try_-decoder_cmd-i.patch
new file mode 100644 (file)
index 0000000..0b74dbf
--- /dev/null
@@ -0,0 +1,96 @@
+From 1decb017f990ea61ab421e316bf1af3a5199b73a Mon Sep 17 00:00:00 2001
+From: Jernej Skrabec <jernej.skrabec@siol.net>
+Date: Fri, 11 Oct 2019 06:32:43 -0300
+Subject: [PATCH] media: v4l2-mem2mem: add stateless_(try_)decoder_cmd
+ ioctl helpers
+
+Commit bef41d93aac64b54c3008ca6170bec54f85784f5 upstream.
+
+These helpers are used by stateless codecs when they support multiple
+slices per frame and hold capture buffer flag is set. It's expected that
+all such codecs will use this code.
+
+Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
+Co-developed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-mem2mem.c | 53 ++++++++++++++++++++++++++
+ include/media/v4l2-mem2mem.h           |  4 ++
+ 2 files changed, 57 insertions(+)
+
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -1218,6 +1218,59 @@ int v4l2_m2m_ioctl_try_decoder_cmd(struc
+ }
+ EXPORT_SYMBOL_GPL(v4l2_m2m_ioctl_try_decoder_cmd);
++int v4l2_m2m_ioctl_stateless_try_decoder_cmd(struct file *file, void *fh,
++                                           struct v4l2_decoder_cmd *dc)
++{
++      if (dc->cmd != V4L2_DEC_CMD_FLUSH)
++              return -EINVAL;
++
++      dc->flags = 0;
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(v4l2_m2m_ioctl_stateless_try_decoder_cmd);
++
++int v4l2_m2m_ioctl_stateless_decoder_cmd(struct file *file, void *priv,
++                                       struct v4l2_decoder_cmd *dc)
++{
++      struct v4l2_fh *fh = file->private_data;
++      struct vb2_v4l2_buffer *out_vb, *cap_vb;
++      struct v4l2_m2m_dev *m2m_dev = fh->m2m_ctx->m2m_dev;
++      unsigned long flags;
++      int ret;
++
++      ret = v4l2_m2m_ioctl_stateless_try_decoder_cmd(file, priv, dc);
++      if (ret < 0)
++              return ret;
++
++      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      out_vb = v4l2_m2m_last_src_buf(fh->m2m_ctx);
++      cap_vb = v4l2_m2m_last_dst_buf(fh->m2m_ctx);
++
++      /*
++       * If there is an out buffer pending, then clear any HOLD flag.
++       *
++       * By clearing this flag we ensure that when this output
++       * buffer is processed any held capture buffer will be released.
++       */
++      if (out_vb) {
++              out_vb->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++      } else if (cap_vb && cap_vb->is_held) {
++              /*
++               * If there were no output buffers, but there is a
++               * capture buffer that is held, then release that
++               * buffer.
++               */
++              cap_vb->is_held = false;
++              v4l2_m2m_dst_buf_remove(fh->m2m_ctx);
++              v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
++      }
++      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(v4l2_m2m_ioctl_stateless_decoder_cmd);
++
+ /*
+  * v4l2_file_operations helpers. It is assumed here same lock is used
+  * for the output and the capture buffer queue.
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -701,6 +701,10 @@ int v4l2_m2m_ioctl_try_encoder_cmd(struc
+                                  struct v4l2_encoder_cmd *ec);
+ int v4l2_m2m_ioctl_try_decoder_cmd(struct file *file, void *fh,
+                                  struct v4l2_decoder_cmd *dc);
++int v4l2_m2m_ioctl_stateless_try_decoder_cmd(struct file *file, void *fh,
++                                           struct v4l2_decoder_cmd *dc);
++int v4l2_m2m_ioctl_stateless_decoder_cmd(struct file *file, void *priv,
++                                       struct v4l2_decoder_cmd *dc);
+ int v4l2_m2m_fop_mmap(struct file *file, struct vm_area_struct *vma);
+ __poll_t v4l2_m2m_fop_poll(struct file *file, poll_table *wait);
diff --git a/target/linux/bcm27xx/patches-5.4/950-0501-media-v4l2-mem2mem-add-new_frame-detection.patch b/target/linux/bcm27xx/patches-5.4/950-0501-media-v4l2-mem2mem-add-new_frame-detection.patch
new file mode 100644 (file)
index 0000000..3c77792
--- /dev/null
@@ -0,0 +1,69 @@
+From 1d55acac432983ad8301f5430c42ac549b4b4c6f Mon Sep 17 00:00:00 2001
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Date: Fri, 11 Oct 2019 06:32:44 -0300
+Subject: [PATCH] media: v4l2-mem2mem: add new_frame detection
+
+Commit f07602ac388723233e9e3c5a05b54baf34e0a3e9 upstream.
+
+Drivers that support VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF
+typically want to know if a new frame is started (i.e. the first
+slice is about to be processed). Add a new_frame bool to v4l2_m2m_ctx
+and set it accordingly.
+
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-mem2mem.c | 11 +++++++++--
+ include/media/v4l2-mem2mem.h           |  7 +++++++
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -319,8 +319,10 @@ static void __v4l2_m2m_try_queue(struct
+               goto job_unlock;
+       }
+-      if (src && dst &&
+-          dst->is_held && dst->vb2_buf.copied_timestamp &&
++      m2m_ctx->new_frame = true;
++
++      if (src && dst && dst->is_held &&
++          dst->vb2_buf.copied_timestamp &&
+           dst->vb2_buf.timestamp != src->vb2_buf.timestamp) {
+               dst->is_held = false;
+               v4l2_m2m_dst_buf_remove(m2m_ctx);
+@@ -333,6 +335,11 @@ static void __v4l2_m2m_try_queue(struct
+               }
+       }
++      if (src && dst && (m2m_ctx->cap_q_ctx.q.subsystem_flags &
++                         VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF))
++              m2m_ctx->new_frame = !dst->vb2_buf.copied_timestamp ||
++                      dst->vb2_buf.timestamp != src->vb2_buf.timestamp;
++
+       if (m2m_dev->m2m_ops->job_ready
+               && (!m2m_dev->m2m_ops->job_ready(m2m_ctx->priv))) {
+               dprintk("Driver not ready\n");
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -75,6 +75,11 @@ struct v4l2_m2m_queue_ctx {
+  * struct v4l2_m2m_ctx - Memory to memory context structure
+  *
+  * @q_lock: struct &mutex lock
++ * @new_frame: valid in the device_run callback: if true, then this
++ *            starts a new frame; if false, then this is a new slice
++ *            for an existing frame. This is always true unless
++ *            V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF is set, which
++ *            indicates slicing support.
+  * @m2m_dev: opaque pointer to the internal data to handle M2M context
+  * @cap_q_ctx: Capture (output to memory) queue context
+  * @out_q_ctx: Output (input from memory) queue context
+@@ -91,6 +96,8 @@ struct v4l2_m2m_ctx {
+       /* optional cap/out vb2 queues lock */
+       struct mutex                    *q_lock;
++      bool                            new_frame;
++
+       /* internal use only */
+       struct v4l2_m2m_dev             *m2m_dev;
diff --git a/target/linux/bcm27xx/patches-5.4/950-0502-media-Documentation-media-Document-V4L2_CTRL_TYPE_AR.patch b/target/linux/bcm27xx/patches-5.4/950-0502-media-Documentation-media-Document-V4L2_CTRL_TYPE_AR.patch
new file mode 100644 (file)
index 0000000..1d478fc
--- /dev/null
@@ -0,0 +1,46 @@
+From 20076d276d045c03f809bb16f0e1fafcfe63a81f Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Date: Mon, 7 Oct 2019 12:06:32 -0300
+Subject: [PATCH] media: Documentation: media: Document
+ V4L2_CTRL_TYPE_AREA
+
+Commit 8ae3a0862993c09a8ef0f9abb379553370c517e3 upstream.
+
+A struct v4l2_area containing the width and the height of a rectangular
+area.
+
+Reviewed-by: Jacopo Mondi <jacopo@jmondi.org>
+Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
+Signed-off-by: Ricardo Ribalda Delgado <ribalda@kernel.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ Documentation/media/uapi/v4l/vidioc-queryctrl.rst | 6 ++++++
+ Documentation/media/videodev2.h.rst.exceptions    | 1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
++++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
+@@ -443,6 +443,12 @@ See also the examples in :ref:`control`.
+       - n/a
+       - A struct :c:type:`v4l2_ctrl_mpeg2_quantization`, containing MPEG-2
+       quantization matrices for stateless video decoders.
++    * - ``V4L2_CTRL_TYPE_AREA``
++      - n/a
++      - n/a
++      - n/a
++      - A struct :c:type:`v4l2_area`, containing the width and the height
++        of a rectangular area. Units depend on the use case.
+     * - ``V4L2_CTRL_TYPE_H264_SPS``
+       - n/a
+       - n/a
+--- a/Documentation/media/videodev2.h.rst.exceptions
++++ b/Documentation/media/videodev2.h.rst.exceptions
+@@ -141,6 +141,7 @@ replace symbol V4L2_CTRL_TYPE_H264_PPS :
+ replace symbol V4L2_CTRL_TYPE_H264_SCALING_MATRIX :c:type:`v4l2_ctrl_type`
+ replace symbol V4L2_CTRL_TYPE_H264_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
+ replace symbol V4L2_CTRL_TYPE_H264_DECODE_PARAMS :c:type:`v4l2_ctrl_type`
++replace symbol V4L2_CTRL_TYPE_AREA :c:type:`v4l2_ctrl_type`
+ # V4L2 capability defines
+ replace define V4L2_CAP_VIDEO_CAPTURE device-capabilities
diff --git a/target/linux/bcm27xx/patches-5.4/950-0503-media-v4l-Add-definitions-for-HEVC-stateless-decodin.patch b/target/linux/bcm27xx/patches-5.4/950-0503-media-v4l-Add-definitions-for-HEVC-stateless-decodin.patch
new file mode 100644 (file)
index 0000000..0fe0f8c
--- /dev/null
@@ -0,0 +1,1093 @@
+From 5f6c08984a6578201fe3a2394ccb0d3a30fdf027 Mon Sep 17 00:00:00 2001
+From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+Date: Tue, 22 Oct 2019 12:26:52 -0300
+Subject: [PATCH] media: v4l: Add definitions for HEVC stateless
+ decoding
+
+This introduces the required definitions for HEVC decoding support with
+stateless VPUs. The controls associated to the HEVC slice format provide
+the required meta-data for decoding slices extracted from the bitstream.
+
+They are not exported to the public V4L2 API since reworking this API
+will likely be needed for covering various use-cases and new hardware.
+
+Multi-slice decoding is exposed as a valid decoding mode to match current
+H.264 support but it is not yet implemented.
+
+The interface comes with the following limitations:
+* No custom quantization matrices (scaling lists);
+* Support for a single temporal layer only;
+* No slice entry point offsets support;
+* No conformance window support;
+* No VUI parameters support;
+* No support for SPS extensions: range, multilayer, 3d, scc, 4 bits;
+* No support for PPS extensions: range, multilayer, 3d, scc, 4 bits.
+
+Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+[hverkuil-cisco@xs4all.nl: use 1ULL in flags defines in hevc-ctrls.h]
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ Documentation/media/uapi/v4l/biblio.rst       |   9 +
+ .../media/uapi/v4l/ext-ctrls-codec.rst        | 553 +++++++++++++++++-
+ .../media/uapi/v4l/vidioc-queryctrl.rst       |  18 +
+ .../media/videodev2.h.rst.exceptions          |   3 +
+ drivers/media/v4l2-core/v4l2-ctrls.c          | 109 +++-
+ drivers/media/v4l2-core/v4l2-ioctl.c          |   1 +
+ include/media/hevc-ctrls.h                    | 212 +++++++
+ include/media/v4l2-ctrls.h                    |   7 +
+ 8 files changed, 908 insertions(+), 4 deletions(-)
+ create mode 100644 include/media/hevc-ctrls.h
+
+--- a/Documentation/media/uapi/v4l/biblio.rst
++++ b/Documentation/media/uapi/v4l/biblio.rst
+@@ -131,6 +131,15 @@ ITU-T Rec. H.264 Specification (04/2017
+ :author:    International Telecommunication Union (http://www.itu.ch)
++.. _hevc:
++
++ITU H.265/HEVC
++==============
++
++:title:     ITU-T Rec. H.265 | ISO/IEC 23008-2 "High Efficiency Video Coding"
++
++:author:    International Telecommunication Union (http://www.itu.ch), International Organisation for Standardisation (http://www.iso.ch)
++
+ .. _jfif:
+ JFIF
+--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
++++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+@@ -1983,9 +1983,9 @@ enum v4l2_mpeg_video_h264_hierarchical_c
+       - ``reference_ts``
+       - Timestamp of the V4L2 capture buffer to use as reference, used
+         with B-coded and P-coded frames. The timestamp refers to the
+-      ``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+-      :c:func:`v4l2_timeval_to_ns()` function to convert the struct
+-      :c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
++        ``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
++        :c:func:`v4l2_timeval_to_ns()` function to convert the struct
++        :c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+     * - __u16
+       - ``frame_num``
+       -
+@@ -3693,3 +3693,550 @@ enum v4l2_mpeg_video_hevc_size_of_length
+     Indicates whether to generate SPS and PPS at every IDR. Setting it to 0
+     disables generating SPS and PPS at every IDR. Setting it to one enables
+     generating SPS and PPS at every IDR.
++
++.. _v4l2-mpeg-hevc:
++
++``V4L2_CID_MPEG_VIDEO_HEVC_SPS (struct)``
++    Specifies the Sequence Parameter Set fields (as extracted from the
++    bitstream) for the associated HEVC slice data.
++    These bitstream parameters are defined according to :ref:`hevc`.
++    They are described in section 7.4.3.2 "Sequence parameter set RBSP
++    semantics" of the specification.
++
++.. c:type:: v4l2_ctrl_hevc_sps
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_ctrl_hevc_sps
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u16
++      - ``pic_width_in_luma_samples``
++      -
++    * - __u16
++      - ``pic_height_in_luma_samples``
++      -
++    * - __u8
++      - ``bit_depth_luma_minus8``
++      -
++    * - __u8
++      - ``bit_depth_chroma_minus8``
++      -
++    * - __u8
++      - ``log2_max_pic_order_cnt_lsb_minus4``
++      -
++    * - __u8
++      - ``sps_max_dec_pic_buffering_minus1``
++      -
++    * - __u8
++      - ``sps_max_num_reorder_pics``
++      -
++    * - __u8
++      - ``sps_max_latency_increase_plus1``
++      -
++    * - __u8
++      - ``log2_min_luma_coding_block_size_minus3``
++      -
++    * - __u8
++      - ``log2_diff_max_min_luma_coding_block_size``
++      -
++    * - __u8
++      - ``log2_min_luma_transform_block_size_minus2``
++      -
++    * - __u8
++      - ``log2_diff_max_min_luma_transform_block_size``
++      -
++    * - __u8
++      - ``max_transform_hierarchy_depth_inter``
++      -
++    * - __u8
++      - ``max_transform_hierarchy_depth_intra``
++      -
++    * - __u8
++      - ``pcm_sample_bit_depth_luma_minus1``
++      -
++    * - __u8
++      - ``pcm_sample_bit_depth_chroma_minus1``
++      -
++    * - __u8
++      - ``log2_min_pcm_luma_coding_block_size_minus3``
++      -
++    * - __u8
++      - ``log2_diff_max_min_pcm_luma_coding_block_size``
++      -
++    * - __u8
++      - ``num_short_term_ref_pic_sets``
++      -
++    * - __u8
++      - ``num_long_term_ref_pics_sps``
++      -
++    * - __u8
++      - ``chroma_format_idc``
++      -
++    * - __u64
++      - ``flags``
++      - See :ref:`Sequence Parameter Set Flags <hevc_sps_flags>`
++
++.. _hevc_sps_flags:
++
++``Sequence Parameter Set Flags``
++
++.. cssclass:: longtable
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - ``V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE``
++      - 0x00000001
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED``
++      - 0x00000002
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_AMP_ENABLED``
++      - 0x00000004
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET``
++      - 0x00000008
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_PCM_ENABLED``
++      - 0x00000010
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED``
++      - 0x00000020
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT``
++      - 0x00000040
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED``
++      - 0x00000080
++      -
++    * - ``V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED``
++      - 0x00000100
++      -
++
++``V4L2_CID_MPEG_VIDEO_HEVC_PPS (struct)``
++    Specifies the Picture Parameter Set fields (as extracted from the
++    bitstream) for the associated HEVC slice data.
++    These bitstream parameters are defined according to :ref:`hevc`.
++    They are described in section 7.4.3.3 "Picture parameter set RBSP
++    semantics" of the specification.
++
++.. c:type:: v4l2_ctrl_hevc_pps
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_ctrl_hevc_pps
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u8
++      - ``num_extra_slice_header_bits``
++      -
++    * - __s8
++      - ``init_qp_minus26``
++      -
++    * - __u8
++      - ``diff_cu_qp_delta_depth``
++      -
++    * - __s8
++      - ``pps_cb_qp_offset``
++      -
++    * - __s8
++      - ``pps_cr_qp_offset``
++      -
++    * - __u8
++      - ``num_tile_columns_minus1``
++      -
++    * - __u8
++      - ``num_tile_rows_minus1``
++      -
++    * - __u8
++      - ``column_width_minus1[20]``
++      -
++    * - __u8
++      - ``row_height_minus1[22]``
++      -
++    * - __s8
++      - ``pps_beta_offset_div2``
++      -
++    * - __s8
++      - ``pps_tc_offset_div2``
++      -
++    * - __u8
++      - ``log2_parallel_merge_level_minus2``
++      -
++    * - __u8
++      - ``padding[4]``
++      - Applications and drivers must set this to zero.
++    * - __u64
++      - ``flags``
++      - See :ref:`Picture Parameter Set Flags <hevc_pps_flags>`
++
++.. _hevc_pps_flags:
++
++``Picture Parameter Set Flags``
++
++.. cssclass:: longtable
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT``
++      - 0x00000001
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT``
++      - 0x00000002
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED``
++      - 0x00000004
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT``
++      - 0x00000008
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED``
++      - 0x00000010
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED``
++      - 0x00000020
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED``
++      - 0x00000040
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT``
++      - 0x00000080
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED``
++      - 0x00000100
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED``
++      - 0x00000200
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED``
++      - 0x00000400
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_TILES_ENABLED``
++      - 0x00000800
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED``
++      - 0x00001000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED``
++      - 0x00002000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED``
++      - 0x00004000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED``
++      - 0x00008000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER``
++      - 0x00010000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT``
++      - 0x00020000
++      -
++    * - ``V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT``
++      - 0x00040000
++      -
++
++``V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (struct)``
++    Specifies various slice-specific parameters, especially from the NAL unit
++    header, general slice segment header and weighted prediction parameter
++    parts of the bitstream.
++    These bitstream parameters are defined according to :ref:`hevc`.
++    They are described in section 7.4.7 "General slice segment header
++    semantics" of the specification.
++
++.. c:type:: v4l2_ctrl_hevc_slice_params
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_ctrl_hevc_slice_params
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u32
++      - ``bit_size``
++      - Size (in bits) of the current slice data.
++    * - __u32
++      - ``data_bit_offset``
++      - Offset (in bits) to the video data in the current slice data.
++    * - __u8
++      - ``nal_unit_type``
++      -
++    * - __u8
++      - ``nuh_temporal_id_plus1``
++      -
++    * - __u8
++      - ``slice_type``
++      -
++      (V4L2_HEVC_SLICE_TYPE_I, V4L2_HEVC_SLICE_TYPE_P or
++      V4L2_HEVC_SLICE_TYPE_B).
++    * - __u8
++      - ``colour_plane_id``
++      -
++    * - __u16
++      - ``slice_pic_order_cnt``
++      -
++    * - __u8
++      - ``num_ref_idx_l0_active_minus1``
++      -
++    * - __u8
++      - ``num_ref_idx_l1_active_minus1``
++      -
++    * - __u8
++      - ``collocated_ref_idx``
++      -
++    * - __u8
++      - ``five_minus_max_num_merge_cand``
++      -
++    * - __s8
++      - ``slice_qp_delta``
++      -
++    * - __s8
++      - ``slice_cb_qp_offset``
++      -
++    * - __s8
++      - ``slice_cr_qp_offset``
++      -
++    * - __s8
++      - ``slice_act_y_qp_offset``
++      -
++    * - __s8
++      - ``slice_act_cb_qp_offset``
++      -
++    * - __s8
++      - ``slice_act_cr_qp_offset``
++      -
++    * - __s8
++      - ``slice_beta_offset_div2``
++      -
++    * - __s8
++      - ``slice_tc_offset_div2``
++      -
++    * - __u8
++      - ``pic_struct``
++      -
++    * - __u8
++      - ``num_active_dpb_entries``
++      - The number of entries in ``dpb``.
++    * - __u8
++      - ``ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      - The list of L0 reference elements as indices in the DPB.
++    * - __u8
++      - ``ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      - The list of L1 reference elements as indices in the DPB.
++    * - __u8
++      - ``num_rps_poc_st_curr_before``
++      - The number of reference pictures in the short-term set that come before
++        the current frame.
++    * - __u8
++      - ``num_rps_poc_st_curr_after``
++      - The number of reference pictures in the short-term set that come after
++        the current frame.
++    * - __u8
++      - ``num_rps_poc_lt_curr``
++      - The number of reference pictures in the long-term set.
++    * - __u8
++      - ``padding[7]``
++      - Applications and drivers must set this to zero.
++    * - struct :c:type:`v4l2_hevc_dpb_entry`
++      - ``dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      - The decoded picture buffer, for meta-data about reference frames.
++    * - struct :c:type:`v4l2_hevc_pred_weight_table`
++      - ``pred_weight_table``
++      - The prediction weight coefficients for inter-picture prediction.
++    * - __u64
++      - ``flags``
++      - See :ref:`Slice Parameters Flags <hevc_slice_params_flags>`
++
++.. _hevc_slice_params_flags:
++
++``Slice Parameters Flags``
++
++.. cssclass:: longtable
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA``
++      - 0x00000001
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA``
++      - 0x00000002
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED``
++      - 0x00000004
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO``
++      - 0x00000008
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT``
++      - 0x00000010
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0``
++      - 0x00000020
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV``
++      - 0x00000040
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED``
++      - 0x00000080
++      -
++    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED``
++      - 0x00000100
++      -
++
++.. c:type:: v4l2_hevc_dpb_entry
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_hevc_dpb_entry
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u64
++      - ``timestamp``
++      - Timestamp of the V4L2 capture buffer to use as reference, used
++        with B-coded and P-coded frames. The timestamp refers to the
++      ``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
++      :c:func:`v4l2_timeval_to_ns()` function to convert the struct
++      :c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
++    * - __u8
++      - ``rps``
++      - The reference set for the reference frame
++        (V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE,
++        V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER or
++        V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR)
++    * - __u8
++      - ``field_pic``
++      - Whether the reference is a field picture or a frame.
++    * - __u16
++      - ``pic_order_cnt[2]``
++      - The picture order count of the reference. Only the first element of the
++        array is used for frame pictures, while the first element identifies the
++        top field and the second the bottom field in field-coded pictures.
++    * - __u8
++      - ``padding[2]``
++      - Applications and drivers must set this to zero.
++
++.. c:type:: v4l2_hevc_pred_weight_table
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_hevc_pred_weight_table
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u8
++      - ``luma_log2_weight_denom``
++      -
++    * - __s8
++      - ``delta_chroma_log2_weight_denom``
++      -
++    * - __s8
++      - ``delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      -
++    * - __s8
++      - ``luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      -
++    * - __s8
++      - ``delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
++      -
++    * - __s8
++      - ``chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
++      -
++    * - __s8
++      - ``delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      -
++    * - __s8
++      - ``luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
++      -
++    * - __s8
++      - ``delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
++      -
++    * - __s8
++      - ``chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]``
++      -
++    * - __u8
++      - ``padding[6]``
++      - Applications and drivers must set this to zero.
++
++``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (enum)``
++    Specifies the decoding mode to use. Currently exposes slice-based and
++    frame-based decoding but new modes might be added later on.
++    This control is used as a modifier for V4L2_PIX_FMT_HEVC_SLICE
++    pixel format. Applications that support V4L2_PIX_FMT_HEVC_SLICE
++    are required to set this control in order to specify the decoding mode
++    that is expected for the buffer.
++    Drivers may expose a single or multiple decoding modes, depending
++    on what they can support.
++
++    .. note::
++
++       This menu control is not yet part of the public kernel API and
++       it is expected to change.
++
++.. c:type:: v4l2_mpeg_video_hevc_decode_mode
++
++.. cssclass:: longtable
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - ``V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED``
++      - 0
++      - Decoding is done at the slice granularity.
++        The OUTPUT buffer must contain a single slice.
++    * - ``V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED``
++      - 1
++      - Decoding is done at the frame granularity.
++        The OUTPUT buffer must contain all slices needed to decode the
++        frame. The OUTPUT buffer must also contain both fields.
++
++``V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (enum)``
++    Specifies the HEVC slice start code expected for each slice.
++    This control is used as a modifier for V4L2_PIX_FMT_HEVC_SLICE
++    pixel format. Applications that support V4L2_PIX_FMT_HEVC_SLICE
++    are required to set this control in order to specify the start code
++    that is expected for the buffer.
++    Drivers may expose a single or multiple start codes, depending
++    on what they can support.
++
++    .. note::
++
++       This menu control is not yet part of the public kernel API and
++       it is expected to change.
++
++.. c:type:: v4l2_mpeg_video_hevc_start_code
++
++.. cssclass:: longtable
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - ``V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE``
++      - 0
++      - Selecting this value specifies that HEVC slices are passed
++        to the driver without any start code.
++    * - ``V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B``
++      - 1
++      - Selecting this value specifies that HEVC slices are expected
++        to be prefixed by Annex B start codes. According to :ref:`hevc`
++        valid start codes can be 3-bytes 0x000001 or 4-bytes 0x00000001.
+--- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
++++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst
+@@ -479,6 +479,24 @@ See also the examples in :ref:`control`.
+       - n/a
+       - A struct :c:type:`v4l2_ctrl_h264_decode_params`, containing H264
+       decode parameters for stateless video decoders.
++    * - ``V4L2_CTRL_TYPE_HEVC_SPS``
++      - n/a
++      - n/a
++      - n/a
++      - A struct :c:type:`v4l2_ctrl_hevc_sps`, containing HEVC Sequence
++      Parameter Set for stateless video decoders.
++    * - ``V4L2_CTRL_TYPE_HEVC_PPS``
++      - n/a
++      - n/a
++      - n/a
++      - A struct :c:type:`v4l2_ctrl_hevc_pps`, containing HEVC Picture
++      Parameter Set for stateless video decoders.
++    * - ``V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS``
++      - n/a
++      - n/a
++      - n/a
++      - A struct :c:type:`v4l2_ctrl_hevc_slice_params`, containing HEVC
++      slice parameters for stateless video decoders.
+ .. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
+--- a/Documentation/media/videodev2.h.rst.exceptions
++++ b/Documentation/media/videodev2.h.rst.exceptions
+@@ -141,6 +141,9 @@ replace symbol V4L2_CTRL_TYPE_H264_PPS :
+ replace symbol V4L2_CTRL_TYPE_H264_SCALING_MATRIX :c:type:`v4l2_ctrl_type`
+ replace symbol V4L2_CTRL_TYPE_H264_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
+ replace symbol V4L2_CTRL_TYPE_H264_DECODE_PARAMS :c:type:`v4l2_ctrl_type`
++replace symbol V4L2_CTRL_TYPE_HEVC_SPS :c:type:`v4l2_ctrl_type`
++replace symbol V4L2_CTRL_TYPE_HEVC_PPS :c:type:`v4l2_ctrl_type`
++replace symbol V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
+ replace symbol V4L2_CTRL_TYPE_AREA :c:type:`v4l2_ctrl_type`
+ # V4L2 capability defines
+--- a/drivers/media/v4l2-core/v4l2-ctrls.c
++++ b/drivers/media/v4l2-core/v4l2-ctrls.c
+@@ -567,6 +567,16 @@ const char * const *v4l2_ctrl_get_menu(u
+               "Disabled at slice boundary",
+               "NULL",
+       };
++      static const char * const hevc_decode_mode[] = {
++              "Slice-Based",
++              "Frame-Based",
++              NULL,
++      };
++      static const char * const hevc_start_code[] = {
++              "No Start Code",
++              "Annex B Start Code",
++              NULL,
++      };
+       switch (id) {
+       case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
+@@ -688,7 +698,10 @@ const char * const *v4l2_ctrl_get_menu(u
+               return hevc_tier;
+       case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
+               return hevc_loop_filter_mode;
+-
++      case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
++              return hevc_decode_mode;
++      case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
++              return hevc_start_code;
+       default:
+               return NULL;
+       }
+@@ -958,6 +971,11 @@ const char *v4l2_ctrl_get_name(u32 id)
+       case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:     return "HEVC Size of Length Field";
+       case V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES:        return "Reference Frames for a P-Frame";
+       case V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR:         return "Prepend SPS and PPS to IDR";
++      case V4L2_CID_MPEG_VIDEO_HEVC_SPS:                      return "HEVC Sequence Parameter Set";
++      case V4L2_CID_MPEG_VIDEO_HEVC_PPS:                      return "HEVC Picture Parameter Set";
++      case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:             return "HEVC Slice Parameters";
++      case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:              return "HEVC Decode Mode";
++      case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:               return "HEVC Start Code";
+       /* CAMERA controls */
+       /* Keep the order of the 'case's the same as in v4l2-controls.h! */
+@@ -1267,6 +1285,8 @@ void v4l2_ctrl_fill(u32 id, const char *
+       case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:
+       case V4L2_CID_MPEG_VIDEO_HEVC_TIER:
+       case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
++      case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
++      case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
+               *type = V4L2_CTRL_TYPE_MENU;
+               break;
+       case V4L2_CID_LINK_FREQ:
+@@ -1377,6 +1397,15 @@ void v4l2_ctrl_fill(u32 id, const char *
+       case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:
+               *type = V4L2_CTRL_TYPE_VP8_FRAME_HEADER;
+               break;
++      case V4L2_CID_MPEG_VIDEO_HEVC_SPS:
++              *type = V4L2_CTRL_TYPE_HEVC_SPS;
++              break;
++      case V4L2_CID_MPEG_VIDEO_HEVC_PPS:
++              *type = V4L2_CTRL_TYPE_HEVC_PPS;
++              break;
++      case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:
++              *type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS;
++              break;
+       case V4L2_CID_UNIT_CELL_SIZE:
+               *type = V4L2_CTRL_TYPE_AREA;
+               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
+@@ -1678,8 +1707,12 @@ static int std_validate_compound(const s
+ {
+       struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
+       struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
++      struct v4l2_ctrl_hevc_sps *p_hevc_sps;
++      struct v4l2_ctrl_hevc_pps *p_hevc_pps;
++      struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
+       struct v4l2_area *area;
+       void *p = ptr.p + idx * ctrl->elem_size;
++      unsigned int i;
+       switch ((u32)ctrl->type) {
+       case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
+@@ -1755,11 +1788,76 @@ static int std_validate_compound(const s
+               zero_padding(p_vp8_frame_header->entropy_header);
+               zero_padding(p_vp8_frame_header->coder_state);
+               break;
++
++      case V4L2_CTRL_TYPE_HEVC_SPS:
++              p_hevc_sps = p;
++
++              if (!(p_hevc_sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) {
++                      p_hevc_sps->pcm_sample_bit_depth_luma_minus1 = 0;
++                      p_hevc_sps->pcm_sample_bit_depth_chroma_minus1 = 0;
++                      p_hevc_sps->log2_min_pcm_luma_coding_block_size_minus3 = 0;
++                      p_hevc_sps->log2_diff_max_min_pcm_luma_coding_block_size = 0;
++              }
++
++              if (!(p_hevc_sps->flags &
++                    V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT))
++                      p_hevc_sps->num_long_term_ref_pics_sps = 0;
++              break;
++
++      case V4L2_CTRL_TYPE_HEVC_PPS:
++              p_hevc_pps = p;
++
++              if (!(p_hevc_pps->flags &
++                    V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
++                      p_hevc_pps->diff_cu_qp_delta_depth = 0;
++
++              if (!(p_hevc_pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
++                      p_hevc_pps->num_tile_columns_minus1 = 0;
++                      p_hevc_pps->num_tile_rows_minus1 = 0;
++                      memset(&p_hevc_pps->column_width_minus1, 0,
++                             sizeof(p_hevc_pps->column_width_minus1));
++                      memset(&p_hevc_pps->row_height_minus1, 0,
++                             sizeof(p_hevc_pps->row_height_minus1));
++
++                      p_hevc_pps->flags &=
++                              ~V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++              }
++
++              if (p_hevc_pps->flags &
++                  V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER) {
++                      p_hevc_pps->pps_beta_offset_div2 = 0;
++                      p_hevc_pps->pps_tc_offset_div2 = 0;
++              }
++
++              zero_padding(*p_hevc_pps);
++              break;
++
++      case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
++              p_hevc_slice_params = p;
++
++              if (p_hevc_slice_params->num_active_dpb_entries >
++                  V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
++                      return -EINVAL;
++
++              zero_padding(p_hevc_slice_params->pred_weight_table);
++
++              for (i = 0; i < p_hevc_slice_params->num_active_dpb_entries;
++                   i++) {
++                      struct v4l2_hevc_dpb_entry *dpb_entry =
++                              &p_hevc_slice_params->dpb[i];
++
++                      zero_padding(*dpb_entry);
++              }
++
++              zero_padding(*p_hevc_slice_params);
++              break;
++
+       case V4L2_CTRL_TYPE_AREA:
+               area = p;
+               if (!area->width || !area->height)
+                       return -EINVAL;
+               break;
++
+       default:
+               return -EINVAL;
+       }
+@@ -2433,6 +2531,15 @@ static struct v4l2_ctrl *v4l2_ctrl_new(s
+       case V4L2_CTRL_TYPE_VP8_FRAME_HEADER:
+               elem_size = sizeof(struct v4l2_ctrl_vp8_frame_header);
+               break;
++      case V4L2_CTRL_TYPE_HEVC_SPS:
++              elem_size = sizeof(struct v4l2_ctrl_hevc_sps);
++              break;
++      case V4L2_CTRL_TYPE_HEVC_PPS:
++              elem_size = sizeof(struct v4l2_ctrl_hevc_pps);
++              break;
++      case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
++              elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
++              break;
+       case V4L2_CTRL_TYPE_AREA:
+               elem_size = sizeof(struct v4l2_area);
+               break;
+--- a/drivers/media/v4l2-core/v4l2-ioctl.c
++++ b/drivers/media/v4l2-core/v4l2-ioctl.c
+@@ -1356,6 +1356,7 @@ static void v4l_fill_fmtdesc(struct v4l2
+               case V4L2_PIX_FMT_VP8_FRAME:    descr = "VP8 Frame"; break;
+               case V4L2_PIX_FMT_VP9:          descr = "VP9"; break;
+               case V4L2_PIX_FMT_HEVC:         descr = "HEVC"; break; /* aka H.265 */
++              case V4L2_PIX_FMT_HEVC_SLICE:   descr = "HEVC Parsed Slice Data"; break;
+               case V4L2_PIX_FMT_FWHT:         descr = "FWHT"; break; /* used in vicodec */
+               case V4L2_PIX_FMT_FWHT_STATELESS:       descr = "FWHT Stateless"; break; /* used in vicodec */
+               case V4L2_PIX_FMT_CPIA1:        descr = "GSPCA CPiA YUV"; break;
+--- /dev/null
++++ b/include/media/hevc-ctrls.h
+@@ -0,0 +1,212 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS          (V4L2_CID_MPEG_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS          (V4L2_CID_MPEG_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE  (V4L2_CID_MPEG_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE   (V4L2_CID_MPEG_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++      V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++      V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++      V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++      V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B        0
++#define V4L2_HEVC_SLICE_TYPE_P        1
++#define V4L2_HEVC_SLICE_TYPE_I        2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE              (1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED                       (1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED                                (1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET             (1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED                                (1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED           (1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT         (1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED           (1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED     (1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++      __u16   pic_width_in_luma_samples;
++      __u16   pic_height_in_luma_samples;
++      __u8    bit_depth_luma_minus8;
++      __u8    bit_depth_chroma_minus8;
++      __u8    log2_max_pic_order_cnt_lsb_minus4;
++      __u8    sps_max_dec_pic_buffering_minus1;
++      __u8    sps_max_num_reorder_pics;
++      __u8    sps_max_latency_increase_plus1;
++      __u8    log2_min_luma_coding_block_size_minus3;
++      __u8    log2_diff_max_min_luma_coding_block_size;
++      __u8    log2_min_luma_transform_block_size_minus2;
++      __u8    log2_diff_max_min_luma_transform_block_size;
++      __u8    max_transform_hierarchy_depth_inter;
++      __u8    max_transform_hierarchy_depth_intra;
++      __u8    pcm_sample_bit_depth_luma_minus1;
++      __u8    pcm_sample_bit_depth_chroma_minus1;
++      __u8    log2_min_pcm_luma_coding_block_size_minus3;
++      __u8    log2_diff_max_min_pcm_luma_coding_block_size;
++      __u8    num_short_term_ref_pic_sets;
++      __u8    num_long_term_ref_pics_sps;
++      __u8    chroma_format_idc;
++
++      __u8    padding;
++
++      __u64   flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT            (1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT                        (1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED           (1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT                 (1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED             (1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED             (1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED                        (1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT        (1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED                      (1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED                    (1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED          (1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED                      (1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED                (1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED   (1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER      (1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT         (1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++
++struct v4l2_ctrl_hevc_pps {
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++      __u8    num_extra_slice_header_bits;
++      __s8    init_qp_minus26;
++      __u8    diff_cu_qp_delta_depth;
++      __s8    pps_cb_qp_offset;
++      __s8    pps_cr_qp_offset;
++      __u8    num_tile_columns_minus1;
++      __u8    num_tile_rows_minus1;
++      __u8    column_width_minus1[20];
++      __u8    row_height_minus1[22];
++      __s8    pps_beta_offset_div2;
++      __s8    pps_tc_offset_div2;
++      __u8    log2_parallel_merge_level_minus2;
++
++      __u8    padding[4];
++      __u64   flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE        0x01
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02
++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR               0x03
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX         16
++
++struct v4l2_hevc_dpb_entry {
++      __u64   timestamp;
++      __u8    rps;
++      __u8    field_pic;
++      __u16   pic_order_cnt[2];
++      __u8    padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++      __s8    delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      __s8    luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      __s8    delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++      __s8    chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++      __s8    delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      __s8    luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      __s8    delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++      __s8    chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++      __u8    padding[6];
++
++      __u8    luma_log2_weight_denom;
++      __s8    delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA            (1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA          (1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED        (1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO                       (1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT                        (1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0                (1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV            (1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++
++struct v4l2_ctrl_hevc_slice_params {
++      __u32   bit_size;
++      __u32   data_bit_offset;
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++      __u8    nal_unit_type;
++      __u8    nuh_temporal_id_plus1;
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++      __u8    slice_type;
++      __u8    colour_plane_id;
++      __u16   slice_pic_order_cnt;
++      __u8    num_ref_idx_l0_active_minus1;
++      __u8    num_ref_idx_l1_active_minus1;
++      __u8    collocated_ref_idx;
++      __u8    five_minus_max_num_merge_cand;
++      __s8    slice_qp_delta;
++      __s8    slice_cb_qp_offset;
++      __s8    slice_cr_qp_offset;
++      __s8    slice_act_y_qp_offset;
++      __s8    slice_act_cb_qp_offset;
++      __s8    slice_act_cr_qp_offset;
++      __s8    slice_beta_offset_div2;
++      __s8    slice_tc_offset_div2;
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++      __u8    pic_struct;
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++      __u8    num_active_dpb_entries;
++      __u8    ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      __u8    ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++      __u8    num_rps_poc_st_curr_before;
++      __u8    num_rps_poc_st_curr_after;
++      __u8    num_rps_poc_lt_curr;
++
++      __u8    padding;
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++      struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++      struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++      __u64   flags;
++};
++
++#endif
+--- a/include/media/v4l2-ctrls.h
++++ b/include/media/v4l2-ctrls.h
+@@ -21,6 +21,7 @@
+ #include <media/fwht-ctrls.h>
+ #include <media/h264-ctrls.h>
+ #include <media/vp8-ctrls.h>
++#include <media/hevc-ctrls.h>
+ /* forward references */
+ struct file;
+@@ -50,6 +51,9 @@ struct poll_table_struct;
+  * @p_h264_slice_params:      Pointer to a struct v4l2_ctrl_h264_slice_params.
+  * @p_h264_decode_params:     Pointer to a struct v4l2_ctrl_h264_decode_params.
+  * @p_vp8_frame_header:               Pointer to a VP8 frame header structure.
++ * @p_hevc_sps:                       Pointer to an HEVC sequence parameter set structure.
++ * @p_hevc_pps:                       Pointer to an HEVC picture parameter set structure.
++ * @p_hevc_slice_params:      Pointer to an HEVC slice parameters structure.
+  * @p_area:                   Pointer to an area.
+  * @p:                                Pointer to a compound value.
+  */
+@@ -69,6 +73,9 @@ union v4l2_ctrl_ptr {
+       struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
+       struct v4l2_ctrl_h264_decode_params *p_h264_decode_params;
+       struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
++      struct v4l2_ctrl_hevc_sps *p_hevc_sps;
++      struct v4l2_ctrl_hevc_pps *p_hevc_pps;
++      struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
+       struct v4l2_area *p_area;
+       void *p;
+ };
diff --git a/target/linux/bcm27xx/patches-5.4/950-0504-media-v4l2-mem2mem-Fix-hold-buf-flag-checks.patch b/target/linux/bcm27xx/patches-5.4/950-0504-media-v4l2-mem2mem-Fix-hold-buf-flag-checks.patch
new file mode 100644 (file)
index 0000000..18073a8
--- /dev/null
@@ -0,0 +1,37 @@
+From 73d8a76ec5b5e1240af4142a9ccbd39179d779af Mon Sep 17 00:00:00 2001
+From: Jernej Skrabec <jernej.skrabec@siol.net>
+Date: Wed, 6 Nov 2019 08:02:53 +0100
+Subject: [PATCH] media: v4l2-mem2mem: Fix hold buf flag checks
+
+Commit 1076df3a77b490d33429560a9e0603b3673223e2 upstream.
+
+Hold buf flag is set on output queue, not capture. Fix that.
+
+Fixes: f07602ac3887 ("media: v4l2-mem2mem: add new_frame detection")
+Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+---
+ drivers/media/v4l2-core/v4l2-mem2mem.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -335,7 +335,7 @@ static void __v4l2_m2m_try_queue(struct
+               }
+       }
+-      if (src && dst && (m2m_ctx->cap_q_ctx.q.subsystem_flags &
++      if (src && dst && (m2m_ctx->out_q_ctx.q.subsystem_flags &
+                          VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF))
+               m2m_ctx->new_frame = !dst->vb2_buf.copied_timestamp ||
+                       dst->vb2_buf.timestamp != src->vb2_buf.timestamp;
+@@ -474,7 +474,7 @@ void v4l2_m2m_job_finish(struct v4l2_m2m
+        * holding capture buffers. Those should use
+        * v4l2_m2m_buf_done_and_job_finish() instead.
+        */
+-      WARN_ON(m2m_ctx->cap_q_ctx.q.subsystem_flags &
++      WARN_ON(m2m_ctx->out_q_ctx.q.subsystem_flags &
+               VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF);
+       spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
+       schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx);
diff --git a/target/linux/bcm27xx/patches-5.4/950-0505-media-pixfmt-Document-the-HEVC-slice-pixel-format.patch b/target/linux/bcm27xx/patches-5.4/950-0505-media-pixfmt-Document-the-HEVC-slice-pixel-format.patch
new file mode 100644 (file)
index 0000000..7398807
--- /dev/null
@@ -0,0 +1,50 @@
+From 662256810630f6ac6d06ee0cdc5f4660b25f7e98 Mon Sep 17 00:00:00 2001
+From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+Date: Tue, 22 Oct 2019 12:26:53 -0300
+Subject: [PATCH] media: pixfmt: Document the HEVC slice pixel format
+
+Commit de06f289283298e2938445019999cec46435375c upstream.
+
+Document the current state of the HEVC slice pixel format.
+The format will need to evolve in the future, which is why it is
+not part of the public API.
+
+Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
+---
+ .../media/uapi/v4l/pixfmt-compressed.rst      | 23 +++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
++++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+@@ -188,6 +188,29 @@ Compressed Formats
+       If :ref:`VIDIOC_ENUM_FMT` reports ``V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM``
+       then the decoder has no requirements since it can parse all the
+       information from the raw bytestream.
++    * .. _V4L2-PIX-FMT-HEVC-SLICE:
++
++      - ``V4L2_PIX_FMT_HEVC_SLICE``
++      - 'S265'
++      - HEVC parsed slice data, as extracted from the HEVC bitstream.
++      This format is adapted for stateless video decoders that implement a
++      HEVC pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
++      This pixelformat has two modifiers that must be set at least once
++      through the ``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE``
++        and ``V4L2_CID_MPEG_VIDEO_HEVC_START_CODE`` controls.
++      Metadata associated with the frame to decode is required to be passed
++      through the following controls :
++        * ``V4L2_CID_MPEG_VIDEO_HEVC_SPS``
++        * ``V4L2_CID_MPEG_VIDEO_HEVC_PPS``
++        * ``V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS``
++      See the :ref:`associated Codec Control IDs <v4l2-mpeg-hevc>`.
++      Buffers associated with this pixel format must contain the appropriate
++      number of macroblocks to decode a full corresponding frame.
++
++      .. note::
++
++         This format is not yet part of the public kernel API and it
++         is expected to change.
+     * .. _V4L2-PIX-FMT-FWHT:
+       - ``V4L2_PIX_FMT_FWHT``
diff --git a/target/linux/bcm27xx/patches-5.4/950-0506-media-uapi-hevc-Add-scaling-matrix-control.patch b/target/linux/bcm27xx/patches-5.4/950-0506-media-uapi-hevc-Add-scaling-matrix-control.patch
new file mode 100644 (file)
index 0000000..c2cf27a
--- /dev/null
@@ -0,0 +1,150 @@
+From 70b5a28786215c996503210abd3e44c200771640 Mon Sep 17 00:00:00 2001
+From: Jernej Skrabec <jernej.skrabec@siol.net>
+Date: Fri, 13 Dec 2019 17:04:25 +0100
+Subject: [PATCH] media: uapi: hevc: Add scaling matrix control
+
+Taken from https://patchwork.linuxtv.org/patch/60728/
+Changes (mainly documentation) have been requested.
+
+HEVC has a scaling matrix concept. Add support for it.
+
+Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
+---
+ .../media/uapi/v4l/ext-ctrls-codec.rst        | 41 +++++++++++++++++++
+ .../media/uapi/v4l/pixfmt-compressed.rst      |  1 +
+ drivers/media/v4l2-core/v4l2-ctrls.c          | 10 +++++
+ include/media/hevc-ctrls.h                    | 11 +++++
+ 4 files changed, 63 insertions(+)
+
+--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
++++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+@@ -4174,6 +4174,47 @@ enum v4l2_mpeg_video_hevc_size_of_length
+       - ``padding[6]``
+       - Applications and drivers must set this to zero.
++``V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (struct)``
++    Specifies the scaling matrix (as extracted from the bitstream) for
++    the associated HEVC slice data. The bitstream parameters are
++    defined according to :ref:`hevc`, section 7.4.5 "Scaling list
++    data semantics". For further documentation, refer to the above
++    specification, unless there is an explicit comment stating
++    otherwise.
++
++    .. note::
++
++       This compound control is not yet part of the public kernel API and
++       it is expected to change.
++
++.. c:type:: v4l2_ctrl_hevc_scaling_matrix
++
++.. cssclass:: longtable
++
++.. flat-table:: struct v4l2_ctrl_hevc_scaling_matrix
++    :header-rows:  0
++    :stub-columns: 0
++    :widths:       1 1 2
++
++    * - __u8
++      - ``scaling_list_4x4[6][16]``
++      -
++    * - __u8
++      - ``scaling_list_8x8[6][64]``
++      -
++    * - __u8
++      - ``scaling_list_16x16[6][64]``
++      -
++    * - __u8
++      - ``scaling_list_32x32[2][64]``
++      -
++    * - __u8
++      - ``scaling_list_dc_coef_16x16[6]``
++      -
++    * - __u8
++      - ``scaling_list_dc_coef_32x32[2]``
++      -
++
+ ``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (enum)``
+     Specifies the decoding mode to use. Currently exposes slice-based and
+     frame-based decoding but new modes might be added later on.
+--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
++++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+@@ -203,6 +203,7 @@ Compressed Formats
+         * ``V4L2_CID_MPEG_VIDEO_HEVC_SPS``
+         * ``V4L2_CID_MPEG_VIDEO_HEVC_PPS``
+         * ``V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS``
++        * ``V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX``
+       See the :ref:`associated Codec Control IDs <v4l2-mpeg-hevc>`.
+       Buffers associated with this pixel format must contain the appropriate
+       number of macroblocks to decode a full corresponding frame.
+--- a/drivers/media/v4l2-core/v4l2-ctrls.c
++++ b/drivers/media/v4l2-core/v4l2-ctrls.c
+@@ -974,6 +974,7 @@ const char *v4l2_ctrl_get_name(u32 id)
+       case V4L2_CID_MPEG_VIDEO_HEVC_SPS:                      return "HEVC Sequence Parameter Set";
+       case V4L2_CID_MPEG_VIDEO_HEVC_PPS:                      return "HEVC Picture Parameter Set";
+       case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:             return "HEVC Slice Parameters";
++      case V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX:           return "HEVC Scaling Matrix";
+       case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:              return "HEVC Decode Mode";
+       case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:               return "HEVC Start Code";
+@@ -1406,6 +1407,9 @@ void v4l2_ctrl_fill(u32 id, const char *
+       case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:
+               *type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS;
+               break;
++      case V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX:
++              *type = V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX;
++              break;
+       case V4L2_CID_UNIT_CELL_SIZE:
+               *type = V4L2_CTRL_TYPE_AREA;
+               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
+@@ -1852,6 +1856,9 @@ static int std_validate_compound(const s
+               zero_padding(*p_hevc_slice_params);
+               break;
++      case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX:
++              break;
++
+       case V4L2_CTRL_TYPE_AREA:
+               area = p;
+               if (!area->width || !area->height)
+@@ -2540,6 +2547,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(s
+       case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
+               elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
+               break;
++      case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX:
++              elem_size = sizeof(struct v4l2_ctrl_hevc_scaling_matrix);
++              break;
+       case V4L2_CTRL_TYPE_AREA:
+               elem_size = sizeof(struct v4l2_area);
+               break;
+--- a/include/media/hevc-ctrls.h
++++ b/include/media/hevc-ctrls.h
+@@ -19,6 +19,7 @@
+ #define V4L2_CID_MPEG_VIDEO_HEVC_SPS          (V4L2_CID_MPEG_BASE + 1008)
+ #define V4L2_CID_MPEG_VIDEO_HEVC_PPS          (V4L2_CID_MPEG_BASE + 1009)
+ #define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX       (V4L2_CID_MPEG_BASE + 1011)
+ #define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE  (V4L2_CID_MPEG_BASE + 1015)
+ #define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE   (V4L2_CID_MPEG_BASE + 1016)
+@@ -26,6 +27,7 @@
+ #define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
+ #define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
+ #define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
+ enum v4l2_mpeg_video_hevc_decode_mode {
+       V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+@@ -209,4 +211,13 @@ struct v4l2_ctrl_hevc_slice_params {
+       __u64   flags;
+ };
++struct v4l2_ctrl_hevc_scaling_matrix {
++      __u8    scaling_list_4x4[6][16];
++      __u8    scaling_list_8x8[6][64];
++      __u8    scaling_list_16x16[6][64];
++      __u8    scaling_list_32x32[2][64];
++      __u8    scaling_list_dc_coef_16x16[6];
++      __u8    scaling_list_dc_coef_32x32[2];
++};
++
+ #endif
diff --git a/target/linux/bcm27xx/patches-5.4/950-0507-media-uapi-hevc-Add-segment-address-field.patch b/target/linux/bcm27xx/patches-5.4/950-0507-media-uapi-hevc-Add-segment-address-field.patch
new file mode 100644 (file)
index 0000000..91f195b
--- /dev/null
@@ -0,0 +1,61 @@
+From 88eb3b015b6f61252fd214d39fc7fc0379ee0442 Mon Sep 17 00:00:00 2001
+From: Jernej Skrabec <jernej.skrabec@siol.net>
+Date: Fri, 13 Dec 2019 17:04:27 +0100
+Subject: [PATCH] media: uapi: hevc: Add segment address field
+
+From https://patchwork.linuxtv.org/patch/60725/
+Changes requested, but mainly docs.
+
+If HEVC frame consists of multiple slices, segment address has to be
+known in order to properly decode it.
+
+Add segment address field to slice parameters.
+
+Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
+---
+ Documentation/media/uapi/v4l/ext-ctrls-codec.rst | 5 ++++-
+ include/media/hevc-ctrls.h                       | 5 ++++-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
++++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+@@ -3969,6 +3969,9 @@ enum v4l2_mpeg_video_hevc_size_of_length
+     * - __u32
+       - ``data_bit_offset``
+       - Offset (in bits) to the video data in the current slice data.
++    * - __u32
++      - ``slice_segment_addr``
++      -
+     * - __u8
+       - ``nal_unit_type``
+       -
+@@ -4046,7 +4049,7 @@ enum v4l2_mpeg_video_hevc_size_of_length
+       - ``num_rps_poc_lt_curr``
+       - The number of reference pictures in the long-term set.
+     * - __u8
+-      - ``padding[7]``
++      - ``padding[5]``
+       - Applications and drivers must set this to zero.
+     * - struct :c:type:`v4l2_hevc_dpb_entry`
+       - ``dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+--- a/include/media/hevc-ctrls.h
++++ b/include/media/hevc-ctrls.h
+@@ -167,6 +167,9 @@ struct v4l2_ctrl_hevc_slice_params {
+       __u32   bit_size;
+       __u32   data_bit_offset;
++      /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++      __u32   slice_segment_addr;
++
+       /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+       __u8    nal_unit_type;
+       __u8    nuh_temporal_id_plus1;
+@@ -200,7 +203,7 @@ struct v4l2_ctrl_hevc_slice_params {
+       __u8    num_rps_poc_st_curr_after;
+       __u8    num_rps_poc_lt_curr;
+-      __u8    padding;
++      __u8    padding[5];
+       /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+       struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
diff --git a/target/linux/bcm27xx/patches-5.4/950-0508-media-hevc_ctrls-Add-slice-param-dependent-slice-seg.patch b/target/linux/bcm27xx/patches-5.4/950-0508-media-hevc_ctrls-Add-slice-param-dependent-slice-seg.patch
new file mode 100644 (file)
index 0000000..1353480
--- /dev/null
@@ -0,0 +1,23 @@
+From e8355c6b60adb6704c9fb863f380f2d7b457d82c Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Date: Mon, 23 Mar 2020 18:34:01 +0000
+Subject: [PATCH] media: hevc_ctrls: Add slice param dependent slice
+ segment
+
+Adds V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT define.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+---
+ include/media/hevc-ctrls.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/media/hevc-ctrls.h
++++ b/include/media/hevc-ctrls.h
+@@ -162,6 +162,7 @@ struct v4l2_hevc_pred_weight_table {
+ #define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV            (1ULL << 6)
+ #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
+ #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT   (1ULL << 9)
+ struct v4l2_ctrl_hevc_slice_params {
+       __u32   bit_size;
diff --git a/target/linux/bcm27xx/patches-5.4/950-0509-media-uapi-Add-hevc-ctrls-for-WPP-decoding.patch b/target/linux/bcm27xx/patches-5.4/950-0509-media-uapi-Add-hevc-ctrls-for-WPP-decoding.patch
new file mode 100644 (file)
index 0000000..234cb82
--- /dev/null
@@ -0,0 +1,40 @@
+From 6a42d17668699234bfa2d459e29cc2732e59759b Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Date: Mon, 23 Mar 2020 19:00:17 +0000
+Subject: [PATCH] media: uapi: Add hevc ctrls for WPP decoding
+
+WPP can allow greater parallelism within the decode, but needs
+offset information to be passed in.
+
+Adds num_entry_point_offsets and entry_point_offset_minus1 to
+v4l2_ctrl_hevc_slice_params.
+
+This is based on Jernej Skrabec's patches for cedrus which
+implement the same feature.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+---
+ include/media/hevc-ctrls.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/media/hevc-ctrls.h
++++ b/include/media/hevc-ctrls.h
+@@ -170,6 +170,7 @@ struct v4l2_ctrl_hevc_slice_params {
+       /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+       __u32   slice_segment_addr;
++      __u32   num_entry_point_offsets;
+       /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+       __u8    nal_unit_type;
+@@ -204,7 +205,9 @@ struct v4l2_ctrl_hevc_slice_params {
+       __u8    num_rps_poc_st_curr_after;
+       __u8    num_rps_poc_lt_curr;
+-      __u8    padding[5];
++      __u8    padding;
++
++      __u32   entry_point_offset_minus1[256];
+       /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+       struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
diff --git a/target/linux/bcm27xx/patches-5.4/950-0510-media-videodev2.h-Add-a-format-for-column-YUV4-2-0-m.patch b/target/linux/bcm27xx/patches-5.4/950-0510-media-videodev2.h-Add-a-format-for-column-YUV4-2-0-m.patch
new file mode 100644 (file)
index 0000000..840541c
--- /dev/null
@@ -0,0 +1,302 @@
+From a8f52dad0ed65192eb880a4a1ca90b236e99711e Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Date: Fri, 24 Jan 2020 14:28:21 +0000
+Subject: [PATCH] media: videodev2.h: Add a format for column YUV4:2:0
+ modes
+
+Some of the Broadcom codec blocks use a column based YUV4:2:0 image
+format, so add the documentation and defines for both 8 and 10 bit
+versions.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+---
+ .../media/uapi/v4l/pixfmt-nv12-col128.rst     | 215 ++++++++++++++++++
+ Documentation/media/uapi/v4l/pixfmt-nv12.rst  |  14 +-
+ Documentation/media/uapi/v4l/yuv-formats.rst  |   1 +
+ drivers/media/v4l2-core/v4l2-ioctl.c          |   2 +
+ include/uapi/linux/videodev2.h                |   4 +
+ 5 files changed, 233 insertions(+), 3 deletions(-)
+ create mode 100644 Documentation/media/uapi/v4l/pixfmt-nv12-col128.rst
+
+--- /dev/null
++++ b/Documentation/media/uapi/v4l/pixfmt-nv12-col128.rst
+@@ -0,0 +1,215 @@
++.. Permission is granted to copy, distribute and/or modify this
++.. document under the terms of the GNU Free Documentation License,
++.. Version 1.1 or any later version published by the Free Software
++.. Foundation, with no Invariant Sections, no Front-Cover Texts
++.. and no Back-Cover Texts. A copy of the license is included at
++.. Documentation/media/uapi/fdl-appendix.rst.
++..
++.. TODO: replace it to GFDL-1.1-or-later WITH no-invariant-sections
++
++.. _V4L2_PIX_FMT_NV12_COL128:
++.. _V4L2_PIX_FMT_NV12_10_COL128:
++
++********************************************************************************
++V4L2_PIX_FMT_NV12_COL128, V4L2_PIX_FMT_NV12_10_COL128
++********************************************************************************
++
++
++V4L2_PIX_FMT_NV21_COL128
++Formats with ½ horizontal and vertical chroma resolution. This format
++has two planes - one for luminance and one for chrominance. Chroma
++samples are interleaved. The difference to ``V4L2_PIX_FMT_NV12`` is the
++memory layout. The image is split into columns of 128 bytes wide rather than
++being in raster order.
++
++V4L2_PIX_FMT_NV12_10_COL128
++Follows the same pattern as ``V4L2_PIX_FMT_NV21_COL128`` with 128 byte, but is
++a 10bit format with 3 10-bit samples being packed into 4 bytes. Each 128 byte
++wide column therefore contains 96 samples.
++
++
++Description
++===========
++
++This is the two-plane versions of the YUV 4:2:0 format where data is
++grouped into 128 byte wide columns. The three components are separated into
++two sub-images or planes. The Y plane has one byte per pixel and pixels
++are grouped into 128 byte wide columns. The CbCr plane has the same width,
++in bytes, as the Y plane (and the image), but is half as tall in pixels.
++The chroma plane is also in 128 byte columns, reflecting 64 Cb and 64 Cr
++samples.
++
++The chroma samples for a column follow the luma samples. If there is any
++paddding, then that will be reflected via the selection API.
++The luma height must be a multiple of 2 lines.
++
++The normal bytesperline is effectively fixed at 128. However the format
++requires knowledge of the stride between columns, therefore the bytesperline
++value has been repurposed to denote the number of 128 byte long lines between
++the start of each column.
++
++**Byte Order.**
++
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths: 12 12 12 12 12 4 12 12 12 12
++
++    * - start + 0:
++      - Y'\ :sub:`0,0`
++      - Y'\ :sub:`0,1`
++      - Y'\ :sub:`0,2`
++      - Y'\ :sub:`0,3`
++      - ...
++      - Y'\ :sub:`0,124`
++      - Y'\ :sub:`0,125`
++      - Y'\ :sub:`0,126`
++      - Y'\ :sub:`0,127`
++    * - start + 128:
++      - Y'\ :sub:`1,0`
++      - Y'\ :sub:`1,1`
++      - Y'\ :sub:`1,2`
++      - Y'\ :sub:`1,3`
++      - ...
++      - Y'\ :sub:`1,124`
++      - Y'\ :sub:`1,125`
++      - Y'\ :sub:`1,126`
++      - Y'\ :sub:`1,127`
++    * - start + 256:
++      - Y'\ :sub:`2,0`
++      - Y'\ :sub:`2,1`
++      - Y'\ :sub:`2,2`
++      - Y'\ :sub:`2,3`
++      - ...
++      - Y'\ :sub:`2,124`
++      - Y'\ :sub:`2,125`
++      - Y'\ :sub:`2,126`
++      - Y'\ :sub:`2,127`
++    * - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++    * - start + ((height-1) * 128):
++      - Y'\ :sub:`height-1,0`
++      - Y'\ :sub:`height-1,1`
++      - Y'\ :sub:`height-1,2`
++      - Y'\ :sub:`height-1,3`
++      - ...
++      - Y'\ :sub:`height-1,124`
++      - Y'\ :sub:`height-1,125`
++      - Y'\ :sub:`height-1,126`
++      - Y'\ :sub:`height-1,127`
++    * - start + ((height) * 128):
++      - Cb\ :sub:`0,0`
++      - Cr\ :sub:`0,0`
++      - Cb\ :sub:`0,1`
++      - Cr\ :sub:`0,1`
++      - ...
++      - Cb\ :sub:`0,62`
++      - Cr\ :sub:`0,62`
++      - Cb\ :sub:`0,63`
++      - Cr\ :sub:`0,63`
++    * - start + ((height+1) * 128):
++      - Cb\ :sub:`1,0`
++      - Cr\ :sub:`1,0`
++      - Cb\ :sub:`1,1`
++      - Cr\ :sub:`1,1`
++      - ...
++      - Cb\ :sub:`1,62`
++      - Cr\ :sub:`1,62`
++      - Cb\ :sub:`1,63`
++      - Cr\ :sub:`1,63`
++    * - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++    * - start + ((height+(height/2)-1) * 128):
++      - Cb\ :sub:`(height/2)-1,0`
++      - Cr\ :sub:`(height/2)-1,0`
++      - Cb\ :sub:`(height/2)-1,1`
++      - Cr\ :sub:`(height/2)-1,1`
++      - ...
++      - Cb\ :sub:`(height/2)-1,62`
++      - Cr\ :sub:`(height/2)-1,62`
++      - Cb\ :sub:`(height/2)-1,63`
++      - Cr\ :sub:`(height/2)-1,63`
++    * - start + (bytesperline * 128):
++      - Y'\ :sub:`0,128`
++      - Y'\ :sub:`0,129`
++      - Y'\ :sub:`0,130`
++      - Y'\ :sub:`0,131`
++      - ...
++      - Y'\ :sub:`0,252`
++      - Y'\ :sub:`0,253`
++      - Y'\ :sub:`0,254`
++      - Y'\ :sub:`0,255`
++    * - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++      - ...
++
++V4L2_PIX_FMT_NV12_10_COL128 uses the same 128 byte column structure, but
++encodes 10-bit YUV.
++3 10-bit values are packed into 4 bytes as bits 9:0, 19:10, and 29:20, with
++bits 30 & 31 unused. For the luma plane, bits 9:0 are Y0, 19:10 are Y1, and
++29:20 are Y2. For the chroma plane the samples always come in pairs of Cr
++and Cb, so it needs to be considered 6 values packed in 8 bytes.
++
++Bit-packed representation.
++
++.. raw:: latex
++
++    \small
++
++.. tabularcolumns:: |p{1.2cm}||p{1.2cm}||p{1.2cm}||p{1.2cm}|p{3.2cm}|p{3.2cm}|
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths: 8 8 8 8
++
++    * - Y'\ :sub:`00[7:0]`
++      - Y'\ :sub:`01[5:0] (bits 7--2)` Y'\ :sub:`00[9:8]`\ (bits 1--0)
++      - Y'\ :sub:`02[3:0] (bits 7--4)` Y'\ :sub:`01[9:6]`\ (bits 3--0)
++      - unused (bits 7--6)` Y'\ :sub:`02[9:4]`\ (bits 5--0)
++
++.. raw:: latex
++
++    \small
++
++.. tabularcolumns:: |p{1.2cm}||p{1.2cm}||p{1.2cm}||p{1.2cm}|p{3.2cm}|p{3.2cm}|
++
++.. flat-table::
++    :header-rows:  0
++    :stub-columns: 0
++    :widths: 12 12 12 12 12 12 12 12
++
++    * - Cb\ :sub:`00[7:0]`
++      - Cr\ :sub:`00[5:0]`\ (bits 7--2) Cb\ :sub:`00[9:8]`\ (bits 1--0)
++      - Cb\ :sub:`01[3:0]`\ (bits 7--4) Cr\ :sub:`00[9:6]`\ (bits 3--0)
++      - unused (bits 7--6) Cb\ :sub:`02[9:4]`\ (bits 5--0)
++      - Cr\ :sub:`01[7:0]`
++      - Cb\ :sub:`02[5:0]`\ (bits 7--2) Cr\ :sub:`01[9:8]`\ (bits 1--0)
++      - Cr\ :sub:`02[3:0]`\ (bits 7--4) Cb\ :sub:`02[9:6]`\ (bits 3--0)
++      - unused (bits 7--6) Cr\ :sub:`02[9:4]`\ (bits 5--0)
++
++.. raw:: latex
++
++    \normalsize
++
++
++
++
+--- a/Documentation/media/uapi/v4l/pixfmt-nv12.rst
++++ b/Documentation/media/uapi/v4l/pixfmt-nv12.rst
+@@ -10,9 +10,9 @@
+ .. _V4L2-PIX-FMT-NV12:
+ .. _V4L2-PIX-FMT-NV21:
+-******************************************************
+-V4L2_PIX_FMT_NV12 ('NV12'), V4L2_PIX_FMT_NV21 ('NV21')
+-******************************************************
++********************************************************************************
++V4L2_PIX_FMT_NV12 ('NV12'), V4L2_PIX_FMT_NV21 ('NV21'), V4L2_PIX_FMT_NV12_COL128
++********************************************************************************
+ V4L2_PIX_FMT_NV21
+@@ -38,6 +38,14 @@ with a Cr byte.
+ If the Y plane has pad bytes after each row, then the CbCr plane has as
+ many pad bytes after its rows.
++``V4L2_PIX_FMT_NV12_COL128`` is the tiled version of
++``V4L2_PIX_FMT_NV12`` with the image broken down into 128 pixel wide columns of
++Y followed by the associated combined CbCr plane.
++The normal bytesperline is effectively fixed at 128. However the format
++requires knowledge of the stride between columns, therefore the bytesperline
++value has been repurposed to denote the number of 128 byte long lines between
++the start of each column.
++
+ **Byte Order.**
+ Each cell is one byte.
+--- a/Documentation/media/uapi/v4l/yuv-formats.rst
++++ b/Documentation/media/uapi/v4l/yuv-formats.rst
+@@ -57,6 +57,7 @@ to brightness information.
+     pixfmt-nv12
+     pixfmt-nv12m
+     pixfmt-nv12mt
++    pixfmt-nv12-col128
+     pixfmt-nv16
+     pixfmt-nv16m
+     pixfmt-nv24
+--- a/drivers/media/v4l2-core/v4l2-ioctl.c
++++ b/drivers/media/v4l2-core/v4l2-ioctl.c
+@@ -1258,6 +1258,8 @@ static void v4l_fill_fmtdesc(struct v4l2
+       case V4L2_PIX_FMT_NV61M:        descr = "Y/CrCb 4:2:2 (N-C)"; break;
+       case V4L2_PIX_FMT_NV12MT:       descr = "Y/CbCr 4:2:0 (64x32 MB, N-C)"; break;
+       case V4L2_PIX_FMT_NV12MT_16X16: descr = "Y/CbCr 4:2:0 (16x16 MB, N-C)"; break;
++      case V4L2_PIX_FMT_NV12_COL128:  descr = "Y/CbCr 4:2:0 (128b cols)"; break;
++      case V4L2_PIX_FMT_NV12_10_COL128: descr = "10-bit Y/CbCr 4:2:0 (128b cols)"; break;
+       case V4L2_PIX_FMT_YUV420M:      descr = "Planar YUV 4:2:0 (N-C)"; break;
+       case V4L2_PIX_FMT_YVU420M:      descr = "Planar YVU 4:2:0 (N-C)"; break;
+       case V4L2_PIX_FMT_YUV422M:      descr = "Planar YUV 4:2:2 (N-C)"; break;
+--- a/include/uapi/linux/videodev2.h
++++ b/include/uapi/linux/videodev2.h
+@@ -737,6 +737,10 @@ struct v4l2_pix_format {
+ #define V4L2_PIX_FMT_INZI     v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */
+ #define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */
+ #define V4L2_PIX_FMT_CNF4     v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++                                                              /* Y/CbCr 4:2:0 10bpc, 3x10 packed as 4 bytes in
++                                                               * a 128 bytes / 96 pixel wide column */
+ /* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */
+ #define V4L2_PIX_FMT_IPU3_SBGGR10     v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0511-media-v4l2-mem2mem-allow-request-job-buffer-processi.patch b/target/linux/bcm27xx/patches-5.4/950-0511-media-v4l2-mem2mem-allow-request-job-buffer-processi.patch
new file mode 100644 (file)
index 0000000..a3023ca
--- /dev/null
@@ -0,0 +1,274 @@
+From b8ae9d55d468a9f55524296247dba93531c29c99 Mon Sep 17 00:00:00 2001
+From: John Cox <jc@kynesim.co.uk>
+Date: Thu, 5 Mar 2020 14:46:54 +0000
+Subject: [PATCH] media: v4l2-mem2mem: allow request job buffer
+ processing after job finish
+
+Allow the capture buffer to be detached from a v4l2 request job such
+that another job can start before the capture buffer is returned. This
+allows h/w codecs that can process multiple requests at the same time
+to operate more efficiently.
+
+Signed-off-by: John Cox <jc@kynesim.co.uk>
+---
+ drivers/media/v4l2-core/v4l2-mem2mem.c | 105 +++++++++++++++++++++++--
+ include/media/v4l2-mem2mem.h           |  47 +++++++++++
+ include/media/videobuf2-v4l2.h         |   3 +
+ 3 files changed, 149 insertions(+), 6 deletions(-)
+
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -399,15 +399,18 @@ static void v4l2_m2m_cancel_job(struct v
+ {
+       struct v4l2_m2m_dev *m2m_dev;
+       unsigned long flags;
++      bool det_abort_req;
+       m2m_dev = m2m_ctx->m2m_dev;
+       spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      det_abort_req = !list_empty(&m2m_ctx->det_list);
+       m2m_ctx->job_flags |= TRANS_ABORT;
+       if (m2m_ctx->job_flags & TRANS_RUNNING) {
+               spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+               if (m2m_dev->m2m_ops->job_abort)
+                       m2m_dev->m2m_ops->job_abort(m2m_ctx->priv);
++              det_abort_req = false;
+               dprintk("m2m_ctx %p running, will wait to complete\n", m2m_ctx);
+               wait_event(m2m_ctx->finished,
+                               !(m2m_ctx->job_flags & TRANS_RUNNING));
+@@ -421,6 +424,11 @@ static void v4l2_m2m_cancel_job(struct v
+               /* Do nothing, was not on queue/running */
+               spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+       }
++
++      /* Wait for detached buffers to come back too */
++      if (det_abort_req && m2m_dev->m2m_ops->job_abort)
++              m2m_dev->m2m_ops->job_abort(m2m_ctx->priv);
++      wait_event(m2m_ctx->det_empty, list_empty(&m2m_ctx->det_list));
+ }
+ /*
+@@ -458,6 +466,7 @@ static bool _v4l2_m2m_job_finish(struct
+       list_del(&m2m_dev->curr_ctx->queue);
+       m2m_dev->curr_ctx->job_flags &= ~(TRANS_QUEUED | TRANS_RUNNING);
++      m2m_ctx->cap_detached = false;
+       wake_up(&m2m_dev->curr_ctx->finished);
+       m2m_dev->curr_ctx = NULL;
+       return true;
+@@ -485,6 +494,80 @@ void v4l2_m2m_job_finish(struct v4l2_m2m
+ }
+ EXPORT_SYMBOL(v4l2_m2m_job_finish);
++struct vb2_v4l2_buffer *_v4l2_m2m_cap_buf_detach(struct v4l2_m2m_ctx *m2m_ctx)
++{
++      struct vb2_v4l2_buffer *buf;
++
++      buf = v4l2_m2m_dst_buf_remove(m2m_ctx);
++      list_add_tail(&container_of(buf, struct v4l2_m2m_buffer, vb)->list,
++                    &m2m_ctx->det_list);
++      m2m_ctx->cap_detached = true;
++      buf->is_held = true;
++      buf->det_state = VB2_BUF_STATE_ACTIVE;
++
++      return buf;
++}
++
++struct vb2_v4l2_buffer *v4l2_m2m_cap_buf_detach(struct v4l2_m2m_dev *m2m_dev,
++                                              struct v4l2_m2m_ctx *m2m_ctx)
++{
++      unsigned long flags;
++      struct vb2_v4l2_buffer *src_buf, *dst_buf;
++
++      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++
++      dst_buf = NULL;
++      src_buf = v4l2_m2m_next_src_buf(m2m_ctx);
++
++      if (!(src_buf->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) &&
++          !m2m_ctx->cap_detached)
++              dst_buf = _v4l2_m2m_cap_buf_detach(m2m_ctx);
++
++      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
++      return dst_buf;
++}
++EXPORT_SYMBOL(v4l2_m2m_cap_buf_detach);
++
++static void _v4l2_m2m_cap_buf_return(struct v4l2_m2m_ctx *m2m_ctx,
++                                   struct vb2_v4l2_buffer *buf,
++                                   enum vb2_buffer_state state)
++{
++      buf->det_state = state;
++
++      /*
++       * Always signal done in the order we got stuff
++       * Stop if we find a buf that is still in use
++       */
++      while (!list_empty(&m2m_ctx->det_list)) {
++              buf = &list_first_entry(&m2m_ctx->det_list,
++                                      struct v4l2_m2m_buffer, list)->vb;
++              state = buf->det_state;
++              if (state != VB2_BUF_STATE_DONE &&
++                  state != VB2_BUF_STATE_ERROR)
++                      return;
++              list_del(&container_of(buf, struct v4l2_m2m_buffer, vb)->list);
++              buf->det_state = VB2_BUF_STATE_DEQUEUED;
++              v4l2_m2m_buf_done(buf, state);
++      }
++      wake_up(&m2m_ctx->det_empty);
++}
++
++void v4l2_m2m_cap_buf_return(struct v4l2_m2m_dev *m2m_dev,
++                           struct v4l2_m2m_ctx *m2m_ctx,
++                           struct vb2_v4l2_buffer *buf,
++                           enum vb2_buffer_state state)
++{
++      unsigned long flags;
++
++      if (!buf)
++              return;
++
++      spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
++      _v4l2_m2m_cap_buf_return(m2m_ctx, buf, state);
++      spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
++}
++EXPORT_SYMBOL(v4l2_m2m_cap_buf_return);
++
+ void v4l2_m2m_buf_done_and_job_finish(struct v4l2_m2m_dev *m2m_dev,
+                                     struct v4l2_m2m_ctx *m2m_ctx,
+                                     enum vb2_buffer_state state)
+@@ -495,15 +578,23 @@ void v4l2_m2m_buf_done_and_job_finish(st
+       spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
+       src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
+-      dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx);
+-      if (WARN_ON(!src_buf || !dst_buf))
++      if (WARN_ON(!src_buf))
+               goto unlock;
+       v4l2_m2m_buf_done(src_buf, state);
+-      dst_buf->is_held = src_buf->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+-      if (!dst_buf->is_held) {
+-              v4l2_m2m_dst_buf_remove(m2m_ctx);
+-              v4l2_m2m_buf_done(dst_buf, state);
++
++      if (!m2m_ctx->cap_detached) {
++              dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx);
++              if (WARN_ON(!dst_buf))
++                      goto unlock;
++
++              dst_buf->is_held = src_buf->flags
++                                  & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++
++              if (!dst_buf->is_held) {
++                      dst_buf = _v4l2_m2m_cap_buf_detach(m2m_ctx);
++                      _v4l2_m2m_cap_buf_return(m2m_ctx, dst_buf, state);
++              }
+       }
+       schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx);
+ unlock:
+@@ -983,12 +1074,14 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(s
+       m2m_ctx->priv = drv_priv;
+       m2m_ctx->m2m_dev = m2m_dev;
+       init_waitqueue_head(&m2m_ctx->finished);
++      init_waitqueue_head(&m2m_ctx->det_empty);
+       out_q_ctx = &m2m_ctx->out_q_ctx;
+       cap_q_ctx = &m2m_ctx->cap_q_ctx;
+       INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
+       INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
++      INIT_LIST_HEAD(&m2m_ctx->det_list);
+       spin_lock_init(&out_q_ctx->rdy_spinlock);
+       spin_lock_init(&cap_q_ctx->rdy_spinlock);
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -88,6 +88,9 @@ struct v4l2_m2m_queue_ctx {
+  *            %TRANS_QUEUED, %TRANS_RUNNING and %TRANS_ABORT.
+  * @finished: Wait queue used to signalize when a job queue finished.
+  * @priv: Instance private data
++ * @cap_detached: Current job's capture buffer has been detached
++ * @det_list: List of detached (post-job but still in flight) capture buffers
++ * @det_empty: Wait queue signalled when det_list goes empty
+  *
+  * The memory to memory context is specific to a file handle, NOT to e.g.
+  * a device.
+@@ -111,6 +114,11 @@ struct v4l2_m2m_ctx {
+       wait_queue_head_t               finished;
+       void                            *priv;
++
++      /* Detached buffer handling */
++      bool    cap_detached;
++      struct list_head                det_list;
++      wait_queue_head_t               det_empty;
+ };
+ /**
+@@ -216,6 +224,45 @@ v4l2_m2m_buf_done(struct vb2_v4l2_buffer
+ }
+ /**
++ * v4l2_m2m_cap_buf_detach() - detach the capture buffer from the job and
++ * return it.
++ *
++ * @m2m_dev: opaque pointer to the internal data to handle M2M context
++ * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
++ *
++ * This function is designed to be used in conjunction with
++ * v4l2_m2m_buf_done_and_job_finish(). It allows the next job to start
++ * execution before the capture buffer is returned to the user which can be
++ * important if the underlying processing has multiple phases that are more
++ * efficiently executed in parallel.
++ *
++ * If used then it must be called before v4l2_m2m_buf_done_and_job_finish()
++ * as otherwise the buffer will have already gone.
++ *
++ * It is the callers reponsibilty to ensure that all detached buffers are
++ * returned.
++ */
++struct vb2_v4l2_buffer *v4l2_m2m_cap_buf_detach(struct v4l2_m2m_dev *m2m_dev,
++                                              struct v4l2_m2m_ctx *m2m_ctx);
++
++/**
++ * v4l2_m2m_cap_buf_return() - return a capture buffer, previously detached
++ * with v4l2_m2m_cap_buf_detach() to the user.
++ *
++ * @m2m_dev: opaque pointer to the internal data to handle M2M context
++ * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
++ * @buf: the buffer to return
++ * @state: vb2 buffer state passed to v4l2_m2m_buf_done().
++ *
++ * Buffers returned by this function will be returned to the user in the order
++ * of the original jobs rather than the order in which this function is called.
++ */
++void v4l2_m2m_cap_buf_return(struct v4l2_m2m_dev *m2m_dev,
++                           struct v4l2_m2m_ctx *m2m_ctx,
++                           struct vb2_v4l2_buffer *buf,
++                           enum vb2_buffer_state state);
++
++/**
+  * v4l2_m2m_reqbufs() - multi-queue-aware REQBUFS multiplexer
+  *
+  * @file: pointer to struct &file
+--- a/include/media/videobuf2-v4l2.h
++++ b/include/media/videobuf2-v4l2.h
+@@ -35,6 +35,8 @@
+  * @request_fd:       the request_fd associated with this buffer
+  * @is_held:  if true, then this capture buffer was held
+  * @planes:   plane information (userptr/fd, length, bytesused, data_offset).
++ * @det_state:        if a detached request capture buffer then this contains its
++ *            current state
+  *
+  * Should contain enough information to be able to cover all the fields
+  * of &struct v4l2_buffer at ``videodev2.h``.
+@@ -49,6 +51,7 @@ struct vb2_v4l2_buffer {
+       __s32                   request_fd;
+       bool                    is_held;
+       struct vb2_plane        planes[VB2_MAX_PLANES];
++      enum vb2_buffer_state   det_state;
+ };
+ /* VB2 V4L2 flags as set in vb2_queue.subsystem_flags */
diff --git a/target/linux/bcm27xx/patches-5.4/950-0512-media-dt-bindings-media-Add-binding-for-the-Raspberr.patch b/target/linux/bcm27xx/patches-5.4/950-0512-media-dt-bindings-media-Add-binding-for-the-Raspberr.patch
new file mode 100644 (file)
index 0000000..203e112
--- /dev/null
@@ -0,0 +1,106 @@
+From 15b4e8fa2d5101b989856c42cdae6ec764c99db0 Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Date: Tue, 17 Mar 2020 10:53:16 +0000
+Subject: [PATCH] media: dt-bindings: media: Add binding for the
+ Raspberry PI HEVC decoder
+
+Adds a binding for the HEVC decoder found on the BCM2711 / Raspberry Pi 4.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+---
+ .../bindings/media/rpivid_hevc.yaml           | 72 +++++++++++++++++++
+ MAINTAINERS                                   |  7 ++
+ 2 files changed, 79 insertions(+)
+ create mode 100644 Documentation/devicetree/bindings/media/rpivid_hevc.yaml
+
+--- /dev/null
++++ b/Documentation/devicetree/bindings/media/rpivid_hevc.yaml
+@@ -0,0 +1,72 @@
++# SPDX-License-Identifier: GPL-2.0-only
++%YAML 1.2
++---
++$id: http://devicetree.org/schemas/media/rpivid_hevc.yaml#
++$schema: http://devicetree.org/meta-schemas/core.yaml#
++
++title: Raspberry Pi HEVC Decoder
++
++maintainers:
++  - Raspberry Pi <kernel-list@raspberrypi.com>
++
++description: |-
++  The Camera Adaptation Layer (CAL) is a key component for image capture
++  applications. The capture module provides the system interface and the
++  processing capability to connect CSI2 image-sensor modules to the
++  DRA72x device.
++
++properties:
++  compatible:
++    enum:
++      - raspberrypi,rpivid-vid-decoder
++
++  reg:
++    minItems: 2
++    items:
++      - description: The HEVC main register region
++      - description: The Interrupt controller register region
++
++  reg-names:
++    minItems: 2
++    items:
++      - const: hevc
++      - const: intc
++
++  interrupts:
++    maxItems: 1
++
++  clocks:
++    items:
++      - description: The HEVC block clock
++
++  clock-names:
++    items:
++      - const: hevc
++
++required:
++  - compatible
++  - reg
++  - reg-names
++  - interrupts
++  - clocks
++
++additionalProperties: false
++
++examples:
++  - |
++    #include <dt-bindings/interrupt-controller/arm-gic.h>
++
++    video-codec@7eb10000 {
++        compatible = "raspberrypi,rpivid-vid-decoder";
++        reg = <0x0 0x7eb10000 0x1000>,        /* INTC */
++              <0x0 0x7eb00000 0x10000>; /* HEVC */
++        reg-names = "intc",
++                    "hevc";
++
++        interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
++
++        clocks = <&clk 0>;
++        clock-names = "hevc";
++    };
++
++...
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -3198,6 +3198,13 @@ N:      bcm2711
+ N:    bcm2835
+ F:    drivers/staging/vc04_services
++BROADCOM BCM2711 HEVC DECODER
++M:    Raspberry Pi Kernel Maintenance <kernel-list@raspberrypi.com>
++L:    linux-media@vger.kernel.org
++S:    Maintained
++F:    Documentation/devicetree/bindings/media/rpivid_hevc.jaml
++F:    drivers/staging/media/rpivid
++
+ BROADCOM BCM2835 CAMERA DRIVER
+ M:    Dave Stevenson <dave.stevenson@raspberrypi.org>
+ L:    linux-media@vger.kernel.org
diff --git a/target/linux/bcm27xx/patches-5.4/950-0513-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch b/target/linux/bcm27xx/patches-5.4/950-0513-staging-media-Add-Raspberry-Pi-V4L2-H265-decoder.patch
new file mode 100644 (file)
index 0000000..134a685
--- /dev/null
@@ -0,0 +1,4341 @@
+From 82bbd353e2dc364bf37e6f0b91890cb432b1a72f Mon Sep 17 00:00:00 2001
+From: John Cox <jc@kynesim.co.uk>
+Date: Thu, 5 Mar 2020 18:30:41 +0000
+Subject: [PATCH] staging: media: Add Raspberry Pi V4L2 H265 decoder
+
+This driver is for the HEVC/H265 decoder block on the Raspberry
+Pi 4, and conforms to the V4L2 stateless decoder API.
+
+Signed-off-by: John Cox <jc@kynesim.co.uk>
+---
+ drivers/staging/media/Kconfig               |    2 +
+ drivers/staging/media/Makefile              |    1 +
+ drivers/staging/media/rpivid/Kconfig        |   16 +
+ drivers/staging/media/rpivid/Makefile       |    5 +
+ drivers/staging/media/rpivid/rpivid.c       |  432 ++++
+ drivers/staging/media/rpivid/rpivid.h       |  181 ++
+ drivers/staging/media/rpivid/rpivid_dec.c   |   79 +
+ drivers/staging/media/rpivid/rpivid_dec.h   |   19 +
+ drivers/staging/media/rpivid/rpivid_h265.c  | 2275 +++++++++++++++++++
+ drivers/staging/media/rpivid/rpivid_hw.c    |  321 +++
+ drivers/staging/media/rpivid/rpivid_hw.h    |  300 +++
+ drivers/staging/media/rpivid/rpivid_video.c |  593 +++++
+ drivers/staging/media/rpivid/rpivid_video.h |   30 +
+ 14 files changed, 4256 insertions(+)
+ create mode 100644 drivers/staging/media/rpivid/Kconfig
+ create mode 100644 drivers/staging/media/rpivid/Makefile
+ create mode 100644 drivers/staging/media/rpivid/rpivid.c
+ create mode 100644 drivers/staging/media/rpivid/rpivid.h
+ create mode 100644 drivers/staging/media/rpivid/rpivid_dec.c
+ create mode 100644 drivers/staging/media/rpivid/rpivid_dec.h
+ create mode 100644 drivers/staging/media/rpivid/rpivid_h265.c
+ create mode 100644 drivers/staging/media/rpivid/rpivid_hw.c
+ create mode 100644 drivers/staging/media/rpivid/rpivid_hw.h
+ create mode 100644 drivers/staging/media/rpivid/rpivid_video.c
+ create mode 100644 drivers/staging/media/rpivid/rpivid_video.h
+
+--- a/drivers/staging/media/Kconfig
++++ b/drivers/staging/media/Kconfig
+@@ -30,6 +30,8 @@ source "drivers/staging/media/meson/vdec
+ source "drivers/staging/media/omap4iss/Kconfig"
++source "drivers/staging/media/rpivid/Kconfig"
++
+ source "drivers/staging/media/sunxi/Kconfig"
+ source "drivers/staging/media/tegra-vde/Kconfig"
+--- a/drivers/staging/media/Makefile
++++ b/drivers/staging/media/Makefile
+@@ -3,6 +3,7 @@ obj-$(CONFIG_VIDEO_ALLEGRO_DVT)        += alleg
+ obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx/
+ obj-$(CONFIG_VIDEO_MESON_VDEC)        += meson/vdec/
+ obj-$(CONFIG_VIDEO_OMAP4)     += omap4iss/
++obj-$(CONFIG_VIDEO_RPIVID)    += rpivid/
+ obj-$(CONFIG_VIDEO_SUNXI)     += sunxi/
+ obj-$(CONFIG_TEGRA_VDE)               += tegra-vde/
+ obj-$(CONFIG_VIDEO_HANTRO)    += hantro/
+--- /dev/null
++++ b/drivers/staging/media/rpivid/Kconfig
+@@ -0,0 +1,16 @@
++# SPDX-License-Identifier: GPL-2.0
++
++config VIDEO_RPIVID
++      tristate "Rpi H265 driver"
++      depends on VIDEO_DEV && VIDEO_V4L2
++      depends on MEDIA_CONTROLLER
++      depends on OF
++      depends on MEDIA_CONTROLLER_REQUEST_API
++      select VIDEOBUF2_DMA_CONTIG
++      select V4L2_MEM2MEM_DEV
++      help
++        Support for the Rpi H265 h/w decoder.
++
++        To compile this driver as a module, choose M here: the module
++        will be called rpivid-hevc.
++
+--- /dev/null
++++ b/drivers/staging/media/rpivid/Makefile
+@@ -0,0 +1,5 @@
++# SPDX-License-Identifier: GPL-2.0
++obj-$(CONFIG_VIDEO_RPIVID) += rpivid-hevc.o
++
++rpivid-hevc-y = rpivid.o rpivid_video.o rpivid_dec.o \
++               rpivid_hw.o rpivid_h265.o
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid.c
+@@ -0,0 +1,432 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#include <linux/platform_device.h>
++#include <linux/module.h>
++#include <linux/of.h>
++
++#include <media/v4l2-device.h>
++#include <media/v4l2-ioctl.h>
++#include <media/v4l2-ctrls.h>
++#include <media/v4l2-mem2mem.h>
++
++#include "rpivid.h"
++#include "rpivid_video.h"
++#include "rpivid_hw.h"
++#include "rpivid_dec.h"
++
++/*
++ * Default /dev/videoN node number.
++ * Deliberately avoid the very low numbers as these are often taken by webcams
++ * etc, and simple apps tend to only go for /dev/video0.
++ */
++static int video_nr = 19;
++module_param(video_nr, int, 0644);
++MODULE_PARM_DESC(video_nr, "decoder video device number");
++
++static const struct rpivid_control rpivid_ctrls[] = {
++      {
++              .cfg = {
++                      .id     = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
++              },
++              .required       = true,
++      },
++      {
++              .cfg = {
++                      .id     = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
++              },
++              .required       = true,
++      },
++      {
++              .cfg = {
++                      .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
++              },
++              .required       = false,
++      },
++      {
++              .cfg = {
++                      .id     = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
++              },
++              .required       = true,
++      },
++      {
++              .cfg = {
++                      .id     = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE,
++                      .max    = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++                      .def    = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++              },
++              .required       = false,
++      },
++      {
++              .cfg = {
++                      .id     = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE,
++                      .max    = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++                      .def    = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++              },
++              .required       = false,
++      },
++};
++
++#define rpivid_ctrls_COUNT    ARRAY_SIZE(rpivid_ctrls)
++
++void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
++{
++      unsigned int i;
++
++      for (i = 0; ctx->ctrls[i]; i++)
++              if (ctx->ctrls[i]->id == id)
++                      return ctx->ctrls[i]->p_cur.p;
++
++      return NULL;
++}
++
++static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx)
++{
++      struct v4l2_ctrl_handler *hdl = &ctx->hdl;
++      struct v4l2_ctrl *ctrl;
++      unsigned int ctrl_size;
++      unsigned int i;
++
++      v4l2_ctrl_handler_init(hdl, rpivid_ctrls_COUNT);
++      if (hdl->error) {
++              v4l2_err(&dev->v4l2_dev,
++                       "Failed to initialize control handler\n");
++              return hdl->error;
++      }
++
++      ctrl_size = sizeof(ctrl) * rpivid_ctrls_COUNT + 1;
++
++      ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL);
++      if (!ctx->ctrls)
++              return -ENOMEM;
++
++      for (i = 0; i < rpivid_ctrls_COUNT; i++) {
++              ctrl = v4l2_ctrl_new_custom(hdl, &rpivid_ctrls[i].cfg,
++                                          NULL);
++              if (hdl->error) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "Failed to create new custom control id=%#x\n",
++                               rpivid_ctrls[i].cfg.id);
++
++                      v4l2_ctrl_handler_free(hdl);
++                      kfree(ctx->ctrls);
++                      return hdl->error;
++              }
++
++              ctx->ctrls[i] = ctrl;
++      }
++
++      ctx->fh.ctrl_handler = hdl;
++      v4l2_ctrl_handler_setup(hdl);
++
++      return 0;
++}
++
++static int rpivid_request_validate(struct media_request *req)
++{
++      struct media_request_object *obj;
++      struct v4l2_ctrl_handler *parent_hdl, *hdl;
++      struct rpivid_ctx *ctx = NULL;
++      struct v4l2_ctrl *ctrl_test;
++      unsigned int count;
++      unsigned int i;
++
++      list_for_each_entry(obj, &req->objects, list) {
++              struct vb2_buffer *vb;
++
++              if (vb2_request_object_is_buffer(obj)) {
++                      vb = container_of(obj, struct vb2_buffer, req_obj);
++                      ctx = vb2_get_drv_priv(vb->vb2_queue);
++
++                      break;
++              }
++      }
++
++      if (!ctx)
++              return -ENOENT;
++
++      count = vb2_request_buffer_cnt(req);
++      if (!count) {
++              v4l2_info(&ctx->dev->v4l2_dev,
++                        "No buffer was provided with the request\n");
++              return -ENOENT;
++      } else if (count > 1) {
++              v4l2_info(&ctx->dev->v4l2_dev,
++                        "More than one buffer was provided with the request\n");
++              return -EINVAL;
++      }
++
++      parent_hdl = &ctx->hdl;
++
++      hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
++      if (!hdl) {
++              v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n");
++              return -ENOENT;
++      }
++
++      for (i = 0; i < rpivid_ctrls_COUNT; i++) {
++              if (!rpivid_ctrls[i].required)
++                      continue;
++
++              ctrl_test =
++                      v4l2_ctrl_request_hdl_ctrl_find(hdl,
++                                                      rpivid_ctrls[i].cfg.id);
++              if (!ctrl_test) {
++                      v4l2_info(&ctx->dev->v4l2_dev,
++                                "Missing required codec control\n");
++                      return -ENOENT;
++              }
++      }
++
++      v4l2_ctrl_request_hdl_put(hdl);
++
++      return vb2_request_validate(req);
++}
++
++static int rpivid_open(struct file *file)
++{
++      struct rpivid_dev *dev = video_drvdata(file);
++      struct rpivid_ctx *ctx = NULL;
++      int ret;
++
++      if (mutex_lock_interruptible(&dev->dev_mutex))
++              return -ERESTARTSYS;
++
++      ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
++      if (!ctx) {
++              mutex_unlock(&dev->dev_mutex);
++              return -ENOMEM;
++      }
++
++      v4l2_fh_init(&ctx->fh, video_devdata(file));
++      file->private_data = &ctx->fh;
++      ctx->dev = dev;
++
++      ret = rpivid_init_ctrls(dev, ctx);
++      if (ret)
++              goto err_free;
++
++      ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
++                                          &rpivid_queue_init);
++      if (IS_ERR(ctx->fh.m2m_ctx)) {
++              ret = PTR_ERR(ctx->fh.m2m_ctx);
++              goto err_ctrls;
++      }
++
++      /* The only bit of format info that we can guess now is H265 src
++       * Everything else we need more info for
++       */
++      ctx->src_fmt.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
++      rpivid_prepare_src_format(&ctx->src_fmt);
++
++      v4l2_fh_add(&ctx->fh);
++
++      mutex_unlock(&dev->dev_mutex);
++
++      return 0;
++
++err_ctrls:
++      v4l2_ctrl_handler_free(&ctx->hdl);
++err_free:
++      kfree(ctx);
++      mutex_unlock(&dev->dev_mutex);
++
++      return ret;
++}
++
++static int rpivid_release(struct file *file)
++{
++      struct rpivid_dev *dev = video_drvdata(file);
++      struct rpivid_ctx *ctx = container_of(file->private_data,
++                                            struct rpivid_ctx, fh);
++
++      mutex_lock(&dev->dev_mutex);
++
++      v4l2_fh_del(&ctx->fh);
++      v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
++
++      v4l2_ctrl_handler_free(&ctx->hdl);
++      kfree(ctx->ctrls);
++
++      v4l2_fh_exit(&ctx->fh);
++
++      kfree(ctx);
++
++      mutex_unlock(&dev->dev_mutex);
++
++      return 0;
++}
++
++static const struct v4l2_file_operations rpivid_fops = {
++      .owner          = THIS_MODULE,
++      .open           = rpivid_open,
++      .release        = rpivid_release,
++      .poll           = v4l2_m2m_fop_poll,
++      .unlocked_ioctl = video_ioctl2,
++      .mmap           = v4l2_m2m_fop_mmap,
++};
++
++static const struct video_device rpivid_video_device = {
++      .name           = RPIVID_NAME,
++      .vfl_dir        = VFL_DIR_M2M,
++      .fops           = &rpivid_fops,
++      .ioctl_ops      = &rpivid_ioctl_ops,
++      .minor          = -1,
++      .release        = video_device_release_empty,
++      .device_caps    = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
++};
++
++static const struct v4l2_m2m_ops rpivid_m2m_ops = {
++      .device_run     = rpivid_device_run,
++};
++
++static const struct media_device_ops rpivid_m2m_media_ops = {
++      .req_validate   = rpivid_request_validate,
++      .req_queue      = v4l2_m2m_request_queue,
++};
++
++static int rpivid_probe(struct platform_device *pdev)
++{
++      struct rpivid_dev *dev;
++      struct video_device *vfd;
++      int ret;
++
++      dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
++      if (!dev)
++              return -ENOMEM;
++
++      dev->vfd = rpivid_video_device;
++      dev->dev = &pdev->dev;
++      dev->pdev = pdev;
++
++      ret = 0;
++      ret = rpivid_hw_probe(dev);
++      if (ret) {
++              dev_err(&pdev->dev, "Failed to probe hardware\n");
++              return ret;
++      }
++
++      dev->dec_ops = &rpivid_dec_ops_h265;
++
++      mutex_init(&dev->dev_mutex);
++
++      ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
++      if (ret) {
++              dev_err(&pdev->dev, "Failed to register V4L2 device\n");
++              return ret;
++      }
++
++      vfd = &dev->vfd;
++      vfd->lock = &dev->dev_mutex;
++      vfd->v4l2_dev = &dev->v4l2_dev;
++
++      snprintf(vfd->name, sizeof(vfd->name), "%s", rpivid_video_device.name);
++      video_set_drvdata(vfd, dev);
++
++      dev->m2m_dev = v4l2_m2m_init(&rpivid_m2m_ops);
++      if (IS_ERR(dev->m2m_dev)) {
++              v4l2_err(&dev->v4l2_dev,
++                       "Failed to initialize V4L2 M2M device\n");
++              ret = PTR_ERR(dev->m2m_dev);
++
++              goto err_v4l2;
++      }
++
++      dev->mdev.dev = &pdev->dev;
++      strscpy(dev->mdev.model, RPIVID_NAME, sizeof(dev->mdev.model));
++      strscpy(dev->mdev.bus_info, "platform:" RPIVID_NAME,
++              sizeof(dev->mdev.bus_info));
++
++      media_device_init(&dev->mdev);
++      dev->mdev.ops = &rpivid_m2m_media_ops;
++      dev->v4l2_dev.mdev = &dev->mdev;
++
++      ret = video_register_device(vfd, VFL_TYPE_GRABBER, video_nr);
++      if (ret) {
++              v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
++              goto err_m2m;
++      }
++
++      v4l2_info(&dev->v4l2_dev,
++                "Device registered as /dev/video%d\n", vfd->num);
++
++      ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd,
++                                               MEDIA_ENT_F_PROC_VIDEO_DECODER);
++      if (ret) {
++              v4l2_err(&dev->v4l2_dev,
++                       "Failed to initialize V4L2 M2M media controller\n");
++              goto err_video;
++      }
++
++      ret = media_device_register(&dev->mdev);
++      if (ret) {
++              v4l2_err(&dev->v4l2_dev, "Failed to register media device\n");
++              goto err_m2m_mc;
++      }
++
++      platform_set_drvdata(pdev, dev);
++
++      return 0;
++
++err_m2m_mc:
++      v4l2_m2m_unregister_media_controller(dev->m2m_dev);
++err_video:
++      video_unregister_device(&dev->vfd);
++err_m2m:
++      v4l2_m2m_release(dev->m2m_dev);
++err_v4l2:
++      v4l2_device_unregister(&dev->v4l2_dev);
++
++      return ret;
++}
++
++static int rpivid_remove(struct platform_device *pdev)
++{
++      struct rpivid_dev *dev = platform_get_drvdata(pdev);
++
++      if (media_devnode_is_registered(dev->mdev.devnode)) {
++              media_device_unregister(&dev->mdev);
++              v4l2_m2m_unregister_media_controller(dev->m2m_dev);
++              media_device_cleanup(&dev->mdev);
++      }
++
++      v4l2_m2m_release(dev->m2m_dev);
++      video_unregister_device(&dev->vfd);
++      v4l2_device_unregister(&dev->v4l2_dev);
++
++      rpivid_hw_remove(dev);
++
++      return 0;
++}
++
++static const struct of_device_id rpivid_dt_match[] = {
++      {
++              .compatible = "raspberrypi,rpivid-vid-decoder",
++      },
++      { /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(of, rpivid_dt_match);
++
++static struct platform_driver rpivid_driver = {
++      .probe          = rpivid_probe,
++      .remove         = rpivid_remove,
++      .driver         = {
++              .name = RPIVID_NAME,
++              .of_match_table = of_match_ptr(rpivid_dt_match),
++      },
++};
++module_platform_driver(rpivid_driver);
++
++MODULE_LICENSE("GPL v2");
++MODULE_AUTHOR("John Cox <jc@kynesim.co.uk>");
++MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver");
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid.h
+@@ -0,0 +1,181 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#ifndef _RPIVID_H_
++#define _RPIVID_H_
++
++#include <linux/clk.h>
++#include <linux/platform_device.h>
++#include <media/v4l2-ctrls.h>
++#include <media/v4l2-device.h>
++#include <media/v4l2-mem2mem.h>
++#include <media/videobuf2-v4l2.h>
++#include <media/videobuf2-dma-contig.h>
++
++#define OPT_DEBUG_POLL_IRQ  0
++
++#define RPIVID_NAME                   "rpivid"
++
++#define RPIVID_CAPABILITY_UNTILED     BIT(0)
++#define RPIVID_CAPABILITY_H265_DEC    BIT(1)
++
++#define RPIVID_QUIRK_NO_DMA_OFFSET    BIT(0)
++
++#define RPIVID_SRC_PIXELFORMAT_DEFAULT        V4L2_PIX_FMT_HEVC_SLICE
++
++enum rpivid_irq_status {
++      RPIVID_IRQ_NONE,
++      RPIVID_IRQ_ERROR,
++      RPIVID_IRQ_OK,
++};
++
++struct rpivid_control {
++      struct v4l2_ctrl_config cfg;
++      unsigned char           required:1;
++};
++
++struct rpivid_h265_run {
++      const struct v4l2_ctrl_hevc_sps                 *sps;
++      const struct v4l2_ctrl_hevc_pps                 *pps;
++      const struct v4l2_ctrl_hevc_slice_params        *slice_params;
++      const struct v4l2_ctrl_hevc_scaling_matrix      *scaling_matrix;
++};
++
++struct rpivid_run {
++      struct vb2_v4l2_buffer  *src;
++      struct vb2_v4l2_buffer  *dst;
++
++      struct rpivid_h265_run  h265;
++};
++
++struct rpivid_buffer {
++      struct v4l2_m2m_buffer          m2m_buf;
++};
++
++struct rpivid_dec_state;
++struct rpivid_dec_env;
++#define RPIVID_DEC_ENV_COUNT 3
++
++struct rpivid_gptr {
++      size_t size;
++      __u8 *ptr;
++      dma_addr_t addr;
++      unsigned long attrs;
++};
++
++struct rpivid_dev;
++typedef void (*rpivid_irq_callback)(struct rpivid_dev *dev, void *ctx);
++
++struct rpivid_q_aux;
++#define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME
++
++#define RPIVID_P2BUF_COUNT 2
++
++struct rpivid_ctx {
++      struct v4l2_fh                  fh;
++      struct rpivid_dev               *dev;
++
++      struct v4l2_pix_format          src_fmt;
++      struct v4l2_pix_format          dst_fmt;
++      int dst_fmt_set;
++
++      struct v4l2_ctrl_handler        hdl;
++      struct v4l2_ctrl                **ctrls;
++
++      /* Decode state - stateless decoder my *** */
++      /* state contains stuff that is only needed in phase0
++       * it could be held in dec_env but that would be wasteful
++       */
++      struct rpivid_dec_state *state;
++      struct rpivid_dec_env *dec0;
++
++      /* Spinlock protecting dec_free */
++      spinlock_t dec_lock;
++      struct rpivid_dec_env *dec_free;
++
++      struct rpivid_dec_env *dec_pool;
++
++      /* Some of these should be in dev */
++      struct rpivid_gptr bitbufs[1];  /* Will be 2 */
++      struct rpivid_gptr cmdbufs[1];  /* Will be 2 */
++      unsigned int p2idx;
++      atomic_t p2out;
++      struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT];
++      struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT];
++
++      /* Spinlock protecting aux_free */
++      spinlock_t aux_lock;
++      struct rpivid_q_aux *aux_free;
++
++      struct rpivid_q_aux *aux_ents[RPIVID_AUX_ENT_COUNT];
++
++      unsigned int colmv_stride;
++      unsigned int colmv_picsize;
++};
++
++struct rpivid_dec_ops {
++      void (*setup)(struct rpivid_ctx *ctx, struct rpivid_run *run);
++      int (*start)(struct rpivid_ctx *ctx);
++      void (*stop)(struct rpivid_ctx *ctx);
++      void (*trigger)(struct rpivid_ctx *ctx);
++};
++
++struct rpivid_variant {
++      unsigned int    capabilities;
++      unsigned int    quirks;
++      unsigned int    mod_rate;
++};
++
++struct rpivid_hw_irq_ent;
++
++struct rpivid_hw_irq_ctrl {
++      /* Spinlock protecting claim and tail */
++      spinlock_t lock;
++      struct rpivid_hw_irq_ent *claim;
++      struct rpivid_hw_irq_ent *tail;
++
++      /* Ent for pending irq - also prevents sched */
++      struct rpivid_hw_irq_ent *irq;
++      /* Non-zero => do not start a new job - outer layer sched pending */
++      int no_sched;
++      /* Thread CB requested */
++      bool thread_reqed;
++};
++
++struct rpivid_dev {
++      struct v4l2_device      v4l2_dev;
++      struct video_device     vfd;
++      struct media_device     mdev;
++      struct media_pad        pad[2];
++      struct platform_device  *pdev;
++      struct device           *dev;
++      struct v4l2_m2m_dev     *m2m_dev;
++      struct rpivid_dec_ops   *dec_ops;
++
++      /* Device file mutex */
++      struct mutex            dev_mutex;
++
++      void __iomem            *base_irq;
++      void __iomem            *base_h265;
++
++      struct clk              *clock;
++
++      struct rpivid_hw_irq_ctrl ic_active1;
++      struct rpivid_hw_irq_ctrl ic_active2;
++};
++
++extern struct rpivid_dec_ops rpivid_dec_ops_h265;
++
++void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id);
++
++#endif
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_dec.c
+@@ -0,0 +1,79 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#include <media/v4l2-device.h>
++#include <media/v4l2-ioctl.h>
++#include <media/v4l2-event.h>
++#include <media/v4l2-mem2mem.h>
++
++#include "rpivid.h"
++#include "rpivid_dec.h"
++
++void rpivid_device_run(void *priv)
++{
++      struct rpivid_ctx *ctx = priv;
++      struct rpivid_dev *dev = ctx->dev;
++      struct rpivid_run run = {};
++      struct media_request *src_req;
++
++      run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
++      run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
++
++      if (!run.src || !run.dst) {
++              v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
++                       __func__, run.src, run.dst);
++              /* We are stuffed - this probably won't dig us out of our
++               * current situation but it is better than nothing
++               */
++              v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                               VB2_BUF_STATE_ERROR);
++              return;
++      }
++
++      /* Apply request(s) controls if needed. */
++      src_req = run.src->vb2_buf.req_obj.req;
++
++      if (src_req)
++              v4l2_ctrl_request_setup(src_req, &ctx->hdl);
++
++      switch (ctx->src_fmt.pixelformat) {
++      case V4L2_PIX_FMT_HEVC_SLICE:
++              run.h265.sps =
++                      rpivid_find_control_data(ctx,
++                                               V4L2_CID_MPEG_VIDEO_HEVC_SPS);
++              run.h265.pps =
++                      rpivid_find_control_data(ctx,
++                                               V4L2_CID_MPEG_VIDEO_HEVC_PPS);
++              run.h265.slice_params =
++                      rpivid_find_control_data(ctx,
++                                               V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
++              run.h265.scaling_matrix =
++                      rpivid_find_control_data(ctx,
++                                               V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
++              break;
++
++      default:
++              break;
++      }
++
++      v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
++
++      dev->dec_ops->setup(ctx, &run);
++
++      /* Complete request(s) controls if needed. */
++
++      if (src_req)
++              v4l2_ctrl_request_complete(src_req, &ctx->hdl);
++
++      dev->dec_ops->trigger(ctx);
++}
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_dec.h
+@@ -0,0 +1,19 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#ifndef _RPIVID_DEC_H_
++#define _RPIVID_DEC_H_
++
++void rpivid_device_run(void *priv);
++
++#endif
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_h265.c
+@@ -0,0 +1,2275 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#include <linux/delay.h>
++#include <linux/types.h>
++
++#include <media/videobuf2-dma-contig.h>
++
++#include "rpivid.h"
++#include "rpivid_hw.h"
++
++#define DEBUG_TRACE_P1_CMD 0
++#define DEBUG_TRACE_EXECUTION 0
++
++#if DEBUG_TRACE_EXECUTION
++#define xtrace_in(dev_, de_)\
++      v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n",   __func__,\
++                (de_) == NULL ? -1 : (de_)->decode_order)
++#define xtrace_ok(dev_, de_)\
++      v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: ok\n",   __func__,\
++                (de_) == NULL ? -1 : (de_)->decode_order)
++#define xtrace_fin(dev_, de_)\
++      v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: finish\n", __func__,\
++                (de_) == NULL ? -1 : (de_)->decode_order)
++#define xtrace_fail(dev_, de_)\
++      v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: FAIL\n", __func__,\
++                (de_) == NULL ? -1 : (de_)->decode_order)
++#else
++#define xtrace_in(dev_, de_)
++#define xtrace_ok(dev_, de_)
++#define xtrace_fin(dev_, de_)
++#define xtrace_fail(dev_, de_)
++#endif
++
++enum hevc_slice_type {
++      HEVC_SLICE_B = 0,
++      HEVC_SLICE_P = 1,
++      HEVC_SLICE_I = 2,
++};
++
++enum hevc_layer { L0 = 0, L1 = 1 };
++
++static int gptr_alloc(struct rpivid_dev *const dev, struct rpivid_gptr *gptr,
++                    size_t size, unsigned long attrs)
++{
++      gptr->size = size;
++      gptr->attrs = attrs;
++      gptr->addr = 0;
++      gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr,
++                                  GFP_KERNEL, gptr->attrs);
++      return !gptr->ptr ? -ENOMEM : 0;
++}
++
++static void gptr_free(struct rpivid_dev *const dev,
++                    struct rpivid_gptr *const gptr)
++{
++      if (gptr->ptr)
++              dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr,
++                             gptr->attrs);
++      gptr->size = 0;
++      gptr->ptr = NULL;
++      gptr->addr = 0;
++      gptr->attrs = 0;
++}
++
++/* Realloc but do not copy */
++static int gptr_realloc_new(struct rpivid_dev * const dev,
++                          struct rpivid_gptr * const gptr, size_t size)
++{
++      if (size == gptr->size)
++              return 0;
++
++      if (gptr->ptr)
++              dma_free_attrs(dev->dev, gptr->size, gptr->ptr,
++                             gptr->addr, gptr->attrs);
++
++      gptr->addr = 0;
++      gptr->size = size;
++      gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
++                                  &gptr->addr, GFP_KERNEL, gptr->attrs);
++      return gptr->ptr ? 0 : -ENOMEM;
++}
++
++/* floor(log2(x)) */
++static unsigned int log2_size(size_t x)
++{
++      unsigned int n = 0;
++
++      if (x & ~0xffff) {
++              n += 16;
++              x >>= 16;
++      }
++      if (x & ~0xff) {
++              n += 8;
++              x >>= 8;
++      }
++      if (x & ~0xf) {
++              n += 4;
++              x >>= 4;
++      }
++      if (x & ~3) {
++              n += 2;
++              x >>= 2;
++      }
++      return (x & ~1) ? n + 1 : n;
++}
++
++static size_t round_up_size(const size_t x)
++{
++      /* Admit no size < 256 */
++      const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
++
++      return x >= (3 << n) ? 4 << n : (3 << n);
++}
++
++static size_t next_size(const size_t x)
++{
++      return round_up_size(x + 1);
++}
++
++#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
++
++#define AXI_BASE64 0
++
++#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0))
++#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6))
++
++#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
++
++//////////////////////////////////////////////////////////////////////////////
++
++struct rpi_cmd {
++      u32 addr;
++      u32 data;
++} __packed;
++
++struct rpivid_q_aux {
++      unsigned int refcount;
++      unsigned int q_index;
++      struct rpivid_q_aux *next;
++      struct rpivid_gptr col;
++};
++
++//////////////////////////////////////////////////////////////////////////////
++
++enum rpivid_decode_state {
++      RPIVID_DECODE_SLICE_START,
++      RPIVID_DECODE_SLICE_CONTINUE,
++      RPIVID_DECODE_ERROR_CONTINUE,
++      RPIVID_DECODE_ERROR_DONE,
++      RPIVID_DECODE_PHASE1,
++      RPIVID_DECODE_END,
++};
++
++struct rpivid_dec_env {
++      struct rpivid_ctx *ctx;
++      struct rpivid_dec_env *next;
++
++      enum rpivid_decode_state state;
++      unsigned int decode_order;
++      int p1_status;          /* P1 status - what to realloc */
++
++      struct rpivid_dec_env *phase_wait_q_next;
++
++      struct rpi_cmd *cmd_fifo;
++      unsigned int cmd_len, cmd_max;
++      unsigned int num_slice_msgs;
++      unsigned int pic_width_in_ctbs_y;
++      unsigned int pic_height_in_ctbs_y;
++      unsigned int dpbno_col;
++      u32 reg_slicestart;
++      int collocated_from_l0_flag;
++      unsigned int wpp_entry_x;
++      unsigned int wpp_entry_y;
++
++      u32 rpi_config2;
++      u32 rpi_framesize;
++      u32 rpi_currpoc;
++
++      struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer
++      unsigned int frame_c_offset;
++      unsigned int frame_stride;
++      dma_addr_t frame_addr;
++      dma_addr_t ref_addrs[16];
++      struct rpivid_q_aux *frame_aux;
++      struct rpivid_q_aux *col_aux;
++
++      dma_addr_t pu_base_vc;
++      dma_addr_t coeff_base_vc;
++      u32 pu_stride;
++      u32 coeff_stride;
++
++      struct rpivid_gptr *bit_copy_gptr;
++      size_t bit_copy_len;
++      struct rpivid_gptr *cmd_copy_gptr;
++
++      u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3];
++      u8 scaling_factors[NUM_SCALING_FACTORS];
++
++      struct rpivid_hw_irq_ent irq_ent;
++};
++
++#define member_size(type, member) sizeof(((type *)0)->member)
++
++struct rpivid_dec_state {
++      struct v4l2_ctrl_hevc_sps sps;
++      struct v4l2_ctrl_hevc_pps pps;
++
++      // Helper vars & tables derived from sps/pps
++      unsigned int log2_ctb_size; /* log2 width of a CTB */
++      unsigned int ctb_width; /* Width in CTBs */
++      unsigned int ctb_height; /* Height in CTBs */
++      unsigned int ctb_size; /* Pic area in CTBs */
++      unsigned int num_tile_columns;
++      unsigned int num_tile_rows;
++      u8 column_width[member_size(struct v4l2_ctrl_hevc_pps,
++                                  column_width_minus1)];
++      u8 row_height[member_size(struct v4l2_ctrl_hevc_pps,
++                                row_height_minus1)];
++
++      int *col_bd;
++      int *row_bd;
++      int *ctb_addr_rs_to_ts;
++      int *ctb_addr_ts_to_rs;
++      int *tile_id;
++
++      // Aux starage for DPB
++      // Hold refs
++      struct rpivid_q_aux *ref_aux[HEVC_MAX_REFS];
++      struct rpivid_q_aux *frame_aux;
++
++      // Slice vars
++      unsigned int slice_idx;
++      bool frame_end;
++      bool slice_temporal_mvp;  /* Slice flag but constant for frame */
++
++      // Temp vars per run - don't actually need to persist
++      u8 *src_buf;
++      dma_addr_t src_addr;
++      const struct v4l2_ctrl_hevc_slice_params *sh;
++      unsigned int nb_refs[2];
++      unsigned int slice_qp;
++      unsigned int max_num_merge_cand; // 0 if I-slice
++      bool dependent_slice_segment_flag;
++};
++
++static inline int clip_int(const int x, const int lo, const int hi)
++{
++      return x < lo ? lo : x > hi ? hi : x;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Phase 1 command and bit FIFOs
++
++#if DEBUG_TRACE_P1_CMD
++static int p1_z;
++#endif
++
++// ???? u16 addr - put in u32
++static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
++                      const u32 data)
++{
++      if (de->cmd_len == de->cmd_max)
++              de->cmd_fifo =
++                      krealloc(de->cmd_fifo,
++                               (de->cmd_max *= 2) * sizeof(struct rpi_cmd),
++                               GFP_KERNEL);
++      de->cmd_fifo[de->cmd_len].addr = addr;
++      de->cmd_fifo[de->cmd_len].data = data;
++
++#if DEBUG_TRACE_P1_CMD
++      if (++p1_z < 256) {
++              v4l2_info(&de->ctx->dev->v4l2_dev, "[%02x] %x %x\n",
++                        de->cmd_len, addr, data);
++      }
++#endif
++
++      return de->cmd_len++;
++}
++
++static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
++{
++      int i;
++
++      for (i = 1; ctb >= bd[i]; i++)
++              ; // bd[] has num+1 elements; bd[0]=0;
++      return i - 1;
++}
++
++static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width,
++                          unsigned int *bd, int num)
++{
++      if (ctb < bd[num - 1])
++              return ctb_size;
++      else if (width % ctb_size)
++              return width % ctb_size;
++      else
++              return ctb_size;
++}
++
++static void aux_q_free(struct rpivid_ctx *const ctx,
++                     struct rpivid_q_aux *const aq)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++
++      gptr_free(dev, &aq->col);
++      kfree(aq);
++}
++
++static struct rpivid_q_aux *aux_q_alloc(struct rpivid_ctx *const ctx)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++      struct rpivid_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL);
++
++      if (!aq)
++              return NULL;
++
++      aq->refcount = 1;
++      if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize,
++                     DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING))
++              goto fail;
++
++      return aq;
++
++fail:
++      kfree(aq);
++      return NULL;
++}
++
++static struct rpivid_q_aux *aux_q_new(struct rpivid_ctx *const ctx,
++                                    const unsigned int q_index)
++{
++      struct rpivid_q_aux *aq;
++      unsigned long lockflags;
++
++      spin_lock_irqsave(&ctx->aux_lock, lockflags);
++      aq = ctx->aux_free;
++      if (aq) {
++              ctx->aux_free = aq->next;
++              aq->next = NULL;
++              aq->refcount = 1;
++      }
++      spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
++
++      if (!aq) {
++              aq = aux_q_alloc(ctx);
++              if (!aq)
++                      return NULL;
++      }
++
++      aq->q_index = q_index;
++      ctx->aux_ents[q_index] = aq;
++      return aq;
++}
++
++static struct rpivid_q_aux *aux_q_ref(struct rpivid_ctx *const ctx,
++                                    struct rpivid_q_aux *const aq)
++{
++      if (aq) {
++              unsigned long lockflags;
++
++              spin_lock_irqsave(&ctx->aux_lock, lockflags);
++
++              ++aq->refcount;
++
++              spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
++      }
++      return aq;
++}
++
++static void aux_q_release(struct rpivid_ctx *const ctx,
++                        struct rpivid_q_aux **const paq)
++{
++      struct rpivid_q_aux *const aq = *paq;
++      *paq = NULL;
++
++      if (aq) {
++              unsigned long lockflags;
++
++              spin_lock_irqsave(&ctx->aux_lock, lockflags);
++
++              if (--aq->refcount == 0) {
++                      aq->next = ctx->aux_free;
++                      ctx->aux_free = aq;
++                      ctx->aux_ents[aq->q_index] = NULL;
++              }
++
++              spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
++      }
++}
++
++static void aux_q_init(struct rpivid_ctx *const ctx)
++{
++      spin_lock_init(&ctx->aux_lock);
++      ctx->aux_free = NULL;
++}
++
++static void aux_q_uninit(struct rpivid_ctx *const ctx)
++{
++      struct rpivid_q_aux *aq;
++
++      ctx->colmv_picsize = 0;
++      ctx->colmv_stride = 0;
++      while ((aq = ctx->aux_free) != NULL) {
++              ctx->aux_free = aq->next;
++              aux_q_free(ctx, aq);
++      }
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++/*
++ * Initialisation process for context variables (CABAC init)
++ * see H.265 9.3.2.2
++ *
++ * N.B. If comparing with FFmpeg note that this h/w uses slightly different
++ * offsets to FFmpegs array
++ */
++
++/* Actual number of values */
++#define RPI_PROB_VALS 154U
++/* Rounded up as we copy words */
++#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
++
++/* Initialiser values - see tables H.265 9-4 through 9-42 */
++static const u8 prob_init[3][156] = {
++      {
++              153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154,
++              154, 184, 63,  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
++              154, 154, 154, 153, 138, 138, 111, 141, 94,  138, 182, 154, 154,
++              154, 140, 92,  137, 138, 140, 152, 138, 139, 153, 74,  149, 92,
++              139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110,
++              110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
++              79,  108, 123, 63,  110, 110, 124, 125, 140, 153, 125, 127, 140,
++              109, 111, 143, 127, 111, 79,  108, 123, 63,  91,  171, 134, 141,
++              138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110,
++              94,  124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141,
++              179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182,
++              152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0,   0,
++      },
++      {
++              153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154,
++              154, 154, 152, 110, 122, 95,  79,  63,  31,  31,  153, 153, 168,
++              140, 198, 79,  124, 138, 94,  153, 111, 149, 107, 167, 154, 154,
++              154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136,
++              153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125,
++              110, 94,  110, 95,  79,  125, 111, 110, 78,  110, 111, 111, 95,
++              94,  108, 123, 108, 125, 110, 94,  110, 95,  79,  125, 111, 110,
++              78,  110, 111, 111, 95,  94,  108, 123, 108, 121, 140, 61,  154,
++              107, 167, 91,  122, 107, 167, 139, 139, 155, 154, 139, 153, 139,
++              123, 123, 63,  153, 166, 183, 140, 136, 153, 154, 166, 183, 140,
++              136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123,
++              107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0,   0,
++      },
++      {
++              153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154,
++              154, 183, 152, 154, 137, 95,  79,  63,  31,  31,  153, 153, 168,
++              169, 198, 79,  224, 167, 122, 153, 111, 149, 92,  167, 154, 154,
++              154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136,
++              153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125,
++              110, 124, 110, 95,  94,  125, 111, 111, 79,  125, 126, 111, 111,
++              79,  108, 123, 93,  125, 110, 124, 110, 95,  94,  125, 111, 111,
++              79,  125, 126, 111, 111, 79,  108, 123, 93,  121, 140, 61,  154,
++              107, 167, 91,  107, 107, 167, 139, 139, 170, 154, 139, 153, 139,
++              123, 123, 63,  124, 166, 183, 140, 136, 153, 154, 166, 183, 140,
++              136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138,
++              122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0,   0,
++      },
++};
++
++static void write_prob(struct rpivid_dec_env *const de,
++                     const struct rpivid_dec_state *const s)
++{
++      u8 dst[RPI_PROB_ARRAY_SIZE];
++
++      const unsigned int init_type =
++              ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 &&
++               s->sh->slice_type != HEVC_SLICE_I) ?
++                      s->sh->slice_type + 1 :
++                      2 - s->sh->slice_type;
++      const u8 *p = prob_init[init_type];
++      const int q = clip_int(s->slice_qp, 0, 51);
++      unsigned int i;
++
++      for (i = 0; i < RPI_PROB_VALS; i++) {
++              int init_value = p[i];
++              int m = (init_value >> 4) * 5 - 45;
++              int n = ((init_value & 15) << 3) - 16;
++              int pre = 2 * (((m * q) >> 4) + n) - 127;
++
++              pre ^= pre >> 31;
++              if (pre > 124)
++                      pre = 124 + (pre & 1);
++              dst[i] = pre;
++      }
++      for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i)
++              dst[i] = 0;
++
++      for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4)
++              p1_apb_write(de, 0x1000 + i,
++                           dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
++                                   (dst[i + 3] << 24));
++}
++
++static void write_scaling_factors(struct rpivid_dec_env *const de)
++{
++      int i;
++      const u8 *p = (u8 *)de->scaling_factors;
++
++      for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4)
++              p1_apb_write(de, 0x2000 + i,
++                           p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24));
++}
++
++static inline __u32 dma_to_axi_addr(dma_addr_t a)
++{
++      return (__u32)(a >> 6);
++}
++
++static void write_bitstream(struct rpivid_dec_env *const de,
++                          const struct rpivid_dec_state *const s)
++{
++      // Note that FFmpeg removes emulation prevention bytes, so this is
++      // matched in the configuration here.
++      // Whether that is the correct behaviour or not is not clear in the
++      // spec.
++      const int rpi_use_emu = 1;
++      unsigned int offset = s->sh->data_bit_offset / 8 + 1;
++      const unsigned int len = (s->sh->bit_size + 7) / 8 - offset;
++      dma_addr_t addr;
++
++      if (s->src_addr != 0) {
++              addr = s->src_addr + offset;
++      } else {
++              memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len,
++                     s->src_buf + offset, len);
++              addr = de->bit_copy_gptr->addr + de->bit_copy_len;
++              de->bit_copy_len += (len + 63) & ~63;
++      }
++      offset = addr & 63;
++
++      p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr));
++      p1_apb_write(de, RPI_BFNUM, len);
++      p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
++      p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static void write_slice(struct rpivid_dec_env *const de,
++                      const struct rpivid_dec_state *const s,
++                      const unsigned int slice_w,
++                      const unsigned int slice_h)
++{
++      u32 u32 = (s->sh->slice_type << 12) +
++                (((s->sh->flags &
++                   V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0)
++                 << 14) +
++                (((s->sh->flags &
++                   V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0)
++                 << 15) +
++                (slice_w << 17) + (slice_h << 24);
++
++      u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) +
++             (s->nb_refs[L1] << 8);
++
++      if (s->sh->slice_type == HEVC_SLICE_B)
++              u32 |= ((s->sh->flags &
++                       V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0)
++                     << 16;
++      p1_apb_write(de, RPI_SLICE, u32);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Tiles mode
++
++static void new_entry_point(struct rpivid_dec_env *const de,
++                          const struct rpivid_dec_state *const s,
++                          const int do_bte,
++                          const int reset_qp_y, const int ctb_addr_ts)
++{
++      int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] %
++                                                      de->pic_width_in_ctbs_y;
++      int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] /
++                                                      de->pic_width_in_ctbs_y;
++
++      int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns);
++      int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows);
++
++      int endx = s->col_bd[tile_x + 1] - 1;
++      int endy = s->row_bd[tile_y + 1] - 1;
++
++      u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size,
++                                    s->sps.pic_width_in_luma_samples,
++                                    s->col_bd, s->num_tile_columns);
++      u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size,
++                                    s->sps.pic_height_in_luma_samples,
++                                    s->row_bd, s->num_tile_rows);
++
++      p1_apb_write(de, RPI_TILESTART,
++                   s->col_bd[tile_x] + (s->row_bd[tile_y] << 16));
++      p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
++
++      if (do_bte)
++              p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
++
++      write_slice(de, s, slice_w, slice_h);
++
++      if (reset_qp_y) {
++              unsigned int sps_qp_bd_offset =
++                      6 * s->sps.bit_depth_luma_minus8;
++
++              p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
++      }
++
++      p1_apb_write(de, RPI_MODE,
++                   (0xFFFF << 0) + (0x0 << 16) +
++                           ((tile_x == s->num_tile_columns - 1) << 17) +
++                           ((tile_y == s->num_tile_rows - 1) << 18));
++
++      p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static void new_slice_segment(struct rpivid_dec_env *const de,
++                            const struct rpivid_dec_state *const s)
++{
++      const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
++      const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
++
++      p1_apb_write(de,
++                   RPI_SPS0,
++                   ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) |
++                   (s->log2_ctb_size << 4) |
++                   ((sps->log2_min_luma_transform_block_size_minus2 + 2)
++                                                      << 8) |
++                   ((sps->log2_min_luma_transform_block_size_minus2 + 2 +
++                     sps->log2_diff_max_min_luma_transform_block_size)
++                                              << 12) |
++                   ((sps->bit_depth_luma_minus8 + 8) << 16) |
++                   ((sps->bit_depth_chroma_minus8 + 8) << 20) |
++                   (sps->max_transform_hierarchy_depth_intra << 24) |
++                   (sps->max_transform_hierarchy_depth_inter << 28));
++
++      p1_apb_write(de,
++                   RPI_SPS1,
++                   ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) |
++                   ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) |
++                   ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3)
++                                              << 8) |
++                   ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 +
++                     sps->log2_diff_max_min_pcm_luma_coding_block_size)
++                                              << 12) |
++                   (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ?
++                              0 : sps->chroma_format_idc) << 16) |
++                   ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) |
++                   ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) |
++                   ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED))
++                                              << 20) |
++                   ((!!(sps->flags &
++                         V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED))
++                                              << 21));
++
++      p1_apb_write(de,
++                   RPI_PPS,
++                   ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) |
++                   ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
++                                               << 4) |
++                   ((!!(pps->flags &
++                              V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED))
++                                               << 5) |
++                   ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED))
++                                               << 6) |
++                   ((!!(pps->flags &
++                              V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED))
++                                              << 7) |
++                   (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255)
++                                              << 8) |
++                   (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255)
++                                              << 16) |
++                   ((!!(pps->flags &
++                              V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
++                                              << 24));
++
++      if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
++              write_scaling_factors(de);
++
++      if (!s->dependent_slice_segment_flag) {
++              int ctb_col = s->sh->slice_segment_addr %
++                                                      de->pic_width_in_ctbs_y;
++              int ctb_row = s->sh->slice_segment_addr /
++                                                      de->pic_width_in_ctbs_y;
++
++              de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16);
++      }
++
++      p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Slice messages
++
++static void msg_slice(struct rpivid_dec_env *const de, const u16 msg)
++{
++      de->slice_msgs[de->num_slice_msgs++] = msg;
++}
++
++static void program_slicecmds(struct rpivid_dec_env *const de,
++                            const int sliceid)
++{
++      int i;
++
++      p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8));
++
++      for (i = 0; i < de->num_slice_msgs; i++)
++              p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff);
++}
++
++// NoBackwardPredictionFlag 8.3.5
++// Simply checks POCs
++static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb,
++                      const __u8 *const idx, const unsigned int n,
++                      const unsigned int cur_poc)
++{
++      unsigned int i;
++
++      for (i = 0; i < n; ++i) {
++              // Compare mod 2^16
++              // We only get u16 pocs & 8.3.1 says
++              // "The bitstream shall not contain data that result in values
++              //  of DiffPicOrderCnt( picA, picB ) used in the decoding
++              //  process that are not in the range of −2^15 to 2^15 − 1,
++              //  inclusive."
++              if (((cur_poc - dpb[idx[i]].pic_order_cnt[0]) & 0x8000) != 0)
++                      return 0;
++      }
++      return 1;
++}
++
++static void pre_slice_decode(struct rpivid_dec_env *const de,
++                           const struct rpivid_dec_state *const s)
++{
++      const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh;
++      int weighted_pred_flag, idx;
++      u16 cmd_slice;
++      unsigned int collocated_from_l0_flag;
++
++      de->num_slice_msgs = 0;
++
++      cmd_slice = 0;
++      if (sh->slice_type == HEVC_SLICE_I)
++              cmd_slice = 1;
++      if (sh->slice_type == HEVC_SLICE_P)
++              cmd_slice = 2;
++      if (sh->slice_type == HEVC_SLICE_B)
++              cmd_slice = 3;
++
++      cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) |
++                   (s->max_num_merge_cand << 11);
++
++      collocated_from_l0_flag =
++              !s->slice_temporal_mvp ||
++              sh->slice_type != HEVC_SLICE_B ||
++              (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0);
++      cmd_slice |= collocated_from_l0_flag << 14;
++
++      if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
++              // Flag to say all reference pictures are from the past
++              const int no_backward_pred_flag =
++                      has_backward(sh->dpb, sh->ref_idx_l0, s->nb_refs[L0],
++                                   sh->slice_pic_order_cnt) &&
++                      has_backward(sh->dpb, sh->ref_idx_l1, s->nb_refs[L1],
++                                   sh->slice_pic_order_cnt);
++              cmd_slice |= no_backward_pred_flag << 10;
++              msg_slice(de, cmd_slice);
++
++              if (s->slice_temporal_mvp) {
++                      const __u8 *const rpl = collocated_from_l0_flag ?
++                                              sh->ref_idx_l0 : sh->ref_idx_l1;
++                      de->dpbno_col = rpl[sh->collocated_ref_idx];
++                      //v4l2_info(&de->ctx->dev->v4l2_dev,
++                      //          "L0=%d col_ref_idx=%d,
++                      //          dpb_no=%d\n", collocated_from_l0_flag,
++                      //          sh->collocated_ref_idx, de->dpbno_col);
++              }
++
++              // Write reference picture descriptions
++              weighted_pred_flag =
++                      sh->slice_type == HEVC_SLICE_P ?
++                              !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) :
++                              !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
++
++              for (idx = 0; idx < s->nb_refs[L0]; ++idx) {
++                      unsigned int dpb_no = sh->ref_idx_l0[idx];
++                      //v4l2_info(&de->ctx->dev->v4l2_dev,
++                      //        "L0[%d]=dpb[%d]\n", idx, dpb_no);
++
++                      msg_slice(de,
++                                dpb_no |
++                                (sh->dpb[dpb_no].rps ==
++                                      V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
++                                               (1 << 4) : 0) |
++                                (weighted_pred_flag ? (3 << 5) : 0));
++                      msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
++
++                      if (weighted_pred_flag) {
++                              const struct v4l2_hevc_pred_weight_table
++                                      *const w = &sh->pred_weight_table;
++                              const int luma_weight_denom =
++                                      (1 << w->luma_log2_weight_denom);
++                              const unsigned int chroma_log2_weight_denom =
++                                      (w->luma_log2_weight_denom +
++                                       w->delta_chroma_log2_weight_denom);
++                              const int chroma_weight_denom =
++                                      (1 << chroma_log2_weight_denom);
++
++                              msg_slice(de,
++                                        w->luma_log2_weight_denom |
++                                        (((w->delta_luma_weight_l0[idx] +
++                                           luma_weight_denom) & 0x1ff)
++                                               << 3));
++                              msg_slice(de, w->luma_offset_l0[idx] & 0xff);
++                              msg_slice(de,
++                                        chroma_log2_weight_denom |
++                                        (((w->delta_chroma_weight_l0[idx][0] +
++                                           chroma_weight_denom) & 0x1ff)
++                                                 << 3));
++                              msg_slice(de,
++                                        w->chroma_offset_l0[idx][0] & 0xff);
++                              msg_slice(de,
++                                        chroma_log2_weight_denom |
++                                        (((w->delta_chroma_weight_l0[idx][1] +
++                                           chroma_weight_denom) & 0x1ff)
++                                                 << 3));
++                              msg_slice(de,
++                                        w->chroma_offset_l0[idx][1] & 0xff);
++                      }
++              }
++
++              for (idx = 0; idx < s->nb_refs[L1]; ++idx) {
++                      unsigned int dpb_no = sh->ref_idx_l1[idx];
++                      //v4l2_info(&de->ctx->dev->v4l2_dev,
++                      //          "L1[%d]=dpb[%d]\n", idx, dpb_no);
++                      msg_slice(de,
++                                dpb_no |
++                                (sh->dpb[dpb_no].rps ==
++                                       V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
++                                               (1 << 4) : 0) |
++                                      (weighted_pred_flag ? (3 << 5) : 0));
++                      msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
++                      if (weighted_pred_flag) {
++                              const struct v4l2_hevc_pred_weight_table
++                                      *const w = &sh->pred_weight_table;
++                              const int luma_weight_denom =
++                                      (1 << w->luma_log2_weight_denom);
++                              const unsigned int chroma_log2_weight_denom =
++                                      (w->luma_log2_weight_denom +
++                                       w->delta_chroma_log2_weight_denom);
++                              const int chroma_weight_denom =
++                                      (1 << chroma_log2_weight_denom);
++
++                              msg_slice(de,
++                                        w->luma_log2_weight_denom |
++                                        (((w->delta_luma_weight_l1[idx] +
++                                           luma_weight_denom) & 0x1ff) << 3));
++                              msg_slice(de, w->luma_offset_l1[idx] & 0xff);
++                              msg_slice(de,
++                                        chroma_log2_weight_denom |
++                                        (((w->delta_chroma_weight_l1[idx][0] +
++                                           chroma_weight_denom) & 0x1ff)
++                                                      << 3));
++                              msg_slice(de,
++                                        w->chroma_offset_l1[idx][0] & 0xff);
++                              msg_slice(de,
++                                        chroma_log2_weight_denom |
++                                        (((w->delta_chroma_weight_l1[idx][1] +
++                                           chroma_weight_denom) & 0x1ff)
++                                                 << 3));
++                              msg_slice(de,
++                                        w->chroma_offset_l1[idx][1] & 0xff);
++                      }
++              }
++      } else {
++              msg_slice(de, cmd_slice);
++      }
++
++      msg_slice(de,
++                (sh->slice_beta_offset_div2 & 15) |
++                ((sh->slice_tc_offset_div2 & 15) << 4) |
++                ((sh->flags &
++                  V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ?
++                                              1 << 8 : 0) |
++                ((sh->flags &
++                        V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ?
++                                              1 << 9 : 0) |
++                ((s->pps.flags &
++                        V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ?
++                                              1 << 10 : 0));
++
++      msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) +
++                     (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Write STATUS register with expected end CTU address of previous slice
++
++static void end_previous_slice(struct rpivid_dec_env *const de,
++                             const struct rpivid_dec_state *const s,
++                             const int ctb_addr_ts)
++{
++      int last_x =
++              s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
++      int last_y =
++              s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
++
++      p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
++}
++
++static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
++{
++      p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25);
++      p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++      p1_apb_write(de, RPI_MODE,
++                   ctb_row == de->pic_height_in_ctbs_y - 1 ?
++                                                      0x70000 : 0x30000);
++      p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
++}
++
++static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
++                                 const struct rpivid_dec_state *const s,
++                                 int ctb_addr_ts)
++{
++      int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
++      int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y;
++      int last_x =
++              s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
++      int last_y =
++              s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
++
++      if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) &&
++          de->pic_width_in_ctbs_y > 2)
++              wpp_pause(de, last_y);
++      p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
++      if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 &&
++                         de->wpp_entry_y < new_y))
++              p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Wavefront mode
++
++static void wpp_entry_point(struct rpivid_dec_env *const de,
++                          const struct rpivid_dec_state *const s,
++                          const int do_bte,
++                          const int reset_qp_y, const int ctb_addr_ts)
++{
++      int ctb_size = 1 << s->log2_ctb_size;
++      int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
++
++      int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y;
++      int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y;
++
++      int endx = de->pic_width_in_ctbs_y - 1;
++      int endy = ctb_row;
++
++      u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size,
++                                    s->sps.pic_width_in_luma_samples,
++                                    s->col_bd, s->num_tile_columns);
++      u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size,
++                                    s->sps.pic_height_in_luma_samples,
++                                    s->row_bd, s->num_tile_rows);
++
++      p1_apb_write(de, RPI_TILESTART, 0);
++      p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
++
++      if (do_bte)
++              p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
++
++      write_slice(de, s, slice_w,
++                  ctb_row == de->pic_height_in_ctbs_y - 1 ?
++                                                      slice_h : ctb_size);
++
++      if (reset_qp_y) {
++              unsigned int sps_qp_bd_offset =
++                      6 * s->sps.bit_depth_luma_minus8;
++
++              p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
++      }
++
++      p1_apb_write(de, RPI_MODE,
++                   ctb_row == de->pic_height_in_ctbs_y - 1 ?
++                                                      0x60001 : 0x20001);
++      p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Wavefront mode
++
++static void wpp_decode_slice(struct rpivid_dec_env *const de,
++                           const struct rpivid_dec_state *const s,
++                           const struct v4l2_ctrl_hevc_slice_params *sh,
++                           int ctb_addr_ts)
++{
++      int i, reset_qp_y = 1;
++      int indep = !s->dependent_slice_segment_flag;
++      int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
++
++      if (ctb_addr_ts)
++              wpp_end_previous_slice(de, s, ctb_addr_ts);
++      pre_slice_decode(de, s);
++      write_bitstream(de, s);
++      if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1)
++              write_prob(de, s);
++      else if (ctb_col == 0)
++              p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
++      else
++              reset_qp_y = 0;
++      program_slicecmds(de, s->slice_idx);
++      new_slice_segment(de, s);
++      wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts);
++
++      for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
++              int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
++              int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
++              int last_x = de->pic_width_in_ctbs_y - 1;
++
++              if (de->pic_width_in_ctbs_y > 2)
++                      wpp_pause(de, ctb_row);
++              p1_apb_write(de, RPI_STATUS,
++                           (ctb_row << 18) + (last_x << 5) + 2);
++              if (de->pic_width_in_ctbs_y == 2)
++                      p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++              if (de->pic_width_in_ctbs_y == 1)
++                      write_prob(de, s);
++              else
++                      p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
++              ctb_addr_ts += s->column_width[0];
++              wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
++      }
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Tiles mode
++
++static void decode_slice(struct rpivid_dec_env *const de,
++                       const struct rpivid_dec_state *const s,
++                       const struct v4l2_ctrl_hevc_slice_params *const sh,
++                       int ctb_addr_ts)
++{
++      int i, reset_qp_y;
++
++      if (ctb_addr_ts)
++              end_previous_slice(de, s, ctb_addr_ts);
++
++      pre_slice_decode(de, s);
++      write_bitstream(de, s);
++
++#if DEBUG_TRACE_P1_CMD
++      if (p1_z < 256) {
++              v4l2_info(&de->ctx->dev->v4l2_dev,
++                        "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n",
++                        ctb_addr_ts, s->tile_id[ctb_addr_ts],
++                        s->tile_id[ctb_addr_ts - 1],
++                        s->dependent_slice_segment_flag, sh->flags);
++      }
++#endif
++
++      reset_qp_y = ctb_addr_ts == 0 ||
++                 s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] ||
++                 !s->dependent_slice_segment_flag;
++      if (reset_qp_y)
++              write_prob(de, s);
++
++      program_slicecmds(de, s->slice_idx);
++      new_slice_segment(de, s);
++      new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
++                      ctb_addr_ts);
++
++      for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
++              int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
++              int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y;
++              int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
++              int tile_x = ctb_to_tile(ctb_col, s->col_bd,
++                                       s->num_tile_columns - 1);
++              int tile_y =
++                      ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1);
++              int last_x = s->col_bd[tile_x + 1] - 1;
++              int last_y = s->row_bd[tile_y + 1] - 1;
++
++              p1_apb_write(de, RPI_STATUS,
++                           2 + (last_x << 5) + (last_y << 18));
++              write_prob(de, s);
++              ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y];
++              new_entry_point(de, s, 0, 1, ctb_addr_ts);
++      }
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Scaling factors
++
++static void expand_scaling_list(const unsigned int size_id,
++                              const unsigned int matrix_id, u8 *const dst0,
++                              const u8 *const src0, uint8_t dc)
++{
++      u8 *d;
++      unsigned int x, y;
++
++      // FIXME: matrix_id is unused ?
++      switch (size_id) {
++      case 0:
++              memcpy(dst0, src0, 16);
++              break;
++      case 1:
++              memcpy(dst0, src0, 64);
++              break;
++      case 2:
++              d = dst0;
++
++              for (y = 0; y != 16; y++) {
++                      const u8 *s = src0 + (y >> 1) * 8;
++
++                      for (x = 0; x != 8; ++x) {
++                              *d++ = *s;
++                              *d++ = *s++;
++                      }
++              }
++              dst0[0] = dc;
++              break;
++      default:
++              d = dst0;
++
++              for (y = 0; y != 32; y++) {
++                      const u8 *s = src0 + (y >> 2) * 8;
++
++                      for (x = 0; x != 8; ++x) {
++                              *d++ = *s;
++                              *d++ = *s;
++                              *d++ = *s;
++                              *d++ = *s++;
++                      }
++              }
++              dst0[0] = dc;
++              break;
++      }
++}
++
++static void populate_scaling_factors(const struct rpivid_run *const run,
++                                   struct rpivid_dec_env *const de,
++                                   const struct rpivid_dec_state *const s)
++{
++      const struct v4l2_ctrl_hevc_scaling_matrix *const sl =
++              run->h265.scaling_matrix;
++      // Array of constants for scaling factors
++      static const u32 scaling_factor_offsets[4][6] = {
++              // MID0    MID1    MID2    MID3    MID4    MID5
++              // SID0 (4x4)
++              { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 },
++              // SID1 (8x8)
++              { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 },
++              // SID2 (16x16)
++              { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 },
++              // SID3 (32x32)
++              { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 }
++      };
++
++      unsigned int mid;
++
++      for (mid = 0; mid < 6; mid++)
++              expand_scaling_list(0, mid,
++                                  de->scaling_factors +
++                                          scaling_factor_offsets[0][mid],
++                                  sl->scaling_list_4x4[mid], 0);
++      for (mid = 0; mid < 6; mid++)
++              expand_scaling_list(1, mid,
++                                  de->scaling_factors +
++                                          scaling_factor_offsets[1][mid],
++                                  sl->scaling_list_8x8[mid], 0);
++      for (mid = 0; mid < 6; mid++)
++              expand_scaling_list(2, mid,
++                                  de->scaling_factors +
++                                          scaling_factor_offsets[2][mid],
++                                  sl->scaling_list_16x16[mid],
++                                  sl->scaling_list_dc_coef_16x16[mid]);
++      for (mid = 0; mid < 2; mid += 1)
++              expand_scaling_list(3, mid,
++                                  de->scaling_factors +
++                                          scaling_factor_offsets[3][mid],
++                                  sl->scaling_list_32x32[mid],
++                                  sl->scaling_list_dc_coef_32x32[mid]);
++}
++
++static void free_ps_info(struct rpivid_dec_state *const s)
++{
++      kfree(s->ctb_addr_rs_to_ts);
++      s->ctb_addr_rs_to_ts = NULL;
++      kfree(s->ctb_addr_ts_to_rs);
++      s->ctb_addr_ts_to_rs = NULL;
++      kfree(s->tile_id);
++      s->tile_id = NULL;
++
++      kfree(s->col_bd);
++      s->col_bd = NULL;
++      kfree(s->row_bd);
++      s->row_bd = NULL;
++}
++
++static int updated_ps(struct rpivid_dec_state *const s)
++{
++      unsigned int ctb_addr_rs;
++      int j, x, y, tile_id;
++      unsigned int i;
++
++      free_ps_info(s);
++
++      // Inferred parameters
++      s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 +
++                         s->sps.log2_diff_max_min_luma_coding_block_size;
++
++      s->ctb_width = (s->sps.pic_width_in_luma_samples +
++                      (1 << s->log2_ctb_size) - 1) >>
++                     s->log2_ctb_size;
++      s->ctb_height = (s->sps.pic_height_in_luma_samples +
++                       (1 << s->log2_ctb_size) - 1) >>
++                      s->log2_ctb_size;
++      s->ctb_size = s->ctb_width * s->ctb_height;
++
++      // Inferred parameters
++
++      if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
++              s->num_tile_columns = 1;
++              s->num_tile_rows = 1;
++              s->column_width[0] = s->ctb_width;
++              s->row_height[0] = s->ctb_height;
++      } else {
++              s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1;
++              s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1;
++              for (i = 0; i < s->num_tile_columns; ++i)
++                      s->column_width[i] = s->pps.column_width_minus1[i] + 1;
++              for (i = 0; i < s->num_tile_rows; ++i)
++                      s->row_height[i] = s->pps.row_height_minus1[i] + 1;
++      }
++
++      s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd),
++                          GFP_KERNEL);
++      s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd),
++                          GFP_KERNEL);
++
++      s->col_bd[0] = 0;
++      for (i = 0; i < s->num_tile_columns; i++)
++              s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i];
++
++      s->row_bd[0] = 0;
++      for (i = 0; i < s->num_tile_rows; i++)
++              s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i];
++
++      s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
++                                           sizeof(*s->ctb_addr_rs_to_ts),
++                                           GFP_KERNEL);
++      s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
++                                           sizeof(*s->ctb_addr_ts_to_rs),
++                                           GFP_KERNEL);
++      s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id),
++                                 GFP_KERNEL);
++
++      for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) {
++              int tb_x = ctb_addr_rs % s->ctb_width;
++              int tb_y = ctb_addr_rs / s->ctb_width;
++              int tile_x = 0;
++              int tile_y = 0;
++              int val = 0;
++
++              for (i = 0; i < s->num_tile_columns; i++) {
++                      if (tb_x < s->col_bd[i + 1]) {
++                              tile_x = i;
++                              break;
++                      }
++              }
++
++              for (i = 0; i < s->num_tile_rows; i++) {
++                      if (tb_y < s->row_bd[i + 1]) {
++                              tile_y = i;
++                              break;
++                      }
++              }
++
++              for (i = 0; i < tile_x; i++)
++                      val += s->row_height[tile_y] * s->column_width[i];
++              for (i = 0; i < tile_y; i++)
++                      val += s->ctb_width * s->row_height[i];
++
++              val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] +
++                     tb_x - s->col_bd[tile_x];
++
++              s->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
++              s->ctb_addr_ts_to_rs[val] = ctb_addr_rs;
++      }
++
++      for (j = 0, tile_id = 0; j < s->num_tile_rows; j++)
++              for (i = 0; i < s->num_tile_columns; i++, tile_id++)
++                      for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++)
++                              for (x = s->col_bd[i];
++                                   x < s->col_bd[i + 1];
++                                   x++)
++                                      s->tile_id[s->ctb_addr_rs_to_ts
++                                                         [y * s->ctb_width +
++                                                          x]] = tile_id;
++
++      return 0;
++}
++
++static int frame_end(struct rpivid_dev *const dev,
++                   struct rpivid_dec_env *const de,
++                   const struct rpivid_dec_state *const s)
++{
++      const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1;
++      const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1;
++      size_t cmd_size;
++
++      if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) {
++              if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2)
++                      wpp_pause(de, last_y);
++      }
++      p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
++
++      // Copy commands out to dma buf
++      cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
++
++      if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
++              size_t cmd_alloc = round_up_size(cmd_size);
++
++              if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "Alloc cmd buffer (%d): FAILED\n", cmd_alloc);
++                      return -ENOMEM;
++              }
++              v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%d): OK\n",
++                        cmd_alloc);
++      }
++
++      memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
++      return 0;
++}
++
++static void setup_colmv(struct rpivid_ctx *const ctx, struct rpivid_run *run,
++                      struct rpivid_dec_state *const s)
++{
++      ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64);
++      ctx->colmv_picsize = ctx->colmv_stride *
++              (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4);
++}
++
++// Can be called from irq context
++static struct rpivid_dec_env *dec_env_new(struct rpivid_ctx *const ctx)
++{
++      struct rpivid_dec_env *de;
++      unsigned long lock_flags;
++
++      spin_lock_irqsave(&ctx->dec_lock, lock_flags);
++
++      de = ctx->dec_free;
++      if (de) {
++              ctx->dec_free = de->next;
++              de->next = NULL;
++              de->state = RPIVID_DECODE_SLICE_START;
++      }
++
++      spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
++      return de;
++}
++
++// Can be called from irq context
++static void dec_env_delete(struct rpivid_dec_env *const de)
++{
++      struct rpivid_ctx * const ctx = de->ctx;
++      unsigned long lock_flags;
++
++      aux_q_release(ctx, &de->frame_aux);
++      aux_q_release(ctx, &de->col_aux);
++
++      spin_lock_irqsave(&ctx->dec_lock, lock_flags);
++
++      de->state = RPIVID_DECODE_END;
++      de->next = ctx->dec_free;
++      ctx->dec_free = de;
++
++      spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
++}
++
++static void dec_env_uninit(struct rpivid_ctx *const ctx)
++{
++      unsigned int i;
++
++      if (ctx->dec_pool) {
++              for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
++                      struct rpivid_dec_env *const de = ctx->dec_pool + i;
++
++                      kfree(de->cmd_fifo);
++              }
++
++              kfree(ctx->dec_pool);
++      }
++
++      ctx->dec_pool = NULL;
++      ctx->dec_free = NULL;
++}
++
++static int dec_env_init(struct rpivid_ctx *const ctx)
++{
++      unsigned int i;
++
++      ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * RPIVID_DEC_ENV_COUNT,
++                              GFP_KERNEL);
++      if (!ctx->dec_pool)
++              return -1;
++
++      spin_lock_init(&ctx->dec_lock);
++
++      // Build free chain
++      ctx->dec_free = ctx->dec_pool;
++      for (i = 0; i != RPIVID_DEC_ENV_COUNT - 1; ++i)
++              ctx->dec_pool[i].next = ctx->dec_pool + i + 1;
++
++      // Fill in other bits
++      for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
++              struct rpivid_dec_env *const de = ctx->dec_pool + i;
++
++              de->ctx = ctx;
++              de->decode_order = i;
++              de->cmd_max = 1024;
++              de->cmd_fifo = kmalloc_array(de->cmd_max,
++                                           sizeof(struct rpi_cmd),
++                                           GFP_KERNEL);
++              if (!de->cmd_fifo)
++                      goto fail;
++      }
++
++      return 0;
++
++fail:
++      dec_env_uninit(ctx);
++      return -1;
++}
++
++// Assume that we get exactly the same DPB for every slice
++// it makes no real sense otherwise
++#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16
++#error HEVC_DPB_ENTRIES > h/w slots
++#endif
++
++static u32 mk_config2(const struct rpivid_dec_state *const s)
++{
++      const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
++      const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
++      u32 c;
++      // BitDepthY
++      c = (sps->bit_depth_luma_minus8 + 8) << 0;
++       // BitDepthC
++      c |= (sps->bit_depth_chroma_minus8 + 8) << 4;
++       // BitDepthY
++      if (sps->bit_depth_luma_minus8)
++              c |= BIT(8);
++      // BitDepthC
++      if (sps->bit_depth_chroma_minus8)
++              c |= BIT(9);
++      c |= s->log2_ctb_size << 10;
++      if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)
++              c |= BIT(13);
++      if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)
++              c |= BIT(14);
++      if (sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED)
++              c |= BIT(15); /* Write motion vectors to external memory */
++      c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16;
++      if (s->slice_temporal_mvp)
++              c |= BIT(19);
++      if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)
++              c |= BIT(20);
++      c |= (pps->pps_cb_qp_offset & 31) << 21;
++      c |= (pps->pps_cr_qp_offset & 31) << 26;
++      return c;
++}
++
++static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++      const struct v4l2_ctrl_hevc_slice_params *const sh =
++                                              run->h265.slice_params;
++      const struct v4l2_hevc_pred_weight_table *pred_weight_table;
++      struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++      struct rpivid_dec_state *const s = ctx->state;
++      struct vb2_queue *vq;
++      struct rpivid_dec_env *de;
++      int ctb_addr_ts;
++      unsigned int i;
++      int use_aux;
++      bool slice_temporal_mvp;
++
++      pred_weight_table = &sh->pred_weight_table;
++
++      s->frame_end =
++              ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
++
++      de = ctx->dec0;
++      slice_temporal_mvp = (sh->flags &
++                 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
++
++      if (de && de->state != RPIVID_DECODE_END) {
++              ++s->slice_idx;
++
++              switch (de->state) {
++              case RPIVID_DECODE_SLICE_CONTINUE:
++                      // Expected state
++                      break;
++              default:
++                      v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n",
++                               __func__, de->state);
++              /* FALLTHRU */
++              case RPIVID_DECODE_ERROR_CONTINUE:
++                      // Uncleared error - fail now
++                      goto fail;
++              }
++
++              if (s->slice_temporal_mvp != slice_temporal_mvp) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Slice Temporal MVP non-constant\n");
++                      goto fail;
++              }
++      } else {
++              /* Frame start */
++              unsigned int ctb_size_y;
++              bool sps_changed = false;
++
++              if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) {
++                      /* SPS changed */
++                      v4l2_info(&dev->v4l2_dev, "SPS changed\n");
++                      memcpy(&s->sps, run->h265.sps, sizeof(s->sps));
++                      sps_changed = true;
++              }
++              if (sps_changed ||
++                  memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) {
++                      /* SPS changed */
++                      v4l2_info(&dev->v4l2_dev, "PPS changed\n");
++                      memcpy(&s->pps, run->h265.pps, sizeof(s->pps));
++
++                      /* Recalc stuff as required */
++                      updated_ps(s);
++              }
++
++              de = dec_env_new(ctx);
++              if (!de) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "Failed to find free decode env\n");
++                      goto fail;
++              }
++              ctx->dec0 = de;
++
++              ctb_size_y =
++                      1U << (s->sps.log2_min_luma_coding_block_size_minus3 +
++                             3 +
++                             s->sps.log2_diff_max_min_luma_coding_block_size);
++
++              de->pic_width_in_ctbs_y =
++                      (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) /
++                              ctb_size_y; // 7-15
++              de->pic_height_in_ctbs_y =
++                      (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) /
++                              ctb_size_y; // 7-17
++              de->cmd_len = 0;
++              de->dpbno_col = ~0U;
++
++              de->bit_copy_gptr = ctx->bitbufs + 0;
++              de->bit_copy_len = 0;
++              de->cmd_copy_gptr = ctx->cmdbufs + 0;
++
++              de->frame_c_offset = ctx->dst_fmt.height * 128;
++              de->frame_stride = ctx->dst_fmt.bytesperline * 128;
++              de->frame_addr =
++                      vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0);
++              de->frame_aux = NULL;
++
++              if (s->sps.bit_depth_luma_minus8 !=
++                  s->sps.bit_depth_chroma_minus8) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Chroma depth (%d) != Luma depth (%d)\n",
++                                s->sps.bit_depth_chroma_minus8 + 8,
++                                s->sps.bit_depth_luma_minus8 + 8);
++                      goto fail;
++              }
++              if (s->sps.bit_depth_luma_minus8 == 0) {
++                      if (ctx->dst_fmt.pixelformat !=
++                                              V4L2_PIX_FMT_NV12_COL128) {
++                              v4l2_err(&dev->v4l2_dev,
++                                       "Pixel format %#x != NV12_COL128 for 8-bit output",
++                                       ctx->dst_fmt.pixelformat);
++                              goto fail;
++                      }
++              } else if (s->sps.bit_depth_luma_minus8 == 2) {
++                      if (ctx->dst_fmt.pixelformat !=
++                                              V4L2_PIX_FMT_NV12_10_COL128) {
++                              v4l2_err(&dev->v4l2_dev,
++                                       "Pixel format %#x != NV12_10_COL128 for 10-bit output",
++                                       ctx->dst_fmt.pixelformat);
++                              goto fail;
++                      }
++              } else {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Luma depth (%d) unsupported\n",
++                                s->sps.bit_depth_luma_minus8 + 8);
++                      goto fail;
++              }
++              if (run->dst->vb2_buf.num_planes != 1) {
++                      v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 1\n",
++                                run->dst->vb2_buf.num_planes);
++                      goto fail;
++              }
++              if (run->dst->planes[0].length <
++                  ctx->dst_fmt.sizeimage) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Capture plane[0] length (%d) < sizeimage (%d)\n",
++                                run->dst->planes[0].length,
++                                ctx->dst_fmt.sizeimage);
++                      goto fail;
++              }
++
++              if (s->sps.pic_width_in_luma_samples > 4096 ||
++                  s->sps.pic_height_in_luma_samples > 4096) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Pic dimension (%dx%d) exeeds 4096\n",
++                                s->sps.pic_width_in_luma_samples,
++                                s->sps.pic_height_in_luma_samples);
++                      goto fail;
++              }
++
++              // Fill in ref planes with our address s.t. if we mess
++              // up refs somehow then we still have a valid address
++              // entry
++              for (i = 0; i != 16; ++i)
++                      de->ref_addrs[i] = de->frame_addr;
++
++              /*
++               * Stash initial temporal_mvp flag
++               * This must be the same for all pic slices (7.4.7.1)
++               */
++              s->slice_temporal_mvp = slice_temporal_mvp;
++
++              // Phase 2 reg pre-calc
++              de->rpi_config2 = mk_config2(s);
++              de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
++                                  s->sps.pic_width_in_luma_samples;
++              de->rpi_currpoc = sh->slice_pic_order_cnt;
++
++              if (s->sps.flags &
++                  V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
++                      setup_colmv(ctx, run, s);
++              }
++
++              s->slice_idx = 0;
++
++              if (sh->slice_segment_addr != 0) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "New frame but segment_addr=%d\n",
++                                sh->slice_segment_addr);
++                      goto fail;
++              }
++
++              /* Allocate a bitbuf if we need one - don't need one if single
++               * slice as we can use the src buf directly
++               */
++              if (!s->frame_end && !de->bit_copy_gptr->ptr) {
++                      const size_t wxh = s->sps.pic_width_in_luma_samples *
++                              s->sps.pic_height_in_luma_samples;
++                      size_t bits_alloc;
++
++                      /* Annex A gives a min compression of 2 @ lvl 3.1
++                       * (wxh <= 983040) and min 4 thereafter but avoid
++                       * the odity of 983041 having a lower limit than
++                       * 983040.
++                       * Multiply by 3/2 for 4:2:0
++                       */
++                      bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
++                              wxh < 983040 * 2 ? 983040 * 3 / 4 :
++                              wxh * 3 / 8;
++                      bits_alloc = round_up_size(bits_alloc);
++
++                      if (gptr_alloc(dev, de->bit_copy_gptr,
++                                     bits_alloc,
++                                     DMA_ATTR_FORCE_CONTIGUOUS) != 0) {
++                              v4l2_err(&dev->v4l2_dev,
++                                       "Unable to alloc buf (%d) for bit copy\n",
++                                       bits_alloc);
++                              goto fail;
++                      }
++                      v4l2_info(&dev->v4l2_dev,
++                                "Alloc buf (%d) for bit copy OK\n",
++                                bits_alloc);
++              }
++      }
++
++      // Pre calc a few things
++      s->src_addr =
++              !s->frame_end ?
++                      0 :
++                      vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
++      s->src_buf = s->src_addr != 0 ? NULL :
++                                      vb2_plane_vaddr(&run->src->vb2_buf, 0);
++      if (!s->src_addr && !s->src_buf) {
++              v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
++              goto fail;
++      }
++
++      s->sh = sh;
++      s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
++      s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
++                                      0 :
++                                      (5 - sh->five_minus_max_num_merge_cand);
++      // * SH DSS flag invented by me - but clearly needed
++      s->dependent_slice_segment_flag =
++              ((sh->flags &
++                V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
++
++      s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
++                              0 :
++                              sh->num_ref_idx_l0_active_minus1 + 1;
++      s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
++                              0 :
++                              sh->num_ref_idx_l1_active_minus1 + 1;
++
++      if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
++              populate_scaling_factors(run, de, s);
++
++      ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
++
++      if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
++              wpp_decode_slice(de, s, sh, ctb_addr_ts);
++      else
++              decode_slice(de, s, sh, ctb_addr_ts);
++
++      if (!s->frame_end)
++              return;
++
++      // Frame end
++      memset(dpb_q_aux, 0,
++             sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX);
++      /*
++       * Need Aux ents for all (ref) DPB ents if temporal MV could
++       * be enabled for any pic
++       * ** At the moment we have aux ents for all pics whether or not
++       *    they are ref
++       */
++      use_aux = ((s->sps.flags &
++                V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
++
++      // Locate ref frames
++      // At least in the current implementation this is constant across all
++      // slices. If this changes we will need idx mapping code.
++      // Uses sh so here rather than trigger
++
++      vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++
++      if (!vq) {
++              v4l2_err(&dev->v4l2_dev, "VQ gone!\n");
++              goto fail;
++      }
++
++      //        v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n");
++      if (frame_end(dev, de, s))
++              goto fail;
++
++      for (i = 0; i < sh->num_active_dpb_entries; ++i) {
++              int buffer_index =
++                      vb2_find_timestamp(vq, sh->dpb[i].timestamp, 0);
++              struct vb2_buffer *buf = buffer_index < 0 ?
++                                      NULL :
++                                      vb2_get_buffer(vq, buffer_index);
++
++              if (!buf) {
++                      v4l2_warn(&dev->v4l2_dev,
++                                "Missing DPB ent %d, timestamp=%lld, index=%d\n",
++                                i, (long long)sh->dpb[i].timestamp,
++                                buffer_index);
++                      continue;
++              }
++
++              if (use_aux) {
++                      dpb_q_aux[i] = aux_q_ref(ctx,
++                                               ctx->aux_ents[buffer_index]);
++                      if (!dpb_q_aux[i])
++                              v4l2_warn(&dev->v4l2_dev,
++                                        "Missing DPB AUX ent %d index=%d\n",
++                                        i, buffer_index);
++              }
++
++              de->ref_addrs[i] =
++                      vb2_dma_contig_plane_dma_addr(buf, 0);
++      }
++
++      // Move DPB from temp
++      for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) {
++              aux_q_release(ctx, &s->ref_aux[i]);
++              s->ref_aux[i] = dpb_q_aux[i];
++      }
++      // Unref the old frame aux too - it is either in the DPB or not
++      // now
++      aux_q_release(ctx, &s->frame_aux);
++
++      if (use_aux) {
++              // New frame so new aux ent
++              // ??? Do we need this if non-ref ??? can we tell
++              s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index);
++
++              if (!s->frame_aux) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "Failed to obtain aux storage for frame\n");
++                      goto fail;
++              }
++
++              de->frame_aux = aux_q_ref(ctx, s->frame_aux);
++      }
++
++      if (de->dpbno_col != ~0U) {
++              if (de->dpbno_col >= sh->num_active_dpb_entries) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "Col ref index %d >= %d\n",
++                               de->dpbno_col,
++                               sh->num_active_dpb_entries);
++              } else {
++                      // Standard requires that the col pic is
++                      // constant for the duration of the pic
++                      // (text of collocated_ref_idx in H265-2 2018
++                      // 7.4.7.1)
++
++                      // Spot the collocated ref in passing
++                      de->col_aux = aux_q_ref(ctx,
++                                              dpb_q_aux[de->dpbno_col]);
++
++                      if (!de->col_aux) {
++                              v4l2_warn(&dev->v4l2_dev,
++                                        "Missing DPB ent for col\n");
++                              // Probably need to abort if this fails
++                              // as P2 may explode on bad data
++                              goto fail;
++                      }
++              }
++      }
++
++      de->state = RPIVID_DECODE_PHASE1;
++      return;
++
++fail:
++      if (de)
++              // Actual error reporting happens in Trigger
++              de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
++                                         RPIVID_DECODE_ERROR_CONTINUE;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Handle PU and COEFF stream overflow
++
++// Returns:
++// -1  Phase 1 decode error
++//  0  OK
++// >0  Out of space (bitmask)
++
++#define STATUS_COEFF_EXHAUSTED        8
++#define STATUS_PU_EXHAUSTED   16
++
++static int check_status(const struct rpivid_dev *const dev)
++{
++      const u32 cfstatus = apb_read(dev, RPI_CFSTATUS);
++      const u32 cfnum = apb_read(dev, RPI_CFNUM);
++      u32 status = apb_read(dev, RPI_STATUS);
++
++      // Handle PU and COEFF stream overflow
++
++      // this is the definition of successful completion of phase 1
++      // it assures that status register is zero and all blocks in each tile
++      // have completed
++      if (cfstatus == cfnum)
++              return 0;       //No error
++
++      status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED);
++      if (status)
++              return status;
++
++      return -1;
++}
++
++static void cb_phase2(struct rpivid_dev *const dev, void *v)
++{
++      struct rpivid_dec_env *const de = v;
++      struct rpivid_ctx *const ctx = de->ctx;
++
++      xtrace_in(dev, de);
++
++      v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf,
++                              VB2_BUF_STATE_DONE);
++      de->frame_buf = NULL;
++
++      /* Delete de before finish as finish might immediately trigger a reuse
++       * of de
++       */
++      dec_env_delete(de);
++
++      if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) {
++              xtrace_fin(dev, de);
++              v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                               VB2_BUF_STATE_DONE);
++      }
++
++      xtrace_ok(dev, de);
++}
++
++static void phase2_claimed(struct rpivid_dev *const dev, void *v)
++{
++      struct rpivid_dec_env *const de = v;
++      unsigned int i;
++
++      xtrace_in(dev, de);
++
++      apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc);
++      apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride);
++      apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc);
++      apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride);
++
++      apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_addr);
++      apb_write_vc_addr(dev, RPI_OUTCBASE,
++                        de->frame_addr + de->frame_c_offset);
++      apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->frame_stride);
++      apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->frame_stride);
++
++      //    v4l2_info(&dev->v4l2_dev, "Frame: Y=%llx, C=%llx, Stride=%x\n",
++      //              de->frame_addr, de->frame_addr + de->frame_c_offset,
++      //              de->frame_stride);
++
++      for (i = 0; i < 16; i++) {
++              // Strides are in fact unused but fill in anyway
++              apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i]);
++              apb_write_vc_len(dev, 0x9004 + 16 * i, de->frame_stride);
++              apb_write_vc_addr(dev, 0x9008 + 16 * i,
++                                de->ref_addrs[i] + de->frame_c_offset);
++              apb_write_vc_len(dev, 0x900C + 16 * i, de->frame_stride);
++      }
++
++      apb_write(dev, RPI_CONFIG2, de->rpi_config2);
++      apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize);
++      apb_write(dev, RPI_CURRPOC, de->rpi_currpoc);
++      //    v4l2_info(&dev->v4l2_dev, "Config2=%#x, FrameSize=%#x, POC=%#x\n",
++      //    de->rpi_config2, de->rpi_framesize, de->rpi_currpoc);
++
++      // collocated reads/writes
++      apb_write_vc_len(dev, RPI_COLSTRIDE,
++                       de->ctx->colmv_stride); // Read vals
++      apb_write_vc_len(dev, RPI_MVSTRIDE,
++                       de->ctx->colmv_stride); // Write vals
++      apb_write_vc_addr(dev, RPI_MVBASE,
++                        !de->frame_aux ? 0 : de->frame_aux->col.addr);
++      apb_write_vc_addr(dev, RPI_COLBASE,
++                        !de->col_aux ? 0 : de->col_aux->col.addr);
++
++      //v4l2_info(&dev->v4l2_dev,
++      //         "Mv=%llx, Col=%llx, Stride=%x, Buf=%llx->%llx\n",
++      //         de->rpi_mvbase, de->rpi_colbase, de->ctx->colmv_stride,
++      //         de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr +
++      //         de->ctx->colmvbuf.size);
++
++      rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de);
++
++      apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
++
++      xtrace_ok(dev, de);
++}
++
++static void phase1_claimed(struct rpivid_dev *const dev, void *v);
++
++static void phase1_thread(struct rpivid_dev *const dev, void *v)
++{
++      struct rpivid_dec_env *const de = v;
++      struct rpivid_ctx *const ctx = de->ctx;
++
++      struct rpivid_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx;
++      struct rpivid_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx;
++
++      xtrace_in(dev, de);
++
++      if (de->p1_status & STATUS_PU_EXHAUSTED) {
++              if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "%s: PU realloc (%#x) failed\n",
++                               __func__, pu_gptr->size);
++                      goto fail;
++              }
++              v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%#x) OK\n",
++                        __func__, pu_gptr->size);
++      }
++
++      if (de->p1_status & STATUS_COEFF_EXHAUSTED) {
++              if (gptr_realloc_new(dev, coeff_gptr,
++                                   next_size(coeff_gptr->size))) {
++                      v4l2_err(&dev->v4l2_dev,
++                               "%s: Coeff realloc (%#x) failed\n",
++                               __func__, coeff_gptr->size);
++                      goto fail;
++              }
++              v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%#x) OK\n",
++                        __func__, coeff_gptr->size);
++      }
++
++      phase1_claimed(dev, de);
++      xtrace_ok(dev, de);
++      return;
++
++fail:
++      dec_env_delete(de);
++      xtrace_fin(dev, de);
++      v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                       VB2_BUF_STATE_ERROR);
++      xtrace_fail(dev, de);
++}
++
++/* Always called in irq context (this is good) */
++static void cb_phase1(struct rpivid_dev *const dev, void *v)
++{
++      struct rpivid_dec_env *const de = v;
++      struct rpivid_ctx *const ctx = de->ctx;
++
++      xtrace_in(dev, de);
++
++      de->p1_status = check_status(dev);
++      if (de->p1_status != 0) {
++              v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
++                        __func__, de->p1_status);
++
++              if (de->p1_status < 0)
++                      goto fail;
++
++              /* Need to realloc - push onto a thread rather than IRQ */
++              rpivid_hw_irq_active1_thread(dev, &de->irq_ent,
++                                           phase1_thread, de);
++              return;
++      }
++
++      /* After the frame-buf is detached it must be returned but from
++       * this point onward (phase2_claimed, cb_phase2) there are no error
++       * paths so the return at the end of cb_phase2 is all that is needed
++       */
++      de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx);
++      if (!de->frame_buf) {
++              v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__);
++              goto fail;
++      }
++
++      ctx->p2idx =
++              (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
++
++      // Enable the next setup if our Q isn't too big
++      if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) {
++              xtrace_fin(dev, de);
++              v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                               VB2_BUF_STATE_DONE);
++      }
++
++      rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
++
++      xtrace_ok(dev, de);
++      return;
++
++fail:
++      dec_env_delete(de);
++      xtrace_fin(dev, de);
++      v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                       VB2_BUF_STATE_ERROR);
++      xtrace_fail(dev, de);
++}
++
++static void phase1_claimed(struct rpivid_dev *const dev, void *v)
++{
++      struct rpivid_dec_env *const de = v;
++      struct rpivid_ctx *const ctx = de->ctx;
++
++      const struct rpivid_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx;
++      const struct rpivid_gptr * const coeff_gptr = ctx->coeff_bufs +
++                                                    ctx->p2idx;
++
++      xtrace_in(dev, de);
++
++      de->pu_base_vc = pu_gptr->addr;
++      de->pu_stride =
++              ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64);
++
++      de->coeff_base_vc = coeff_gptr->addr;
++      de->coeff_stride =
++              ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
++
++      apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
++      apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
++      apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
++      apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride);
++
++      // Trigger command FIFO
++      apb_write(dev, RPI_CFNUM, de->cmd_len);
++
++      // Claim irq
++      rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de);
++
++      // And start the h/w
++      apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
++
++      xtrace_ok(dev, de);
++}
++
++static void dec_state_delete(struct rpivid_ctx *const ctx)
++{
++      unsigned int i;
++      struct rpivid_dec_state *const s = ctx->state;
++
++      if (!s)
++              return;
++      ctx->state = NULL;
++
++      free_ps_info(s);
++
++      for (i = 0; i != HEVC_MAX_REFS; ++i)
++              aux_q_release(ctx, &s->ref_aux[i]);
++      aux_q_release(ctx, &s->frame_aux);
++
++      kfree(s);
++}
++
++static void rpivid_h265_stop(struct rpivid_ctx *ctx)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++      unsigned int i;
++
++      v4l2_info(&dev->v4l2_dev, "%s\n", __func__);
++
++      dec_env_uninit(ctx);
++      dec_state_delete(ctx);
++
++      // dec_env & state must be killed before this to release the buffer to
++      // the free pool
++      aux_q_uninit(ctx);
++
++      for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
++              gptr_free(dev, ctx->bitbufs + i);
++      for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
++              gptr_free(dev, ctx->cmdbufs + i);
++      for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
++              gptr_free(dev, ctx->pu_bufs + i);
++      for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
++              gptr_free(dev, ctx->coeff_bufs + i);
++}
++
++static int rpivid_h265_start(struct rpivid_ctx *ctx)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++      unsigned int i;
++
++      unsigned int w = ctx->dst_fmt.width;
++      unsigned int h = ctx->dst_fmt.height;
++      unsigned int wxh;
++      size_t pu_alloc;
++      size_t coeff_alloc;
++
++      // Generate a sanitised WxH for memory alloc
++      // Assume HD if unset
++      if (w == 0)
++              w = 1920;
++      if (w > 4096)
++              w = 4096;
++      if (h == 0)
++              w = 1088;
++      if (h > 4096)
++              h = 4096;
++      wxh = w * h;
++
++      v4l2_info(&dev->v4l2_dev, "%s: (%dx%d)\n", __func__,
++                ctx->dst_fmt.width, ctx->dst_fmt.height);
++
++      ctx->dec0 = NULL;
++      ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL);
++      if (!ctx->state) {
++              v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n");
++              goto fail;
++      }
++
++      if (dec_env_init(ctx) != 0) {
++              v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n");
++              goto fail;
++      }
++
++      // 16k is plenty for most purposes but we will realloc if needed
++      for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
++              if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
++                             DMA_ATTR_FORCE_CONTIGUOUS))
++                      goto fail;
++      }
++
++      // Finger in the air PU & Coeff alloc
++      // Will be realloced if too small
++      coeff_alloc = round_up_size(wxh);
++      pu_alloc = round_up_size(wxh / 4);
++      for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
++              // Don't actually need a kernel mapping here
++              if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
++                             DMA_ATTR_FORCE_CONTIGUOUS |
++                                      DMA_ATTR_NO_KERNEL_MAPPING))
++                      goto fail;
++              if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
++                             DMA_ATTR_FORCE_CONTIGUOUS |
++                                      DMA_ATTR_NO_KERNEL_MAPPING))
++                      goto fail;
++      }
++      aux_q_init(ctx);
++
++      return 0;
++
++fail:
++      rpivid_h265_stop(ctx);
++      return -ENOMEM;
++}
++
++static void rpivid_h265_trigger(struct rpivid_ctx *ctx)
++{
++      struct rpivid_dev *const dev = ctx->dev;
++      struct rpivid_dec_env *const de = ctx->dec0;
++
++      xtrace_in(dev, de);
++
++      switch (!de ? RPIVID_DECODE_ERROR_CONTINUE : de->state) {
++      case RPIVID_DECODE_SLICE_START:
++              de->state = RPIVID_DECODE_SLICE_CONTINUE;
++      /* FALLTHRU */
++      case RPIVID_DECODE_SLICE_CONTINUE:
++              v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                               VB2_BUF_STATE_DONE);
++              break;
++      default:
++              v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
++                       de->state);
++      /* FALLTHRU */
++      case RPIVID_DECODE_ERROR_DONE:
++              ctx->dec0 = NULL;
++              dec_env_delete(de);
++      /* FALLTHRU */
++      case RPIVID_DECODE_ERROR_CONTINUE:
++              xtrace_fin(dev, de);
++              v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
++                                               VB2_BUF_STATE_ERROR);
++              break;
++      case RPIVID_DECODE_PHASE1:
++              ctx->dec0 = NULL;
++              rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
++                                          de);
++              break;
++      }
++
++      xtrace_ok(dev, de);
++}
++
++struct rpivid_dec_ops rpivid_dec_ops_h265 = {
++      .setup = rpivid_h265_setup,
++      .start = rpivid_h265_start,
++      .stop = rpivid_h265_stop,
++      .trigger = rpivid_h265_trigger,
++};
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_hw.c
+@@ -0,0 +1,321 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++#include <linux/clk.h>
++#include <linux/component.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/io.h>
++#include <linux/of_reserved_mem.h>
++#include <linux/of_device.h>
++#include <linux/of_platform.h>
++#include <linux/platform_device.h>
++#include <linux/regmap.h>
++#include <linux/reset.h>
++
++#include <media/videobuf2-core.h>
++#include <media/v4l2-mem2mem.h>
++
++#include "rpivid.h"
++#include "rpivid_hw.h"
++
++static void pre_irq(struct rpivid_dev *dev, struct rpivid_hw_irq_ent *ient,
++                  rpivid_irq_callback cb, void *v,
++                  struct rpivid_hw_irq_ctrl *ictl)
++{
++      unsigned long flags;
++
++      if (ictl->irq) {
++              v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n");
++              return;
++      }
++
++      ient->cb = cb;
++      ient->v = v;
++
++      // Not sure this lock is actually required
++      spin_lock_irqsave(&ictl->lock, flags);
++      ictl->irq = ient;
++      spin_unlock_irqrestore(&ictl->lock, flags);
++}
++
++static void sched_claim(struct rpivid_dev * const dev,
++                      struct rpivid_hw_irq_ctrl * const ictl)
++{
++      for (;;) {
++              struct rpivid_hw_irq_ent *ient = NULL;
++              unsigned long flags;
++
++              spin_lock_irqsave(&ictl->lock, flags);
++
++              if (--ictl->no_sched <= 0) {
++                      ient = ictl->claim;
++                      if (!ictl->irq && ient) {
++                              ictl->claim = ient->next;
++                              ictl->no_sched = 1;
++                      }
++              }
++
++              spin_unlock_irqrestore(&ictl->lock, flags);
++
++              if (!ient)
++                      break;
++
++              ient->cb(dev, ient->v);
++      }
++}
++
++/* Should only ever be called from its own IRQ cb so no lock required */
++static void pre_thread(struct rpivid_dev *dev,
++                     struct rpivid_hw_irq_ent *ient,
++                     rpivid_irq_callback cb, void *v,
++                     struct rpivid_hw_irq_ctrl *ictl)
++{
++      ient->cb = cb;
++      ient->v = v;
++      ictl->irq = ient;
++      ictl->thread_reqed = true;
++      ictl->no_sched++;
++}
++
++// Called in irq context
++static void do_irq(struct rpivid_dev * const dev,
++                 struct rpivid_hw_irq_ctrl * const ictl)
++{
++      struct rpivid_hw_irq_ent *ient;
++      unsigned long flags;
++
++      spin_lock_irqsave(&ictl->lock, flags);
++      ient = ictl->irq;
++      if (ient) {
++              ictl->no_sched++;
++              ictl->irq = NULL;
++      }
++      spin_unlock_irqrestore(&ictl->lock, flags);
++
++      if (ient) {
++              ient->cb(dev, ient->v);
++
++              sched_claim(dev, ictl);
++      }
++}
++
++static void do_claim(struct rpivid_dev * const dev,
++                   struct rpivid_hw_irq_ent *ient,
++                   const rpivid_irq_callback cb, void * const v,
++                   struct rpivid_hw_irq_ctrl * const ictl)
++{
++      unsigned long flags;
++
++      ient->next = NULL;
++      ient->cb = cb;
++      ient->v = v;
++
++      spin_lock_irqsave(&ictl->lock, flags);
++
++      if (ictl->claim) {
++              // If we have a Q then add to end
++              ictl->tail->next = ient;
++              ictl->tail = ient;
++              ient = NULL;
++      } else if (ictl->no_sched || ictl->irq) {
++              // Empty Q but other activity in progress so Q
++              ictl->claim = ient;
++              ictl->tail = ient;
++              ient = NULL;
++      } else {
++              // Nothing else going on - schedule immediately and
++              // prevent anything else scheduling claims
++              ictl->no_sched = 1;
++      }
++
++      spin_unlock_irqrestore(&ictl->lock, flags);
++
++      if (ient) {
++              ient->cb(dev, ient->v);
++
++              sched_claim(dev, ictl);
++      }
++}
++
++static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl)
++{
++      spin_lock_init(&ictl->lock);
++      ictl->claim = NULL;
++      ictl->tail = NULL;
++      ictl->irq = NULL;
++      ictl->no_sched = 0;
++}
++
++static void ictl_uninit(struct rpivid_hw_irq_ctrl * const ictl)
++{
++      // Nothing to do
++}
++
++#if !OPT_DEBUG_POLL_IRQ
++static irqreturn_t rpivid_irq_irq(int irq, void *data)
++{
++      struct rpivid_dev * const dev = data;
++      __u32 ictrl;
++
++      ictrl = irq_read(dev, ARG_IC_ICTRL);
++      if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) {
++              v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n");
++              return IRQ_NONE;
++      }
++
++      // Cancel any/all irqs
++      irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK);
++
++      // Service Active2 before Active1 so Phase 1 can transition to Phase 2
++      // without delay
++      if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET)
++              do_irq(dev, &dev->ic_active2);
++      if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET)
++              do_irq(dev, &dev->ic_active1);
++
++      return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ?
++              IRQ_WAKE_THREAD : IRQ_HANDLED;
++}
++
++static void do_thread(struct rpivid_dev * const dev,
++                    struct rpivid_hw_irq_ctrl *const ictl)
++{
++      unsigned long flags;
++      struct rpivid_hw_irq_ent *ient = NULL;
++
++      spin_lock_irqsave(&ictl->lock, flags);
++
++      if (ictl->thread_reqed) {
++              ient = ictl->irq;
++              ictl->thread_reqed = false;
++              ictl->irq = NULL;
++      }
++
++      spin_unlock_irqrestore(&ictl->lock, flags);
++
++      if (ient) {
++              ient->cb(dev, ient->v);
++
++              sched_claim(dev, ictl);
++      }
++}
++
++static irqreturn_t rpivid_irq_thread(int irq, void *data)
++{
++      struct rpivid_dev * const dev = data;
++
++      do_thread(dev, &dev->ic_active1);
++      do_thread(dev, &dev->ic_active2);
++
++      return IRQ_HANDLED;
++}
++#endif
++
++/* May only be called from Active1 CB
++ * IRQs should not be expected until execution continues in the cb
++ */
++void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
++                                struct rpivid_hw_irq_ent *ient,
++                                rpivid_irq_callback thread_cb, void *ctx)
++{
++      pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1);
++}
++
++void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
++                               struct rpivid_hw_irq_ent *ient,
++                               rpivid_irq_callback ready_cb, void *ctx)
++{
++      do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1);
++}
++
++void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
++                             struct rpivid_hw_irq_ent *ient,
++                             rpivid_irq_callback irq_cb, void *ctx)
++{
++      pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1);
++}
++
++void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
++                               struct rpivid_hw_irq_ent *ient,
++                               rpivid_irq_callback ready_cb, void *ctx)
++{
++      do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2);
++}
++
++void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
++                             struct rpivid_hw_irq_ent *ient,
++                             rpivid_irq_callback irq_cb, void *ctx)
++{
++      pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2);
++}
++
++int rpivid_hw_probe(struct rpivid_dev *dev)
++{
++      struct resource *res;
++      __u32 irq_stat;
++      int irq_dec;
++      int ret = 0;
++
++      ictl_init(&dev->ic_active1);
++      ictl_init(&dev->ic_active2);
++
++      res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc");
++      if (!res)
++              return -ENODEV;
++
++      dev->base_irq = devm_ioremap(dev->dev, res->start, resource_size(res));
++      if (IS_ERR(dev->base_irq))
++              return PTR_ERR(dev->base_irq);
++
++      res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "hevc");
++      if (!res)
++              return -ENODEV;
++
++      dev->base_h265 = devm_ioremap(dev->dev, res->start, resource_size(res));
++      if (IS_ERR(dev->base_h265))
++              return PTR_ERR(dev->base_h265);
++
++      dev->clock = devm_clk_get(&dev->pdev->dev, "hevc");
++      if (IS_ERR(dev->clock))
++              return PTR_ERR(dev->clock);
++
++      // Disable IRQs & reset anything pending
++      irq_write(dev, 0,
++                ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
++      irq_stat = irq_read(dev, 0);
++      irq_write(dev, 0, irq_stat);
++
++#if !OPT_DEBUG_POLL_IRQ
++      irq_dec = platform_get_irq(dev->pdev, 0);
++      if (irq_dec <= 0)
++              return irq_dec;
++      ret = devm_request_threaded_irq(dev->dev, irq_dec,
++                                      rpivid_irq_irq,
++                                      rpivid_irq_thread,
++                                      0, dev_name(dev->dev), dev);
++      if (ret) {
++              dev_err(dev->dev, "Failed to request IRQ - %d\n", ret);
++
++              return ret;
++      }
++#endif
++      return ret;
++}
++
++void rpivid_hw_remove(struct rpivid_dev *dev)
++{
++      // IRQ auto freed on unload so no need to do it here
++      ictl_uninit(&dev->ic_active1);
++      ictl_uninit(&dev->ic_active2);
++}
++
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_hw.h
+@@ -0,0 +1,300 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#ifndef _RPIVID_HW_H_
++#define _RPIVID_HW_H_
++
++struct rpivid_hw_irq_ent {
++      struct rpivid_hw_irq_ent *next;
++      rpivid_irq_callback cb;
++      void *v;
++};
++
++/* Phase 1 Register offsets */
++
++#define RPI_SPS0 0
++#define RPI_SPS1 4
++#define RPI_PPS 8
++#define RPI_SLICE 12
++#define RPI_TILESTART 16
++#define RPI_TILEEND 20
++#define RPI_SLICESTART 24
++#define RPI_MODE 28
++#define RPI_LEFT0 32
++#define RPI_LEFT1 36
++#define RPI_LEFT2 40
++#define RPI_LEFT3 44
++#define RPI_QP 48
++#define RPI_CONTROL 52
++#define RPI_STATUS 56
++#define RPI_VERSION 60
++#define RPI_BFBASE 64
++#define RPI_BFNUM 68
++#define RPI_BFCONTROL 72
++#define RPI_BFSTATUS 76
++#define RPI_PUWBASE 80
++#define RPI_PUWSTRIDE 84
++#define RPI_COEFFWBASE 88
++#define RPI_COEFFWSTRIDE 92
++#define RPI_SLICECMDS 96
++#define RPI_BEGINTILEEND 100
++#define RPI_TRANSFER 104
++#define RPI_CFBASE 108
++#define RPI_CFNUM 112
++#define RPI_CFSTATUS 116
++
++/* Phase 2 Register offsets */
++
++#define RPI_PURBASE 0x8000
++#define RPI_PURSTRIDE 0x8004
++#define RPI_COEFFRBASE 0x8008
++#define RPI_COEFFRSTRIDE 0x800C
++#define RPI_NUMROWS 0x8010
++#define RPI_CONFIG2 0x8014
++#define RPI_OUTYBASE 0x8018
++#define RPI_OUTYSTRIDE 0x801C
++#define RPI_OUTCBASE 0x8020
++#define RPI_OUTCSTRIDE 0x8024
++#define RPI_STATUS2 0x8028
++#define RPI_FRAMESIZE 0x802C
++#define RPI_MVBASE 0x8030
++#define RPI_MVSTRIDE 0x8034
++#define RPI_COLBASE 0x8038
++#define RPI_COLSTRIDE 0x803C
++#define RPI_CURRPOC 0x8040
++
++/*
++ * Write a general register value
++ * Order is unimportant
++ */
++static inline void apb_write(const struct rpivid_dev * const dev,
++                           const unsigned int offset, const u32 val)
++{
++      writel_relaxed(val, dev->base_h265 + offset);
++}
++
++/* Write the final register value that actually starts the phase */
++static inline void apb_write_final(const struct rpivid_dev * const dev,
++                                 const unsigned int offset, const u32 val)
++{
++      writel(val, dev->base_h265 + offset);
++}
++
++static inline u32 apb_read(const struct rpivid_dev * const dev,
++                         const unsigned int offset)
++{
++      return readl(dev->base_h265 + offset);
++}
++
++static inline void irq_write(const struct rpivid_dev * const dev,
++                           const unsigned int offset, const u32 val)
++{
++      writel(val, dev->base_irq + offset);
++}
++
++static inline u32 irq_read(const struct rpivid_dev * const dev,
++                         const unsigned int offset)
++{
++      return readl(dev->base_irq + offset);
++}
++
++static inline void apb_write_vc_addr(const struct rpivid_dev * const dev,
++                                   const unsigned int offset,
++                                   const dma_addr_t a)
++{
++      apb_write(dev, offset, (u32)(a >> 6));
++}
++
++static inline void apb_write_vc_addr_final(const struct rpivid_dev * const dev,
++                                         const unsigned int offset,
++                                         const dma_addr_t a)
++{
++      apb_write_final(dev, offset, (u32)(a >> 6));
++}
++
++static inline void apb_write_vc_len(const struct rpivid_dev * const dev,
++                                  const unsigned int offset,
++                                  const unsigned int x)
++{
++      apb_write(dev, offset, (x + 63) >> 6);
++}
++
++/* *ARG_IC_ICTRL - Interrupt control for ARGON Core*
++ * Offset (byte space) = 40'h2b10000
++ * Physical Address (byte space) = 40'h7eb10000
++ * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL
++ * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100
++ * Access = RW (32-bit only)
++ * Interrupt control logic for ARGON Core.
++ */
++#define ARG_IC_ICTRL 0
++
++/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC
++ *
++ * Interrupt 1
++ * This is set and held when an hevc_active1 interrupt edge is detected
++ * The polarity of the edge is set by the ACTIVE1_EDGE field
++ * Write a 1 to this bit to clear down the latched interrupt
++ * The latched interrupt is only enabled out onto the interrupt line if
++ * ACTIVE1_EN is set
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_ACTIVE1_INT_SET          BIT(0)
++
++/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic
++ * This logic detects edges of the hevc_active1 line from the argon core
++ * 0 = negedge, 1 = posedge
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET         BIT(1)
++
++/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line.
++ * If this isn't set, the interrupt logic will work but no interrupt will be
++ * set to the interrupt controller
++ * Reset value is *1* decimal.
++ *
++ * [JC] The above appears to be a lie - if unset then b0 is never set
++ */
++#define ARG_IC_ICTRL_ACTIVE1_EN_SET           BIT(2)
++
++/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO
++ *
++ * The current status of the hevc_active1 signal
++ */
++#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET               BIT(3)
++
++/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC
++ *
++ * Interrupt 2
++ * This is set and held when an hevc_active2 interrupt edge is detected
++ * The polarity of the edge is set by the ACTIVE2_EDGE field
++ * Write a 1 to this bit to clear down the latched interrupt
++ * The latched interrupt is only enabled out onto the interrupt line if
++ * ACTIVE2_EN is set
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_ACTIVE2_INT_SET          BIT(4)
++
++/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic
++ * This logic detects edges of the hevc_active2 line from the argon core
++ * 0 = negedge, 1 = posedge
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET         BIT(5)
++
++/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line.
++ * If this isn't set, the interrupt logic will work but no interrupt will be
++ * set to the interrupt controller
++ * Reset value is *1* decimal.
++ */
++#define ARG_IC_ICTRL_ACTIVE2_EN_SET           BIT(6)
++
++/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO
++ *
++ * The current status of the hevc_active2 signal
++ */
++#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET               BIT(7)
++
++/* TEST_INT Forces the argon int high for test purposes.
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_TEST_INT                 BIT(8)
++#define ARG_IC_ICTRL_SPARE                    BIT(9)
++
++/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO
++ *
++ * The current status of the vp9_interrupt signal
++ */
++#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS     BIT(10)
++
++/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see
++ * it
++ * 0 = the AIO int is masked. (It should still be connected to the GIC though).
++ */
++#define ARG_IC_ICTRL_AIO_INT_ENABLE           BIT(20)
++#define ARG_IC_ICTRL_H264_ACTIVE_INT          BIT(21)
++#define ARG_IC_ICTRL_H264_ACTIVE_EDGE         BIT(22)
++#define ARG_IC_ICTRL_H264_ACTIVE_EN           BIT(23)
++#define ARG_IC_ICTRL_H264_ACTIVE_STATUS               BIT(24)
++#define ARG_IC_ICTRL_H264_INTERRUPT_INT               BIT(25)
++#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE      BIT(26)
++#define ARG_IC_ICTRL_H264_INTERRUPT_EN                BIT(27)
++
++/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO
++ *
++ * The current status of the h264_interrupt signal
++ */
++#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS    BIT(28)
++
++/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC
++ *
++ * Interrupt 1
++ * This is set and held when an vp9_interrupt interrupt edge is detected
++ * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field
++ * Write a 1 to this bit to clear down the latched interrupt
++ * The latched interrupt is only enabled out onto the interrupt line if
++ * VP9_INTERRUPT_EN is set
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_VP9_INTERRUPT_INT                BIT(29)
++
++/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic
++ * This logic detects edges of the vp9_interrupt line from the argon h264 core
++ * 0 = negedge, 1 = posedge
++ * Reset value is *0* decimal.
++ */
++#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE               BIT(30)
++
++/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line.
++ * If this isn't set, the interrupt logic will work but no interrupt will be
++ * set to the interrupt controller
++ * Reset value is *1* decimal.
++ */
++#define ARG_IC_ICTRL_VP9_INTERRUPT_EN         BIT(31)
++
++/* Bits 19:12, 11 reserved - read ?, write 0 */
++#define ARG_IC_ICTRL_SET_ZERO_MASK            ((0xff << 12) | BIT(11))
++
++/* All IRQ bits */
++#define ARG_IC_ICTRL_ALL_IRQ_MASK   (\
++              ARG_IC_ICTRL_VP9_INTERRUPT_INT  |\
++              ARG_IC_ICTRL_H264_INTERRUPT_INT |\
++              ARG_IC_ICTRL_ACTIVE1_INT_SET    |\
++              ARG_IC_ICTRL_ACTIVE2_INT_SET)
++
++/* Auto release once all CBs called */
++void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
++                               struct rpivid_hw_irq_ent *ient,
++                               rpivid_irq_callback ready_cb, void *ctx);
++/* May only be called in claim cb */
++void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
++                             struct rpivid_hw_irq_ent *ient,
++                             rpivid_irq_callback irq_cb, void *ctx);
++/* May only be called in irq cb */
++void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
++                                struct rpivid_hw_irq_ent *ient,
++                                rpivid_irq_callback thread_cb, void *ctx);
++
++/* Auto release once all CBs called */
++void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
++                               struct rpivid_hw_irq_ent *ient,
++                               rpivid_irq_callback ready_cb, void *ctx);
++/* May only be called in claim cb */
++void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
++                             struct rpivid_hw_irq_ent *ient,
++                             rpivid_irq_callback irq_cb, void *ctx);
++
++int rpivid_hw_probe(struct rpivid_dev *dev);
++void rpivid_hw_remove(struct rpivid_dev *dev);
++
++#endif
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_video.c
+@@ -0,0 +1,593 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#include <media/videobuf2-dma-contig.h>
++#include <media/v4l2-device.h>
++#include <media/v4l2-ioctl.h>
++#include <media/v4l2-event.h>
++#include <media/v4l2-mem2mem.h>
++
++#include "rpivid.h"
++#include "rpivid_video.h"
++#include "rpivid_dec.h"
++
++#define RPIVID_DECODE_SRC     BIT(0)
++#define RPIVID_DECODE_DST     BIT(1)
++
++#define RPIVID_MIN_WIDTH      16U
++#define RPIVID_MIN_HEIGHT     16U
++#define RPIVID_MAX_WIDTH      4096U
++#define RPIVID_MAX_HEIGHT     4096U
++
++static inline struct rpivid_ctx *rpivid_file2ctx(struct file *file)
++{
++      return container_of(file->private_data, struct rpivid_ctx, fh);
++}
++
++/* constrain x to y,y*2 */
++static inline unsigned int constrain2x(unsigned int x, unsigned int y)
++{
++      return (x < y) ?
++                      y :
++                      (x > y * 2) ? y : x;
++}
++
++int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt)
++{
++      if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
++              return -EINVAL;
++
++      /* Zero bytes per line for encoded source. */
++      pix_fmt->bytesperline = 0;
++      /* Choose some minimum size since this can't be 0 */
++      pix_fmt->sizeimage = max_t(u32, SZ_1K, pix_fmt->sizeimage);
++      pix_fmt->field = V4L2_FIELD_NONE;
++      return 0;
++}
++
++int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt)
++{
++      unsigned int width = pix_fmt->width;
++      unsigned int height = pix_fmt->height;
++      unsigned int sizeimage = pix_fmt->sizeimage;
++      unsigned int bytesperline = pix_fmt->bytesperline;
++
++      switch (pix_fmt->pixelformat) {
++      /* For column formats set bytesperline to column height (stride2) */
++      case V4L2_PIX_FMT_NV12_COL128:
++              /* Width rounds up to columns */
++              width = ALIGN(min(width, RPIVID_MAX_WIDTH), 128);
++
++              /* 16 aligned height - not sure we even need that */
++              height = ALIGN(height, 16);
++              /* column height
++               * Accept suggested shape if at least min & < 2 * min
++               */
++              bytesperline = constrain2x(bytesperline, height * 3 / 2);
++
++              /* image size
++               * Again allow plausible variation in case added padding is
++               * required
++               */
++              sizeimage = constrain2x(sizeimage, bytesperline * width);
++              break;
++
++      case V4L2_PIX_FMT_NV12_10_COL128:
++              /* width in pixels (3 pels = 4 bytes) rounded to 128 byte
++               * columns
++               */
++              width = ALIGN(((min(width, RPIVID_MAX_WIDTH) + 2) / 3), 32) * 3;
++
++              /* 16-aligned height. */
++              height = ALIGN(height, 16);
++
++              /* column height
++               * Accept suggested shape if at least min & < 2 * min
++               */
++              bytesperline = constrain2x(bytesperline, height * 3 / 2);
++
++              /* image size
++               * Again allow plausible variation in case added padding is
++               * required
++               */
++              sizeimage = constrain2x(sizeimage,
++                                      bytesperline * width * 4 / 3);
++              break;
++
++      default:
++              return -EINVAL;
++      }
++
++      pix_fmt->width = width;
++      pix_fmt->height = height;
++
++      pix_fmt->field = V4L2_FIELD_NONE;
++      pix_fmt->bytesperline = bytesperline;
++      pix_fmt->sizeimage = sizeimage;
++      return 0;
++}
++
++static int rpivid_querycap(struct file *file, void *priv,
++                         struct v4l2_capability *cap)
++{
++      strscpy(cap->driver, RPIVID_NAME, sizeof(cap->driver));
++      strscpy(cap->card, RPIVID_NAME, sizeof(cap->card));
++      snprintf(cap->bus_info, sizeof(cap->bus_info),
++               "platform:%s", RPIVID_NAME);
++
++      return 0;
++}
++
++static int rpivid_enum_fmt_vid_out(struct file *file, void *priv,
++                                 struct v4l2_fmtdesc *f)
++{
++      // Input formats
++
++      // H.265 Slice only currently
++      if (f->index == 0) {
++              f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
++              return 0;
++      }
++
++      return -EINVAL;
++}
++
++static int rpivid_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps)
++{
++      const unsigned int ctb_log2_size_y =
++                      sps->log2_min_luma_coding_block_size_minus3 + 3 +
++                      sps->log2_diff_max_min_luma_coding_block_size;
++      const unsigned int min_tb_log2_size_y =
++                      sps->log2_min_luma_transform_block_size_minus2 + 2;
++      const unsigned int max_tb_log2_size_y = min_tb_log2_size_y +
++                      sps->log2_diff_max_min_luma_transform_block_size;
++
++      /* Local limitations */
++      if (sps->pic_width_in_luma_samples < 32 ||
++          sps->pic_width_in_luma_samples > 4096)
++              return 0;
++      if (sps->pic_height_in_luma_samples < 32 ||
++          sps->pic_height_in_luma_samples > 4096)
++              return 0;
++      if (!(sps->bit_depth_luma_minus8 == 0 ||
++            sps->bit_depth_luma_minus8 == 2))
++              return 0;
++      if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
++              return 0;
++      if (sps->chroma_format_idc != 1)
++              return 0;
++
++      /*  Limits from H.265 7.4.3.2.1 */
++      if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
++              return 0;
++      if (sps->sps_max_dec_pic_buffering_minus1 > 15)
++              return 0;
++      if (sps->sps_max_num_reorder_pics >
++                              sps->sps_max_dec_pic_buffering_minus1)
++              return 0;
++      if (ctb_log2_size_y > 6)
++              return 0;
++      if (max_tb_log2_size_y > 5)
++              return 0;
++      if (max_tb_log2_size_y > ctb_log2_size_y)
++              return 0;
++      if (sps->max_transform_hierarchy_depth_inter >
++                              (ctb_log2_size_y - min_tb_log2_size_y))
++              return 0;
++      if (sps->max_transform_hierarchy_depth_intra >
++                              (ctb_log2_size_y - min_tb_log2_size_y))
++              return 0;
++      /* Check pcm stuff */
++      if (sps->num_short_term_ref_pic_sets > 64)
++              return 0;
++      if (sps->num_long_term_ref_pics_sps > 32)
++              return 0;
++      return 1;
++}
++
++static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps)
++{
++      return sps && sps->pic_width_in_luma_samples != 0;
++}
++
++static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps,
++                              const int index)
++{
++      u32 pf = 0;
++
++      // Use width 0 as a signifier of unsetness
++      if (!is_sps_set(sps)) {
++              /* Treat this as an error? For now return both */
++              if (index == 0)
++                      pf = V4L2_PIX_FMT_NV12_COL128;
++              else if (index == 1)
++                      pf = V4L2_PIX_FMT_NV12_10_COL128;
++      } else if (index == 0 && rpivid_hevc_validate_sps(sps)) {
++              if (sps->bit_depth_luma_minus8 == 0)
++                      pf = V4L2_PIX_FMT_NV12_COL128;
++              else if (sps->bit_depth_luma_minus8 == 2)
++                      pf = V4L2_PIX_FMT_NV12_10_COL128;
++      }
++
++      return pf;
++}
++
++static struct v4l2_pix_format
++rpivid_hevc_default_dst_fmt(struct rpivid_ctx * const ctx)
++{
++      const struct v4l2_ctrl_hevc_sps * const sps =
++              rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
++      struct v4l2_pix_format pix_fmt = {
++              .width = sps->pic_width_in_luma_samples,
++              .height = sps->pic_height_in_luma_samples,
++              .pixelformat = pixelformat_from_sps(sps, 0)
++      };
++
++      rpivid_prepare_dst_format(&pix_fmt);
++      return pix_fmt;
++}
++
++static u32 rpivid_hevc_get_dst_pixelformat(struct rpivid_ctx * const ctx,
++                                         const int index)
++{
++      const struct v4l2_ctrl_hevc_sps * const sps =
++              rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
++
++      return pixelformat_from_sps(sps, index);
++}
++
++static int rpivid_enum_fmt_vid_cap(struct file *file, void *priv,
++                                 struct v4l2_fmtdesc *f)
++{
++      struct rpivid_ctx * const ctx = rpivid_file2ctx(file);
++
++      const u32 pf = rpivid_hevc_get_dst_pixelformat(ctx, f->index);
++
++      if (pf == 0)
++              return -EINVAL;
++
++      f->pixelformat = pf;
++      return 0;
++}
++
++static int rpivid_g_fmt_vid_cap(struct file *file, void *priv,
++                              struct v4l2_format *f)
++{
++      struct rpivid_ctx *ctx = rpivid_file2ctx(file);
++
++      if (!ctx->dst_fmt_set)
++              ctx->dst_fmt = rpivid_hevc_default_dst_fmt(ctx);
++      f->fmt.pix = ctx->dst_fmt;
++      return 0;
++}
++
++static int rpivid_g_fmt_vid_out(struct file *file, void *priv,
++                              struct v4l2_format *f)
++{
++      struct rpivid_ctx *ctx = rpivid_file2ctx(file);
++
++      f->fmt.pix = ctx->src_fmt;
++      return 0;
++}
++
++static inline void copy_color(struct v4l2_pix_format *d,
++                            const struct v4l2_pix_format *s)
++{
++      d->colorspace   = s->colorspace;
++      d->xfer_func    = s->xfer_func;
++      d->ycbcr_enc    = s->ycbcr_enc;
++      d->quantization = s->quantization;
++}
++
++static int rpivid_try_fmt_vid_cap(struct file *file, void *priv,
++                                struct v4l2_format *f)
++{
++      struct rpivid_ctx *ctx = rpivid_file2ctx(file);
++      const struct v4l2_ctrl_hevc_sps * const sps =
++              rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
++      u32 pixelformat;
++      int i;
++
++      /* Reject format types we don't support */
++      if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
++              return -EINVAL;
++
++      for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) {
++              if (f->fmt.pix.pixelformat == pixelformat)
++                      break;
++      }
++
++      // If we can't use requested fmt then set to default
++      if (pixelformat == 0) {
++              pixelformat = pixelformat_from_sps(sps, 0);
++              // If we don't have a default then give up
++              if (pixelformat == 0)
++                      return -EINVAL;
++      }
++
++      // We don't have any way of finding out colourspace so believe
++      // anything we are told - take anything set in src as a default
++      if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT)
++              copy_color(&f->fmt.pix, &ctx->src_fmt);
++
++      f->fmt.pix.pixelformat = pixelformat;
++      return rpivid_prepare_dst_format(&f->fmt.pix);
++}
++
++static int rpivid_try_fmt_vid_out(struct file *file, void *priv,
++                                struct v4l2_format *f)
++{
++      if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
++              return -EINVAL;
++
++      if (rpivid_prepare_src_format(&f->fmt.pix)) {
++              // Set default src format
++              f->fmt.pix.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
++              rpivid_prepare_src_format(&f->fmt.pix);
++      }
++      return 0;
++}
++
++static int rpivid_s_fmt_vid_cap(struct file *file, void *priv,
++                              struct v4l2_format *f)
++{
++      struct rpivid_ctx *ctx = rpivid_file2ctx(file);
++      struct vb2_queue *vq;
++      int ret;
++
++      vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
++      if (vb2_is_busy(vq))
++              return -EBUSY;
++
++      ret = rpivid_try_fmt_vid_cap(file, priv, f);
++      if (ret)
++              return ret;
++
++      ctx->dst_fmt = f->fmt.pix;
++      ctx->dst_fmt_set = 1;
++
++      return 0;
++}
++
++static int rpivid_s_fmt_vid_out(struct file *file, void *priv,
++                              struct v4l2_format *f)
++{
++      struct rpivid_ctx *ctx = rpivid_file2ctx(file);
++      struct vb2_queue *vq;
++      int ret;
++
++      vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
++      if (vb2_is_busy(vq))
++              return -EBUSY;
++
++      ret = rpivid_try_fmt_vid_out(file, priv, f);
++      if (ret)
++              return ret;
++
++      ctx->src_fmt = f->fmt.pix;
++      ctx->dst_fmt_set = 0;  // Setting src invalidates dst
++
++      vq->subsystem_flags |=
++              VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
++
++      /* Propagate colorspace information to capture. */
++      copy_color(&ctx->dst_fmt, &f->fmt.pix);
++      return 0;
++}
++
++const struct v4l2_ioctl_ops rpivid_ioctl_ops = {
++      .vidioc_querycap                = rpivid_querycap,
++
++      .vidioc_enum_fmt_vid_cap        = rpivid_enum_fmt_vid_cap,
++      .vidioc_g_fmt_vid_cap           = rpivid_g_fmt_vid_cap,
++      .vidioc_try_fmt_vid_cap         = rpivid_try_fmt_vid_cap,
++      .vidioc_s_fmt_vid_cap           = rpivid_s_fmt_vid_cap,
++
++      .vidioc_enum_fmt_vid_out        = rpivid_enum_fmt_vid_out,
++      .vidioc_g_fmt_vid_out           = rpivid_g_fmt_vid_out,
++      .vidioc_try_fmt_vid_out         = rpivid_try_fmt_vid_out,
++      .vidioc_s_fmt_vid_out           = rpivid_s_fmt_vid_out,
++
++      .vidioc_reqbufs                 = v4l2_m2m_ioctl_reqbufs,
++      .vidioc_querybuf                = v4l2_m2m_ioctl_querybuf,
++      .vidioc_qbuf                    = v4l2_m2m_ioctl_qbuf,
++      .vidioc_dqbuf                   = v4l2_m2m_ioctl_dqbuf,
++      .vidioc_prepare_buf             = v4l2_m2m_ioctl_prepare_buf,
++      .vidioc_create_bufs             = v4l2_m2m_ioctl_create_bufs,
++      .vidioc_expbuf                  = v4l2_m2m_ioctl_expbuf,
++
++      .vidioc_streamon                = v4l2_m2m_ioctl_streamon,
++      .vidioc_streamoff               = v4l2_m2m_ioctl_streamoff,
++
++      .vidioc_try_decoder_cmd         = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
++      .vidioc_decoder_cmd             = v4l2_m2m_ioctl_stateless_decoder_cmd,
++
++      .vidioc_subscribe_event         = v4l2_ctrl_subscribe_event,
++      .vidioc_unsubscribe_event       = v4l2_event_unsubscribe,
++};
++
++static int rpivid_queue_setup(struct vb2_queue *vq, unsigned int *nbufs,
++                            unsigned int *nplanes, unsigned int sizes[],
++                            struct device *alloc_devs[])
++{
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
++      struct v4l2_pix_format *pix_fmt;
++
++      if (V4L2_TYPE_IS_OUTPUT(vq->type))
++              pix_fmt = &ctx->src_fmt;
++      else
++              pix_fmt = &ctx->dst_fmt;
++
++      if (*nplanes) {
++              if (sizes[0] < pix_fmt->sizeimage)
++                      return -EINVAL;
++      } else {
++              sizes[0] = pix_fmt->sizeimage;
++              *nplanes = 1;
++      }
++
++      return 0;
++}
++
++static void rpivid_queue_cleanup(struct vb2_queue *vq, u32 state)
++{
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
++      struct vb2_v4l2_buffer *vbuf;
++
++      for (;;) {
++              if (V4L2_TYPE_IS_OUTPUT(vq->type))
++                      vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
++              else
++                      vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
++
++              if (!vbuf)
++                      return;
++
++              v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
++                                         &ctx->hdl);
++              v4l2_m2m_buf_done(vbuf, state);
++      }
++}
++
++static int rpivid_buf_out_validate(struct vb2_buffer *vb)
++{
++      struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
++
++      vbuf->field = V4L2_FIELD_NONE;
++      return 0;
++}
++
++static int rpivid_buf_prepare(struct vb2_buffer *vb)
++{
++      struct vb2_queue *vq = vb->vb2_queue;
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
++      struct v4l2_pix_format *pix_fmt;
++
++      if (V4L2_TYPE_IS_OUTPUT(vq->type))
++              pix_fmt = &ctx->src_fmt;
++      else
++              pix_fmt = &ctx->dst_fmt;
++
++      if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage)
++              return -EINVAL;
++
++      vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
++
++      return 0;
++}
++
++static int rpivid_start_streaming(struct vb2_queue *vq, unsigned int count)
++{
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
++      struct rpivid_dev *dev = ctx->dev;
++      int ret = 0;
++
++      if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
++              return -EINVAL;
++
++      if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->start)
++              ret = dev->dec_ops->start(ctx);
++
++      ret = clk_set_rate(dev->clock, 500 * 1000 * 1000);
++      if (ret) {
++              dev_err(dev->dev, "Failed to set clock rate\n");
++              goto out;
++      }
++
++      ret = clk_prepare_enable(dev->clock);
++      if (ret)
++              dev_err(dev->dev, "Failed to enable clock\n");
++
++out:
++      if (ret)
++              rpivid_queue_cleanup(vq, VB2_BUF_STATE_QUEUED);
++
++      return ret;
++}
++
++static void rpivid_stop_streaming(struct vb2_queue *vq)
++{
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
++      struct rpivid_dev *dev = ctx->dev;
++
++      if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->stop)
++              dev->dec_ops->stop(ctx);
++
++      rpivid_queue_cleanup(vq, VB2_BUF_STATE_ERROR);
++
++      clk_disable_unprepare(dev->clock);
++}
++
++static void rpivid_buf_queue(struct vb2_buffer *vb)
++{
++      struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
++
++      v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
++}
++
++static void rpivid_buf_request_complete(struct vb2_buffer *vb)
++{
++      struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
++
++      v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
++}
++
++static struct vb2_ops rpivid_qops = {
++      .queue_setup            = rpivid_queue_setup,
++      .buf_prepare            = rpivid_buf_prepare,
++      .buf_queue              = rpivid_buf_queue,
++      .buf_out_validate       = rpivid_buf_out_validate,
++      .buf_request_complete   = rpivid_buf_request_complete,
++      .start_streaming        = rpivid_start_streaming,
++      .stop_streaming         = rpivid_stop_streaming,
++      .wait_prepare           = vb2_ops_wait_prepare,
++      .wait_finish            = vb2_ops_wait_finish,
++};
++
++int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
++                    struct vb2_queue *dst_vq)
++{
++      struct rpivid_ctx *ctx = priv;
++      int ret;
++
++      src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++      src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
++      src_vq->drv_priv = ctx;
++      src_vq->buf_struct_size = sizeof(struct rpivid_buffer);
++      src_vq->min_buffers_needed = 1;
++      src_vq->ops = &rpivid_qops;
++      src_vq->mem_ops = &vb2_dma_contig_memops;
++      src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
++      src_vq->lock = &ctx->dev->dev_mutex;
++      src_vq->dev = ctx->dev->dev;
++      src_vq->supports_requests = true;
++      src_vq->requires_requests = true;
++
++      ret = vb2_queue_init(src_vq);
++      if (ret)
++              return ret;
++
++      dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++      dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
++      dst_vq->drv_priv = ctx;
++      dst_vq->buf_struct_size = sizeof(struct rpivid_buffer);
++      dst_vq->min_buffers_needed = 1;
++      dst_vq->ops = &rpivid_qops;
++      dst_vq->mem_ops = &vb2_dma_contig_memops;
++      dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
++      dst_vq->lock = &ctx->dev->dev_mutex;
++      dst_vq->dev = ctx->dev->dev;
++
++      return vb2_queue_init(dst_vq);
++}
+--- /dev/null
++++ b/drivers/staging/media/rpivid/rpivid_video.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Raspberry Pi HEVC driver
++ *
++ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
++ *
++ * Based on the Cedrus VPU driver, that is:
++ *
++ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ * Copyright (C) 2018 Bootlin
++ */
++
++#ifndef _RPIVID_VIDEO_H_
++#define _RPIVID_VIDEO_H_
++
++struct rpivid_format {
++      u32             pixelformat;
++      u32             directions;
++      unsigned int    capabilities;
++};
++
++extern const struct v4l2_ioctl_ops rpivid_ioctl_ops;
++
++int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
++                    struct vb2_queue *dst_vq);
++int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt);
++int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt);
++
++#endif
diff --git a/target/linux/bcm27xx/patches-5.4/950-0514-dtoverlays-Add-overlay-to-enable-the-HEVC-V4L2-drive.patch b/target/linux/bcm27xx/patches-5.4/950-0514-dtoverlays-Add-overlay-to-enable-the-HEVC-V4L2-drive.patch
new file mode 100644 (file)
index 0000000..ee92ada
--- /dev/null
@@ -0,0 +1,102 @@
+From b1d6499e00b6061ecc7061335199acf86f54d31a Mon Sep 17 00:00:00 2001
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Date: Fri, 13 Mar 2020 16:52:55 +0000
+Subject: [PATCH] dtoverlays: Add overlay to enable the HEVC V4L2
+ driver
+
+This replaces the rpivid_mem register mapping driver.
+When the driver is complete, these DT changes should be
+merged into the base DT instead of being an overlay.
+
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+---
+ arch/arm/boot/dts/overlays/Makefile           |  1 +
+ arch/arm/boot/dts/overlays/README             |  7 +++
+ .../boot/dts/overlays/rpivid-v4l2-overlay.dts | 55 +++++++++++++++++++
+ 4 files changed, 63 insertions(+), 2 deletions(-)
+ create mode 100644 arch/arm/boot/dts/overlays/rpivid-v4l2-overlay.dts
+
+--- a/arch/arm/boot/dts/overlays/Makefile
++++ b/arch/arm/boot/dts/overlays/Makefile
+@@ -140,6 +140,7 @@ dtbo-$(CONFIG_ARCH_BCM2835) += \
+       rpi-proto.dtbo \
+       rpi-sense.dtbo \
+       rpi-tv.dtbo \
++      rpivid-v4l2.dtbo \
+       rra-digidac1-wm8741-audio.dtbo \
+       sc16is750-i2c.dtbo \
+       sc16is752-i2c.dtbo \
+--- a/arch/arm/boot/dts/overlays/README
++++ b/arch/arm/boot/dts/overlays/README
+@@ -2064,6 +2064,13 @@ Load:   dtoverlay=rpi-tv
+ Params: <None>
++Name:   rpivid-v4l2
++Info:   Load the V4L2 stateless video decoder driver for the HEVC block,
++        disabling the memory mapped devices in the process.
++Load:   dtoverlay=rpivid-v4l2
++Params: <None>
++
++
+ Name:   rra-digidac1-wm8741-audio
+ Info:   Configures the Red Rocks Audio DigiDAC1 soundcard
+ Load:   dtoverlay=rra-digidac1-wm8741-audio
+--- /dev/null
++++ b/arch/arm/boot/dts/overlays/rpivid-v4l2-overlay.dts
+@@ -0,0 +1,55 @@
++// SPDX-License-Identifier: GPL-2.0-only
++// Definitions for Raspberry Pi video decode engine
++/dts-v1/;
++/plugin/;
++
++#include <dt-bindings/interrupt-controller/arm-gic.h>
++
++/{
++      compatible = "brcm,bcm2711";
++
++      fragment@0 {
++              target = <&scb>;
++              __overlay__ {
++                      /* needed to avoid dtc warning */
++                      #address-cells = <2>;
++                      #size-cells = <1>;
++                      codec@7eb10000 {
++                              compatible = "raspberrypi,rpivid-vid-decoder";
++                              reg = <0x0 0x7eb10000 0x1000>,  /* INTC */
++                                    <0x0 0x7eb00000 0x10000>; /* HEVC */
++                              reg-names = "intc",
++                                          "hevc";
++
++                              interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
++
++                              clocks = <&hevc_clk>;
++                              clock-names = "hevc";
++
++                              hevc_clk: hevc_clk {
++                                      compatible = "fixed-clock";
++                                      #clock-cells = <0>;
++                                      clock-frequency = <500000000>;
++                              };
++                      };
++              };
++      };
++
++      fragment@1 {
++              target = <&scb>;
++              __overlay__ {
++                      hevc-decoder@7eb00000 {
++                              status = "disabled";
++                      };
++                      rpivid-local-intc@7eb10000 {
++                              status = "disabled";
++                      };
++                      h264-decoder@7eb20000 {
++                              status = "disabled";
++                      };
++                      vp9-decoder@7eb30000 {
++                              status = "disabled";
++                      };
++              };
++      };
++};