]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
media: iris: implement power scaling for vpu2 and vpu3
authorVedang Nagar <quic_vnagar@quicinc.com>
Fri, 7 Feb 2025 07:55:05 +0000 (13:25 +0530)
committerHans Verkuil <hverkuil@xs4all.nl>
Fri, 7 Feb 2025 10:51:55 +0000 (11:51 +0100)
Implement power scaling including a specific vpu2 and vpu3 calculation
for clock and bus bandwidth, which depends on the hardware
configuration, codec format, resolution and frame rate.

Signed-off-by: Vedang Nagar <quic_vnagar@quicinc.com>
Tested-by: Stefan Schmidt <stefan.schmidt@linaro.org> # x1e80100 (Dell XPS 13 9345)
Reviewed-by: Stefan Schmidt <stefan.schmidt@linaro.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-QRD
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-HDK
Signed-off-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
12 files changed:
drivers/media/platform/qcom/iris/Makefile
drivers/media/platform/qcom/iris/iris_buffer.c
drivers/media/platform/qcom/iris/iris_instance.h
drivers/media/platform/qcom/iris/iris_platform_common.h
drivers/media/platform/qcom/iris/iris_platform_sm8550.c
drivers/media/platform/qcom/iris/iris_power.c [new file with mode: 0644]
drivers/media/platform/qcom/iris/iris_power.h [new file with mode: 0644]
drivers/media/platform/qcom/iris/iris_vb2.c
drivers/media/platform/qcom/iris/iris_vdec.c
drivers/media/platform/qcom/iris/iris_vpu2.c
drivers/media/platform/qcom/iris/iris_vpu3.c
drivers/media/platform/qcom/iris/iris_vpu_common.h

index ab16189aa9e6594adc772c8b67376f630c38e58f..ca31db847273246c76bdfa11363477d847b47a0f 100644 (file)
@@ -10,6 +10,7 @@ iris-objs += iris_buffer.o \
              iris_hfi_gen2_response.o \
              iris_hfi_queue.o \
              iris_platform_sm8550.o \
+             iris_power.o \
              iris_probe.o \
              iris_resources.o \
              iris_state.o \
index dc096e5e95bf0c02b272384e986a677174d73d07..e5c5a564fcb81e77746df8c4797a10a07f2ae946 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "iris_buffer.h"
 #include "iris_instance.h"
+#include "iris_power.h"
 #include "iris_vpu_buffer.h"
 
 #define PIXELS_4K 4096
@@ -500,6 +501,8 @@ int iris_queue_deferred_buffers(struct iris_inst *inst, enum iris_buffer_type bu
        struct iris_buffer *buf;
        int ret;
 
+       iris_scale_power(inst);
+
        if (buf_type == BUF_INPUT) {
                v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buffer, n) {
                        buf = to_iris_buffer(&buffer->vb);
index 89fb636443110a8b8d47395fad6e5f8b013fcc57..caa3c65070061b3a9a2facd5b7b297c9f11b07d9 100644 (file)
@@ -33,6 +33,9 @@
  * @state: instance state
  * @sub_state: instance sub state
  * @once_per_session_set: boolean to set once per session property
+ * @max_input_data_size: max size of input data
+ * @power: structure of power info
+ * @icc_data: structure of interconnect data
  * @m2m_dev:   a reference to m2m device structure
  * @m2m_ctx:   a reference to m2m context structure
  * @sequence_cap: a sequence counter for capture queue
@@ -60,6 +63,9 @@ struct iris_inst {
        enum iris_inst_state            state;
        enum iris_inst_sub_state        sub_state;
        bool                            once_per_session_set;
+       size_t                          max_input_data_size;
+       struct iris_inst_power          power;
+       struct icc_vote_data            icc_data;
        struct v4l2_m2m_dev             *m2m_dev;
        struct v4l2_m2m_ctx             *m2m_ctx;
        u32                             sequence_cap;
index a5a7d6838d1677c93c82905da3e6bfa1bc6daffb..189dd081ad0a471769d081a747d98f20817bccd3 100644 (file)
@@ -20,6 +20,8 @@ struct iris_inst;
 #define CODED_FRAMES_PROGRESSIVE               0x0
 #define DEFAULT_MAX_HOST_BUF_COUNT             64
 #define DEFAULT_MAX_HOST_BURST_BUF_COUNT       256
+#define DEFAULT_FPS                            30
+
 enum stage_type {
        STAGE_1 = 1,
        STAGE_2 = 2,
@@ -67,6 +69,10 @@ struct platform_inst_caps {
        u32 min_frame_height;
        u32 max_frame_height;
        u32 max_mbpf;
+       u32 mb_cycles_vsp;
+       u32 mb_cycles_vpp;
+       u32 mb_cycles_fw;
+       u32 mb_cycles_fw_vpp;
        u32 num_comv;
 };
 
@@ -106,11 +112,26 @@ struct platform_inst_fw_cap {
                   enum platform_inst_fw_cap_type cap_id);
 };
 
+struct bw_info {
+       u32 mbs_per_sec;
+       u32 bw_ddr;
+};
+
 struct iris_core_power {
        u64 clk_freq;
        u64 icc_bw;
 };
 
+struct iris_inst_power {
+       u64 min_freq;
+       u32 icc_bw;
+};
+
+struct icc_vote_data {
+       u32 height, width;
+       u32 fps;
+};
+
 enum platform_pm_domain_type {
        IRIS_CTRL_POWER_DOMAIN,
        IRIS_HW_POWER_DOMAIN,
@@ -124,6 +145,8 @@ struct iris_platform_data {
        void (*set_preset_registers)(struct iris_core *core);
        const struct icc_info *icc_tbl;
        unsigned int icc_tbl_size;
+       const struct bw_info *bw_tbl_dec;
+       unsigned int bw_tbl_dec_size;
        const char * const *pmdomain_tbl;
        unsigned int pmdomain_tbl_size;
        const char * const *opp_pd_tbl;
index 8d23978f5ceeb3789c582c2e680666800064efdf..35d278996c430f2856d0fe59586930061a271c3e 100644 (file)
@@ -126,6 +126,9 @@ static struct platform_inst_caps platform_inst_cap_sm8550 = {
        .min_frame_height = 96,
        .max_frame_height = 8192,
        .max_mbpf = (8192 * 4352) / 256,
+       .mb_cycles_vpp = 200,
+       .mb_cycles_fw = 489583,
+       .mb_cycles_fw_vpp = 66234,
        .num_comv = 0,
 };
 
@@ -141,6 +144,13 @@ static const struct icc_info sm8550_icc_table[] = {
 
 static const char * const sm8550_clk_reset_table[] = { "bus" };
 
+static const struct bw_info sm8550_bw_table_dec[] = {
+       { ((4096 * 2160) / 256) * 60, 1608000 },
+       { ((4096 * 2160) / 256) * 30,  826000 },
+       { ((1920 * 1080) / 256) * 60,  567000 },
+       { ((1920 * 1080) / 256) * 30,  294000 },
+};
+
 static const char * const sm8550_pmdomain_table[] = { "venus", "vcodec0" };
 
 static const char * const sm8550_opp_pd_table[] = { "mxc", "mmcx" };
@@ -214,6 +224,8 @@ struct iris_platform_data sm8550_data = {
        .icc_tbl_size = ARRAY_SIZE(sm8550_icc_table),
        .clk_rst_tbl = sm8550_clk_reset_table,
        .clk_rst_tbl_size = ARRAY_SIZE(sm8550_clk_reset_table),
+       .bw_tbl_dec = sm8550_bw_table_dec,
+       .bw_tbl_dec_size = ARRAY_SIZE(sm8550_bw_table_dec),
        .pmdomain_tbl = sm8550_pmdomain_table,
        .pmdomain_tbl_size = ARRAY_SIZE(sm8550_pmdomain_table),
        .opp_pd_tbl = sm8550_opp_pd_table,
diff --git a/drivers/media/platform/qcom/iris/iris_power.c b/drivers/media/platform/qcom/iris/iris_power.c
new file mode 100644 (file)
index 0000000..dbca42d
--- /dev/null
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "iris_buffer.h"
+#include "iris_instance.h"
+#include "iris_power.h"
+#include "iris_resources.h"
+#include "iris_vpu_common.h"
+
+static u32 iris_calc_bw(struct iris_inst *inst, struct icc_vote_data *data)
+{
+       const struct bw_info *bw_tbl = NULL;
+       struct iris_core *core = inst->core;
+       u32 num_rows, i, mbs, mbps;
+       u32 icc_bw = 0;
+
+       mbs = DIV_ROUND_UP(data->height, 16) * DIV_ROUND_UP(data->width, 16);
+       mbps = mbs * data->fps;
+       if (mbps == 0)
+               goto exit;
+
+       bw_tbl = core->iris_platform_data->bw_tbl_dec;
+       num_rows = core->iris_platform_data->bw_tbl_dec_size;
+
+       for (i = 0; i < num_rows; i++) {
+               if (i != 0 && mbps > bw_tbl[i].mbs_per_sec)
+                       break;
+
+               icc_bw = bw_tbl[i].bw_ddr;
+       }
+
+exit:
+       return icc_bw;
+}
+
+static int iris_set_interconnects(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       struct iris_inst *instance;
+       u64 total_bw_ddr = 0;
+       int ret;
+
+       mutex_lock(&core->lock);
+       list_for_each_entry(instance, &core->instances, list) {
+               if (!instance->max_input_data_size)
+                       continue;
+
+               total_bw_ddr += instance->power.icc_bw;
+       }
+
+       ret = iris_set_icc_bw(core, total_bw_ddr);
+
+       mutex_unlock(&core->lock);
+
+       return ret;
+}
+
+static int iris_vote_interconnects(struct iris_inst *inst)
+{
+       struct icc_vote_data *vote_data = &inst->icc_data;
+       struct v4l2_format *inp_f = inst->fmt_src;
+
+       vote_data->width = inp_f->fmt.pix_mp.width;
+       vote_data->height = inp_f->fmt.pix_mp.height;
+       vote_data->fps = DEFAULT_FPS;
+
+       inst->power.icc_bw = iris_calc_bw(inst, vote_data);
+
+       return iris_set_interconnects(inst);
+}
+
+static int iris_set_clocks(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       struct iris_inst *instance;
+       u64 freq = 0;
+       int ret;
+
+       mutex_lock(&core->lock);
+       list_for_each_entry(instance, &core->instances, list) {
+               if (!instance->max_input_data_size)
+                       continue;
+
+               freq += instance->power.min_freq;
+       }
+
+       core->power.clk_freq = freq;
+       ret = dev_pm_opp_set_rate(core->dev, freq);
+       mutex_unlock(&core->lock);
+
+       return ret;
+}
+
+static int iris_scale_clocks(struct iris_inst *inst)
+{
+       const struct vpu_ops *vpu_ops = inst->core->iris_platform_data->vpu_ops;
+       struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
+       struct v4l2_m2m_buffer *buffer, *n;
+       struct iris_buffer *buf;
+       size_t data_size = 0;
+
+       v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buffer, n) {
+               buf = to_iris_buffer(&buffer->vb);
+               data_size = max(data_size, buf->data_size);
+       }
+
+       inst->max_input_data_size = data_size;
+       if (!inst->max_input_data_size)
+               return 0;
+
+       inst->power.min_freq = vpu_ops->calc_freq(inst, inst->max_input_data_size);
+
+       return iris_set_clocks(inst);
+}
+
+int iris_scale_power(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       int ret;
+
+       if (pm_runtime_suspended(core->dev)) {
+               ret = pm_runtime_resume_and_get(core->dev);
+               if (ret < 0)
+                       return ret;
+
+               pm_runtime_put_autosuspend(core->dev);
+       }
+
+       ret = iris_scale_clocks(inst);
+       if (ret)
+               return ret;
+
+       return iris_vote_interconnects(inst);
+}
diff --git a/drivers/media/platform/qcom/iris/iris_power.h b/drivers/media/platform/qcom/iris/iris_power.h
new file mode 100644 (file)
index 0000000..5521266
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __IRIS_POWER_H__
+#define __IRIS_POWER_H__
+
+struct iris_inst;
+
+int iris_scale_power(struct iris_inst *inst);
+
+#endif
index 712d37723ec3feaf2a78c95c371675fc9878fe22..cdf11feb590b5cb7804db3fcde7282fb1f9f1a1e 100644 (file)
@@ -10,6 +10,7 @@
 #include "iris_instance.h"
 #include "iris_vb2.h"
 #include "iris_vdec.h"
+#include "iris_power.h"
 
 static int iris_check_core_mbpf(struct iris_inst *inst)
 {
@@ -187,6 +188,8 @@ int iris_vb2_start_streaming(struct vb2_queue *q, unsigned int count)
                goto error;
        }
 
+       iris_scale_power(inst);
+
        ret = iris_check_session_supported(inst);
        if (ret)
                goto error;
index 076e3ee7969f857eda911c88288e3e6f5a338d06..4143acedfc5744f16f026ab662e2eb7f714580ac 100644 (file)
@@ -9,6 +9,7 @@
 #include "iris_buffer.h"
 #include "iris_ctrls.h"
 #include "iris_instance.h"
+#include "iris_power.h"
 #include "iris_vdec.h"
 #include "iris_vpu_buffer.h"
 
@@ -360,6 +361,8 @@ static int iris_vdec_process_streamon_input(struct iris_inst *inst)
        enum iris_inst_sub_state set_sub_state = 0;
        int ret;
 
+       iris_scale_power(inst);
+
        ret = hfi_ops->session_start(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
        if (ret)
                return ret;
@@ -427,6 +430,8 @@ static int iris_vdec_process_streamon_output(struct iris_inst *inst)
        enum iris_inst_sub_state clear_sub_state = 0;
        int ret = 0;
 
+       iris_scale_power(inst);
+
        drain_active = inst->sub_state & IRIS_INST_SUB_DRAIN &&
                inst->sub_state & IRIS_INST_SUB_DRAIN_LAST;
 
@@ -573,6 +578,8 @@ int iris_vdec_qbuf(struct iris_inst *inst, struct vb2_v4l2_buffer *vbuf)
                return 0;
        }
 
+       iris_scale_power(inst);
+
        return iris_queue_buffer(inst, buf);
 }
 
index bd842741157600f4fa247118e1863f2cc59a9ba2..8f502aed43ce2fa6a272a2ce14ff1ca54d3e63a2 100644 (file)
@@ -6,6 +6,33 @@
 #include "iris_instance.h"
 #include "iris_vpu_common.h"
 
+static u64 iris_vpu2_calc_freq(struct iris_inst *inst, size_t data_size)
+{
+       struct platform_inst_caps *caps = inst->core->iris_platform_data->inst_caps;
+       struct v4l2_format *inp_f = inst->fmt_src;
+       u32 mbs_per_second, mbpf, height, width;
+       unsigned long vpp_freq, vsp_freq;
+       u32 fps = DEFAULT_FPS;
+
+       width = max(inp_f->fmt.pix_mp.width, inst->crop.width);
+       height = max(inp_f->fmt.pix_mp.height, inst->crop.height);
+
+       mbpf = NUM_MBS_PER_FRAME(height, width);
+       mbs_per_second = mbpf * fps;
+
+       vpp_freq = mbs_per_second * caps->mb_cycles_vpp;
+
+       /* 21 / 20 is overhead factor */
+       vpp_freq += vpp_freq / 20;
+       vsp_freq = mbs_per_second * caps->mb_cycles_vsp;
+
+       /* 10 / 7 is overhead factor */
+       vsp_freq += ((fps * data_size * 8) * 10) / 7;
+
+       return max(vpp_freq, vsp_freq);
+}
+
 const struct vpu_ops iris_vpu2_ops = {
        .power_off_hw = iris_vpu_power_off_hw,
+       .calc_freq = iris_vpu2_calc_freq,
 };
index 10599f1fa7894d72fc7bffc1477eb42c6c46966d..b484638e6105a69319232f667ee7ae95e3853698 100644 (file)
@@ -79,6 +79,44 @@ disable_power:
        iris_vpu_power_off_hw(core);
 }
 
+static u64 iris_vpu3_calculate_frequency(struct iris_inst *inst, size_t data_size)
+{
+       struct platform_inst_caps *caps = inst->core->iris_platform_data->inst_caps;
+       struct v4l2_format *inp_f = inst->fmt_src;
+       u32 height, width, mbs_per_second, mbpf;
+       u64 fw_cycles, fw_vpp_cycles;
+       u64 vsp_cycles, vpp_cycles;
+       u32 fps = DEFAULT_FPS;
+
+       width = max(inp_f->fmt.pix_mp.width, inst->crop.width);
+       height = max(inp_f->fmt.pix_mp.height, inst->crop.height);
+
+       mbpf = NUM_MBS_PER_FRAME(height, width);
+       mbs_per_second = mbpf * fps;
+
+       fw_cycles = fps * caps->mb_cycles_fw;
+       fw_vpp_cycles = fps * caps->mb_cycles_fw_vpp;
+
+       vpp_cycles = mult_frac(mbs_per_second, caps->mb_cycles_vpp, (u32)inst->fw_caps[PIPE].value);
+       /* 21 / 20 is minimum overhead factor */
+       vpp_cycles += max(div_u64(vpp_cycles, 20), fw_vpp_cycles);
+
+       /* 1.059 is multi-pipe overhead */
+       if (inst->fw_caps[PIPE].value > 1)
+               vpp_cycles += div_u64(vpp_cycles * 59, 1000);
+
+       vsp_cycles = fps * data_size * 8;
+       vsp_cycles = div_u64(vsp_cycles, 2);
+       /* VSP FW overhead 1.05 */
+       vsp_cycles = div_u64(vsp_cycles * 21, 20);
+
+       if (inst->fw_caps[STAGE].value == STAGE_1)
+               vsp_cycles = vsp_cycles * 3;
+
+       return max3(vpp_cycles, vsp_cycles, fw_cycles);
+}
+
 const struct vpu_ops iris_vpu3_ops = {
        .power_off_hw = iris_vpu3_power_off_hardware,
+       .calc_freq = iris_vpu3_calculate_frequency,
 };
index d3efa7c0ce9a3500d28f133b7146a3067bd0ea49..63fa1fa5a4989e48aebdb6c7619c140000c0b44c 100644 (file)
@@ -13,6 +13,7 @@ extern const struct vpu_ops iris_vpu3_ops;
 
 struct vpu_ops {
        void (*power_off_hw)(struct iris_core *core);
+       u64 (*calc_freq)(struct iris_inst *inst, size_t data_size);
 };
 
 int iris_vpu_boot_firmware(struct iris_core *core);