]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
riscv: Implement HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
authorPuranjay Mohan <puranjay12@gmail.com>
Mon, 7 Apr 2025 18:08:34 +0000 (02:08 +0800)
committerPalmer Dabbelt <palmer@dabbelt.com>
Thu, 5 Jun 2025 18:09:30 +0000 (11:09 -0700)
This patch enables support for DYNAMIC_FTRACE_WITH_CALL_OPS on RISC-V.
This allows each ftrace callsite to provide an ftrace_ops to the common
ftrace trampoline, allowing each callsite to invoke distinct tracer
functions without the need to fall back to list processing or to
allocate custom trampolines for each callsite. This significantly speeds
up cases where multiple distinct trace functions are used and callsites
are mostly traced by a single tracer.

The idea and most of the implementation is taken from the ARM64's
implementation of the same feature. The idea is to place a pointer to
the ftrace_ops as a literal at a fixed offset from the function entry
point, which can be recovered by the common ftrace trampoline.

We use -fpatchable-function-entry to reserve 8 bytes above the function
entry by emitting 2 4 byte or 4 2 byte  nops depending on the presence of
CONFIG_RISCV_ISA_C. These 8 bytes are patched at runtime with a pointer
to the associated ftrace_ops for that callsite. Functions are aligned to
8 bytes to make sure that the accesses to this literal are atomic.

This approach allows for directly invoking ftrace_ops::func even for
ftrace_ops which are dynamically-allocated (or part of a module),
without going via ftrace_ops_list_func.

We've benchamrked this with the ftrace_ops sample module on Spacemit K1
Jupiter:

Without this patch:

baseline (Linux rivos 6.14.0-09584-g7d06015d936c #3 SMP Sat Mar 29
+-----------------------+-----------------+----------------------------+
|  Number of tracers    | Total time (ns) | Per-call average time      |
|-----------------------+-----------------+----------------------------|
| Relevant | Irrelevant |    100000 calls | Total (ns) | Overhead (ns) |
|----------+------------+-----------------+------------+---------------|
|        0 |          0 |        1357958 |          13 |             - |
|        0 |          1 |        1302375 |          13 |             - |
|        0 |          2 |        1302375 |          13 |             - |
|        0 |         10 |        1379084 |          13 |             - |
|        0 |        100 |        1302458 |          13 |             - |
|        0 |        200 |        1302333 |          13 |             - |
|----------+------------+-----------------+------------+---------------|
|        1 |          0 |       13677833 |         136 |           123 |
|        1 |          1 |       18500916 |         185 |           172 |
|        1 |          2 |       22856459 |         228 |           215 |
|        1 |         10 |       58824709 |         588 |           575 |
|        1 |        100 |      505141584 |        5051 |          5038 |
|        1 |        200 |     1580473126 |       15804 |         15791 |
|----------+------------+-----------------+------------+---------------|
|        1 |          0 |       13561000 |         135 |           122 |
|        2 |          0 |       19707292 |         197 |           184 |
|       10 |          0 |       67774750 |         677 |           664 |
|      100 |          0 |      714123125 |        7141 |          7128 |
|      200 |          0 |     1918065668 |       19180 |         19167 |
+----------+------------+-----------------+------------+---------------+

Note: per-call overhead is estimated relative to the baseline case with
0 relevant tracers and 0 irrelevant tracers.

With this patch:

v4-rc4 (Linux rivos 6.14.0-09598-gd75747611c93 #4 SMP Sat Mar 29
+-----------------------+-----------------+----------------------------+
|  Number of tracers    | Total time (ns) | Per-call average time      |
|-----------------------+-----------------+----------------------------|
| Relevant | Irrelevant |    100000 calls | Total (ns) | Overhead (ns) |
|----------+------------+-----------------+------------+---------------|
|        0 |          0 |         1459917 |         14 |             - |
|        0 |          1 |         1408000 |         14 |             - |
|        0 |          2 |         1383792 |         13 |             - |
|        0 |         10 |         1430709 |         14 |             - |
|        0 |        100 |         1383791 |         13 |             - |
|        0 |        200 |         1383750 |         13 |             - |
|----------+------------+-----------------+------------+---------------|
|        1 |          0 |         5238041 |         52 |            38 |
|        1 |          1 |         5228542 |         52 |            38 |
|        1 |          2 |         5325917 |         53 |            40 |
|        1 |         10 |         5299667 |         52 |            38 |
|        1 |        100 |         5245250 |         52 |            39 |
|        1 |        200 |         5238459 |         52 |            39 |
|----------+------------+-----------------+------------+---------------|
|        1 |          0 |         5239083 |         52 |            38 |
|        2 |          0 |        19449417 |        194 |           181 |
|       10 |          0 |        67718584 |        677 |           663 |
|      100 |          0 |       709840708 |       7098 |          7085 |
|      200 |          0 |      2203580626 |      22035 |         22022 |
+----------+------------+-----------------+------------+---------------+

Note: per-call overhead is estimated relative to the baseline case with
0 relevant tracers and 0 irrelevant tracers.

As can be seen from the above:

 a) Whenever there is a single relevant tracer function associated with a
    tracee, the overhead of invoking the tracer is constant, and does not
    scale with the number of tracers which are *not* associated with that
    tracee.

 b) The overhead for a single relevant tracer has dropped to ~1/3 of the
    overhead prior to this series (from 122ns to 38ns). This is largely
    due to permitting calls to dynamically-allocated ftrace_ops without
    going through ftrace_ops_list_func.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
[update kconfig, asm, refactor]

Signed-off-by: Andy Chiu <andybnac@gmail.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://lore.kernel.org/r/20250407180838.42877-10-andybnac@gmail.com
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
arch/riscv/Kconfig
arch/riscv/Makefile
arch/riscv/kernel/asm-offsets.c
arch/riscv/kernel/ftrace.c
arch/riscv/kernel/mcount-dyn.S

index dc0fc11b6e96270ca8517a0b076a5e4fec91c7ca..ec986c9120e3e9945703229d7db4c524256fedf4 100644 (file)
@@ -99,6 +99,7 @@ config RISCV
        select EDAC_SUPPORT
        select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
        select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
+       select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
        select GENERIC_ARCH_TOPOLOGY
        select GENERIC_ATOMIC64 if !64BIT
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -152,6 +153,7 @@ config RISCV
        select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
        select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C
        select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+       select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
        select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE_GRAPH_FUNC
        select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
index 539d2aef5cab98c72289b8e3d153689a32fea595..df57654a615e0010f9ec3cf2843751f592357692 100644 (file)
@@ -15,9 +15,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
        LDFLAGS_vmlinux += --no-relax
        KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
 ifeq ($(CONFIG_RISCV_ISA_C),y)
-       CC_FLAGS_FTRACE := -fpatchable-function-entry=4
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=8,4
 else
-       CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
 endif
 endif
 
index 7c43c8e26ae7f72a8445195bd785c5c5875f7cd5..2d96197a8abfb50ec552cc2b2e27a035311833f2 100644 (file)
@@ -493,6 +493,9 @@ void asm_offsets(void)
        DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
        OFFSET(STACKFRAME_FP, stackframe, fp);
        OFFSET(STACKFRAME_RA, stackframe, ra);
+#ifdef CONFIG_FUNCTION_TRACER
+       DEFINE(FTRACE_OPS_FUNC,         offsetof(struct ftrace_ops, func));
+#endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
        DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN));
index b133c60808fe0141b028b818a4d4d91f5f5305b9..d56fc6e9fba091e53da095ab9f684ac5e6083cc5 100644 (file)
@@ -16,6 +16,9 @@
 #ifdef CONFIG_DYNAMIC_FTRACE
 unsigned long ftrace_call_adjust(unsigned long addr)
 {
+       if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+               return addr + 8;
+
        return addr + MCOUNT_AUIPC_SIZE;
 }
 
@@ -64,9 +67,52 @@ static int __ftrace_modify_call(unsigned long source, unsigned long target, bool
        return 0;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec)
+{
+       const struct ftrace_ops *ops = NULL;
+
+       if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+               ops = ftrace_find_unique_ops(rec);
+               WARN_ON_ONCE(!ops);
+       }
+
+       if (!ops)
+               ops = &ftrace_list_ops;
+
+       return ops;
+}
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec,
+                             const struct ftrace_ops *ops)
+{
+       unsigned long literal = rec->ip - 8;
+
+       return patch_text_nosync((void *)literal, &ops, sizeof(ops));
+}
+
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+       return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+       return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec));
+}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
+
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
        unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE;
+       int ret;
+
+       ret = ftrace_rec_update_ops(rec);
+       if (ret)
+               return ret;
 
        orig_addr = (unsigned long)&ftrace_caller;
        distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr;
@@ -79,6 +125,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
 {
        u32 nop4 = RISCV_INSN_NOP4;
+       int ret;
+
+       ret = ftrace_rec_set_nop_ops(rec);
+       if (ret)
+               return ret;
 
        if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE))
                return -EPERM;
@@ -99,6 +150,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
        unsigned int nops[2], offset;
        int ret;
 
+       ret = ftrace_rec_set_nop_ops(rec);
+       if (ret)
+               return ret;
+
        offset = (unsigned long) &ftrace_caller - pc;
        nops[0] = to_auipc_t0(offset);
        nops[1] = RISCV_INSN_NOP4;
@@ -113,6 +168,13 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 ftrace_func_t ftrace_call_dest = ftrace_stub;
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
+       /*
+        * When using CALL_OPS, the function to call is associated with the
+        * call site, and we don't have a global function pointer to update.
+        */
+       if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+               return 0;
+
        WRITE_ONCE(ftrace_call_dest, func);
        /*
         * The data fence ensure that the update to ftrace_call_dest happens
@@ -143,8 +205,13 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 {
        unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE;
        unsigned int call[2];
+       int ret;
 
        make_call_t0(caller, old_addr, call);
+       ret = ftrace_rec_update_ops(rec);
+       if (ret)
+               return ret;
+
        return __ftrace_modify_call(caller, addr, true);
 }
 #endif
index 8aa554d5609626e1c7d77c89f64df7b39ae8cd24..699684eea7f0b16c9aab56a135d9350d3cdce97a 100644 (file)
 
        .macro PREPARE_ARGS
        addi    a0, t0, -MCOUNT_JALR_SIZE       // ip (callsite's jalr insn)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+       /*
+        * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the
+        * function entry, these are later overwritten with the pointer to the
+        * associated struct ftrace_ops.
+        *
+        * -8: &ftrace_ops of the associated tracer function.
+        *<ftrace enable>:
+        *  0: auipc  t0/ra, 0x?
+        *  4: jalr   t0/ra, ?(t0/ra)
+        *
+        * -8: &ftrace_nop_ops
+        *<ftrace disable>:
+        *  0: nop
+        *  4: nop
+        *
+        * t0 is set to ip+8 after the jalr is executed at the callsite,
+        * so we find the associated op at t0-16.
+        */
+       mv      a1, ra                          // parent_ip
+       REG_L   a2, -16(t0)                     // op
+       REG_L   ra, FTRACE_OPS_FUNC(a2)         // op->func
+#else
        la      a1, function_trace_op
-       REG_L   a2, 0(a1)
-       mv      a1, ra
-       mv      a3, sp
+       REG_L   a2, 0(a1)                       // op
+       mv      a1, ra                          // parent_ip
+#endif
+       mv      a3, sp                          // regs
        .endm
 
 SYM_FUNC_START(ftrace_caller)
@@ -150,10 +174,13 @@ SYM_FUNC_START(ftrace_caller)
        SAVE_ABI_REGS
        PREPARE_ARGS
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+       jalr    ra
+#else
 SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
        REG_L   ra, ftrace_call_dest
        jalr    ra, 0(ra)
-
+#endif
        RESTORE_ABI_REGS
        bnez    t1, .Ldirect
        jr      t0