Patch series "Fix calculations in trace_balance_dirty_pages() for cgwb", v2.
In my experiment, I found that the output of trace_balance_dirty_pages()
in the cgroup writeback scenario was strange because
trace_balance_dirty_pages() always uses global_wb_domain.dirty_limit for
related calculations instead of the dirty_limit of the corresponding
memcg's wb_domain.
The basic idea of the fix is to store the hard dirty limit value computed
in wb_position_ratio() into struct dirty_throttle_control and use it for
calculations in trace_balance_dirty_pages().
This patch (of 3):
Currently, trace_balance_dirty_pages() already has 12 parameters. In the
patch #3, I initially attempted to introduce an additional parameter.
However, in include/linux/trace_events.h, bpf_trace_run12() only supports
up to 12 parameters and bpf_trace_run13() does not exist.
To reduce the number of parameters in trace_balance_dirty_pages(), we can
make it accept a pointer to struct dirty_throttle_control as a parameter.
To achieve this, we need to move the definition of struct
dirty_throttle_control from mm/page-writeback.c to
include/linux/writeback.h.
Link: https://lkml.kernel.org/r/20250304110318.159567-1-yizhou.tang@shopee.com
Link: https://lkml.kernel.org/r/20250304110318.159567-2-yizhou.tang@shopee.com
Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jan Kara <jack@suse.cz>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Tang Yizhou <yizhou.tang@shopee.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/*
* mm/page-writeback.c
*/
+/* consolidated parameters for balance_dirty_pages() and its subroutines */
+struct dirty_throttle_control {
+#ifdef CONFIG_CGROUP_WRITEBACK
+ struct wb_domain *dom;
+ struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */
+#endif
+ struct bdi_writeback *wb;
+ struct fprop_local_percpu *wb_completions;
+
+ unsigned long avail; /* dirtyable */
+ unsigned long dirty; /* file_dirty + write + nfs */
+ unsigned long thresh; /* dirty threshold */
+ unsigned long bg_thresh; /* dirty background threshold */
+
+ unsigned long wb_dirty; /* per-wb counterparts */
+ unsigned long wb_thresh;
+ unsigned long wb_bg_thresh;
+
+ unsigned long pos_ratio;
+ bool freerun;
+ bool dirty_exceeded;
+};
+
void laptop_io_completion(struct backing_dev_info *info);
void laptop_sync_completion(void);
void laptop_mode_timer_fn(struct timer_list *t);
TRACE_EVENT(balance_dirty_pages,
TP_PROTO(struct bdi_writeback *wb,
- unsigned long thresh,
- unsigned long bg_thresh,
- unsigned long dirty,
- unsigned long bdi_thresh,
- unsigned long bdi_dirty,
+ struct dirty_throttle_control *dtc,
unsigned long dirty_ratelimit,
unsigned long task_ratelimit,
unsigned long dirtied,
long pause,
unsigned long start_time),
- TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
+ TP_ARGS(wb, dtc,
dirty_ratelimit, task_ratelimit,
dirtied, period, pause, start_time),
),
TP_fast_assign(
- unsigned long freerun = (thresh + bg_thresh) / 2;
+ unsigned long freerun = (dtc->thresh + dtc->bg_thresh) / 2;
strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
freerun) / 2;
- __entry->dirty = dirty;
+ __entry->dirty = dtc->dirty;
__entry->bdi_setpoint = __entry->setpoint *
- bdi_thresh / (thresh + 1);
- __entry->bdi_dirty = bdi_dirty;
+ dtc->wb_thresh / (dtc->thresh + 1);
+ __entry->bdi_dirty = dtc->wb_dirty;
__entry->dirty_ratelimit = KBps(dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->dirtied = dirtied;
struct wb_domain global_wb_domain;
-/* consolidated parameters for balance_dirty_pages() and its subroutines */
-struct dirty_throttle_control {
-#ifdef CONFIG_CGROUP_WRITEBACK
- struct wb_domain *dom;
- struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */
-#endif
- struct bdi_writeback *wb;
- struct fprop_local_percpu *wb_completions;
-
- unsigned long avail; /* dirtyable */
- unsigned long dirty; /* file_dirty + write + nfs */
- unsigned long thresh; /* dirty threshold */
- unsigned long bg_thresh; /* dirty background threshold */
-
- unsigned long wb_dirty; /* per-wb counterparts */
- unsigned long wb_thresh;
- unsigned long wb_bg_thresh;
-
- unsigned long pos_ratio;
- bool freerun;
- bool dirty_exceeded;
-};
-
/*
* Length of period for aging writeout fractions of bdis. This is an
* arbitrarily chosen number. The longer the period, the slower fractions will
*/
if (pause < min_pause) {
trace_balance_dirty_pages(wb,
- sdtc->thresh,
- sdtc->bg_thresh,
- sdtc->dirty,
- sdtc->wb_thresh,
- sdtc->wb_dirty,
+ sdtc,
dirty_ratelimit,
task_ratelimit,
pages_dirtied,
pause:
trace_balance_dirty_pages(wb,
- sdtc->thresh,
- sdtc->bg_thresh,
- sdtc->dirty,
- sdtc->wb_thresh,
- sdtc->wb_dirty,
+ sdtc,
dirty_ratelimit,
task_ratelimit,
pages_dirtied,