This closely resembles helpers added for the global cgroup_rstat_lock in
commit fc29e04ae1 ("cgroup/rstat: add cgroup_rstat_lock helpers and
tracepoints"). This is for the per CPU lock cgroup_rstat_cpu_lock.
Based on production workloads, we observe the fast-path "update" function
cgroup_rstat_updated() is invoked around 3 million times per sec, while the
"flush" function cgroup_rstat_flush_locked(), walking each possible CPU,
can see periodic spikes of 700 invocations/sec.
For this reason, the tracepoints are split into normal and fastpath
versions for this per-CPU lock. Making it feasible for production to
continuously monitor the non-fastpath tracepoint to detect lock contention
issues. The reason for monitoring is that lock disables IRQs which can
disturb e.g. softirq processing on the local CPUs involved. When the
global cgroup_rstat_lock stops disabling IRQs (e.g converted to a mutex),
this per CPU lock becomes the next bottleneck that can introduce latency
variations.
A practical bpftrace script for monitoring contention latency:
bpftrace -e '
tracepoint:cgroup:cgroup_rstat_cpu_lock_contended {
@start[tid]=nsecs; @cnt[probe]=count()}
tracepoint:cgroup:cgroup_rstat_cpu_locked {
if (args->contended) {
@wait_ns=hist(nsecs-@start[tid]); delete(@start[tid]);}
@cnt[probe]=count()}
interval:s:1 {time("%H:%M:%S "); print(@wait_ns); print(@cnt); clear(@cnt);}'
Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
303 lines
6.3 KiB
C
303 lines
6.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM cgroup
|
|
|
|
#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_CGROUP_H
|
|
|
|
#include <linux/cgroup.h>
|
|
#include <linux/tracepoint.h>
|
|
|
|
DECLARE_EVENT_CLASS(cgroup_root,
|
|
|
|
TP_PROTO(struct cgroup_root *root),
|
|
|
|
TP_ARGS(root),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, root )
|
|
__field( u16, ss_mask )
|
|
__string( name, root->name )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->root = root->hierarchy_id;
|
|
__entry->ss_mask = root->subsys_mask;
|
|
__assign_str(name, root->name);
|
|
),
|
|
|
|
TP_printk("root=%d ss_mask=%#x name=%s",
|
|
__entry->root, __entry->ss_mask, __get_str(name))
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_root, cgroup_setup_root,
|
|
|
|
TP_PROTO(struct cgroup_root *root),
|
|
|
|
TP_ARGS(root)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_root, cgroup_destroy_root,
|
|
|
|
TP_PROTO(struct cgroup_root *root),
|
|
|
|
TP_ARGS(root)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_root, cgroup_remount,
|
|
|
|
TP_PROTO(struct cgroup_root *root),
|
|
|
|
TP_ARGS(root)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(cgroup,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, root )
|
|
__field( int, level )
|
|
__field( u64, id )
|
|
__string( path, path )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->root = cgrp->root->hierarchy_id;
|
|
__entry->id = cgroup_id(cgrp);
|
|
__entry->level = cgrp->level;
|
|
__assign_str(path, path);
|
|
),
|
|
|
|
TP_printk("root=%d id=%llu level=%d path=%s",
|
|
__entry->root, __entry->id, __entry->level, __get_str(path))
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_mkdir,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_rmdir,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_release,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_rename,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_freeze,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_unfreeze,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
TP_ARGS(cgrp, path)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(cgroup_migrate,
|
|
|
|
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
struct task_struct *task, bool threadgroup),
|
|
|
|
TP_ARGS(dst_cgrp, path, task, threadgroup),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, dst_root )
|
|
__field( int, dst_level )
|
|
__field( u64, dst_id )
|
|
__field( int, pid )
|
|
__string( dst_path, path )
|
|
__string( comm, task->comm )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->dst_root = dst_cgrp->root->hierarchy_id;
|
|
__entry->dst_id = cgroup_id(dst_cgrp);
|
|
__entry->dst_level = dst_cgrp->level;
|
|
__assign_str(dst_path, path);
|
|
__entry->pid = task->pid;
|
|
__assign_str(comm, task->comm);
|
|
),
|
|
|
|
TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
|
|
__entry->dst_root, __entry->dst_id, __entry->dst_level,
|
|
__get_str(dst_path), __entry->pid, __get_str(comm))
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
|
|
|
|
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
struct task_struct *task, bool threadgroup),
|
|
|
|
TP_ARGS(dst_cgrp, path, task, threadgroup)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
|
|
|
|
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
struct task_struct *task, bool threadgroup),
|
|
|
|
TP_ARGS(dst_cgrp, path, task, threadgroup)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(cgroup_event,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
|
|
|
TP_ARGS(cgrp, path, val),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, root )
|
|
__field( int, level )
|
|
__field( u64, id )
|
|
__string( path, path )
|
|
__field( int, val )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->root = cgrp->root->hierarchy_id;
|
|
__entry->id = cgroup_id(cgrp);
|
|
__entry->level = cgrp->level;
|
|
__assign_str(path, path);
|
|
__entry->val = val;
|
|
),
|
|
|
|
TP_printk("root=%d id=%llu level=%d path=%s val=%d",
|
|
__entry->root, __entry->id, __entry->level, __get_str(path),
|
|
__entry->val)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
|
|
|
TP_ARGS(cgrp, path, val)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
|
|
|
|
TP_ARGS(cgrp, path, val)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(cgroup_rstat,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, root )
|
|
__field( int, level )
|
|
__field( u64, id )
|
|
__field( int, cpu )
|
|
__field( bool, contended )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->root = cgrp->root->hierarchy_id;
|
|
__entry->id = cgroup_id(cgrp);
|
|
__entry->level = cgrp->level;
|
|
__entry->cpu = cpu;
|
|
__entry->contended = contended;
|
|
),
|
|
|
|
TP_printk("root=%d id=%llu level=%d cpu=%d lock contended:%d",
|
|
__entry->root, __entry->id, __entry->level,
|
|
__entry->cpu, __entry->contended)
|
|
);
|
|
|
|
/* Related to global: cgroup_rstat_lock */
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_locked,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
/* Related to per CPU: cgroup_rstat_cpu_lock */
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
|
|
|
|
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
|
|
|
|
TP_ARGS(cgrp, cpu, contended)
|
|
);
|
|
|
|
#endif /* _TRACE_CGROUP_H */
|
|
|
|
/* This part must be outside protection */
|
|
#include <trace/define_trace.h>
|