From c763ba06bd9b5db2c46c36276c89103d92d2c604 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:22 +1100 Subject: tracing/syscalls: Make arch_syscall_addr weak Some architectures use non-trivial system call tables and will not work with the generic arch_syscall_addr code. For example, PowerPC64 uses a table of twin long longs. This patch makes the generic arch_syscall_addr weak to allow architectures with non-trivial system call tables to override it. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-4-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation/trace') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index dc52bd442c92..6fca17beee2f 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -247,6 +247,9 @@ You need very few things to get the syscalls tracing in an arch. - Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. +- If the system call table on this arch is more complicated than a simple array + of addresses of the system calls, implement an arch_syscall_addr to return + the address of a given system call. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. -- cgit v1.2.3 From b2d55496818d64310b9f5486d4eea76ea614d7f8 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:23 +1100 Subject: tracing/syscalls: Allow arch specific syscall symbol matching Some architectures have unusual symbol names and the generic code to match the symbol name with the function name for the syscall metadata will fail. For example, symbols on PPC64 start with a period and the generic code will fail to match them. This patch moves the match logic out into a separate function which an arch can override by defining ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and implementing arch_syscall_match_sym_name. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-5-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 4 ++++ kernel/trace/trace_syscalls.c | 21 ++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'Documentation/trace') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 6fca17beee2f..79fcafc7fd64 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -250,6 +250,10 @@ You need very few things to get the syscalls tracing in an arch. - If the system call table on this arch is more complicated than a simple array of addresses of the system calls, implement an arch_syscall_addr to return the address of a given system call. +- If the symbol names of the system calls do not match the function names on + this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and + implement arch_syscall_match_sym_name with the appropriate logic to return + true if the function name corresponds with the symbol name. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index af831545f656..86a23e7de031 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; +#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + return !strcmp(sym + 3, name + 3); +} +#endif + static __init struct syscall_metadata * find_syscall_meta(unsigned long syscall) { @@ -73,13 +86,7 @@ find_syscall_meta(unsigned long syscall) kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { - /* - * Only compare after the "sys" prefix. Archs that use - * syscall wrappers may have syscalls symbols aliases prefixed - * with "SyS" instead of "sys", leading to an unwanted - * mismatch. - */ - if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) return *start; } return NULL; -- cgit v1.2.3 From 87d80de2800d087ea833cb79bc13f85ff34ed49f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:19:49 -0500 Subject: tracing: Remove obsolete sched_switch tracer The trace events sched_switch and sched_wakeup do the same thing as the stand alone sched_switch tracer does. It is no longer needed. Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 110 -------------------------------------- kernel/trace/trace_sched_switch.c | 48 ----------------- 2 files changed, 158 deletions(-) (limited to 'Documentation/trace') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 557c1edeccaf..65eddb7cfa02 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured. to draw a graph of function calls similar to C code source. - "sched_switch" - - Traces the context switches and wakeups between tasks. - "irqsoff" Traces the areas that disable interrupts and saves @@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the parent function that called this function "path_walk". The timestamp is the time at which the function was entered. -The sched_switch tracer also includes tracing of task wakeups -and context switches. - - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R - events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R - kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R - ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R - -Wake ups are represented by a "+" and the context switches are -shown as "==>". The format is: - - Context switches: - - Previous task Next Task - - :: ==> :: - - Wake ups: - - Current task Task waking up - - :: + :: - -The prio is the internal kernel priority, which is the inverse -of the priority that is usually displayed by user-space tools. -Zero represents the highest priority (99). Prio 100 starts the -"nice" priorities with 100 being equal to nice -20 and 139 being -nice 19. The prio "140" is reserved for the idle task which is -the lowest priority thread (pid 0). - - Latency trace format -------------------- @@ -491,79 +454,6 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] latencies, as described in "Latency trace format". -sched_switch ------------- - -This tracer simply records schedule switches. Here is an example -of how to use it. - - # echo sched_switch > current_tracer - # echo 1 > tracing_enabled - # sleep 1 - # echo 0 > tracing_enabled - # cat trace - -# tracer: sched_switch -# -# TASK-PID CPU# TIMESTAMP FUNCTION -# | | | | | - bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R - bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R - bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S - bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R - bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D - bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R - -0 [00] 240.132589: 0:140:R + 4:115:S - -0 [00] 240.132591: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R - -0 [00] 240.132598: 0:140:R + 4:115:S - -0 [00] 240.132599: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R - sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R - [...] - - -As we have discussed previously about this format, the header -shows the name of the trace and points to the options. The -"FUNCTION" is a misnomer since here it represents the wake ups -and context switches. - -The sched_switch file only lists the wake ups (represented with -'+') and context switches ('==>') with the previous task or -current task first followed by the next task or task waking up. -The format for both of these is PID:KERNEL-PRIO:TASK-STATE. -Remember that the KERNEL-PRIO is the inverse of the actual -priority with zero (0) being the highest priority and the nice -values starting at 100 (nice -20). Below is a quick chart to map -the kernel priority to user land priorities. - - Kernel Space User Space - =============================================================== - 0(high) to 98(low) user RT priority 99(high) to 1(low) - with SCHED_RR or SCHED_FIFO - --------------------------------------------------------------- - 99 sched_priority is not used in scheduling - decisions(it must be specified as 0) - --------------------------------------------------------------- - 100(high) to 139(low) user nice -20(high) to 19(low) - --------------------------------------------------------------- - 140 idle task priority - --------------------------------------------------------------- - -The task states are: - - R - running : wants to run, may not actually be running - S - sleep : process is waiting to be woken up (handles signals) - D - disk sleep (uninterruptible sleep) : process must be woken up - (ignores signals) - T - stopped : process suspended - t - traced : process is being traced (with something like gdb) - Z - zombie : process waiting to be cleaned up - X - unknown - - ftrace_enabled -------------- diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8f758d070c43..7e62c0a18456 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } -static void stop_sched_trace(struct trace_array *tr) -{ - tracing_stop_sched_switch_record(); -} - -static int sched_switch_trace_init(struct trace_array *tr) -{ - ctx_trace = tr; - tracing_reset_online_cpus(tr); - tracing_start_sched_switch_record(); - return 0; -} - -static void sched_switch_trace_reset(struct trace_array *tr) -{ - if (sched_ref) - stop_sched_trace(tr); -} - -static void sched_switch_trace_start(struct trace_array *tr) -{ - sched_stopped = 0; -} - -static void sched_switch_trace_stop(struct trace_array *tr) -{ - sched_stopped = 1; -} - -static struct tracer sched_switch_trace __read_mostly = -{ - .name = "sched_switch", - .init = sched_switch_trace_init, - .reset = sched_switch_trace_reset, - .start = sched_switch_trace_start, - .stop = sched_switch_trace_stop, - .wait_pipe = poll_wait_pipe, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_sched_switch, -#endif -}; - -__init static int init_sched_switch_trace(void) -{ - return register_tracer(&sched_switch_trace); -} -device_initcall(init_sched_switch_trace); - -- cgit v1.2.3 From 6752ab4a9c30d5411b2dfdb251a3f1cb18aae487 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:54:06 -0500 Subject: tracing: Deprecate tracing_enabled for tracing_on tracing_enabled should not be used, it is heavy weight and does not do much in helping lower the overhead. tracing_on should be used instead. Warn users to use tracing_on when tracing_enabled is used as it will soon be removed from the tracing directory. Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 38 +++++++++++++++++++------------------- kernel/trace/trace.c | 4 ++++ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'Documentation/trace') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 65eddb7cfa02..67f1cc473257 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files: tracers listed here can be configured by echoing their name into current_tracer. - tracing_enabled: + tracing_on: - This sets or displays whether the current_tracer - is activated and tracing or not. Echo 0 into this - file to disable the tracer or 1 to enable it. + This sets or displays whether writing to the trace + ring buffer is enabled. Echo 0 into this file to disable + the tracer or 1 to enable it. trace: @@ -497,10 +497,10 @@ an example: # echo irqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: irqsoff # @@ -605,10 +605,10 @@ is much like the irqsoff tracer. # echo preemptoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptoff # @@ -753,10 +753,10 @@ tracers. # echo preemptirqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptirqsoff # @@ -916,9 +916,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under # echo wakeup > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # chrt -f 5 sleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: wakeup # @@ -1030,9 +1030,9 @@ ftrace_enabled is set; otherwise this tracer is a nop. # sysctl kernel.ftrace_enabled=1 # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # @@ -1070,7 +1070,7 @@ int trace_fd; [...] int main(int argc, char *argv[]) { [...] - trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); + trace_fd = open(tracing_file("tracing_on"), O_WRONLY); [...] if (condition_hit()) { write(trace_fd, "0", 1); @@ -1521,9 +1521,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ > set_ftrace_filter # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: ftrace # @@ -1769,9 +1769,9 @@ different. The trace is live. # echo function > current_tracer # cat trace_pipe > /tmp/trace.out & [1] 4153 - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dc53ecb80589..8dc8da6733f9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2710,6 +2710,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); if (tracer_enabled ^ val) { + + /* Only need to warn if this is used to change the state */ + WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); + if (val) { tracer_enabled = 1; if (current_trace->start) -- cgit v1.2.3