From cf7905165fee15fe26ac4a91ee0a35b966249430 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 9 Oct 2018 17:36:24 +0300 Subject: perf record: Encode -k clockid frequency into Perf trace Store -k clockid frequency into Perf trace to enable timestamps derived metrics conversion into wall clock time on reporting stage. Below is the example of perf report output: tools/perf/perf record -k raw -- ../../matrix/linux/matrix.gcc ... [ perf record: Captured and wrote 31.222 MB perf.data (818054 samples) ] tools/perf/perf report --header # ======== ... # event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, enable_on_exec = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1, use_clockid = 1, clockid = 4 ... # clockid frequency: 1000 MHz ... # ======== Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/23a4a1dc-b160-85a0-347d-40a2ed6d007b@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 24 ++++++++++++++++++++++-- tools/perf/perf.h | 1 + tools/perf/util/env.h | 1 + tools/perf/util/header.c | 23 +++++++++++++++++++++++ tools/perf/util/header.h | 1 + 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0980dfe3396b1..10cf889c6d75d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -592,6 +592,9 @@ static void record__init_features(struct record *rec) if (!rec->opts.full_auxtrace) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) + perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, @@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = { CLOCKID_END, }; +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + static int parse_clockid(const struct option *opt, const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; @@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) /* if its a number, we're done */ if (sscanf(str, "%d", &opts->clockid) == 1) - return 0; + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); /* allow a "CLOCK_" prefix to the name */ if (!strncasecmp(str, "CLOCK_", 6)) @@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) for (cm = clockids; cm->name; cm++) { if (!strcasecmp(str, cm->name)) { opts->clockid = cm->clockid; - return 0; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); } } diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 21bf7f5a3cf51..0ed4a34c74c4b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -81,6 +81,7 @@ struct record_opts { unsigned initial_delay; bool use_clockid; clockid_t clockid; + u64 clockid_res_ns; unsigned int proc_map_timeout; }; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1f3ccc3685303..d01b8355f4cab 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -63,6 +63,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; + u64 clockid_res_ns; }; extern struct perf_env perf_env; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1ec1d9bc2d635..4fd45be95a433 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff, return err; } +static int write_clockid(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + return do_write(ff, &ff->ph->env.clockid_res_ns, + sizeof(ff->ph->env.clockid_res_ns)); +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) fprintf(fp, "# Core ID and Socket ID information is not available\n"); } +static void print_clockid(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", + ff->ph->env.clockid_res_ns * 1000); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2531,6 +2544,15 @@ out: return ret; } +static int process_clockid(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), + FEAT_OPR(CLOCKID, clockid, false) }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e17903caa71da..0d553ddca0a30 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -38,6 +38,7 @@ enum { HEADER_CACHE, HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, + HEADER_CLOCKID, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit v1.2.3 From 0ab41886648bb75b951bd41d8b5cecaca8e0ad66 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 16 Oct 2018 20:55:55 -0700 Subject: perf annotate: Add Sparc support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g.: $ perf annotate --stdio2 Samples: 7K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 3086733887 __gettimeofday /lib32/libc-2.27.so [Percent: local period] Percent│ │ │ │ Disassembly of section .text: │ │ 000a6fa0 <__gettimeofday@@GLIBC_2.0>: 0.47 │ save %sp, -96, %sp 0.73 │ sethi %hi(0xe9000), %l7 │ → call __frame_state_for@@GLIBC_2.0+0x480 0.30 │ add %l7, 0x58, %l7 ! e9058 1.33 │ mov %i0, %o0 │ mov %i1, %o1 0.43 │ mov 0x74, %g1 │ ta 0x10 88.92 │ ↓ bcc 30 2.95 │ clr %g1 │ neg %o0 │ mov 1, %g1 0.31 │30: cmp %g1, 0 │ bne,pn %icc, a6fe4 <__gettimeofday@@GLIBC_2.0+0x44> │ mov %o0, %i0 1.96 │ ← return %i7 + 8 2.62 │ nop │ sethi %hi(0), %g1 │ neg %o0, %g2 │ add %g1, 0x160, %g1 │ ld [ %l7 + %g1 ], %g1 │ st %g2, [ %g7 + %g1 ] │ ← return %i7 + 8 │ mov -1, %o0 Signed-off-by: David S. Miller Link: http://lkml.kernel.org/r/20181016.205555.1070918198627611771.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/sparc/annotate/instructions.c | 169 ++++++++++++++++++++++++++ tools/perf/util/annotate.c | 8 ++ 2 files changed, 177 insertions(+) create mode 100644 tools/perf/arch/sparc/annotate/instructions.c diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c new file mode 100644 index 0000000000000..2614c010c2352 --- /dev/null +++ b/tools/perf/arch/sparc/annotate/instructions.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +static int is_branch_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if (cond[0] == 'a' && cond[1] == '\0') + return 1; + + if (cond[0] == 'c' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'e' && + (cond[1] == '\0' || + (cond[1] == 'q' && cond[2] == '\0'))) + return 1; + + if (cond[0] == 'g' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'l' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'u' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'n' && + (cond[1] == '\0' || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'z' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'b' && + cond[1] == 'p' && + cond[2] == 'o' && + cond[3] == 's' && + cond[4] == '\0') + return 1; + + if (cond[0] == 'v' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'b' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_reg_cond(const char *cond) +{ + if ((cond[0] == 'n' || cond[0] == 'l') && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + if (cond[0] == 'z' && + cond[1] == '\0') + return 1; + + if ((cond[0] == 'g' || cond[0] == 'l') && + cond[1] == 'e' && + cond[2] == 'z' && + cond[3] == '\0') + return 1; + + if (cond[0] == 'g' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_float_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if ((cond[0] == 'a' || cond[0] == 'e' || + cond[0] == 'z' || cond[0] == 'g' || + cond[0] == 'l' || cond[0] == 'n' || + cond[0] == 'o' || cond[0] == 'u') && + cond[1] == '\0') + return 1; + + if (((cond[0] == 'g' && cond[1] == 'e') || + (cond[0] == 'l' && (cond[1] == 'e' || + cond[1] == 'g')) || + (cond[0] == 'n' && (cond[1] == 'e' || + cond[1] == 'z')) || + (cond[0] == 'u' && (cond[1] == 'e' || + cond[1] == 'g' || + cond[1] == 'l'))) && + cond[2] == '\0') + return 1; + + if (cond[0] == 'u' && + (cond[1] == 'g' || cond[1] == 'l') && + cond[2] == 'e' && + cond[3] == '\0') + return 1; + + return 0; +} + +static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strcmp(name, "call") || + !strcmp(name, "jmp") || + !strcmp(name, "jmpl")) { + ops = &call_ops; + } else if (!strcmp(name, "ret") || + !strcmp(name, "retl") || + !strcmp(name, "return")) { + ops = &ret_ops; + } else if (!strcmp(name, "mov")) { + ops = &mov_ops; + } else { + if (name[0] == 'c' && + (name[1] == 'w' || name[1] == 'x')) + name += 2; + + if (name[0] == 'b') { + const char *cond = name + 1; + + if (cond[0] == 'r') { + if (is_branch_reg_cond(cond + 1)) + ops = &jump_ops; + } else if (is_branch_cond(cond)) { + ops = &jump_ops; + } + } else if (name[0] == 'f' && name[1] == 'b') { + if (is_branch_float_cond(name + 2)) + ops = &jump_ops; + } + } + + if (ops) + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = sparc__associate_instruction_ops; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 28cd6a17491b2..6936daf89dddc 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" static struct arch architectures[] = { { @@ -170,6 +171,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "sparc", + .init = sparc__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) -- cgit v1.2.3 From d87b9790b3deb7601872d34fd05af3da78147583 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 16 Oct 2018 21:15:45 -0700 Subject: perf jitdump: Add Sparc support. Signed-off-by: David S. Miller Link: http://lkml.kernel.org/r/20181016.211545.1487970139012324624.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/sparc/Makefile | 2 ++ tools/perf/util/genelf.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 7fbca175099ec..275dea7ff59a0 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,3 +1,5 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index de322d51c7fe2..b72440bf9a796 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__powerpc__) #define GEN_ELF_ARCH EM_PPC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__sparc__) && defined(__arch64__) +#define GEN_ELF_ARCH EM_SPARCV9 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__sparc__) +#define GEN_ELF_ARCH EM_SPARC +#define GEN_ELF_CLASS ELFCLASS32 #else #error "unsupported architecture" #endif -- cgit v1.2.3 From d6afa561e1471ccfdaf7191230c0c59a37e45a5b Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 17 Oct 2018 12:08:59 -0700 Subject: perf symbols: Set PLT entry/header sizes properly on Sparc Using the sh_entsize for both values isn't correct. It happens to be correct on x86... For both 32-bit and 64-bit sparc, there are four PLT entries in the PLT section. Signed-off-by: David S. Miller Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Li Bin Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: b2f7605076d6 ("perf symbols: Fix plt entry calculation for ARM and AARCH64") Link: http://lkml.kernel.org/r/20181017.120859.2268840244308635255.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 29770ea61768b..6e70cc00c1618 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; -- cgit v1.2.3 From 389373d3306553896a9e218493e5b6175c844eb0 Mon Sep 17 00:00:00 2001 From: Hongxu Jia Date: Thu, 18 Oct 2018 16:26:13 +0800 Subject: perf arm64: Fix generate system call table failed with /tmp mounted with noexec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When /tmp is mounted with noexec, mksyscalltbl fails. [snip] |perf-1.0/tools/perf/arch/arm64/entry/syscalls//mksyscalltbl: /tmp/create-table-6VGPSt: Permission denied [snip] Add variable TMPDIR as prefix dir of the temporary file, if it is set, replace default /tmp. Signed-off-by: Hongxu Jia Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kim Phillips Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sébastien Boisvert Cc: Thomas Richter Fixes: 2b5882435606 ("perf arm64: Generate system call table from asm/unistd.h") LPU-Reference: 1539851173-14959-1-git-send-email-hongxu.jia@windriver.com Link: https://lkml.kernel.org/n/tip-1qrgq840ci0c5cy4oww957ge@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index 2dbb8cade048f..c88fd32563ebc 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -23,7 +23,7 @@ create_table_from_c() { local sc nr last_sc - create_table_exe=`mktemp /tmp/create-table-XXXXXX` + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` { -- cgit v1.2.3 From 4ba8b3ebf4f8f583c2c01da20e4d110a5881ffdd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Oct 2018 15:24:07 -0300 Subject: tools lib subcmd: Introduce OPTION_ULONG For completeness, will be used in 'perf trace --max-events'. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Kim Phillips Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-glaj3pwespxfj2fdjs9a20b6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.c | 19 +++++++++++++++++++ tools/lib/subcmd/parse-options.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index cb7154eccbdc1..dbb9efbf718a0 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "expects a numerical value", flags); return 0; + case OPTION_ULONG: + if (unset) { + *(unsigned long *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(unsigned long *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + case OPTION_U64: if (unset) { *(u64 *)opt->value = 0; @@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full) case OPTION_ARGUMENT: break; case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: case OPTION_INTEGER: case OPTION_UINTEGER: diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 92fdbe1519f6d..6ca2a8bfe716b 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -25,6 +25,7 @@ enum parse_opt_type { OPTION_STRING, OPTION_INTEGER, OPTION_LONG, + OPTION_ULONG, OPTION_CALLBACK, OPTION_U64, OPTION_UINTEGER, @@ -133,6 +134,7 @@ struct option { #define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } #define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } +#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) } #define OPT_STRING_OPTARG(s, l, v, a, h, d) \ -- cgit v1.2.3 From 5067a8cdd4ce3588fca2e0ee554f0f081650de8f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Oct 2018 16:38:27 -0300 Subject: perf trace: Introduce --max-events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow stopping tracing after a number of events take place, considering strace-like syscalls formatting as one event per enter/exit pair or when in a multi-process tracing session a syscall is interrupted and printed ending with '...'. Examples included in the documentation: Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here): $ perf trace -e open* --max-events 4 [root@jouet perf]# trace -e open* --max-events 4 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 $ Trace the first minor page fault when running a workload: # perf trace -F min --max-stack=7 --max-events 1 sleep 1 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k) __clear_user ([kernel.kallsyms]) load_elf_binary ([kernel.kallsyms]) search_binary_handler ([kernel.kallsyms]) __do_execve_file.isra.33 ([kernel.kallsyms]) __x64_sys_execve ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) # Trace the next min page page fault to take place on the first CPU: # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.) js::gc::FreeSpan::initAsEmpty (inlined) js::gc::Arena::setAsNotAllocated (inlined) js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so) js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so) js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so) js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so) js::gc::GCRuntime::tryNewTenuredThing (inlined) js::AllocateString (/usr/lib64/firefox/libxul.so) js::Allocate (inlined) JSThinInlineString::new_<(js::AllowGC)1> (inlined) AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined) js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so) [0x18b26e6bc2bd] (/tmp/perf-17136.map) Tracing the next four ext4 operations on a specific CPU: # perf trace -e ext4:*/call-graph=fp/ --max-events 4 --cpu 3 0.000 mutt/3849 ext4:ext4_es_lookup_extent_enter:dev 253,2 ino 57277 lblk 0 ext4_es_lookup_extent ([kernel.kallsyms]) read (/usr/lib64/libc-2.26.so) 0.097 mutt/3849 ext4:ext4_es_lookup_extent_exit:dev 253,2 ino 57277 found 0 [0/0) 0 ext4_es_lookup_extent ([kernel.kallsyms]) read (/usr/lib64/libc-2.26.so) 0.141 mutt/3849 ext4:ext4_ext_map_blocks_enter:dev 253,2 ino 57277 lblk 0 len 1 flags ext4_ext_map_blocks ([kernel.kallsyms]) read (/usr/lib64/libc-2.26.so) 0.184 mutt/3849 ext4:ext4_ext_load_extent:dev 253,2 ino 57277 lblk 1516511 pblk 18446744071750013657 __read_extent_tree_block ([kernel.kallsyms]) __read_extent_tree_block ([kernel.kallsyms]) ext4_find_extent ([kernel.kallsyms]) ext4_ext_map_blocks ([kernel.kallsyms]) ext4_map_blocks ([kernel.kallsyms]) ext4_mpage_readpages ([kernel.kallsyms]) read_pages ([kernel.kallsyms]) __do_page_cache_readahead ([kernel.kallsyms]) ondemand_readahead ([kernel.kallsyms]) generic_file_read_iter ([kernel.kallsyms]) __vfs_read ([kernel.kallsyms]) vfs_read ([kernel.kallsyms]) ksys_read ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) read (/usr/lib64/libc-2.26.so) # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Rudá Moura Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-sweh107bs7ol5bzls0m4tqdz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 47 +++++++++++++++++++++++++++++++++ tools/perf/builtin-trace.c | 21 +++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 115db9e06ecd8..0d1a1cd4d3281 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. +--max-events=N:: + Stop after processing N events. Note that strace-like events are considered + only at exit time or when a syscall is interrupted, i.e. in those cases this + option is equivalent to the number of lines printed. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point @@ -238,6 +243,48 @@ Trace syscalls, major and minor pagefaults: As you can see, there was major pagefault in python process, from CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. +Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here): + + $ perf trace -e open* --max-events 4 + [root@jouet perf]# trace -e open* --max-events 4 + 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31 + 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 + $ + +Trace the first minor page fault when running a workload: + + # perf trace -F min --max-stack=7 --max-events 1 sleep 1 + 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k) + __clear_user ([kernel.kallsyms]) + load_elf_binary ([kernel.kallsyms]) + search_binary_handler ([kernel.kallsyms]) + __do_execve_file.isra.33 ([kernel.kallsyms]) + __x64_sys_execve ([kernel.kallsyms]) + do_syscall_64 ([kernel.kallsyms]) + entry_SYSCALL_64 ([kernel.kallsyms]) + # + +Trace the next min page page fault to take place on the first CPU: + + # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0 + 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.) + js::gc::FreeSpan::initAsEmpty (inlined) + js::gc::Arena::setAsNotAllocated (inlined) + js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so) + js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::tryNewTenuredThing (inlined) + js::AllocateString (/usr/lib64/firefox/libxul.so) + js::Allocate (inlined) + JSThinInlineString::new_<(js::AllowGC)1> (inlined) + AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined) + js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so) + [0x18b26e6bc2bd] (/tmp/perf-17136.map) + # + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 90289f31dd87c..74638034861ca 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -89,6 +89,8 @@ struct trace { u64 base_time; FILE *output; unsigned long nr_events; + unsigned long nr_events_printed; + unsigned long max_events; struct strlist *ev_qualifier; struct { size_t nr; @@ -1664,6 +1666,8 @@ static int trace__printf_interrupted_entry(struct trace *trace) printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; + ++trace->nr_events_printed; + return printed; } @@ -1940,6 +1944,13 @@ errno_print: { fputc('\n', trace->output); + /* + * We only consider an 'event' for the sake of --max-events a non-filtered + * sys_enter + sys_exit and other tracepoint events. + */ + if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; + if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) @@ -2072,6 +2083,7 @@ static void bpf_output__fprintf(struct trace *trace, { binary__fprintf(sample->raw_data, sample->raw_size, 8, bpf_output__printer, NULL, trace->output); + ++trace->nr_events_printed; } static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, @@ -2127,6 +2139,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); + ++trace->nr_events_printed; } } @@ -2225,6 +2238,8 @@ static int trace__pgfault(struct trace *trace, trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + + ++trace->nr_events_printed; out: err = 0; out_put: @@ -2402,6 +2417,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st tracepoint_handler handler = evsel->handler; handler(trace, evsel, event, sample); } + + if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; } static int trace__add_syscall_newtp(struct trace *trace) @@ -3249,6 +3267,7 @@ int cmd_trace(int argc, const char **argv) .trace_syscalls = false, .kernel_syscallchains = false, .max_stack = UINT_MAX, + .max_events = ULONG_MAX, }; const char *output_name = NULL; const struct option trace_options[] = { @@ -3301,6 +3320,8 @@ int cmd_trace(int argc, const char **argv) &record_parse_callchain_opt), OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, "Show the kernel callchains on the syscall exit path"), + OPT_ULONG(0, "max-events", &trace.max_events, + "Set the maximum number of events to print, exit after that is reached. "), OPT_UINTEGER(0, "min-stack", &trace.min_stack, "Set the minimum stack depth when parsing the callchain, " "anything below the specified depth will be ignored."), -- cgit v1.2.3 From 2fda5ada07f36f6cde39a52e7f05d86ea8ffdc33 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Oct 2018 15:47:34 -0300 Subject: perf evsel: Introduce per event max_events property This simply adds the field to 'struct perf_evsel' and allows setting it via the event parser, to test it lets trace trace: First look at where in a function that receives an evsel we can put a probe to read how evsel->max_events was setup: # perf probe -x ~/bin/perf -L trace__event_handler 0 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) 3 { 4 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 5 int callchain_ret = 0; 7 if (sample->callchain) { 8 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 9 if (callchain_ret == 0) { 10 if (callchain_cursor.nr < trace->min_stack) 11 goto out; 12 callchain_ret = 1; } } See what variables we can probe at line 7: # perf probe -x ~/bin/perf -V trace__event_handler:7 Available variables at trace__event_handler:7 @ int callchain_ret struct perf_evsel* evsel struct perf_sample* sample struct thread* thread struct trace* trace union perf_event* event Add a probe at that line asking for evsel->max_events to be collected and named as "max_events": # perf probe -x ~/bin/perf trace__event_handler:7 'max_events=evsel->max_events' Added new event: probe_perf:trace__event_handler (on trace__event_handler:7 in /home/acme/bin/perf with max_events=evsel->max_events) You can now use it in all perf tools, such as: perf record -e probe_perf:trace__event_handler -aR sleep 1 Now use 'perf trace', here aliased to just 'trace' and trace trace, i.e. the first 'trace' is tracing just that 'probe_perf:trace__event_handler' event, while the traced trace is tracing all scheduler tracepoints, will stop at two events (--max-events 2) and will just set evsel->max_events for all the sched tracepoints to 9, we will see the output of both traces intermixed: # trace -e *perf:*event_handler trace --max-events 2 -e sched:*/nr=9/ 0.000 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=000 0.009 :0/0 sched:sched_wakeup:comm=rcu_sched pid=10 prio=120 target_cpu=000 0.000 trace/23949 probe_perf:trace__event_handler:(48c34a) max_events=0x9 0.046 trace/23949 probe_perf:trace__event_handler:(48c34a) max_events=0x9 # Now, if the traced trace sends its output to /dev/null, we'll see just what the first level trace outputs: that evsel->max_events is indeed being set to 9: # trace -e *perf:*event_handler trace -o /dev/null --max-events 2 -e sched:*/nr=9/ 0.000 trace/23961 probe_perf:trace__event_handler:(48c34a) max_events=0x9 0.030 trace/23961 probe_perf:trace__event_handler:(48c34a) max_events=0x9 # Now that we can set evsel->max_events, we can go to the next step, honour that per-event property in 'perf trace'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-og00yasj276joem6e14l1eas@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 4 ++++ tools/perf/util/evsel.h | 3 +++ tools/perf/util/parse-events.c | 8 ++++++++ tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 1 + 5 files changed, 17 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 29d7b97f66fbc..7e95ec1c19a82 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->max_events = ULONG_MAX; evsel->evlist = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); @@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; + case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + evsel->max_events = term->val.max_events; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4107c39f4a54a..ad5d615c6db64 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -46,6 +46,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, @@ -65,6 +66,7 @@ struct perf_evsel_config_term { bool inherit; bool overwrite; char *branch; + unsigned long max_events; } val; bool weak; }; @@ -99,6 +101,7 @@ struct perf_evsel { struct perf_counts *prev_raw_counts; int idx; u32 ids; + unsigned long max_events; char *name; double scale; const char *unit; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f8cd3e7c91866..59be3466d64d3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", @@ -1037,6 +1038,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); @@ -1162,6 +1167,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); break; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 4473dac27aee2..5ed035cbcbb72 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_MAX_EVENTS, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5f761f3ed0f33..7805c71aaae2e 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } +nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } -- cgit v1.2.3 From b7e8452b860c299f342a012922bdd9ab8f2bb722 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Oct 2018 09:04:41 -0300 Subject: perf evsel: Mark a evsel as disabled when asking the kernel do disable it Because there may be more such events in the ring buffer that should be discarded when an app decides to stop considering them. At some point we'll do this with eBPF, this way we stop them at origin, before they are placed in the ring buffer. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-uzufuxws4hufigx07ue1dpv6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 23 +++++++++++++++++------ tools/perf/util/evsel.h | 1 + 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index be440df296150..e88e6f9b1463f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) struct perf_evsel *pos; evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) + if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__disable(pos); } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7e95ec1c19a82..6d187059a3736 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1207,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) int perf_evsel__enable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_ENABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0); + + if (!err) + evsel->disabled = false; + + return err; } int perf_evsel__disable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_DISABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0); + /* + * We mark it disabled here so that tools that disable a event can + * ignore events after they disable it. I.e. the ring buffer may have + * already a few more events queued up before the kernel got the stop + * request. + */ + if (!err) + evsel->disabled = true; + + return err; } int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ad5d615c6db64..4ef50f157b50b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -122,6 +122,7 @@ struct perf_evsel { bool snapshot; bool supported; bool needs_swap; + bool disabled; bool no_aux_samples; bool immediate; bool system_wide; -- cgit v1.2.3 From 4291bf5cb93918232f88a3a70d8f70a72fbb6ab0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Oct 2018 09:18:50 -0300 Subject: perf trace: Drop addr_location refcounts When we use machine__resolve() we grab a reference to addr_location.thread (and in the future to other elements there) via machine__findnew_thread(), so we must pair that with addr_location__put(), else we'll never drop that thread when it exits and no other remaining data structures have pointers to it. Fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ivg9hifzeuokb1f5jxc2wob4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 74638034861ca..77b8748ad5edb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1814,12 +1814,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse int max_stack = evsel->attr.sample_max_stack ? evsel->attr.sample_max_stack : trace->max_stack; + int err; - if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) + if (machine__resolve(trace->host, &al, sample) < 0) return -1; - return 0; + err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack); + addr_location__put(&al); + return err; } static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) -- cgit v1.2.3 From a937c6658b8e77e1f65cde2be9970811752121bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Oct 2018 09:27:52 -0300 Subject: perf trace: Drop thread refcount in trace__event_handler() We must pair: thread = machine__findnew_thread(); with thread__put(thread). Fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: c4191e55b874 ("perf trace: Show comm and tid for tracepoint events") Link: https://lkml.kernel.org/n/tip-dkxsb8cwg87rmkrzrbns1o4z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 77b8748ad5edb..589e0412652a5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2152,8 +2152,8 @@ newline: trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); - thread__put(thread); out: + thread__put(thread); return 0; } -- cgit v1.2.3 From cbb5df7e96070f1f728ff7885443646ebba703d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Oct 2018 11:30:15 +0200 Subject: perf stat: Poll for monitored tasks being alive Adding the check for tasks we monitor via -p/-t options, and finish stat if there's no longer task to monitor. Requested-by: Stephane Eranian Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20181022093015.9106-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b86aba1c8028f..d1028d7755bbc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) return leader; } +static bool is_target_alive(struct target *_target, + struct thread_map *threads) +{ + struct stat st; + int i; + + if (!target__has_task(_target)) + return true; + + for (i = 0; i < threads->nr; i++) { + char path[PATH_MAX]; + + scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(), + threads->map[i].pid); + + if (!stat(path, &st)) + return true; + } + + return false; +} + static int __run_perf_stat(int argc, const char **argv, int run_idx) { int interval = stat_config.interval; @@ -579,6 +601,8 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); + if (!is_target_alive(&target, evsel_list->threads)) + break; if (timeout) break; if (interval) { -- cgit v1.2.3 From c1c9b9695cc8868048f45c7e2559f65bc0be7382 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sun, 21 Oct 2018 21:14:23 +0200 Subject: perf script: Allow extended console debug output The script tool isn't using a browser, yet use_browser wasn't set explicitly to zero. This in turn lead to confusing output such as: ``` $ perf script -vvv ... ... overlapping maps in /home/milian/foobar (disable tui for more info) ... ``` Explicitly set use_browser to 0 now, which gives us the extended debug information now in perf script as expected. Signed-off-by: Milian Wolff Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20181021191424.16183-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4da5e32b9e035..bd468b90801b2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3417,8 +3417,10 @@ int cmd_script(int argc, const char **argv) exit(-1); } - if (!script_name) + if (!script_name) { setup_pager(); + use_browser = 0; + } session = perf_session__new(&data, false, &script.tool); if (session == NULL) -- cgit v1.2.3 From 7ee40678af935fb489b0c6cf0f75808175214cd7 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sun, 21 Oct 2018 21:14:24 +0200 Subject: perf script: Flush output stream after events in verbose mode When the perf script output is written to a terminal stream, the normal output of `perf script` would get buffered, but its debug output would be written directly. This made it quite hard to figure out where a given debug output is coming from. We can improve on this by flushing the output buffer after processing an event. To see the value, compare the following output for a `perf script -v` run: Before this patch: ``` unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 unwind: find_proc_info dso /usr/lib/ld-2.28.so unwind: reg 6, val 0 unwind: _start:ip = 0x7faf7dfdc000 (0x2000) unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 unwind: find_proc_info dso /usr/lib/ld-2.28.so unwind: reg 6, val 0 unwind: _start:ip = 0x7faf7dfdc000 (0x2000) unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 unwind: find_proc_info dso /usr/lib/ld-2.28.so unwind: reg 6, val 0 unwind: _start:ip = 0x7faf7dfdc000 (0x2000) unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 ... lots and lots of verbose debug output cpp-inlining 24617 90229.122036534: 1 cycles:uppp: 7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so) cpp-inlining 24617 90229.122043974: 1 cycles:uppp: 7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so) ... ``` After this patch: ``` ... unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 unwind: find_proc_info dso /usr/lib/ld-2.28.so unwind: reg 6, val 0 unwind: _start:ip = 0x7faf7dfdc000 (0x2000) cpp-inlining 24617 90229.122036534: 1 cycles:uppp: 7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so) unwind: reg 16, val 7faf7dfdc000 unwind: reg 7, val 7ffc80811e30 unwind: find_proc_info dso /usr/lib/ld-2.28.so unwind: reg 6, val 0 unwind: _start:ip = 0x7faf7dfdc000 (0x2000) cpp-inlining 24617 90229.122043974: 1 cycles:uppp: 7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so) ... ``` This new output format makes it much easier to use perf script output for debugging purposes, e.g. to investigate broken dwarf unwinding. Signed-off-by: Milian Wolff Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20181021191424.16183-2-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bd468b90801b2..ca09b7d2adb7e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1737,6 +1737,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(METRIC)) perf_sample__fprint_metric(script, thread, evsel, sample, fp); + + if (verbose) + fflush(fp); } static struct scripting_ops *scripting_ops; -- cgit v1.2.3 From a9c5e6c1e9bff42ca5f01ceb3092a27a010755fb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Oct 2018 14:14:16 -0300 Subject: perf trace: Introduce per-event maximum number of events property Call it 'nr', as in this context it should be expressive enough, i.e.: # perf trace -e sched:*waking/nr=8,call-graph=fp/ 0.000 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=001 try_to_wake_up ([kernel.kallsyms]) sched_clock ([kernel.kallsyms]) 3.933 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=001 try_to_wake_up ([kernel.kallsyms]) sched_clock ([kernel.kallsyms]) 3.970 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003 try_to_wake_up ([kernel.kallsyms]) __libc_write (/usr/lib64/libpthread-2.26.so) 20.069 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003 try_to_wake_up ([kernel.kallsyms]) __libc_write (/usr/lib64/libpthread-2.26.so) 37.170 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003 try_to_wake_up ([kernel.kallsyms]) __libc_write (/usr/lib64/libpthread-2.26.so) 53.267 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003 try_to_wake_up ([kernel.kallsyms]) __libc_write (/usr/lib64/libpthread-2.26.so) 70.365 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003 try_to_wake_up ([kernel.kallsyms]) __libc_write (/usr/lib64/libpthread-2.26.so) 75.781 Web Content/3649 sched:sched_waking:comm=JS Helper pid=3670 prio=120 target_cpu=000 try_to_wake_up ([kernel.kallsyms]) try_to_wake_up ([kernel.kallsyms]) wake_up_q ([kernel.kallsyms]) futex_wake ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) pthread_cond_signal@@GLIBC_2.3.2 (/usr/lib64/libpthread-2.26.so) # # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ 0.000 :0/0 sched:sched_switch:swapper/0:0 [120] S ==> trace:3367 [120] 0.046 :0/0 sched:sched_switch:swapper/1:0 [120] S ==> kworker/u16:58:2722 [120] 570.670 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ef00 len=66 __dev_queue_xmit ([kernel.kallsyms]) 1106.141 jbd2/dm-0-8/476 block:block_plug:[jbd2/dm-0-8] 1106.175 jbd2/dm-0-8/476 block:block_unplug:[jbd2/dm-0-8] 1 1618.088 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] 1810.000 :0/0 net:net_dev_queue:dev=vnet0 skbaddr=0xffff93498051ef00 len=52 __dev_queue_xmit ([kernel.kallsyms]) 3857.974 :0/0 net:net_dev_queue:dev=vnet0 skbaddr=0xffff93498051f900 len=52 __dev_queue_xmit ([kernel.kallsyms]) 4790.277 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8] 4790.448 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8] # The global --max-events has precendence: # trace --max-events 3 -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ 0.000 :0/0 sched:sched_switch:swapper/0:0 [120] S ==> qemu-system-x86:2252 [120] 0.029 qemu-system-x8/2252 sched:sched_switch:qemu-system-x86:2252 [120] D ==> swapper/0:0 [120] 58.047 DNS Res~er #14/31661 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff9346966af100 len=84 __dev_queue_xmit ([kernel.kallsyms]) __libc_send (/usr/lib64/libpthread-2.26.so) # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-s4jswltvh660ughvg9nwngah@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 20 ++++++++++++++++++++ tools/perf/builtin-trace.c | 19 +++++++++++++++++-- tools/perf/util/evsel.h | 1 + 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 0d1a1cd4d3281..e113450503d2f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -285,6 +285,26 @@ Trace the next min page page fault to take place on the first CPU: [0x18b26e6bc2bd] (/tmp/perf-17136.map) # +Trace the next two sched:sched_switch events, four block:*_plug events, the +next block:*_unplug and the next three net:*dev_queue events, this last one +with a backtrace of at most 16 entries, system wide: + + # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ + 0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120] + 0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120] + 254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66 + __dev_queue_xmit ([kernel.kallsyms]) + 273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58] + 2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1 + 4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8] + 8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + 8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + # + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 589e0412652a5..7081d7ea12e5d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2092,8 +2092,18 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + struct thread *thread; int callchain_ret = 0; + /* + * Check if we called perf_evsel__disable(evsel) due to, for instance, + * this event's max_events having been hit and this is an entry coming + * from the ring buffer that we should discard, since the max events + * have already been considered/printed. + */ + if (evsel->disabled) + return 0; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (sample->callchain) { callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); @@ -2142,6 +2152,11 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, sample->raw_data, sample->raw_size, trace->output); ++trace->nr_events_printed; + + if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { + perf_evsel__disable(evsel); + perf_evsel__close(evsel); + } } } @@ -2726,7 +2741,7 @@ next_event: int timeout = done ? 100 : -1; if (!draining && perf_evlist__poll(evlist, timeout) > 0) { - if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) + if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; goto again; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4ef50f157b50b..3147ca76c6fc2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -102,6 +102,7 @@ struct perf_evsel { int idx; u32 ids; unsigned long max_events; + unsigned long nr_events_printed; char *name; double scale; const char *unit; -- cgit v1.2.3 From 3e71c70c946b5d5e7b21397c621b14951e5c0fcf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:35 +0300 Subject: perf scripts python: call-graph-from-sql.py: Use SPDX license identifier Use SPDX license identifier in call-graph-from-sql.py. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index b494a67a1c679..ce1b91fcd6b88 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -1,15 +1,7 @@ #!/usr/bin/python2 -# call-graph-from-sql.py: create call-graph from sql database -# Copyright (c) 2014-2017, Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. +# SPDX-License-Identifier: GPL-2.0 +# exported-sql-viewer.py: view data from sql database +# Copyright (c) 2014-2018, Intel Corporation. # To use this script you will need to have exported data using either the # export-to-sqlite.py or the export-to-postgresql.py script. Refer to those -- cgit v1.2.3 From 1d865c06f5715df94528f76d6bb7f6f98975e04e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:36 +0300 Subject: perf scripts python: call-graph-from-sql.py: Provide better default column sizes Set initial column sizes to improve initial display. Committer testing: Extended instructions on testing this, using the sqlite variant: Make sure you have the SQLite glue for python+Qt installed, on fedora 27 I used: # dnf install python-pyside Collect some PT samples, say 5-secs worth, system wide: # perf record -r 10 -e intel_pt//u -a sleep 5 [ perf record: Woken up 49 times to write data ] [ perf record: Captured and wrote 96.131 MB perf.data ] This results in this perf.data file: # ls -larth perf.data -rw-------. 1 root root 97M Oct 23 10:11 perf.data With the following attributes: # perf evlist -v intel_pt//u: type: 8, size: 112, config: 0x300e601, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, exclude_hv: 1, sample_id_all: 1 dummy:u: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, context_switch: 1 # Then generate the "pt_example" tables using: # perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls 2018-10-23 10:56:59.177711 Creating database... 2018-10-23 10:56:59.195842 Writing records... instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x263984516750 code 5: Failed to get instruction instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e116fd20 code 6: Trace doesn't match instruction instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e162c9ee code 6: Trace doesn't match instruction instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e9ce831a code 6: Trace doesn't match instruction instruction trace error type 1 cpu 0 pid 1644 tid 1644 ip 0x7f26e13d07b4 code 6: Trace doesn't match instruction Warning: 132 instruction trace errors 2018-10-23 11:25:25.015717 Adding indexes 2018-10-23 11:25:28.788061 Done # In my example, that perf.data file generated this db: # file pt_example pt_example: SQLite 3.x database, last written using SQLite version 3020001 [root@seventh perf]# ls -lah pt_example -rw-r--r--. 1 root root 6.6G Oct 23 11:25 pt_example # Then use this python script to use that db and provide a GUI: $ python tools/perf/scripts/python/call-graph-from-sql.py pt_example branches calls I compared the column widths before this patch and after applying it, the visual results match the patch intent. The following patches will refer to this set of instructions in the "Committer Testing" section. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index ce1b91fcd6b88..e1014f2628a7d 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -280,6 +280,9 @@ class MainWindow(QMainWindow): self.view = QTreeView() self.view.setModel(self.model) + for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): + self.view.setColumnWidth(c, w) + self.setCentralWidget(self.view) if __name__ == '__main__': -- cgit v1.2.3 From 3c4ef451506897a15aff76ff141c995c6cd32f4d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:37 +0300 Subject: perf scripts python: call-graph-from-sql.py: Set a minimum window size Prevent weirdly small window size. Committer testing: Seems to work, but even before this patch, on my system, it always started with: xwininfo: Window id: 0x1e00002 "Call Graph: pt_example" Width: 800 Height: 600 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index e1014f2628a7d..68153fa1b4d12 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -274,6 +274,7 @@ class MainWindow(QMainWindow): style = self.style() icon = style.standardIcon(QStyle.SP_MessageBoxInformation) self.setWindowIcon(icon); + self.setMinimumSize(200, 100) self.model = TreeModel(db) -- cgit v1.2.3 From 99a097c987c26c8c82293fcb92908d07009c925e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:38 +0300 Subject: perf scripts python: call-graph-from-sql.py: Change icon There are not many standard icons, but the computer icon looks slightly better than the information icon. Committer testing: Noticed the change on the icon on the gnome menu right next to the "Activities" menu, looks nicer indeed. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 68153fa1b4d12..2e33540f3de01 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -271,9 +271,7 @@ class MainWindow(QMainWindow): self.setWindowTitle("Call Graph: " + dbname) self.move(100, 100) self.resize(800, 600) - style = self.style() - icon = style.standardIcon(QStyle.SP_MessageBoxInformation) - self.setWindowIcon(icon); + self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) self.setMinimumSize(200, 100) self.model = TreeModel(db) -- cgit v1.2.3 From 7e4fc93e2ade2b0c453a97e307203ffe3f930c98 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:39 +0300 Subject: perf scripts python: call-graph-from-sql.py: Make a "Main" function Make a "Main" function so that the variables used do not pollute the global namespace. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 2e33540f3de01..2b74b94eecccd 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -284,7 +284,9 @@ class MainWindow(QMainWindow): self.setCentralWidget(self.view) -if __name__ == '__main__': +# Main + +def Main(): if (len(sys.argv) < 2): print >> sys.stderr, "Usage is: call-graph-from-sql.py " raise Exception("Too few arguments") @@ -331,3 +333,6 @@ if __name__ == '__main__': err = app.exec_() db.close() sys.exit(err) + +if __name__ == "__main__": + Main() -- cgit v1.2.3 From b2556c46a69b4c0e6bbf690ac4ca2913cbe90e1e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:40 +0300 Subject: perf scripts python: call-graph-from-sql.py: Separate the database details into a class Separate the database details into a class that can provide different connections using the same connection information. That paves the way for sub-processes that require their own connection. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 63 ++++++++++++++---------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 2b74b94eecccd..9d056deab2b19 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -284,6 +284,42 @@ class MainWindow(QMainWindow): self.setCentralWidget(self.view) +# Database reference + +class DBRef(): + + def __init__(self, is_sqlite3, dbname): + self.is_sqlite3 = is_sqlite3 + self.dbname = dbname + + def Open(self, connection_name): + dbname = self.dbname + if self.is_sqlite3: + db = QSqlDatabase.addDatabase("QSQLITE", connection_name) + else: + db = QSqlDatabase.addDatabase("QPSQL", connection_name) + opts = dbname.split() + for opt in opts: + if "=" in opt: + opt = opt.split("=") + if opt[0] == "hostname": + db.setHostName(opt[1]) + elif opt[0] == "port": + db.setPort(int(opt[1])) + elif opt[0] == "username": + db.setUserName(opt[1]) + elif opt[0] == "password": + db.setPassword(opt[1]) + elif opt[0] == "dbname": + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + return db, dbname + # Main def Main(): @@ -302,31 +338,8 @@ def Main(): except: pass - if is_sqlite3: - db = QSqlDatabase.addDatabase('QSQLITE') - else: - db = QSqlDatabase.addDatabase('QPSQL') - opts = dbname.split() - for opt in opts: - if '=' in opt: - opt = opt.split('=') - if opt[0] == 'hostname': - db.setHostName(opt[1]) - elif opt[0] == 'port': - db.setPort(int(opt[1])) - elif opt[0] == 'username': - db.setUserName(opt[1]) - elif opt[0] == 'password': - db.setPassword(opt[1]) - elif opt[0] == 'dbname': - dbname = opt[1] - else: - dbname = opt - - db.setDatabaseName(dbname) - if not db.open(): - raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) - + dbref = DBRef(is_sqlite3, dbname) + db, dbname = dbref.Open("main") app = QApplication(sys.argv) window = MainWindow(db, dbname) window.show() -- cgit v1.2.3 From 5f9dfef1bb7fadfb2d001244ef23359982fedd06 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:41 +0300 Subject: perf scripts python: call-graph-from-sql.py: Add a class for global data Keep global data in a single object that is easy to pass around as needed, without polluting the global namespace. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 26 +++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 9d056deab2b19..0a4dc13d4818a 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -264,17 +264,19 @@ class TreeModel(QAbstractItemModel): class MainWindow(QMainWindow): - def __init__(self, db, dbname, parent=None): + def __init__(self, glb, parent=None): super(MainWindow, self).__init__(parent) + self.glb = glb + self.setObjectName("MainWindow") - self.setWindowTitle("Call Graph: " + dbname) + self.setWindowTitle("Call Graph: " + glb.dbname) self.move(100, 100) self.resize(800, 600) self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) self.setMinimumSize(200, 100) - self.model = TreeModel(db) + self.model = TreeModel(glb.db) self.view = QTreeView() self.view.setModel(self.model) @@ -284,6 +286,17 @@ class MainWindow(QMainWindow): self.setCentralWidget(self.view) +# Global data + +class Glb(): + + def __init__(self, dbref, db, dbname): + self.dbref = dbref + self.db = db + self.dbname = dbname + self.app = None + self.mainwindow = None + # Database reference class DBRef(): @@ -340,9 +353,12 @@ def Main(): dbref = DBRef(is_sqlite3, dbname) db, dbname = dbref.Open("main") + glb = Glb(dbref, db, dbname) app = QApplication(sys.argv) - window = MainWindow(db, dbname) - window.show() + glb.app = app + mainwindow = MainWindow(glb) + glb.mainwindow = mainwindow + mainwindow.show() err = app.exec_() db.close() sys.exit(err) -- cgit v1.2.3 From e99ef8141a6d97abaf47647cfd0034769144d080 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:42 +0300 Subject: perf scripts python: call-graph-from-sql.py: Remove use of setObjectName() The object name is never used, so don't bother setting it. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 0a4dc13d4818a..65c18e351bc40 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -269,7 +269,6 @@ class MainWindow(QMainWindow): self.glb = glb - self.setObjectName("MainWindow") self.setWindowTitle("Call Graph: " + glb.dbname) self.move(100, 100) self.resize(800, 600) -- cgit v1.2.3 From 70d831e85c1bdd87d193e85666bf3aa39aab7f21 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:43 +0300 Subject: perf scripts python: call-graph-from-sql.py: Factor out CallGraphModel from TreeModel Factor out CallGraphModel from TreeModel, which paves the way to reuse TreeModel in future reports. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 90 ++++++++++++++++-------- 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 65c18e351bc40..ada486048ad8d 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -201,42 +201,47 @@ class TreeItem(): self.selectCalls() return self.child_count - def columnCount(self): - return 7 - - def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] - return headers[column] + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 def getData(self, column): return self.data[column] +# Tree data model + class TreeModel(QAbstractItemModel): - def __init__(self, db, parent=None): + def __init__(self, root, parent=None): super(TreeModel, self).__init__(parent) - self.db = db - self.root = TreeItem(db, 0, None) + self.root = root + self.last_row_read = 0 - def columnCount(self, parent): - return self.root.columnCount() - - def rowCount(self, parent): + def Item(self, parent): if parent.isValid(): - parent_item = parent.internalPointer() + return parent.internalPointer() else: - parent_item = self.root - return parent_item.childCount() + return self.root + + def rowCount(self, parent): + result = self.Item(parent).childCount() + if result < 0: + result = 0 + self.dataChanged.emit(parent, parent) + return result + + def hasChildren(self, parent): + return self.Item(parent).hasChildren() def headerData(self, section, orientation, role): if role == Qt.TextAlignmentRole: - if section > 1: - return Qt.AlignRight + return self.columnAlignment(section) if role != Qt.DisplayRole: return None if orientation != Qt.Horizontal: return None - return self.root.columnHeader(section) + return self.columnHeader(section) def parent(self, child): child_item = child.internalPointer() @@ -246,21 +251,48 @@ class TreeModel(QAbstractItemModel): return self.createIndex(parent_item.getRow(), 0, parent_item) def index(self, row, column, parent): - if parent.isValid(): - parent_item = parent.internalPointer() - else: - parent_item = self.root - child_item = parent_item.getChildItem(row) + child_item = self.Item(parent).getChildItem(row) return self.createIndex(row, column, child_item) + def DisplayData(self, item, index): + return item.getData(index.column()) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + def data(self, index, role): if role == Qt.TextAlignmentRole: - if index.column() > 1: - return Qt.AlignRight + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) if role != Qt.DisplayRole: return None - index_item = index.internalPointer() - return index_item.getData(index.column()) + item = index.internalPointer() + return self.DisplayData(item, index) + +# Context-sensitive call graph data model + +class CallGraphModel(TreeModel): + + def __init__(self, glb, parent=None): + super(CallGraphModel, self).__init__(TreeItem(glb.db, 0, None), parent) + self.glb = glb + + def columnCount(self, parent=None): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def columnAlignment(self, column): + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + return alignment[column] + +# Main window class MainWindow(QMainWindow): @@ -275,7 +307,7 @@ class MainWindow(QMainWindow): self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) self.setMinimumSize(200, 100) - self.model = TreeModel(glb.db) + self.model = CallGraphModel(glb) self.view = QTreeView() self.view.setModel(self.model) -- cgit v1.2.3 From 4be9ace7e1cdcb44c1fba1fb41ec2b92dda06732 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:44 +0300 Subject: perf scripts python: call-graph-from-sql.py: Add data helper functions Add helper functions for a few common cases. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 54 +++++++++++++----------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index ada486048ad8d..7f2eabe7dacdc 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -52,6 +52,28 @@ from PySide.QtGui import * from PySide.QtSql import * from decimal import * +# Data formatting helpers + +def dsoname(name): + if name == "[kernel.kallsyms]": + return "[kernel]" + return name + +# Percent to one decimal place + +def PercentToOneDP(n, d): + if not d: + return "0.0" + x = (n * Decimal(100)) / d + return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP)) + +# Helper for queries that must not fail + +def QueryExec(query, stmt): + ret = query.exec_(stmt) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + class TreeItem(): def __init__(self, db, row, parent_item): @@ -73,9 +95,7 @@ class TreeItem(): def setUpRoot(self): self.query_done = True query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, comm FROM comms') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) + QueryExec(query, 'SELECT id, comm FROM comms') while query.next(): if not query.value(0): continue @@ -91,9 +111,7 @@ class TreeItem(): self.child_items = [] self.child_count = 0 query = QSqlQuery(self.db) - ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) - if not ret: - raise Exception("Query failed: " + query.lastError().text()) + QueryExec(query, 'SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) while query.next(): child_item = TreeItem(self.db, self.child_count, self) self.child_items.append(child_item) @@ -114,18 +132,6 @@ class TreeItem(): def getRow(self): return self.row - def timePercent(self, b): - if not self.time: - return "0.0" - x = (b * Decimal(100)) / self.time - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - - def branchPercent(self, b): - if not self.branch_count: - return "0.0" - x = (b * Decimal(100)) / self.branch_count - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - def addChild(self, call_path_id, name, dso, count, time, branch_count): child_item = TreeItem(self.db, self.child_count, self) child_item.comm_id = self.comm_id @@ -134,14 +140,12 @@ class TreeItem(): child_item.branch_count = branch_count child_item.time = time child_item.data[0] = name - if dso == "[kernel.kallsyms]": - dso = "[kernel]" - child_item.data[1] = dso + child_item.data[1] = dsoname(dso) child_item.data[2] = str(count) child_item.data[3] = str(time) - child_item.data[4] = self.timePercent(time) + child_item.data[4] = PercentToOneDP(time, self.time) child_item.data[5] = str(branch_count) - child_item.data[6] = self.branchPercent(branch_count) + child_item.data[6] = PercentToOneDP(branch_count, self.branch_count) self.child_items.append(child_item) self.child_count += 1 @@ -189,12 +193,12 @@ class TreeItem(): self.branch_count = total_branch_count if self.branch_count: for child_item in self.child_items: - child_item.data[6] = self.branchPercent(child_item.branch_count) + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) if total_time > self.time: self.time = total_time if self.time: for child_item in self.child_items: - child_item.data[4] = self.timePercent(child_item.time) + child_item.data[4] = PercentToOneDP(child_item.time, self.time) def childCount(self): if not self.query_done: -- cgit v1.2.3 From 341e73cbd3019d350d1271803b45d84af88f2408 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:45 +0300 Subject: perf scripts python: call-graph-from-sql.py: Refactor TreeItem class class TreeItem represents items at all levels of the call-graph tree. However, not all the levels represent the same data i.e. the top-level is comms, the next level is threads, and subsequent levels are functions. Consequently it is simpler to have separate classes for different levels with commonality in a base class. Refactor TreeItem class accordingly. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-sql.py | 273 +++++++++++------------ 1 file changed, 133 insertions(+), 140 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index 7f2eabe7dacdc..ee1085169a3ed 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -74,145 +74,6 @@ def QueryExec(query, stmt): if not ret: raise Exception("Query failed: " + query.lastError().text()) -class TreeItem(): - - def __init__(self, db, row, parent_item): - self.db = db - self.row = row - self.parent_item = parent_item - self.query_done = False; - self.child_count = 0 - self.child_items = [] - self.data = ["", "", "", "", "", "", ""] - self.comm_id = 0 - self.thread_id = 0 - self.call_path_id = 1 - self.branch_count = 0 - self.time = 0 - if not parent_item: - self.setUpRoot() - - def setUpRoot(self): - self.query_done = True - query = QSqlQuery(self.db) - QueryExec(query, 'SELECT id, comm FROM comms') - while query.next(): - if not query.value(0): - continue - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel1(query.value(0), query.value(1)) - - def setUpLevel1(self, comm_id, comm): - self.query_done = True; - self.comm_id = comm_id - self.data[0] = comm - self.child_items = [] - self.child_count = 0 - query = QSqlQuery(self.db) - QueryExec(query, 'SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) - while query.next(): - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) - - def setUpLevel2(self, comm_id, thread_id, pid, tid): - self.comm_id = comm_id - self.thread_id = thread_id - self.data[0] = str(pid) + ":" + str(tid) - - def getChildItem(self, row): - return self.child_items[row] - - def getParentItem(self): - return self.parent_item - - def getRow(self): - return self.row - - def addChild(self, call_path_id, name, dso, count, time, branch_count): - child_item = TreeItem(self.db, self.child_count, self) - child_item.comm_id = self.comm_id - child_item.thread_id = self.thread_id - child_item.call_path_id = call_path_id - child_item.branch_count = branch_count - child_item.time = time - child_item.data[0] = name - child_item.data[1] = dsoname(dso) - child_item.data[2] = str(count) - child_item.data[3] = str(time) - child_item.data[4] = PercentToOneDP(time, self.time) - child_item.data[5] = str(branch_count) - child_item.data[6] = PercentToOneDP(branch_count, self.branch_count) - self.child_items.append(child_item) - self.child_count += 1 - - def selectCalls(self): - self.query_done = True; - query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' - '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' - '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' - '( SELECT ip FROM call_paths where id = call_path_id ) ' - 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + - ' ORDER BY call_path_id') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - last_call_path_id = 0 - name = "" - dso = "" - count = 0 - branch_count = 0 - total_branch_count = 0 - time = 0 - total_time = 0 - while query.next(): - if query.value(1) == last_call_path_id: - count += 1 - branch_count += query.value(2) - time += query.value(4) - query.value(3) - else: - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - last_call_path_id = query.value(1) - name = query.value(5) - dso = query.value(6) - count = 1 - total_branch_count += branch_count - total_time += time - branch_count = query.value(2) - time = query.value(4) - query.value(3) - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - total_branch_count += branch_count - total_time += time - # Top level does not have time or branch count, so fix that here - if total_branch_count > self.branch_count: - self.branch_count = total_branch_count - if self.branch_count: - for child_item in self.child_items: - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) - if total_time > self.time: - self.time = total_time - if self.time: - for child_item in self.child_items: - child_item.data[4] = PercentToOneDP(child_item.time, self.time) - - def childCount(self): - if not self.query_done: - self.selectCalls() - return self.child_count - - def hasChildren(self): - if not self.query_done: - return True - return self.child_count > 0 - - def getData(self, column): - return self.data[column] - # Tree data model class TreeModel(QAbstractItemModel): @@ -277,12 +138,144 @@ class TreeModel(QAbstractItemModel): item = index.internalPointer() return self.DisplayData(item, index) +# Context-sensitive call graph data model item base + +class CallGraphLevelItemBase(object): + + def __init__(self, glb, row, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Context-sensitive call graph data model level 2+ item base + +class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + self.comm_id = comm_id + self.thread_id = thread_id + self.call_path_id = call_path_id + self.branch_count = branch_count + self.time = time + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " INNER JOIN dsos ON symbols.dso_id = dsos.id" + " WHERE parent_call_path_id = " + str(self.call_path_id) + + " AND comm_id = " + str(self.comm_id) + + " AND thread_id = " + str(self.thread_id) + + " GROUP BY call_path_id, name, short_name" + " ORDER BY call_path_id") + while query.next(): + child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model level three item + +class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + dso = dsoname(dso) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.dbid = call_path_id + +# Context-sensitive call graph data model level two item + +class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + self.dbid = thread_id + + def Select(self): + super(CallGraphLevelTwoItem, self).Select() + for child_item in self.child_items: + self.time += child_item.time + self.branch_count += child_item.branch_count + for child_item in self.child_items: + child_item.data[4] = PercentToOneDP(child_item.time, self.time) + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + +# Context-sensitive call graph data model level one item + +class CallGraphLevelOneItem(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) + self.data = [comm, "", "", "", "", "", ""] + self.dbid = comm_id + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT thread_id, pid, tid" + " FROM comm_threads" + " INNER JOIN threads ON thread_id = threads.id" + " WHERE comm_id = " + str(self.dbid)) + while query.next(): + child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model root item + +class CallGraphRootItem(CallGraphLevelItemBase): + + def __init__(self, glb): + super(CallGraphRootItem, self).__init__(glb, 0, None) + self.dbid = 0 + self.query_done = True; + query = QSqlQuery(glb.db) + QueryExec(query, "SELECT id, comm FROM comms") + while query.next(): + if not query.value(0): + continue + child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + self.child_items.append(child_item) + self.child_count += 1 + # Context-sensitive call graph data model class CallGraphModel(TreeModel): def __init__(self, glb, parent=None): - super(CallGraphModel, self).__init__(TreeItem(glb.db, 0, None), parent) + super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent) self.glb = glb def columnCount(self, parent=None): -- cgit v1.2.3 From 031c2a004ba75a4f8f2a6d0a7ca6f2fe5912de22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:46 +0300 Subject: perf scripts python: call-graph-from-sql.py: Rename to exported-sql-viewer.py Additional reports will be added to the script so rename to reflect the more general purpose. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 2 +- tools/perf/scripts/python/call-graph-from-sql.py | 395 ---------------------- tools/perf/scripts/python/export-to-postgresql.py | 2 +- tools/perf/scripts/python/export-to-sqlite.py | 2 +- tools/perf/scripts/python/exported-sql-viewer.py | 395 ++++++++++++++++++++++ 5 files changed, 398 insertions(+), 398 deletions(-) delete mode 100644 tools/perf/scripts/python/call-graph-from-sql.py create mode 100755 tools/perf/scripts/python/exported-sql-viewer.py diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 76971d2e41645..115eaacc455fd 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -106,7 +106,7 @@ in transaction, respectively. While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script call-graph-from-sql.py for an example of using the database. +and to script exported-sql-viewer.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py deleted file mode 100644 index ee1085169a3ed..0000000000000 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ /dev/null @@ -1,395 +0,0 @@ -#!/usr/bin/python2 -# SPDX-License-Identifier: GPL-2.0 -# exported-sql-viewer.py: view data from sql database -# Copyright (c) 2014-2018, Intel Corporation. - -# To use this script you will need to have exported data using either the -# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those -# scripts for details. -# -# Following on from the example in the export scripts, a -# call-graph can be displayed for the pt_example database like this: -# -# python tools/perf/scripts/python/call-graph-from-sql.py pt_example -# -# Note that for PostgreSQL, this script supports connecting to remote databases -# by setting hostname, port, username, password, and dbname e.g. -# -# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" -# -# The result is a GUI window with a tree representing a context-sensitive -# call-graph. Expanding a couple of levels of the tree and adjusting column -# widths to suit will display something like: -# -# Call Graph: pt_example -# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) -# v- ls -# v- 2638:2638 -# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 -# |- unknown unknown 1 13198 0.1 1 0.0 -# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 -# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 -# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 -# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 -# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 -# >- __libc_csu_init ls 1 10354 0.1 10 0.0 -# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 -# v- main ls 1 8182043 99.6 180254 99.9 -# -# Points to note: -# The top level is a command name (comm) -# The next level is a thread (pid:tid) -# Subsequent levels are functions -# 'Count' is the number of calls -# 'Time' is the elapsed time until the function returns -# Percentages are relative to the level above -# 'Branch Count' is the total number of branches for that function and all -# functions that it calls - -import sys -from PySide.QtCore import * -from PySide.QtGui import * -from PySide.QtSql import * -from decimal import * - -# Data formatting helpers - -def dsoname(name): - if name == "[kernel.kallsyms]": - return "[kernel]" - return name - -# Percent to one decimal place - -def PercentToOneDP(n, d): - if not d: - return "0.0" - x = (n * Decimal(100)) / d - return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP)) - -# Helper for queries that must not fail - -def QueryExec(query, stmt): - ret = query.exec_(stmt) - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - -# Tree data model - -class TreeModel(QAbstractItemModel): - - def __init__(self, root, parent=None): - super(TreeModel, self).__init__(parent) - self.root = root - self.last_row_read = 0 - - def Item(self, parent): - if parent.isValid(): - return parent.internalPointer() - else: - return self.root - - def rowCount(self, parent): - result = self.Item(parent).childCount() - if result < 0: - result = 0 - self.dataChanged.emit(parent, parent) - return result - - def hasChildren(self, parent): - return self.Item(parent).hasChildren() - - def headerData(self, section, orientation, role): - if role == Qt.TextAlignmentRole: - return self.columnAlignment(section) - if role != Qt.DisplayRole: - return None - if orientation != Qt.Horizontal: - return None - return self.columnHeader(section) - - def parent(self, child): - child_item = child.internalPointer() - if child_item is self.root: - return QModelIndex() - parent_item = child_item.getParentItem() - return self.createIndex(parent_item.getRow(), 0, parent_item) - - def index(self, row, column, parent): - child_item = self.Item(parent).getChildItem(row) - return self.createIndex(row, column, child_item) - - def DisplayData(self, item, index): - return item.getData(index.column()) - - def columnAlignment(self, column): - return Qt.AlignLeft - - def columnFont(self, column): - return None - - def data(self, index, role): - if role == Qt.TextAlignmentRole: - return self.columnAlignment(index.column()) - if role == Qt.FontRole: - return self.columnFont(index.column()) - if role != Qt.DisplayRole: - return None - item = index.internalPointer() - return self.DisplayData(item, index) - -# Context-sensitive call graph data model item base - -class CallGraphLevelItemBase(object): - - def __init__(self, glb, row, parent_item): - self.glb = glb - self.row = row - self.parent_item = parent_item - self.query_done = False; - self.child_count = 0 - self.child_items = [] - - def getChildItem(self, row): - return self.child_items[row] - - def getParentItem(self): - return self.parent_item - - def getRow(self): - return self.row - - def childCount(self): - if not self.query_done: - self.Select() - if not self.child_count: - return -1 - return self.child_count - - def hasChildren(self): - if not self.query_done: - return True - return self.child_count > 0 - - def getData(self, column): - return self.data[column] - -# Context-sensitive call graph data model level 2+ item base - -class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - - def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): - super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) - self.comm_id = comm_id - self.thread_id = thread_id - self.call_path_id = call_path_id - self.branch_count = branch_count - self.time = time - - def Select(self): - self.query_done = True; - query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" - " FROM calls" - " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" - " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" - " INNER JOIN dsos ON symbols.dso_id = dsos.id" - " WHERE parent_call_path_id = " + str(self.call_path_id) + - " AND comm_id = " + str(self.comm_id) + - " AND thread_id = " + str(self.thread_id) + - " GROUP BY call_path_id, name, short_name" - " ORDER BY call_path_id") - while query.next(): - child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) - self.child_items.append(child_item) - self.child_count += 1 - -# Context-sensitive call graph data model level three item - -class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): - - def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): - super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) - dso = dsoname(dso) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] - self.dbid = call_path_id - -# Context-sensitive call graph data model level two item - -class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): - - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] - self.dbid = thread_id - - def Select(self): - super(CallGraphLevelTwoItem, self).Select() - for child_item in self.child_items: - self.time += child_item.time - self.branch_count += child_item.branch_count - for child_item in self.child_items: - child_item.data[4] = PercentToOneDP(child_item.time, self.time) - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) - -# Context-sensitive call graph data model level one item - -class CallGraphLevelOneItem(CallGraphLevelItemBase): - - def __init__(self, glb, row, comm_id, comm, parent_item): - super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) - self.data = [comm, "", "", "", "", "", ""] - self.dbid = comm_id - - def Select(self): - self.query_done = True; - query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT thread_id, pid, tid" - " FROM comm_threads" - " INNER JOIN threads ON thread_id = threads.id" - " WHERE comm_id = " + str(self.dbid)) - while query.next(): - child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) - self.child_items.append(child_item) - self.child_count += 1 - -# Context-sensitive call graph data model root item - -class CallGraphRootItem(CallGraphLevelItemBase): - - def __init__(self, glb): - super(CallGraphRootItem, self).__init__(glb, 0, None) - self.dbid = 0 - self.query_done = True; - query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") - while query.next(): - if not query.value(0): - continue - child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) - self.child_items.append(child_item) - self.child_count += 1 - -# Context-sensitive call graph data model - -class CallGraphModel(TreeModel): - - def __init__(self, glb, parent=None): - super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent) - self.glb = glb - - def columnCount(self, parent=None): - return 7 - - def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] - return headers[column] - - def columnAlignment(self, column): - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] - return alignment[column] - -# Main window - -class MainWindow(QMainWindow): - - def __init__(self, glb, parent=None): - super(MainWindow, self).__init__(parent) - - self.glb = glb - - self.setWindowTitle("Call Graph: " + glb.dbname) - self.move(100, 100) - self.resize(800, 600) - self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) - self.setMinimumSize(200, 100) - - self.model = CallGraphModel(glb) - - self.view = QTreeView() - self.view.setModel(self.model) - - for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): - self.view.setColumnWidth(c, w) - - self.setCentralWidget(self.view) - -# Global data - -class Glb(): - - def __init__(self, dbref, db, dbname): - self.dbref = dbref - self.db = db - self.dbname = dbname - self.app = None - self.mainwindow = None - -# Database reference - -class DBRef(): - - def __init__(self, is_sqlite3, dbname): - self.is_sqlite3 = is_sqlite3 - self.dbname = dbname - - def Open(self, connection_name): - dbname = self.dbname - if self.is_sqlite3: - db = QSqlDatabase.addDatabase("QSQLITE", connection_name) - else: - db = QSqlDatabase.addDatabase("QPSQL", connection_name) - opts = dbname.split() - for opt in opts: - if "=" in opt: - opt = opt.split("=") - if opt[0] == "hostname": - db.setHostName(opt[1]) - elif opt[0] == "port": - db.setPort(int(opt[1])) - elif opt[0] == "username": - db.setUserName(opt[1]) - elif opt[0] == "password": - db.setPassword(opt[1]) - elif opt[0] == "dbname": - dbname = opt[1] - else: - dbname = opt - - db.setDatabaseName(dbname) - if not db.open(): - raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) - return db, dbname - -# Main - -def Main(): - if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: call-graph-from-sql.py " - raise Exception("Too few arguments") - - dbname = sys.argv[1] - - is_sqlite3 = False - try: - f = open(dbname) - if f.read(15) == "SQLite format 3": - is_sqlite3 = True - f.close() - except: - pass - - dbref = DBRef(is_sqlite3, dbname) - db, dbname = dbref.Open("main") - glb = Glb(dbref, db, dbname) - app = QApplication(sys.argv) - glb.app = app - mainwindow = MainWindow(glb) - glb.mainwindow = mainwindow - mainwindow.show() - err = app.exec_() - db.close() - sys.exit(err) - -if __name__ == "__main__": - Main() diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index e46f51b175131..0564dd7377f22 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -59,7 +59,7 @@ import datetime # pt_example=# \q # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # Tables: # diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index e4bb82c8aba9e..245caf2643ed1 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -40,7 +40,7 @@ import datetime # sqlite> .quit # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # The database structure is practically the same as created by the script # export-to-postgresql.py. Refer to that script for details. A notable diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py new file mode 100755 index 0000000000000..03e7a1de7f31d --- /dev/null +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -0,0 +1,395 @@ +#!/usr/bin/python2 +# SPDX-License-Identifier: GPL-2.0 +# exported-sql-viewer.py: view data from sql database +# Copyright (c) 2014-2018, Intel Corporation. + +# To use this script you will need to have exported data using either the +# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those +# scripts for details. +# +# Following on from the example in the export scripts, a +# call-graph can be displayed for the pt_example database like this: +# +# python tools/perf/scripts/python/exported-sql-viewer.py pt_example +# +# Note that for PostgreSQL, this script supports connecting to remote databases +# by setting hostname, port, username, password, and dbname e.g. +# +# python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# +# The result is a GUI window with a tree representing a context-sensitive +# call-graph. Expanding a couple of levels of the tree and adjusting column +# widths to suit will display something like: +# +# Call Graph: pt_example +# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +# v- ls +# v- 2638:2638 +# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 +# |- unknown unknown 1 13198 0.1 1 0.0 +# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 +# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 +# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 +# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 +# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 +# >- __libc_csu_init ls 1 10354 0.1 10 0.0 +# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 +# v- main ls 1 8182043 99.6 180254 99.9 +# +# Points to note: +# The top level is a command name (comm) +# The next level is a thread (pid:tid) +# Subsequent levels are functions +# 'Count' is the number of calls +# 'Time' is the elapsed time until the function returns +# Percentages are relative to the level above +# 'Branch Count' is the total number of branches for that function and all +# functions that it calls + +import sys +from PySide.QtCore import * +from PySide.QtGui import * +from PySide.QtSql import * +from decimal import * + +# Data formatting helpers + +def dsoname(name): + if name == "[kernel.kallsyms]": + return "[kernel]" + return name + +# Percent to one decimal place + +def PercentToOneDP(n, d): + if not d: + return "0.0" + x = (n * Decimal(100)) / d + return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP)) + +# Helper for queries that must not fail + +def QueryExec(query, stmt): + ret = query.exec_(stmt) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + +# Tree data model + +class TreeModel(QAbstractItemModel): + + def __init__(self, root, parent=None): + super(TreeModel, self).__init__(parent) + self.root = root + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self.root + + def rowCount(self, parent): + result = self.Item(parent).childCount() + if result < 0: + result = 0 + self.dataChanged.emit(parent, parent) + return result + + def hasChildren(self, parent): + return self.Item(parent).hasChildren() + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def parent(self, child): + child_item = child.internalPointer() + if child_item is self.root: + return QModelIndex() + parent_item = child_item.getParentItem() + return self.createIndex(parent_item.getRow(), 0, parent_item) + + def index(self, row, column, parent): + child_item = self.Item(parent).getChildItem(row) + return self.createIndex(row, column, child_item) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Context-sensitive call graph data model item base + +class CallGraphLevelItemBase(object): + + def __init__(self, glb, row, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Context-sensitive call graph data model level 2+ item base + +class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + self.comm_id = comm_id + self.thread_id = thread_id + self.call_path_id = call_path_id + self.branch_count = branch_count + self.time = time + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " INNER JOIN dsos ON symbols.dso_id = dsos.id" + " WHERE parent_call_path_id = " + str(self.call_path_id) + + " AND comm_id = " + str(self.comm_id) + + " AND thread_id = " + str(self.thread_id) + + " GROUP BY call_path_id, name, short_name" + " ORDER BY call_path_id") + while query.next(): + child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model level three item + +class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + dso = dsoname(dso) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.dbid = call_path_id + +# Context-sensitive call graph data model level two item + +class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + self.dbid = thread_id + + def Select(self): + super(CallGraphLevelTwoItem, self).Select() + for child_item in self.child_items: + self.time += child_item.time + self.branch_count += child_item.branch_count + for child_item in self.child_items: + child_item.data[4] = PercentToOneDP(child_item.time, self.time) + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + +# Context-sensitive call graph data model level one item + +class CallGraphLevelOneItem(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) + self.data = [comm, "", "", "", "", "", ""] + self.dbid = comm_id + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT thread_id, pid, tid" + " FROM comm_threads" + " INNER JOIN threads ON thread_id = threads.id" + " WHERE comm_id = " + str(self.dbid)) + while query.next(): + child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model root item + +class CallGraphRootItem(CallGraphLevelItemBase): + + def __init__(self, glb): + super(CallGraphRootItem, self).__init__(glb, 0, None) + self.dbid = 0 + self.query_done = True; + query = QSqlQuery(glb.db) + QueryExec(query, "SELECT id, comm FROM comms") + while query.next(): + if not query.value(0): + continue + child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model + +class CallGraphModel(TreeModel): + + def __init__(self, glb, parent=None): + super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent) + self.glb = glb + + def columnCount(self, parent=None): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def columnAlignment(self, column): + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + return alignment[column] + +# Main window + +class MainWindow(QMainWindow): + + def __init__(self, glb, parent=None): + super(MainWindow, self).__init__(parent) + + self.glb = glb + + self.setWindowTitle("Call Graph: " + glb.dbname) + self.move(100, 100) + self.resize(800, 600) + self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) + self.setMinimumSize(200, 100) + + self.model = CallGraphModel(glb) + + self.view = QTreeView() + self.view.setModel(self.model) + + for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): + self.view.setColumnWidth(c, w) + + self.setCentralWidget(self.view) + +# Global data + +class Glb(): + + def __init__(self, dbref, db, dbname): + self.dbref = dbref + self.db = db + self.dbname = dbname + self.app = None + self.mainwindow = None + +# Database reference + +class DBRef(): + + def __init__(self, is_sqlite3, dbname): + self.is_sqlite3 = is_sqlite3 + self.dbname = dbname + + def Open(self, connection_name): + dbname = self.dbname + if self.is_sqlite3: + db = QSqlDatabase.addDatabase("QSQLITE", connection_name) + else: + db = QSqlDatabase.addDatabase("QPSQL", connection_name) + opts = dbname.split() + for opt in opts: + if "=" in opt: + opt = opt.split("=") + if opt[0] == "hostname": + db.setHostName(opt[1]) + elif opt[0] == "port": + db.setPort(int(opt[1])) + elif opt[0] == "username": + db.setUserName(opt[1]) + elif opt[0] == "password": + db.setPassword(opt[1]) + elif opt[0] == "dbname": + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + return db, dbname + +# Main + +def Main(): + if (len(sys.argv) < 2): + print >> sys.stderr, "Usage is: exported-sql-viewer.py " + raise Exception("Too few arguments") + + dbname = sys.argv[1] + + is_sqlite3 = False + try: + f = open(dbname) + if f.read(15) == "SQLite format 3": + is_sqlite3 = True + f.close() + except: + pass + + dbref = DBRef(is_sqlite3, dbname) + db, dbname = dbref.Open("main") + glb = Glb(dbref, db, dbname) + app = QApplication(sys.argv) + glb.app = app + mainwindow = MainWindow(glb) + glb.mainwindow = mainwindow + mainwindow.show() + err = app.exec_() + db.close() + sys.exit(err) + +if __name__ == "__main__": + Main() -- cgit v1.2.3 From 1beb5c7b07040b70975a2ae0e90b87d412fabf06 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:47 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add support for multiple sub-windows Use Qt MDI (multiple document interface) to support multiple sub-windows. Put the data model in a cache so that each sub-window can share the same data. This allows mutiple views of the call-graph at the same time and paves the way to add more reports. Committer testing: Starts with a "File Reports Windows" main menu, from the "Reports" I can get what was available up to now, the "Context-Sensitivi Call Graph" option. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 182 +++++++++++++++++++++-- 1 file changed, 173 insertions(+), 9 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 03e7a1de7f31d..c2f44351821ea 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -47,6 +47,8 @@ # functions that it calls import sys +import weakref +import threading from PySide.QtCore import * from PySide.QtGui import * from PySide.QtSql import * @@ -138,6 +140,23 @@ class TreeModel(QAbstractItemModel): item = index.internalPointer() return self.DisplayData(item, index) +# Model cache + +model_cache = weakref.WeakValueDictionary() +model_cache_lock = threading.Lock() + +def LookupCreateModel(model_name, create_fn): + model_cache_lock.acquire() + try: + model = model_cache[model_name] + except: + model = None + if model is None: + model = create_fn() + model_cache[model_name] = model + model_cache_lock.release() + return model + # Context-sensitive call graph data model item base class CallGraphLevelItemBase(object): @@ -289,6 +308,144 @@ class CallGraphModel(TreeModel): alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] +# Context-sensitive call graph window + +class CallGraphWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(CallGraphWindow, self).__init__(parent) + + self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x)) + + self.view = QTreeView() + self.view.setModel(self.model) + + for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): + self.view.setColumnWidth(c, w) + + self.setWidget(self.view) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph") + +# Action Definition + +def CreateAction(label, tip, callback, parent=None, shortcut=None): + action = QAction(label, parent) + if shortcut != None: + action.setShortcuts(shortcut) + action.setStatusTip(tip) + action.triggered.connect(callback) + return action + +# Typical application actions + +def CreateExitAction(app, parent=None): + return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit) + +# Typical MDI actions + +def CreateCloseActiveWindowAction(mdi_area): + return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area) + +def CreateCloseAllWindowsAction(mdi_area): + return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area) + +def CreateTileWindowsAction(mdi_area): + return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area) + +def CreateCascadeWindowsAction(mdi_area): + return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area) + +def CreateNextWindowAction(mdi_area): + return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild) + +def CreatePreviousWindowAction(mdi_area): + return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild) + +# Typical MDI window menu + +class WindowMenu(): + + def __init__(self, mdi_area, menu): + self.mdi_area = mdi_area + self.window_menu = menu.addMenu("&Windows") + self.close_active_window = CreateCloseActiveWindowAction(mdi_area) + self.close_all_windows = CreateCloseAllWindowsAction(mdi_area) + self.tile_windows = CreateTileWindowsAction(mdi_area) + self.cascade_windows = CreateCascadeWindowsAction(mdi_area) + self.next_window = CreateNextWindowAction(mdi_area) + self.previous_window = CreatePreviousWindowAction(mdi_area) + self.window_menu.aboutToShow.connect(self.Update) + + def Update(self): + self.window_menu.clear() + sub_window_count = len(self.mdi_area.subWindowList()) + have_sub_windows = sub_window_count != 0 + self.close_active_window.setEnabled(have_sub_windows) + self.close_all_windows.setEnabled(have_sub_windows) + self.tile_windows.setEnabled(have_sub_windows) + self.cascade_windows.setEnabled(have_sub_windows) + self.next_window.setEnabled(have_sub_windows) + self.previous_window.setEnabled(have_sub_windows) + self.window_menu.addAction(self.close_active_window) + self.window_menu.addAction(self.close_all_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.tile_windows) + self.window_menu.addAction(self.cascade_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.next_window) + self.window_menu.addAction(self.previous_window) + if sub_window_count == 0: + return + self.window_menu.addSeparator() + nr = 1 + for sub_window in self.mdi_area.subWindowList(): + label = str(nr) + " " + sub_window.name + if nr < 10: + label = "&" + label + action = self.window_menu.addAction(label) + action.setCheckable(True) + action.setChecked(sub_window == self.mdi_area.activeSubWindow()) + action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) + self.window_menu.addAction(action) + nr += 1 + + def setActiveSubWindow(self, nr): + self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) + +# Unique name for sub-windows + +def NumberedWindowName(name, nr): + if nr > 1: + name += " <" + str(nr) + ">" + return name + +def UniqueSubWindowName(mdi_area, name): + nr = 1 + while True: + unique_name = NumberedWindowName(name, nr) + ok = True + for sub_window in mdi_area.subWindowList(): + if sub_window.name == unique_name: + ok = False + break + if ok: + return unique_name + nr += 1 + +# Add a sub-window + +def AddSubWindow(mdi_area, sub_window, name): + unique_name = UniqueSubWindowName(mdi_area, name) + sub_window.setMinimumSize(200, 100) + sub_window.resize(800, 600) + sub_window.setWindowTitle(unique_name) + sub_window.setAttribute(Qt.WA_DeleteOnClose) + sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon)) + sub_window.name = unique_name + mdi_area.addSubWindow(sub_window) + sub_window.show() + # Main window class MainWindow(QMainWindow): @@ -298,21 +455,28 @@ class MainWindow(QMainWindow): self.glb = glb - self.setWindowTitle("Call Graph: " + glb.dbname) - self.move(100, 100) - self.resize(800, 600) + self.setWindowTitle("Exported SQL Viewer: " + glb.dbname) self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) self.setMinimumSize(200, 100) - self.model = CallGraphModel(glb) + self.mdi_area = QMdiArea() + self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded) + self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) - self.view = QTreeView() - self.view.setModel(self.model) + self.setCentralWidget(self.mdi_area) - for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): - self.view.setColumnWidth(c, w) + menu = self.menuBar() + + file_menu = menu.addMenu("&File") + file_menu.addAction(CreateExitAction(glb.app, self)) + + reports_menu = menu.addMenu("&Reports") + reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) + + self.window_menu = WindowMenu(self.mdi_area, menu) - self.setCentralWidget(self.view) + def NewCallGraph(self): + CallGraphWindow(self.glb, self) # Global data -- cgit v1.2.3 From ebd70c7dc2f5f57315e19d959ddc9cb05e9d48e1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:48 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add ability to find symbols in the call-graph Add a Find bar that appears at the bottom of the call-graph window. Committer testing: Using: python tools/perf/scripts/python/exported-sql-viewer.py pt_example branches calls Using the database built in the first "Committer Testing" section in this patch series I was able to: "Reports" "Context-Sensitive Call Graphs" Control+F or select "Edit" in the top menu then "Find" __poll and find the first place where the "__poll" function appears, then press the down arrow in the lower right corner and go to the next, etc. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 306 ++++++++++++++++++++++- 1 file changed, 305 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index c2f44351821ea..0386a600ffc73 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -49,6 +49,7 @@ import sys import weakref import threading +import string from PySide.QtCore import * from PySide.QtGui import * from PySide.QtSql import * @@ -76,6 +77,27 @@ def QueryExec(query, stmt): if not ret: raise Exception("Query failed: " + query.lastError().text()) +# Background thread + +class Thread(QThread): + + done = Signal(object) + + def __init__(self, task, param=None, parent=None): + super(Thread, self).__init__(parent) + self.task = task + self.param = param + + def run(self): + while True: + if self.param is None: + done, result = self.task() + else: + done, result = self.task(self.param) + self.done.emit(result) + if done: + break + # Tree data model class TreeModel(QAbstractItemModel): @@ -157,6 +179,125 @@ def LookupCreateModel(model_name, create_fn): model_cache_lock.release() return model +# Find bar + +class FindBar(): + + def __init__(self, parent, finder, is_reg_expr=False): + self.finder = finder + self.context = [] + self.last_value = None + self.last_pattern = None + + label = QLabel("Find:") + label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.textbox = QComboBox() + self.textbox.setEditable(True) + self.textbox.currentIndexChanged.connect(self.ValueChanged) + + self.progress = QProgressBar() + self.progress.setRange(0, 0) + self.progress.hide() + + if is_reg_expr: + self.pattern = QCheckBox("Regular Expression") + else: + self.pattern = QCheckBox("Pattern") + self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.next_button = QToolButton() + self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown)) + self.next_button.released.connect(lambda: self.NextPrev(1)) + + self.prev_button = QToolButton() + self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp)) + self.prev_button.released.connect(lambda: self.NextPrev(-1)) + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(label) + self.hbox.addWidget(self.textbox) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.pattern) + self.hbox.addWidget(self.next_button) + self.hbox.addWidget(self.prev_button) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.hide() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.textbox.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Busy(self): + self.textbox.setEnabled(False) + self.pattern.hide() + self.next_button.hide() + self.prev_button.hide() + self.progress.show() + + def Idle(self): + self.textbox.setEnabled(True) + self.progress.hide() + self.pattern.show() + self.next_button.show() + self.prev_button.show() + + def Find(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + self.last_value = value + self.last_pattern = pattern + self.finder.Find(value, direction, pattern, self.context) + + def ValueChanged(self): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + index = self.textbox.currentIndex() + data = self.textbox.itemData(index) + # Store the pattern in the combo box to keep it with the text value + if data == None: + self.textbox.setItemData(index, pattern) + else: + self.pattern.setChecked(data) + self.Find(0) + + def NextPrev(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + if value != self.last_value: + index = self.textbox.findText(value) + # Allow for a button press before the value has been added to the combo box + if index < 0: + index = self.textbox.count() + self.textbox.addItem(value, pattern) + self.textbox.setCurrentIndex(index) + return + else: + self.textbox.setItemData(index, pattern) + elif pattern != self.last_pattern: + # Keep the pattern recorded in the combo box up to date + index = self.textbox.currentIndex() + self.textbox.setItemData(index, pattern) + self.Find(direction) + + def NotFound(self): + QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found") + # Context-sensitive call graph data model item base class CallGraphLevelItemBase(object): @@ -308,6 +449,123 @@ class CallGraphModel(TreeModel): alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] + def FindSelect(self, value, pattern, query): + if pattern: + # postgresql and sqlite pattern patching differences: + # postgresql LIKE is case sensitive but sqlite LIKE is not + # postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not + # postgresql supports ILIKE which is case insensitive + # sqlite supports GLOB (text only) which uses * and ? and is case sensitive + if not self.glb.dbref.is_sqlite3: + # Escape % and _ + s = value.replace("%", "\%") + s = s.replace("_", "\_") + # Translate * and ? into SQL LIKE pattern characters % and _ + trans = string.maketrans("*?", "%_") + match = " LIKE '" + str(s).translate(trans) + "'" + else: + match = " GLOB '" + str(value) + "'" + else: + match = " = '" + str(value) + "'" + QueryExec(query, "SELECT call_path_id, comm_id, thread_id" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " WHERE symbols.name" + match + + " GROUP BY comm_id, thread_id, call_path_id" + " ORDER BY comm_id, thread_id, call_path_id") + + def FindPath(self, query): + # Turn the query result into a list of ids that the tree view can walk + # to open the tree at the right place. + ids = [] + parent_id = query.value(0) + while parent_id: + ids.insert(0, parent_id) + q2 = QSqlQuery(self.glb.db) + QueryExec(q2, "SELECT parent_id" + " FROM call_paths" + " WHERE id = " + str(parent_id)) + if not q2.next(): + break + parent_id = q2.value(0) + # The call path root is not used + if ids[0] == 1: + del ids[0] + ids.insert(0, query.value(2)) + ids.insert(0, query.value(1)) + return ids + + def Found(self, query, found): + if found: + return self.FindPath(query) + return [] + + def FindValue(self, value, pattern, query, last_value, last_pattern): + if last_value == value and pattern == last_pattern: + found = query.first() + else: + self.FindSelect(value, pattern, query) + found = query.next() + return self.Found(query, found) + + def FindNext(self, query): + found = query.next() + if not found: + found = query.first() + return self.Found(query, found) + + def FindPrev(self, query): + found = query.previous() + if not found: + found = query.last() + return self.Found(query, found) + + def FindThread(self, c): + if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern: + ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern) + elif c.direction > 0: + ids = self.FindNext(c.query) + else: + ids = self.FindPrev(c.query) + return (True, ids) + + def Find(self, value, direction, pattern, context, callback): + class Context(): + def __init__(self, *x): + self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x + def Update(self, *x): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern) + if len(context): + context[0].Update(value, direction, pattern) + else: + context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None)) + # Use a thread so the UI is not blocked during the SELECT + thread = Thread(self.FindThread, context[0]) + thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, ids): + callback(ids) + +# Vertical widget layout + +class VBox(): + + def __init__(self, w1, w2, w3=None): + self.vbox = QWidget() + self.vbox.setLayout(QVBoxLayout()); + + self.vbox.layout().setContentsMargins(0, 0, 0, 0) + + self.vbox.layout().addWidget(w1) + self.vbox.layout().addWidget(w2) + if w3: + self.vbox.layout().addWidget(w3) + + def Widget(self): + return self.vbox + # Context-sensitive call graph window class CallGraphWindow(QMdiSubWindow): @@ -323,10 +581,45 @@ class CallGraphWindow(QMdiSubWindow): for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): self.view.setColumnWidth(c, w) - self.setWidget(self.view) + self.find_bar = FindBar(self, self) + + self.vbox = VBox(self.view, self.find_bar.Widget()) + + self.setWidget(self.vbox.Widget()) AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph") + def DisplayFound(self, ids): + if not len(ids): + return False + parent = QModelIndex() + for dbid in ids: + found = False + n = self.model.rowCount(parent) + for row in xrange(n): + child = self.model.index(row, 0, parent) + if child.internalPointer().dbid == dbid: + found = True + self.view.setCurrentIndex(child) + parent = child + break + if not found: + break + return found + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.model.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, ids): + found = True + if not self.DisplayFound(ids): + found = False + self.find_bar.Idle() + if not found: + self.find_bar.NotFound() + # Action Definition def CreateAction(label, tip, callback, parent=None, shortcut=None): @@ -470,11 +763,22 @@ class MainWindow(QMainWindow): file_menu = menu.addMenu("&File") file_menu.addAction(CreateExitAction(glb.app, self)) + edit_menu = menu.addMenu("&Edit") + edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) + reports_menu = menu.addMenu("&Reports") reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) self.window_menu = WindowMenu(self.mdi_area, menu) + def Find(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.find_bar.Activate() + except: + pass + def NewCallGraph(self): CallGraphWindow(self.glb, self) -- cgit v1.2.3 From 82f68e2898e634b8b0efc7ddd57e037ef75ea114 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:49 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add ability to shrink / enlarge font Shrinking the font allows more information to display. Committer testing: Works, tested with the convenient Control+Shift+'+' and Control+'-' as well with the more cumbersome top menu "Edit" + "Enlarge/Shrink font" options. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 0386a600ffc73..310ba71475830 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -706,6 +706,20 @@ class WindowMenu(): def setActiveSubWindow(self, nr): self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) +# Font resize + +def ResizeFont(widget, diff): + font = widget.font() + sz = font.pointSize() + font.setPointSize(sz + diff) + widget.setFont(font) + +def ShrinkFont(widget): + ResizeFont(widget, -1) + +def EnlargeFont(widget): + ResizeFont(widget, 1) + # Unique name for sub-windows def NumberedWindowName(name, nr): @@ -765,6 +779,8 @@ class MainWindow(QMainWindow): edit_menu = menu.addMenu("&Edit") edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) + edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) + edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) reports_menu = menu.addMenu("&Reports") reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) @@ -779,6 +795,14 @@ class MainWindow(QMainWindow): except: pass + def ShrinkFont(self): + win = self.mdi_area.activeSubWindow() + ShrinkFont(win.view) + + def EnlargeFont(self): + win = self.mdi_area.activeSubWindow() + EnlargeFont(win.view) + def NewCallGraph(self): CallGraphWindow(self.glb, self) -- cgit v1.2.3 From 8392b74b575c38fa5d50d1fe07fa9a4bcea93862 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 1 Oct 2018 09:28:50 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add ability to display all the database tables Displaying all the database tables can help make the database easier to understand. Committer testing: Opened all the tables, even the sqlite master table, which I selected everything and used control+C, lets see if it works... CREATE VIEW threads_view AS SELECT id,machine_id,(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,process_id,pid,tid FROM threads Humm, nope, just one of the cells got copied, even with everything selected :-) Anyway, works as advertised, useful for perusing the data. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181001062853.28285-17-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 694 +++++++++++++++++++++++ 1 file changed, 694 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 310ba71475830..ef822d8501093 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -50,10 +50,15 @@ import sys import weakref import threading import string +import cPickle +import re +import os from PySide.QtCore import * from PySide.QtGui import * from PySide.QtSql import * from decimal import * +from ctypes import * +from multiprocessing import Process, Array, Value, Event # Data formatting helpers @@ -146,6 +151,68 @@ class TreeModel(QAbstractItemModel): def DisplayData(self, item, index): return item.getData(index.column()) + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.root.child_count: + self.fetcher.Fetch(glb_chunk_sz) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Table data model + +class TableModel(QAbstractTableModel): + + def __init__(self, parent=None): + super(TableModel, self).__init__(parent) + self.child_count = 0 + self.child_items = [] + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self + + def rowCount(self, parent): + return self.child_count + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def index(self, row, column, parent): + return self.createIndex(row, column, self.child_items[row]) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.child_count: + self.fetcher.Fetch(glb_chunk_sz) + def columnAlignment(self, column): return Qt.AlignLeft @@ -620,6 +687,601 @@ class CallGraphWindow(QMdiSubWindow): if not found: self.find_bar.NotFound() +# Child data item finder + +class ChildDataItemFinder(): + + def __init__(self, root): + self.root = root + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5 + self.rows = [] + self.pos = 0 + + def FindSelect(self): + self.rows = [] + if self.pattern: + pattern = re.compile(self.value) + for child in self.root.child_items: + for column_data in child.data: + if re.search(pattern, str(column_data)) is not None: + self.rows.append(child.row) + break + else: + for child in self.root.child_items: + for column_data in child.data: + if self.value in str(column_data): + self.rows.append(child.row) + break + + def FindValue(self): + self.pos = 0 + if self.last_value != self.value or self.pattern != self.last_pattern: + self.FindSelect() + if not len(self.rows): + return -1 + return self.rows[self.pos] + + def FindThread(self): + if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern: + row = self.FindValue() + elif len(self.rows): + if self.direction > 0: + self.pos += 1 + if self.pos >= len(self.rows): + self.pos = 0 + else: + self.pos -= 1 + if self.pos < 0: + self.pos = len(self.rows) - 1 + row = self.rows[self.pos] + else: + row = -1 + return (True, row) + + def Find(self, value, direction, pattern, context, callback): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern) + # Use a thread so the UI is not blocked + thread = Thread(self.FindThread) + thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, row): + callback(row) + +# Number of database records to fetch in one go + +glb_chunk_sz = 10000 + +# size of pickled integer big enough for record size + +glb_nsz = 8 + +# Background process for SQL data fetcher + +class SQLFetcherProcess(): + + def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep): + # Need a unique connection name + conn_name = "SQLFetcher" + str(os.getpid()) + self.db, dbname = dbref.Open(conn_name) + self.sql = sql + self.buffer = buffer + self.head = head + self.tail = tail + self.fetch_count = fetch_count + self.fetching_done = fetching_done + self.process_target = process_target + self.wait_event = wait_event + self.fetched_event = fetched_event + self.prep = prep + self.query = QSqlQuery(self.db) + self.query_limit = 0 if "$$last_id$$" in sql else 2 + self.last_id = -1 + self.fetched = 0 + self.more = True + self.local_head = self.head.value + self.local_tail = self.tail.value + + def Select(self): + if self.query_limit: + if self.query_limit == 1: + return + self.query_limit -= 1 + stmt = self.sql.replace("$$last_id$$", str(self.last_id)) + QueryExec(self.query, stmt) + + def Next(self): + if not self.query.next(): + self.Select() + if not self.query.next(): + return None + self.last_id = self.query.value(0) + return self.prep(self.query) + + def WaitForTarget(self): + while True: + self.wait_event.clear() + target = self.process_target.value + if target > self.fetched or target < 0: + break + self.wait_event.wait() + return target + + def HasSpace(self, sz): + if self.local_tail <= self.local_head: + space = len(self.buffer) - self.local_head + if space > sz: + return True + if space >= glb_nsz: + # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer + nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL) + self.buffer[self.local_head : self.local_head + len(nd)] = nd + self.local_head = 0 + if self.local_tail - self.local_head > sz: + return True + return False + + def WaitForSpace(self, sz): + if self.HasSpace(sz): + return + while True: + self.wait_event.clear() + self.local_tail = self.tail.value + if self.HasSpace(sz): + return + self.wait_event.wait() + + def AddToBuffer(self, obj): + d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL) + n = len(d) + nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL) + sz = n + glb_nsz + self.WaitForSpace(sz) + pos = self.local_head + self.buffer[pos : pos + len(nd)] = nd + self.buffer[pos + glb_nsz : pos + sz] = d + self.local_head += sz + + def FetchBatch(self, batch_size): + fetched = 0 + while batch_size > fetched: + obj = self.Next() + if obj is None: + self.more = False + break + self.AddToBuffer(obj) + fetched += 1 + if fetched: + self.fetched += fetched + with self.fetch_count.get_lock(): + self.fetch_count.value += fetched + self.head.value = self.local_head + self.fetched_event.set() + + def Run(self): + while self.more: + target = self.WaitForTarget() + if target < 0: + break + batch_size = min(glb_chunk_sz, target - self.fetched) + self.FetchBatch(batch_size) + self.fetching_done.value = True + self.fetched_event.set() + +def SQLFetcherFn(*x): + process = SQLFetcherProcess(*x) + process.Run() + +# SQL data fetcher + +class SQLFetcher(QObject): + + done = Signal(object) + + def __init__(self, glb, sql, prep, process_data, parent=None): + super(SQLFetcher, self).__init__(parent) + self.process_data = process_data + self.more = True + self.target = 0 + self.last_target = 0 + self.fetched = 0 + self.buffer_size = 16 * 1024 * 1024 + self.buffer = Array(c_char, self.buffer_size, lock=False) + self.head = Value(c_longlong) + self.tail = Value(c_longlong) + self.local_tail = 0 + self.fetch_count = Value(c_longlong) + self.fetching_done = Value(c_bool) + self.last_count = 0 + self.process_target = Value(c_longlong) + self.wait_event = Event() + self.fetched_event = Event() + glb.AddInstanceToShutdownOnExit(self) + self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep)) + self.process.start() + self.thread = Thread(self.Thread) + self.thread.done.connect(self.ProcessData, Qt.QueuedConnection) + self.thread.start() + + def Shutdown(self): + # Tell the thread and process to exit + self.process_target.value = -1 + self.wait_event.set() + self.more = False + self.fetching_done.value = True + self.fetched_event.set() + + def Thread(self): + if not self.more: + return True, 0 + while True: + self.fetched_event.clear() + fetch_count = self.fetch_count.value + if fetch_count != self.last_count: + break + if self.fetching_done.value: + self.more = False + return True, 0 + self.fetched_event.wait() + count = fetch_count - self.last_count + self.last_count = fetch_count + self.fetched += count + return False, count + + def Fetch(self, nr): + if not self.more: + # -1 inidcates there are no more + return -1 + result = self.fetched + extra = result + nr - self.target + if extra > 0: + self.target += extra + # process_target < 0 indicates shutting down + if self.process_target.value >= 0: + self.process_target.value = self.target + self.wait_event.set() + return result + + def RemoveFromBuffer(self): + pos = self.local_tail + if len(self.buffer) - pos < glb_nsz: + pos = 0 + n = cPickle.loads(self.buffer[pos : pos + glb_nsz]) + if n == 0: + pos = 0 + n = cPickle.loads(self.buffer[0 : glb_nsz]) + pos += glb_nsz + obj = cPickle.loads(self.buffer[pos : pos + n]) + self.local_tail = pos + n + return obj + + def ProcessData(self, count): + for i in xrange(count): + obj = self.RemoveFromBuffer() + self.process_data(obj) + self.tail.value = self.local_tail + self.wait_event.set() + self.done.emit(count) + +# Fetch more records bar + +class FetchMoreRecordsBar(): + + def __init__(self, model, parent): + self.model = model + + self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:") + self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch_count = QSpinBox() + self.fetch_count.setRange(1, 1000000) + self.fetch_count.setValue(10) + self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch = QPushButton("Go!") + self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self.fetch.released.connect(self.FetchMoreRecords) + + self.progress = QProgressBar() + self.progress.setRange(0, 100) + self.progress.hide() + + self.done_label = QLabel("All records fetched") + self.done_label.hide() + + self.spacer = QLabel("") + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(self.label) + self.hbox.addWidget(self.fetch_count) + self.hbox.addWidget(self.fetch) + self.hbox.addWidget(self.spacer) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.done_label) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.show() + + self.in_progress = False + self.model.progress.connect(self.Progress) + + self.done = False + + if not model.HasMoreRecords(): + self.Done() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.fetch.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Enable(self, enable): + self.fetch.setEnabled(enable) + self.fetch_count.setEnabled(enable) + + def Busy(self): + self.Enable(False) + self.fetch.hide() + self.spacer.hide() + self.progress.show() + + def Idle(self): + self.in_progress = False + self.Enable(True) + self.progress.hide() + self.fetch.show() + self.spacer.show() + + def Target(self): + return self.fetch_count.value() * glb_chunk_sz + + def Done(self): + self.done = True + self.Idle() + self.label.hide() + self.fetch_count.hide() + self.fetch.hide() + self.spacer.hide() + self.done_label.show() + + def Progress(self, count): + if self.in_progress: + if count: + percent = ((count - self.start) * 100) / self.Target() + if percent >= 100: + self.Idle() + else: + self.progress.setValue(percent) + if not count: + # Count value of zero means no more records + self.Done() + + def FetchMoreRecords(self): + if self.done: + return + self.progress.setValue(0) + self.Busy() + self.in_progress = True + self.start = self.model.FetchMoreRecords(self.Target()) + +# SQL data preparation + +def SQLTableDataPrep(query, count): + data = [] + for i in xrange(count): + data.append(query.value(i)) + return data + +# SQL table data model item + +class SQLTableItem(): + + def __init__(self, row, data): + self.row = row + self.data = data + + def getData(self, column): + return self.data[column] + +# SQL table data model + +class SQLTableModel(TableModel): + + progress = Signal(object) + + def __init__(self, glb, sql, column_count, parent=None): + super(SQLTableModel, self).__init__(parent) + self.glb = glb + self.more = True + self.populated = 0 + self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def DisplayData(self, item, index): + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = SQLTableItem(self.populated, data) + self.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.child_count += count + self.endInsertRows() + self.progress.emit(self.child_count) + + def FetchMoreRecords(self, count): + current = self.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# SQL automatic table data model + +class SQLAutoTableModel(SQLTableModel): + + def __init__(self, glb, table_name, parent=None): + sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz) + if table_name == "comm_threads_view": + # For now, comm_threads_view has no id column + sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) + self.column_headers = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "PRAGMA table_info(" + table_name + ")") + while query.next(): + self.column_headers.append(query.value(1)) + if table_name == "sqlite_master": + sql = "SELECT * FROM " + table_name + else: + if table_name[:19] == "information_schema.": + sql = "SELECT * FROM " + table_name + select_table_name = table_name[19:] + schema = "information_schema" + else: + select_table_name = table_name + schema = "public" + QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") + while query.next(): + self.column_headers.append(query.value(0)) + super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent) + + def columnCount(self, parent=None): + return len(self.column_headers) + + def columnHeader(self, column): + return self.column_headers[column] + +# Base class for custom ResizeColumnsToContents + +class ResizeColumnsToContentsBase(QObject): + + def __init__(self, parent=None): + super(ResizeColumnsToContentsBase, self).__init__(parent) + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.data_model.child_items[row].data[column] + len = metrics.width(str(val) + "MM") + max = len if len > max else max + val = self.data_model.columnHeader(column) + len = metrics.width(str(val) + "MM") + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.data_model.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.data_model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.data_model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + +# Table window + +class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): + + def __init__(self, glb, table_name, parent=None): + super(TableWindow, self).__init__(parent) + + self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name)) + + self.model = QSortFilterProxyModel() + self.model.setSourceModel(self.data_model) + + self.view = QTableView() + self.view.setModel(self.model) + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.view.verticalHeader().setVisible(False) + self.view.sortByColumn(-1, Qt.AscendingOrder) + self.view.setSortingEnabled(True) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.data_model) + + self.fetch_bar = FetchMoreRecordsBar(self.data_model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table") + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + else: + self.find_bar.NotFound() + +# Table list + +def GetTableList(glb): + tables = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name") + else: + QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name") + while query.next(): + tables.append(query.value(0)) + if glb.dbref.is_sqlite3: + tables.append("sqlite_master") + else: + tables.append("information_schema.tables") + tables.append("information_schema.views") + tables.append("information_schema.columns") + return tables + # Action Definition def CreateAction(label, tip, callback, parent=None, shortcut=None): @@ -779,12 +1441,15 @@ class MainWindow(QMainWindow): edit_menu = menu.addMenu("&Edit") edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) + edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)])) edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) reports_menu = menu.addMenu("&Reports") reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) + self.TableMenu(GetTableList(glb), menu) + self.window_menu = WindowMenu(self.mdi_area, menu) def Find(self): @@ -795,6 +1460,14 @@ class MainWindow(QMainWindow): except: pass + def FetchMoreRecords(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.fetch_bar.Activate() + except: + pass + def ShrinkFont(self): win = self.mdi_area.activeSubWindow() ShrinkFont(win.view) @@ -803,9 +1476,17 @@ class MainWindow(QMainWindow): win = self.mdi_area.activeSubWindow() EnlargeFont(win.view) + def TableMenu(self, tables, menu): + table_menu = menu.addMenu("&Tables") + for table in tables: + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) + def NewCallGraph(self): CallGraphWindow(self.glb, self) + def NewTableView(self, table_name): + TableWindow(self.glb, table_name, self) + # Global data class Glb(): @@ -816,6 +1497,18 @@ class Glb(): self.dbname = dbname self.app = None self.mainwindow = None + self.instances_to_shutdown_on_exit = weakref.WeakSet() + + def AddInstanceToShutdownOnExit(self, instance): + self.instances_to_shutdown_on_exit.add(instance) + + # Shutdown any background processes or threads + def ShutdownInstances(self): + for x in self.instances_to_shutdown_on_exit: + try: + x.Shutdown() + except: + pass # Database reference @@ -880,6 +1573,7 @@ def Main(): glb.mainwindow = mainwindow mainwindow.show() err = app.exec_() + glb.ShutdownInstances() db.close() sys.exit(err) -- cgit v1.2.3 From 76099f98aea4606f7c96b8d2366b46840529d08f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 23 Oct 2018 10:59:49 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add All branches report Add a report to display branches in a similar fashion to perf script. The main purpose of this report is to display disassembly, however, presently, the only supported disassembler is Intel XED, and additionally the object code must be present in perf build ID cache. To use Intel XED, libxed.so must be present. To build and install libxed.so: git clone https://github.com/intelxed/mbuild.git mbuild git clone https://github.com/intelxed/xed cd xed ./mfile.py --share sudo ./mfile.py --prefix=/usr/local install sudo ldconfig Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181023075949.18920-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 547 +++++++++++++++++++++++ 1 file changed, 547 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index ef822d8501093..24cb0bd56afa5 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -46,6 +46,48 @@ # 'Branch Count' is the total number of branches for that function and all # functions that it calls +# There is also a "All branches" report, which displays branches and +# possibly disassembly. However, presently, the only supported disassembler is +# Intel XED, and additionally the object code must be present in perf build ID +# cache. To use Intel XED, libxed.so must be present. To build and install +# libxed.so: +# git clone https://github.com/intelxed/mbuild.git mbuild +# git clone https://github.com/intelxed/xed +# cd xed +# ./mfile.py --share +# sudo ./mfile.py --prefix=/usr/local install +# sudo ldconfig +# +# Example report: +# +# Time CPU Command PID TID Branch Type In Tx Branch +# 8107675239590 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 8107675239899 2 ls 22011 22011 hardware interrupt No 7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675241900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 7fab593ea263 e8 c8 06 00 00 callq 0x7fab593ea930 +# 8107675241900 2 ls 22011 22011 call No 7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so) +# 7fab593ea930 55 pushq %rbp +# 7fab593ea931 48 89 e5 mov %rsp, %rbp +# 7fab593ea934 41 57 pushq %r15 +# 7fab593ea936 41 56 pushq %r14 +# 7fab593ea938 41 55 pushq %r13 +# 7fab593ea93a 41 54 pushq %r12 +# 7fab593ea93c 53 pushq %rbx +# 7fab593ea93d 48 89 fb mov %rdi, %rbx +# 7fab593ea940 48 83 ec 68 sub $0x68, %rsp +# 7fab593ea944 0f 31 rdtsc +# 7fab593ea946 48 c1 e2 20 shl $0x20, %rdx +# 7fab593ea94a 89 c0 mov %eax, %eax +# 7fab593ea94c 48 09 c2 or %rax, %rdx +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 8107675242232 2 ls 22011 22011 hardware interrupt No 7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675242900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so) +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip) +# 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) + import sys import weakref import threading @@ -62,6 +104,16 @@ from multiprocessing import Process, Array, Value, Event # Data formatting helpers +def tohex(ip): + if ip < 0: + ip += 1 << 64 + return "%x" % ip + +def offstr(offset): + if offset: + return "+0x%x" % offset + return "" + def dsoname(name): if name == "[kernel.kallsyms]": return "[kernel]" @@ -1077,6 +1129,351 @@ class FetchMoreRecordsBar(): self.in_progress = True self.start = self.model.FetchMoreRecords(self.Target()) +# Brance data model level two item + +class BranchLevelTwoItem(): + + def __init__(self, row, text, parent_item): + self.row = row + self.parent_item = parent_item + self.data = [""] * 8 + self.data[7] = text + self.level = 2 + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + return 0 + + def hasChildren(self): + return False + + def getData(self, column): + return self.data[column] + +# Brance data model level one item + +class BranchLevelOneItem(): + + def __init__(self, glb, row, data, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.child_count = 0 + self.child_items = [] + self.data = data[1:] + self.dbid = data[0] + self.level = 1 + self.query_done = False + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def Select(self): + self.query_done = True + + if not self.glb.have_disassembler: + return + + query = QSqlQuery(self.glb.db) + + QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip" + " FROM samples" + " INNER JOIN dsos ON samples.to_dso_id = dsos.id" + " INNER JOIN symbols ON samples.to_symbol_id = symbols.id" + " WHERE samples.id = " + str(self.dbid)) + if not query.next(): + return + cpu = query.value(0) + dso = query.value(1) + sym = query.value(2) + if dso == 0 or sym == 0: + return + off = query.value(3) + short_name = query.value(4) + long_name = query.value(5) + build_id = query.value(6) + sym_start = query.value(7) + ip = query.value(8) + + QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start" + " FROM samples" + " INNER JOIN symbols ON samples.symbol_id = symbols.id" + " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) + + " ORDER BY samples.id" + " LIMIT 1") + if not query.next(): + return + if query.value(0) != dso: + # Cannot disassemble from one dso to another + return + bsym = query.value(1) + boff = query.value(2) + bsym_start = query.value(3) + if bsym == 0: + return + tot = bsym_start + boff + 1 - sym_start - off + if tot <= 0 or tot > 16384: + return + + inst = self.glb.disassembler.Instruction() + f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id) + if not f: + return + mode = 0 if Is64Bit(f) else 1 + self.glb.disassembler.SetMode(inst, mode) + + buf_sz = tot + 16 + buf = create_string_buffer(tot + 16) + f.seek(sym_start + off) + buf.value = f.read(buf_sz) + buf_ptr = addressof(buf) + i = 0 + while tot > 0: + cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip) + if cnt: + byte_str = tohex(ip).rjust(16) + for k in xrange(cnt): + byte_str += " %02x" % ord(buf[i]) + i += 1 + while k < 15: + byte_str += " " + k += 1 + self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) + self.child_count += 1 + else: + return + buf_ptr += cnt + tot -= cnt + buf_sz -= cnt + ip += cnt + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Brance data model root item + +class BranchRootItem(): + + def __init__(self): + self.child_count = 0 + self.child_items = [] + self.level = 0 + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return None + + def getRow(self): + return 0 + + def childCount(self): + return self.child_count + + def hasChildren(self): + return self.child_count > 0 + + def getData(self, column): + return "" + +# Branch data preparation + +def BranchDataPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + return data + +# Branch data model + +class BranchModel(TreeModel): + + progress = Signal(object) + + def __init__(self, glb, event_id, where_clause, parent=None): + super(BranchModel, self).__init__(BranchRootItem(), parent) + self.glb = glb + self.event_id = event_id + self.more = True + self.populated = 0 + sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," + " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," + " ip, symbols.name, sym_offset, dsos.short_name," + " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" + " FROM samples" + " INNER JOIN comms ON comm_id = comms.id" + " INNER JOIN threads ON thread_id = threads.id" + " INNER JOIN branch_types ON branch_type = branch_types.id" + " INNER JOIN symbols ON symbol_id = symbols.id" + " INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id" + " INNER JOIN dsos ON samples.dso_id = dsos.id" + " INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id" + " WHERE samples.id > $$last_id$$" + where_clause + + " AND evsel_id = " + str(self.event_id) + + " ORDER BY samples.id" + " LIMIT " + str(glb_chunk_sz)) + self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def columnCount(self, parent=None): + return 8 + + def columnHeader(self, column): + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] + + def columnFont(self, column): + if column != 7: + return None + return QFont("Monospace") + + def DisplayData(self, item, index): + if item.level == 1: + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = BranchLevelOneItem(self.glb, self.populated, data, self.root) + self.root.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.root.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.root.child_count += count + self.endInsertRows() + self.progress.emit(self.root.child_count) + + def FetchMoreRecords(self, count): + current = self.root.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# Branch window + +class BranchWindow(QMdiSubWindow): + + def __init__(self, glb, event_id, name, where_clause, parent=None): + super(BranchWindow, self).__init__(parent) + + model_name = "Branch Events " + str(event_id) + if len(where_clause): + model_name = where_clause + " " + model_name + + self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause)) + + self.view = QTreeView() + self.view.setUniformRowHeights(True) + self.view.setModel(self.model) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.model.root) + + self.fetch_bar = FetchMoreRecordsBar(self.model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events") + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + mm = "MM" if column else "MMMM" + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.model.root.child_items[row].data[column] + len = metrics.width(str(val) + mm) + max = len if len > max else max + val = self.model.columnHeader(column) + len = metrics.width(str(val) + mm) + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.model.root.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + else: + self.find_bar.NotFound() + +# Event list + +def GetEventList(db): + events = [] + query = QSqlQuery(db) + QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id") + while query.next(): + events.append(query.value(0)) + return events + # SQL data preparation def SQLTableDataPrep(query, count): @@ -1448,6 +1845,8 @@ class MainWindow(QMainWindow): reports_menu = menu.addMenu("&Reports") reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) + self.EventMenu(GetEventList(glb.db), reports_menu) + self.TableMenu(GetTableList(glb), menu) self.window_menu = WindowMenu(self.mdi_area, menu) @@ -1476,6 +1875,20 @@ class MainWindow(QMainWindow): win = self.mdi_area.activeSubWindow() EnlargeFont(win.view) + def EventMenu(self, events, reports_menu): + branches_events = 0 + for event in events: + event = event.split(":")[0] + if event == "branches": + branches_events += 1 + dbid = 0 + for event in events: + dbid += 1 + event = event.split(":")[0] + if event == "branches": + label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") for table in tables: @@ -1484,9 +1897,112 @@ class MainWindow(QMainWindow): def NewCallGraph(self): CallGraphWindow(self.glb, self) + def NewBranchView(self, event_id): + BranchWindow(self.glb, event_id, "", "", self) + def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) +# XED Disassembler + +class xed_state_t(Structure): + + _fields_ = [ + ("mode", c_int), + ("width", c_int) + ] + +class XEDInstruction(): + + def __init__(self, libxed): + # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion + xedd_t = c_byte * 512 + self.xedd = xedd_t() + self.xedp = addressof(self.xedd) + libxed.xed_decoded_inst_zero(self.xedp) + self.state = xed_state_t() + self.statep = addressof(self.state) + # Buffer for disassembled instruction text + self.buffer = create_string_buffer(256) + self.bufferp = addressof(self.buffer) + +class LibXED(): + + def __init__(self): + self.libxed = CDLL("libxed.so") + + self.xed_tables_init = self.libxed.xed_tables_init + self.xed_tables_init.restype = None + self.xed_tables_init.argtypes = [] + + self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero + self.xed_decoded_inst_zero.restype = None + self.xed_decoded_inst_zero.argtypes = [ c_void_p ] + + self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode + self.xed_operand_values_set_mode.restype = None + self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ] + + self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode + self.xed_decoded_inst_zero_keep_mode.restype = None + self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ] + + self.xed_decode = self.libxed.xed_decode + self.xed_decode.restype = c_int + self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ] + + self.xed_format_context = self.libxed.xed_format_context + self.xed_format_context.restype = c_uint + self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ] + + self.xed_tables_init() + + def Instruction(self): + return XEDInstruction(self) + + def SetMode(self, inst, mode): + if mode: + inst.state.mode = 4 # 32-bit + inst.state.width = 4 # 4 bytes + else: + inst.state.mode = 1 # 64-bit + inst.state.width = 8 # 8 bytes + self.xed_operand_values_set_mode(inst.xedp, inst.statep) + + def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip): + self.xed_decoded_inst_zero_keep_mode(inst.xedp) + err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt) + if err: + return 0, "" + # Use AT&T mode (2), alternative is Intel (3) + ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) + if not ok: + return 0, "" + # Return instruction length and the disassembled instruction text + # For now, assume the length is in byte 166 + return inst.xedd[166], inst.buffer.value + +def TryOpen(file_name): + try: + return open(file_name, "rb") + except: + return None + +def Is64Bit(f): + result = sizeof(c_void_p) + # ELF support only + pos = f.tell() + f.seek(0) + header = f.read(7) + f.seek(pos) + magic = header[0:4] + eclass = ord(header[4]) + encoding = ord(header[5]) + version = ord(header[6]) + if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: + result = True if eclass == 2 else False + return result + # Global data class Glb(): @@ -1495,9 +2011,40 @@ class Glb(): self.dbref = dbref self.db = db self.dbname = dbname + self.home_dir = os.path.expanduser("~") + self.buildid_dir = os.getenv("PERF_BUILDID_DIR") + if self.buildid_dir: + self.buildid_dir += "/.build-id/" + else: + self.buildid_dir = self.home_dir + "/.debug/.build-id/" self.app = None self.mainwindow = None self.instances_to_shutdown_on_exit = weakref.WeakSet() + try: + self.disassembler = LibXED() + self.have_disassembler = True + except: + self.have_disassembler = False + + def FileFromBuildId(self, build_id): + file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" + return TryOpen(file_name) + + def FileFromNamesAndBuildId(self, short_name, long_name, build_id): + # Assume current machine i.e. no support for virtualization + if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore": + file_name = os.getenv("PERF_KCORE") + f = TryOpen(file_name) if file_name else None + if f: + return f + # For now, no special handling if long_name is /proc/kcore + f = TryOpen(long_name) + if f: + return f + f = self.FileFromBuildId(build_id) + if f: + return f + return None def AddInstanceToShutdownOnExit(self, instance): self.instances_to_shutdown_on_exit.add(instance) -- cgit v1.2.3 From b585ebdb5912cf1438d4822f79aaebe36a2d123a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 20 Sep 2018 11:05:36 -0700 Subject: perf script: Add --insn-trace for instruction decoding Add a --insn-trace short hand option for decoding and disassembling instruction streams for intel_pt. This automatically pipes the output into the xed disassembler to generate disassembled instructions. This just makes this use model much nicer to use. Before % perf record -e intel_pt// ... % perf script --itrace=i0ns --ns -F +insn,-event,-period | xed -F insn: -A -64 swapper 0 [000] 17276.429606186: ffffffff81010486 pt_config ([kernel.kallsyms]) nopl %eax, (%rax,%rax,1) swapper 0 [000] 17276.429606186: ffffffff8101048b pt_config ([kernel.kallsyms]) add $0x10, %rsp swapper 0 [000] 17276.429606186: ffffffff8101048f pt_config ([kernel.kallsyms]) popq %rbx swapper 0 [000] 17276.429606186: ffffffff81010490 pt_config ([kernel.kallsyms]) popq %rbp swapper 0 [000] 17276.429606186: ffffffff81010491 pt_config ([kernel.kallsyms]) popq %r12 swapper 0 [000] 17276.429606186: ffffffff81010493 pt_config ([kernel.kallsyms]) popq %r13 swapper 0 [000] 17276.429606186: ffffffff81010495 pt_config ([kernel.kallsyms]) popq %r14 swapper 0 [000] 17276.429606186: ffffffff81010497 pt_config ([kernel.kallsyms]) popq %r15 swapper 0 [000] 17276.429606186: ffffffff81010499 pt_config ([kernel.kallsyms]) retq swapper 0 [000] 17276.429606186: ffffffff8101063e pt_event_add ([kernel.kallsyms]) cmpl $0x1, 0x1b0(%rbx) swapper 0 [000] 17276.429606186: ffffffff81010645 pt_event_add ([kernel.kallsyms]) mov $0xffffffea, %eax swapper 0 [000] 17276.429606186: ffffffff8101064a pt_event_add ([kernel.kallsyms]) mov $0x0, %edx swapper 0 [000] 17276.429606186: ffffffff8101064f pt_event_add ([kernel.kallsyms]) popq %rbx swapper 0 [000] 17276.429606186: ffffffff81010650 pt_event_add ([kernel.kallsyms]) cmovnz %edx, %eax swapper 0 [000] 17276.429606186: ffffffff81010653 pt_event_add ([kernel.kallsyms]) jmp 0xffffffff81010635 swapper 0 [000] 17276.429606186: ffffffff81010635 pt_event_add ([kernel.kallsyms]) retq swapper 0 [000] 17276.429606186: ffffffff8115e687 event_sched_in.isra.107 ([kernel.kallsyms]) test %eax, %eax Now: % perf record -e intel_pt// ... % perf script --insn-trace --xed ... same output ... XED needs to be installed with: $ git clone https://github.com/intelxed/mbuild.git mbuild $ git clone https://github.com/intelxed/xed $ cd xed $ ./mfile.py $ ./mfile.py examples $ sudo ./mfile.py --prefix=/usr/local install $ sudo cp obj/examples/xed /usr/local/bin $ xed | head -3 ERROR: required argument(s) were missing Copyright (C) 2017, Intel Corporation. All rights reserved. XED version: [v10.0-328-g7d62c8c49b7b] $ Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180920180540.14039-2-andi@firstfloor.org [ Fixed up whitespace damage, added the 'mfile.py examples + cp obj/examples/xed ... ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/build-xed.txt | 19 +++++++++++++++++++ tools/perf/Documentation/perf-script.txt | 7 +++++++ tools/perf/builtin-script.c | 23 +++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 tools/perf/Documentation/build-xed.txt diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt new file mode 100644 index 0000000000000..6222c1e7231fb --- /dev/null +++ b/tools/perf/Documentation/build-xed.txt @@ -0,0 +1,19 @@ + +For --xed the xed tool is needed. Here is how to install it: + + $ git clone https://github.com/intelxed/mbuild.git mbuild + $ git clone https://github.com/intelxed/xed + $ cd xed + $ ./mfile.py --share + $ ./mfile.py examples + $ sudo ./mfile.py --prefix=/usr/local install + $ sudo ldconfig + $ sudo cp obj/examples/xed /usr/local/bin + +Basic xed testing: + + $ xed | head -3 + ERROR: required argument(s) were missing + Copyright (C) 2017, Intel Corporation. All rights reserved. + XED version: [v10.0-328-g7d62c8c49b7b] + $ diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index afdafe2110a17..00c655ab4968e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -383,6 +383,13 @@ include::itrace.txt[] will be printed. Each entry has function name and file/line. Enabled by default, disable with --no-inline. +--insn-trace:: + Show instruction stream for intel_pt traces. Combine with --xed to + show disassembly. + +--xed:: + Run xed disassembler on output. Requires installing the xed disassembler. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ca09b7d2adb7e..411ea175bcaf0 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "sane_ctype.h" @@ -3103,6 +3104,24 @@ static int perf_script__process_auxtrace_info(struct perf_session *session, #define perf_script__process_auxtrace_info 0 #endif +static int parse_insn_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "+insn,-event,-period", 0); + itrace_parse_synth_opts(opt, "i0ns", 0); + nanosecs = true; + return 0; +} + +static int parse_xed(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + force_pager("xed -F insn: -A -64 | less"); + return 0; +} + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -3187,6 +3206,10 @@ int cmd_script(int argc, const char **argv) "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, + "Decode instructions from itrace", parse_insn_trace), + OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, + "Run xed disassembler on output", parse_xed), OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", "Stop display of callgraph at these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), -- cgit v1.2.3 From 4eb068157121939f4bc16256a37bcd88f5554123 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 20 Sep 2018 11:05:37 -0700 Subject: perf script: Make itrace script default to all calls By default 'perf script' for itrace outputs sampled instructions or branches. In my experience this is confusing to users because it's hard to correlate with real program behavior. The sampling makes sense for tools like 'perf report' that actually sample to reduce the run time, but run time is normally not a problem for 'perf script'. It's better to give an accurate representation of the program flow. Default 'perf script' to output all calls for itrace. That's a much saner default. The old behavior can be still requested with 'perf script' --itrace=ibxwpe100000 v2: Fix ETM build failure v3: Really fix ETM build failure (Kim Phillips) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Kim Phillips Cc: Leo Yan Link: http://lkml.kernel.org/r/20180920180540.14039-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 7 ++++--- tools/perf/builtin-script.c | 5 ++++- tools/perf/util/auxtrace.c | 17 ++++++++++++----- tools/perf/util/auxtrace.h | 5 ++++- tools/perf/util/cs-etm.c | 3 ++- tools/perf/util/intel-bts.c | 3 ++- tools/perf/util/intel-pt.c | 3 ++- 7 files changed, 30 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index a3abe04c779d0..c2182cbabde3a 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -11,10 +11,11 @@ l synthesize last branch entries (use with i or x) s skip initial number of events - The default is all events i.e. the same as --itrace=ibxwpe + The default is all events i.e. the same as --itrace=ibxwpe, + except for perf script where it is --itrace=ce - In addition, the period (default 100000) for instructions events - can be specified in units of: + In addition, the period (default 100000, except for perf script where it is 1) + for instructions events can be specified in units of: i instructions t ticks diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 411ea175bcaf0..6099c722a6796 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3131,7 +3131,10 @@ int cmd_script(int argc, const char **argv) char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; - struct itrace_synth_opts itrace_synth_opts = { .set = false, }; + struct itrace_synth_opts itrace_synth_opts = { + .set = false, + .default_no_sample = true, + }; char *script_path = NULL; const char **__argv; int i, j, err = 0; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c4617bcfd521f..72d5ba2479bf1 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session, #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample) { - synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; synth_opts->ptwrites = true; synth_opts->pwr_events = true; synth_opts->errors = true; - synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + if (no_sample) { + synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + synth_opts->period = 1; + synth_opts->calls = true; + } else { + synth_opts->instructions = true; + synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + } synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; synth_opts->initial_skip = 0; @@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts); + itrace_synth_opts__set_default(synth_opts, false); return 0; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index d88f6e9eb4611..8e50f96d4b23d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -58,6 +58,7 @@ enum itrace_period_type { /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set + * @default_no_sample: Default to no sampling. * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events @@ -82,6 +83,7 @@ enum itrace_period_type { */ struct itrace_synth_opts { bool set; + bool default_no_sample; bool inject; bool instructions; bool branches; @@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session, union perf_event *event); int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset); -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts); +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample); size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp); void perf_session__auxtrace_error_inc(struct perf_session *session, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2ae640257fdbb..3b37d66dc5337 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1432,7 +1432,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&etm->synth_opts); + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); etm->synth_opts.callchain = false; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7f0c83b6332bf..3b3a3d55dca18 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -910,7 +910,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&bts->synth_opts); + itrace_synth_opts__set_default(&bts->synth_opts, + session->itrace_synth_opts->default_no_sample); if (session->itrace_synth_opts) bts->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 48c1d415c6b06..ffa385a029b3f 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2559,7 +2559,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&pt->synth_opts); + itrace_synth_opts__set_default(&pt->synth_opts, + session->itrace_synth_opts->default_no_sample); if (use_browser != -1) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; -- cgit v1.2.3 From d1b1552e15d41297abcaf3812378e3391d44fa6b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 20 Sep 2018 11:05:38 -0700 Subject: tools script: Add --call-trace and --call-ret-trace Add short cut options to print PT call trace and call-ret-trace, for calls and call and returns. Roughly corresponds to ftrace function tracer and function graph tracer. Just makes these common use cases nicer to use. % perf record -a -e intel_pt// sleep 1 % perf script --call-trace perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_enable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) event_filter_match perf 900 [000] 194167.205652203: ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_set_state.part.71 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_time perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_disable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_log_itrace_start perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_userpage % perf script --call-ret-trace perf 900 [000] 194167.205652203: tr strt ([unknown]) pt_config perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) pt_config perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) pt_event_add perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) perf_pmu_enable perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) perf_pmu_nop_void perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) perf_pmu_nop_int perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) event_filter_match perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) event_filter_match perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: return ([kernel.kallsyms]) perf_pmu_nop_txn perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: call ([kernel.kallsyms]) perf_event_set_state.part.71 Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Tested-by: Leo Yan Acked-by: Jiri Olsa Cc: Kim Phillips Link: http://lkml.kernel.org/r/20180920180540.14039-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 7 +++++++ tools/perf/builtin-script.c | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 00c655ab4968e..805baabd238eb 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -390,6 +390,13 @@ include::itrace.txt[] --xed:: Run xed disassembler on output. Requires installing the xed disassembler. +--call-trace:: + Show call stream for intel_pt traces. The CPUs are interleaved, but + can be filtered with -C. + +--call-ret-trace:: + Show call and return stream for intel_pt traces. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6099c722a6796..566e1450898a2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3122,6 +3122,26 @@ static int parse_xed(const struct option *opt __maybe_unused, return 0; } +static int parse_call_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); + itrace_parse_synth_opts(opt, "cewp", 0); + nanosecs = true; + return 0; +} + +static int parse_callret_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); + itrace_parse_synth_opts(opt, "crewp", 0); + nanosecs = true; + return 0; +} + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -3213,6 +3233,10 @@ int cmd_script(int argc, const char **argv) "Decode instructions from itrace", parse_insn_trace), OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, "Run xed disassembler on output", parse_xed), + OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls from from itrace", parse_call_trace), + OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls and returns from itrace", parse_callret_trace), OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", "Stop display of callgraph at these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), -- cgit v1.2.3 From 99f753f048b3f02f31a56951781672021af6cd0d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 20 Sep 2018 11:05:39 -0700 Subject: perf script: Implement --graph-function Add a ftrace style --graph-function argument to 'perf script' that allows to print itrace function calls only below a given function. This makes it easier to find the code of interest in a large trace. % perf record -e intel_pt//k -a sleep 1 % perf script --graph-function group_sched_in --call-trace perf 900 [000] 194167.205652203: ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_set_state.part.71 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_time perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_disable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_log_itrace_start perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) calc_timer_values perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_cpu perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) arch_perf_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __fentry__ perf 900 [000] 194167.205652203: ([kernel.kallsyms]) using_native_sched_clock perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_stable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_enable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) group_sched_in swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) event_sched_in.isra.107 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_set_state.part.71 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_time swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_pmu_disable swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_log_itrace_start swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) calc_timer_values swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_cpu swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) arch_perf_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __fentry__ swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) using_native_sched_clock swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_stable Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Tested-by: Leo Yan Acked-by: Jiri Olsa Cc: Kim Phillips Link: http://lkml.kernel.org/r/20180920180540.14039-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 ++ tools/perf/builtin-script.c | 96 ++++++++++++++++++++++++++------ tools/perf/util/symbol.h | 3 +- tools/perf/util/thread.h | 2 + 4 files changed, 86 insertions(+), 19 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 805baabd238eb..a2b37ce48094d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -397,6 +397,10 @@ include::itrace.txt[] --call-ret-trace:: Show call and return stream for intel_pt traces. +--graph-function:: + For itrace only show specified functions and their callees for + itrace. Multiple functions can be separated by comma. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 566e1450898a2..9d2249ea75e3c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1105,6 +1105,35 @@ out: return printed; } +static const char *resolve_branch_sym(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al, + u64 *ip) +{ + struct addr_location addr_al; + struct perf_event_attr *attr = &evsel->attr; + const char *name = NULL; + + if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { + if (sample_addr_correlates_sym(attr)) { + thread__resolve(thread, &addr_al, sample); + if (addr_al.sym) + name = addr_al.sym->name; + else + *ip = sample->addr; + } else { + *ip = sample->addr; + } + } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { + if (al->sym) + name = al->sym->name; + else + *ip = sample->ip; + } + return name; +} + static int perf_sample__fprintf_callindent(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -1112,7 +1141,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, { struct perf_event_attr *attr = &evsel->attr; size_t depth = thread_stack__depth(thread); - struct addr_location addr_al; const char *name = NULL; static int spacing; int len = 0; @@ -1126,22 +1154,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) depth += 1; - if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { - if (sample_addr_correlates_sym(attr)) { - thread__resolve(thread, &addr_al, sample); - if (addr_al.sym) - name = addr_al.sym->name; - else - ip = sample->addr; - } else { - ip = sample->addr; - } - } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { - if (al->sym) - name = al->sym->name; - else - ip = sample->ip; - } + name = resolve_branch_sym(sample, evsel, thread, al, &ip); if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) { dlen += fprintf(fp, "("); @@ -1647,6 +1660,47 @@ static void perf_sample__fprint_metric(struct perf_script *script, } } +static bool show_event(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al) +{ + int depth = thread_stack__depth(thread); + + if (!symbol_conf.graph_function) + return true; + + if (thread->filter) { + if (depth <= thread->filter_entry_depth) { + thread->filter = false; + return false; + } + return true; + } else { + const char *s = symbol_conf.graph_function; + u64 ip; + const char *name = resolve_branch_sym(sample, evsel, thread, al, + &ip); + unsigned nlen; + + if (!name) + return false; + nlen = strlen(name); + while (*s) { + unsigned len = strcspn(s, ","); + if (nlen == len && !strncmp(name, s, len)) { + thread->filter = true; + thread->filter_entry_depth = depth; + return true; + } + s += len; + if (*s == ',') + s++; + } + return false; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1661,6 +1715,9 @@ static void process_event(struct perf_script *script, if (output[type].fields == 0) return; + if (!show_event(sample, evsel, thread, al)) + return; + ++es->samples; perf_sample__fprintf_start(sample, thread, evsel, @@ -3237,6 +3294,8 @@ int cmd_script(int argc, const char **argv) "Decode calls from from itrace", parse_call_trace), OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL, "Decode calls and returns from itrace", parse_callret_trace), + OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]", + "Only print symbols and callees with --call-trace/--call-ret-trace"), OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", "Stop display of callgraph at these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), @@ -3494,7 +3553,8 @@ int cmd_script(int argc, const char **argv) script.session = session; script__setup_sample_type(&script); - if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) + if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) || + symbol_conf.graph_function) itrace_synth_opts.thread_stack = true; session->itrace_synth_opts = &itrace_synth_opts; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f25fae4b5743c..d726a8a7bb1b0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -123,7 +123,8 @@ struct symbol_conf { const char *vmlinux_name, *kallsyms_name, *source_prefix, - *field_sep; + *field_sep, + *graph_function; const char *default_guest_vmlinux_name, *default_guest_kallsyms, *default_guest_modules; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 07606aa6998d9..36c09a9904e66 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -42,6 +42,8 @@ struct thread { void *addr_space; struct unwind_libunwind_ops *unwind_libunwind_ops; #endif + bool filter; + int filter_entry_depth; }; struct machine; -- cgit v1.2.3 From fe57120e18a1f9124ca758c89cc54f91333d1847 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 24 Sep 2018 10:07:32 -0700 Subject: perf script: Support total cycles count For 'perf script' brstackinsn also print a running cycles count. This makes it easier to calculate cycle deltas for code sections measured with LBRs. % perf record -b -a sleep 1 % perf script -F +brstackinsn ... 00007f73ecc41083 insn: 74 06 # PRED 9 cycles [17] 1.11 IPC 00007f73ecc4108b insn: a8 10 00007f73ecc4108d insn: 74 71 # PRED 1 cycles [18] 1.00 IPC 00007f73ecc41100 insn: 48 8b 46 10 00007f73ecc41104 insn: 4c 8b 38 00007f73ecc41107 insn: 4d 85 ff 00007f73ecc4110a insn: 0f 84 b0 00 00 00 00007f73ecc41110 insn: 83 43 58 01 00007f73ecc41114 insn: 48 89 df 00007f73ecc41117 insn: e8 94 73 04 00 # PRED 6 cycles [24] 1.00 IPC Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Andi Kleen Link: http://lkml.kernel.org/r/20180924170732.GA28040@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9d2249ea75e3c..b5bc85bd0bbea 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -913,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, - int insn, FILE *fp) + int insn, FILE *fp, int *total_cycles) { int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip, dump_insn(x, ip, inbuf, len, NULL), @@ -922,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, en->flags.in_tx ? " INTX" : "", en->flags.abort ? " ABORT" : ""); if (en->flags.cycles) { - printed += fprintf(fp, " %d cycles", en->flags.cycles); + *total_cycles += en->flags.cycles; + printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles); if (insn) printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } @@ -979,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, u8 buffer[MAXBB]; unsigned off; struct symbol *lastsym = NULL; + int total_cycles = 0; if (!(br && br->nr)) return 0; @@ -999,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], - &x, buffer, len, 0, fp); + &x, buffer, len, 0, fp, &total_cycles); } /* Print all blocks */ @@ -1027,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp); + printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp, + &total_cycles); break; } else { printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, -- cgit v1.2.3 From 28fa741c27e6d57f6bf594ba3c444ce79e671e09 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 29 Oct 2018 23:32:11 +0000 Subject: perf/core: Clean up inconsisent indentation Replace a bunch of spaces with tab, cleans up indentation Signed-off-by: Colin Ian King Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20181029233211.21475-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a97f34bc14c8..65e90c752a91e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) __update_cgrp_time(event->cgrp); } -- cgit v1.2.3 From f443f38c5789ece6ebe59ae21c27bf861e61c4e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 24 Oct 2018 14:31:12 -0300 Subject: tools include uapi: Grab a copy of linux/fs.h We'll use it to create tables for the 'flags' argument to the 'mount' and 'umount' syscalls. Add it to check_headers.sh so that when a new protocol gets added we get a notification during the build process. Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yacf9jvkwfwg2g95r2us3xb3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 393 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 394 insertions(+) create mode 100644 tools/include/uapi/linux/fs.h diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h new file mode 100644 index 0000000000000..73e01918f9963 --- /dev/null +++ b/tools/include/uapi/linux/fs.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FS_H +#define _UAPI_LINUX_FS_H + +/* + * This file has definitions for some important file table structures + * and constants and structures used by various generic file system + * ioctl's. Please do not make any changes in this file before + * sending patches for review to linux-fsdevel@vger.kernel.org and + * linux-api@vger.kernel.org. + */ + +#include +#include +#include + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1< Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-mgutbbkmip9gfnmd28ikg7xt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mount_flags.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 tools/perf/trace/beauty/mount_flags.sh diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh new file mode 100755 index 0000000000000..45547573a1dbb --- /dev/null +++ b/tools/perf/trace/beauty/mount_flags.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ + +printf "static const char *mount_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \ + sed -r "s/$regex/\2 \2 \1/g" | sort -n | \ + xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s + 1] = \"%s\",\n" +printf "};\n" -- cgit v1.2.3 From 794f594e0c3be6199e0b3e2324280e8785806fb6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 24 Oct 2018 15:54:23 -0300 Subject: perf beauty: Switch from GPL v2.0 to LGPL v2.1 The intention is to have this as a library, since it is not perf specific at all. I did the switch for the files where I'm the only contributor, with the exception of a few lines changed by Jiri Olsa. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-a04q6chdyjknm1hr305ulx8h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/clone.c | 3 +-- tools/perf/trace/beauty/drm_ioctl.sh | 1 + tools/perf/trace/beauty/eventfd.c | 2 +- tools/perf/trace/beauty/fcntl.c | 3 +-- tools/perf/trace/beauty/flock.c | 2 +- tools/perf/trace/beauty/futex_op.c | 2 +- tools/perf/trace/beauty/futex_val3.c | 2 +- tools/perf/trace/beauty/ioctl.c | 3 +-- tools/perf/trace/beauty/kcmp.c | 3 +-- tools/perf/trace/beauty/kcmp_type.sh | 1 + tools/perf/trace/beauty/kvm_ioctl.sh | 1 + tools/perf/trace/beauty/madvise_behavior.sh | 1 + tools/perf/trace/beauty/mmap.c | 2 +- tools/perf/trace/beauty/mode_t.c | 2 +- tools/perf/trace/beauty/msg_flags.c | 2 +- tools/perf/trace/beauty/open_flags.c | 2 +- tools/perf/trace/beauty/perf_event_open.c | 2 +- tools/perf/trace/beauty/perf_ioctl.sh | 1 + tools/perf/trace/beauty/pid.c | 3 ++- tools/perf/trace/beauty/pkey_alloc.c | 3 +-- tools/perf/trace/beauty/pkey_alloc_access_rights.sh | 1 + tools/perf/trace/beauty/prctl.c | 3 +-- tools/perf/trace/beauty/prctl_option.sh | 1 + tools/perf/trace/beauty/sched_policy.c | 2 +- tools/perf/trace/beauty/seccomp.c | 2 +- tools/perf/trace/beauty/signum.c | 2 +- tools/perf/trace/beauty/sndrv_ctl_ioctl.sh | 1 + tools/perf/trace/beauty/sndrv_pcm_ioctl.sh | 1 + tools/perf/trace/beauty/sockaddr.c | 2 +- tools/perf/trace/beauty/socket.c | 2 +- tools/perf/trace/beauty/socket_ipproto.sh | 1 + tools/perf/trace/beauty/socket_type.c | 2 +- tools/perf/trace/beauty/statx.c | 3 +-- tools/perf/trace/beauty/vhost_virtio_ioctl.sh | 1 + tools/perf/trace/beauty/waitid_options.c | 2 +- 35 files changed, 36 insertions(+), 31 deletions(-) diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c index d64d049ab9915..010406500c304 100644 --- a/tools/perf/trace/beauty/clone.c +++ b/tools/perf/trace/beauty/clone.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/cone.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh index 9d3816815e60f..9aa94fd523a9c 100755 --- a/tools/perf/trace/beauty/drm_ioctl.sh +++ b/tools/perf/trace/beauty/drm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/ diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c index 5d6a477a64002..db5b9b4921137 100644 --- a/tools/perf/trace/beauty/eventfd.c +++ b/tools/perf/trace/beauty/eventfd.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef EFD_SEMAPHORE #define EFD_SEMAPHORE 1 #endif diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 9e8900c13cb13..e6de31674e246 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/fcntl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c index c4ff6ad30b062..cf02ae5f0ba66 100644 --- a/tools/perf/trace/beauty/flock.c +++ b/tools/perf/trace/beauty/flock.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include "trace/beauty/beauty.h" #include diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c index 61850fbc85ff3..1136bde56406e 100644 --- a/tools/perf/trace/beauty/futex_op.c +++ b/tools/perf/trace/beauty/futex_op.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #ifndef FUTEX_WAIT_BITSET diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c index 26f6b3253511e..138b7d588a708 100644 --- a/tools/perf/trace/beauty/futex_val3.c +++ b/tools/perf/trace/beauty/futex_val3.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #ifndef FUTEX_BITSET_MATCH_ANY diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 1be3b4cf08270..5d2a7fd8d4077 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/ioctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c index f62040eb9d5c5..b276a274f2030 100644 --- a/tools/perf/trace/beauty/kcmp.c +++ b/tools/perf/trace/beauty/kcmp.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/kcmp.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh index a3c304caa3365..df8b17486d575 100755 --- a/tools/perf/trace/beauty/kcmp_type.sh +++ b/tools/perf/trace/beauty/kcmp_type.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh index c4699fd46bb64..4ce54f5bf7564 100755 --- a/tools/perf/trace/beauty/kvm_ioctl.sh +++ b/tools/perf/trace/beauty/kvm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh index 431639eb4d29a..4527d290cdfc6 100755 --- a/tools/perf/trace/beauty/madvise_behavior.sh +++ b/tools/perf/trace/beauty/madvise_behavior.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 9f68077b241b9..0605593552c6d 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c index d929ad7dd97be..6879d36d30048 100644 --- a/tools/perf/trace/beauty/mode_t.c +++ b/tools/perf/trace/beauty/mode_t.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #include #include diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index c064d6aae6597..1b9d6306d2749 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #include diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index 6aec6178a99dc..cc673fec9184d 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #include #include diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c index 2bafd7c995fff..981185c1974ba 100644 --- a/tools/perf/trace/beauty/perf_event_open.c +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef PERF_FLAG_FD_NO_GROUP # define PERF_FLAG_FD_NO_GROUP (1UL << 0) #endif diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh index 6492c74df928d..9aabd9743ef6e 100755 --- a/tools/perf/trace/beauty/perf_ioctl.sh +++ b/tools/perf/trace/beauty/perf_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c index 0313df3428304..1a6acc46807bc 100644 --- a/tools/perf/trace/beauty/pid.c +++ b/tools/perf/trace/beauty/pid.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 + size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) { int pid = arg->val; diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c index 2ba784a3734ad..d3e5188a9befa 100644 --- a/tools/perf/trace/beauty/pkey_alloc.c +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/pkey_alloc.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh index e0a51aeb20b21..f8f1b560cf8a4 100755 --- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh +++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c index 246130dad6c41..be7a5d3959757 100644 --- a/tools/perf/trace/beauty/prctl.c +++ b/tools/perf/trace/beauty/prctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/prctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index f24722146ebef..d32f8f1124af0 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c index ba5096ae76b60..48f2b5c9aa3ed 100644 --- a/tools/perf/trace/beauty/sched_policy.c +++ b/tools/perf/trace/beauty/sched_policy.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include /* diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c index b7097fd5fed9e..e36156b19c708 100644 --- a/tools/perf/trace/beauty/seccomp.c +++ b/tools/perf/trace/beauty/seccomp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c index bde18a53f0909..587fec545b8a6 100644 --- a/tools/perf/trace/beauty/signum.c +++ b/tools/perf/trace/beauty/signum.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh index eb511bb5fbd32..e0803b9575932 100755 --- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh +++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh index 6818392968b24..7a464a7bf9139 100755 --- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh +++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index 71a79f72d9d92..9410ad230f101 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c index 65227269384b1..d971a25964174 100644 --- a/tools/perf/trace/beauty/socket.c +++ b/tools/perf/trace/beauty/socket.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/socket.c * diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh index a3cc24633bec5..de0f2f29017f2 100755 --- a/tools/perf/trace/beauty/socket_ipproto.sh +++ b/tools/perf/trace/beauty/socket_ipproto.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c index bca26aef4a77a..a63a9a332aa0f 100644 --- a/tools/perf/trace/beauty/socket_type.c +++ b/tools/perf/trace/beauty/socket_type.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #include diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c index 5643b692af4cf..630f2760dd666 100644 --- a/tools/perf/trace/beauty/statx.c +++ b/tools/perf/trace/beauty/statx.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/statx.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh index 0f6a5197d0bed..439773daaf77d 100755 --- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh +++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c index 8465281a093de..42ff58ad613b8 100644 --- a/tools/perf/trace/beauty/waitid_options.c +++ b/tools/perf/trace/beauty/waitid_options.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include #include -- cgit v1.2.3 From 579e5ff629b17744f62473f314768de8b6f1e66c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Oct 2018 14:21:31 -0300 Subject: perf beauty: Introduce strarray__scnprintf_flags() Generalizing pkey_alloc__scnprintf_access_rights(), so that we can use it with other flags-like arguments, such as mount's mountflags argument. Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-o3ymi3104m8moaz9865g09w9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/beauty.h | 1 + tools/perf/trace/beauty/pkey_alloc.c | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 2570152d39097..a1806c4f3ccd8 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -24,6 +24,7 @@ struct strarray { } size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val); +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags); struct trace; struct thread; diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c index d3e5188a9befa..1b8ed4cac8153 100644 --- a/tools/perf/trace/beauty/pkey_alloc.c +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -9,31 +9,28 @@ #include #include -static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags) { int i, printed = 0; -#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" - static DEFINE_STRARRAY(pkey_alloc_access_rights); - - if (access_rights == 0) { - const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (flags == 0) { + const char *s = sa->entries[0]; if (s) return scnprintf(bf, size, "%s", s); return scnprintf(bf, size, "%d", 0); } - for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { - int bit = 1 << (i - 1); + for (i = 1; i < sa->nr_entries; ++i) { + unsigned long bit = 1UL << (i - 1); - if (!(access_rights & bit)) + if (!(flags & bit)) continue; if (printed != 0) printed += scnprintf(bf + printed, size - printed, "|"); - if (strarray__pkey_alloc_access_rights.entries[i] != NULL) - printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + if (sa->entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]); else printed += scnprintf(bf + printed, size - printed, "0x%#", bit); } @@ -41,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s return printed; } +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights); +} + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) { unsigned long cmd = arg->val; -- cgit v1.2.3 From 496fd346b71ffa0ff35623e9ec79df0bad47bd66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Oct 2018 16:09:47 -0300 Subject: perf trace beauty: Allow syscalls to mask an argument before considering it Take mount's 'flags' arg, to cope with this semantic, as defined in do_mount in fs/namespace.c: /* * Pre-0.97 versions of mount() didn't have a flags word. When the * flags word was introduced its top half was required to have the * magic value 0xC0ED, and this remained so until 2.4.0-test9. * Therefore, if this magic number is present, it carries no * information and must be discarded. */ We need to mask this arg, and then see if it is zero, when we simply don't print the arg name and value. The next patch will use this for mount's 'flag' arg. Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-btue14k5jemayuykfrwsnh85@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7081d7ea12e5d..14fb63e17de56 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -614,6 +614,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); void *parm; const char *name; bool show_zero; @@ -1487,6 +1488,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, return scnprintf(bf, size, "arg%d: ", arg->idx); } +/* + * Check if the value is in fact zero, i.e. mask whatever needs masking, such + * as mount 'flags' argument that needs ignoring some magic flag, see comment + * in tools/perf/trace/beauty/mount_flags.c + */ +static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) +{ + if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) + return sc->arg_fmt[arg->idx].mask_val(arg, val); + + return val; +} + static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, struct syscall_arg *arg, unsigned long val) { @@ -1535,6 +1549,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, continue; val = syscall_arg__val(&arg, arg.idx); + /* + * Some syscall args need some mask, most don't and + * return val untouched. + */ + val = syscall__mask_val(sc, &arg, val); /* * Suppress this argument if its value is zero and -- cgit v1.2.3 From 73d141adcea66de656d0c8336811f2b0bbd9700c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Oct 2018 15:18:06 -0300 Subject: perf trace beauty: Beautify mount/umount's 'flags' argument # trace -e mount mount -o ro -t debugfs nodev /mnt 0.000 ( 1.040 ms): mount/27235 mount(dev_name: 0x5601cc8c64e0, dir_name: 0x5601cc8c6500, type: 0x5601cc8c6480, flags: RDONLY) = 0 # trace -e mount mount -o remount,relatime -t debugfs nodev /mnt 0.000 ( 2.946 ms): mount/27262 mount(dev_name: 0x55f4a73d64e0, dir_name: 0x55f4a73d6500, type: 0x55f4a73d6480, flags: REMOUNT|RELATIME) = 0 # trace -e mount mount -o remount,strictatime -t debugfs nodev /mnt 0.000 ( 2.934 ms): mount/27265 mount(dev_name: 0x5617f71d94e0, dir_name: 0x5617f71d9500, type: 0x5617f71d9480, flags: REMOUNT|STRICTATIME) = 0 # trace -e mount mount -o remount,suid,silent -t debugfs nodev /mnt 0.000 ( 0.049 ms): mount/27273 mount(dev_name: 0x55ad65df24e0, dir_name: 0x55ad65df2500, type: 0x55ad65df2480, flags: REMOUNT|SILENT) = 0 # trace -e mount mount -o remount,rw,sync,lazytime -t debugfs nodev /mnt 0.000 ( 2.684 ms): mount/27281 mount(dev_name: 0x561216055530, dir_name: 0x561216055550, type: 0x561216055510, flags: SYNCHRONOUS|REMOUNT|LAZYTIME) = 0 # trace -e mount mount -o remount,dirsync -t debugfs nodev /mnt 0.000 ( 3.512 ms): mount/27314 mount(dev_name: 0x55c4e7188480, dir_name: 0x55c4e7188530, type: 0x55c4e71884a0, flags: REMOUNT|DIRSYNC, data: 0x55c4e71884e0) = 0 # Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-i5ncao73c0bd02qprgrq6wb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++++ tools/perf/builtin-trace.c | 3 +++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 6 +++++ tools/perf/trace/beauty/mount_flags.c | 43 +++++++++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+) create mode 100644 tools/perf/trace/beauty/mount_flags.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 2f3bf025e3050..a31c5c29d53ab 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -460,6 +460,12 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ +mount_flags_array := $(beauty_outdir)/mount_flags_array.c +mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh + +$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) + $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -577,6 +583,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(socket_ipproto_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ + $(mount_flags_array) \ $(perf_ioctl_array) \ $(prctl_option_array) \ $(arch_errno_name_array) @@ -863,6 +870,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(madvise_behavior_array) \ + $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 14fb63e17de56..76c14c0129fca 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -726,6 +726,9 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, + { .name = "mount", + .arg = { [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ + .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index c3b0afd67760a..3043130732427 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -5,6 +5,7 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif libperf-y += kcmp.o +libperf-y += mount_flags.o libperf-y += pkey_alloc.o libperf-y += prctl.o libperf-y += sockaddr.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index a1806c4f3ccd8..039c29039b2c4 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -123,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg); #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags); +#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c new file mode 100644 index 0000000000000..712935c6620a5 --- /dev/null +++ b/tools/perf/trace/beauty/mount_flags.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/mount_flags.c + * + * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mount_flags_array.c" + static DEFINE_STRARRAY(mount_flags); + + return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags); +} + +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags) +{ + // do_mount in fs/namespace.c: + /* + * Pre-0.97 versions of mount() didn't have a flags word. When the + * flags word was introduced its top half was required to have the + * magic value 0xC0ED, and this remained so until 2.4.0-test9. + * Therefore, if this magic number is present, it carries no + * information and must be discarded. + */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; + + return flags; +} + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return mount__scnprintf_flags(flags, bf, size); +} -- cgit v1.2.3 From f932184e282f574cfd34afee917a10b782fd3e76 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Oct 2018 17:24:45 -0300 Subject: perf trace: Consider syscall aliases too When trying to trace the 'umount' syscall on x86_64 I noticed that it was failing: # trace -e umount umount /mnt event syntax error: 'umount' \___ parser error Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events # This is because in the x86-64 we have it just as 'umount2': $ grep umount arch/x86/entry/syscalls/syscall_64.tbl 166 common umount2 __x64_sys_umount $ So if the syscall name fails, try fallbacking to looking at the aliases we have in the syscall_fmts table to then re-lookup, now: # trace -e umount umount -f /mnt umount: /mnt: not mounted. 1.759 ( 0.004 ms): umount/18365 umount2(name: 0x55fbfcbc4480, flags: 1) = -1 EINVAL Invalid argument # Time to beautify the flags arg :-) Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ukweodgzbmjd25lfkgryeft1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 76c14c0129fca..db8711061ca3b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -862,6 +862,18 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); } +static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +{ + int i, nmemb = ARRAY_SIZE(syscall_fmts); + + for (i = 0; i < nmemb; ++i) { + if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) + return &syscall_fmts[i]; + } + + return NULL; +} + /* * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. @@ -3195,6 +3207,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, int len = strlen(str) + 1, err = -1, list, idx; char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char group_name[PATH_MAX]; + struct syscall_fmt *fmt; if (strace_groups_dir == NULL) return -1; @@ -3212,12 +3225,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str, if (syscalltbl__id(trace->sctbl, s) >= 0 || syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { list = 1; + goto do_concat; + } + + fmt = syscall_fmt__find_by_alias(s); + if (fmt != NULL) { + list = 1; + s = fmt->name; } else { path__join(group_name, sizeof(group_name), strace_groups_dir, s); if (access(group_name, R_OK) == 0) list = 1; } - +do_concat: if (lists[list]) { sprintf(lists[list] + strlen(lists[list]), ",%s", s); } else { -- cgit v1.2.3 From 476c92cacf383c83584ba02d06c88cf18f062afb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Oct 2018 13:23:25 -0300 Subject: perf trace: Beautify the umount's 'name' argument By using the SCA_FILENAME beautifier, that works when either the probe:vfs_getname probe is in place or with the eBPF program tools/perf/examples/bpf/augmented_syscalls.c: # perf probe -l probe:vfs_getname (on getname_flags:73@acme/git/linux/fs/namei.c with pathname) # perf trace -e umount 9630.332 ( 9.521 ms): umount/8082 umount2(name: /mnt) = 0 # The augmented syscalls one will be done in the next patch. Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-hegbzlpd2nrn584l5jxn7sy2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index db8711061ca3b..d286f73ef2e5b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -838,7 +838,8 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "umount2", .alias = "umount", }, + { .name = "umount2", .alias = "umount", + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, }, { .name = "uname", .alias = "newuname", }, { .name = "unlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, -- cgit v1.2.3 From 23c07a23cbed389d0b9c7a06486574621df8d1a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Oct 2018 13:51:45 -0300 Subject: perf trace: Beautify mount's first pathname arg The pathname beautifiers so far support just one augmented pathname per syscall, so do it just for mount's first arg, later this will get fixed. With: # perf probe -l probe:vfs_getname (on getname_flags:73@acme/git/linux/fs/namei.c with pathname) # Later this will get added to augmented_syscalls.c (eBPF): In one xterm: # perf trace -e mount,umount 2687.331 ( 3.544 ms): mount/8892 mount(dev_name: /mnt, dir_name: 0x561f9ac184a0, type: 0x561f9ac1b170, flags: BIND) = 0 3912.126 ( 8.807 ms): umount/8895 umount2(name: /mnt) = 0 ^C# In the other: $ sudo mount --bind /proc /mnt $ sudo umount /mnt Cc: Adrian Hunter Cc: Benjamin Peterson Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qsvhrm2es635cl4zicqjeth2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d286f73ef2e5b..dc8a6c4986ce2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -727,7 +727,8 @@ static struct syscall_fmt { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, { .name = "mount", - .arg = { [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, + [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, -- cgit v1.2.3 From 4e303fbe2d95806c875f5ebfcb3d980e20b4bd83 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Oct 2018 15:55:23 -0300 Subject: perf top: Allow disabling the overwrite mode In ebebbf082357 ("perf top: Switch default mode to overwrite mode") we forgot to leave a way to disable that new default, add a --overwrite option that can be disabled using --no-overwrite, since the code already in such a way that we can readily disable this mode. This is useful when investigating bugs with this mode like the recent report from David Miller where lots of unknown symbols appear due to disabling the events while processing them which disables all record types, not just PERF_RECORD_SAMPLE, which makes it impossible to resolve maps when we lose PERF_RECORD_MMAP records. This can be easily seen while building a kernel, when there are lots of short lived processes. Reported-by: David Miller Acked-by: Kan Liang Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode") Link: https://lkml.kernel.org/n/tip-oqgsz2bq4kgrnnajrafcdhie@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 5 +++++ tools/perf/builtin-top.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 114fda12aa490..d4be6061fe1c1 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -242,6 +242,11 @@ Default is to monitor all CPUS. --hierarchy:: Enable hierarchy output. +--overwrite:: + This is the default, but for investigating problems with it or any other strange + behaviour like lots of unknown samples, we may want to disable this mode by using + --no-overwrite. + --force:: Don't do ownership validation. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d21d8751e7491..214fad747b041 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1372,6 +1372,8 @@ int cmd_top(int argc, const char **argv) "Show raw trace event output (do not use print fmt or plugins)"), OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), + OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite, + "Use a backward ring buffer, default: yes"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, "number of thread to run event synthesize"), -- cgit v1.2.3 From 218d61110f69632974034b6e27686ce482a1c455 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Oct 2018 09:47:00 -0300 Subject: perf top: Do not use overwrite mode by default Enabling --overwrite mode allows us to to use just the most recent records, which helps in high core count machines such as Knights Landing/Mill, but right now is being disabled by default as the pausing used in this technique is leading to loss of metadata events such as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading to lots of unknown samples appearing on the UI. Enabling this may be useful if you are in such machines and profiling a workload that doesn't creates short lived threads and/or doesn't uses many executable mmap operations. Work is being planed to solve this situation, till then, this will remain disabled by default. Reported-by: David Miller Acked-by: Kan Liang Link: https://lkml.kernel.org/r/4f84468f-37d9-cf1b-12c1-514ef74b6a48@linux.intel.com Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode") Link: https://lkml.kernel.org/n/tip-ehvf77vi1si9409r7p4wx788@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 11 ++++++++--- tools/perf/builtin-top.c | 11 +++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index d4be6061fe1c1..808b664343c9f 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -243,9 +243,14 @@ Default is to monitor all CPUS. Enable hierarchy output. --overwrite:: - This is the default, but for investigating problems with it or any other strange - behaviour like lots of unknown samples, we may want to disable this mode by using - --no-overwrite. + Enable this to use just the most recent records, which helps in high core count + machines such as Knights Landing/Mill, but right now is disabled by default as + the pausing used in this technique is leading to loss of metadata events such + as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading + to lots of unknown samples appearing on the UI. Enable this if you are in such + machines and profiling a workload that doesn't creates short lived threads and/or + doesn't uses many executable mmap operations. Work is being planed to solve + this situation, till then, this will remain disabled by default. --force:: Don't do ownership validation. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 214fad747b041..8e29e0cc86263 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv) .uses_mmap = true, }, .proc_map_timeout = 500, - .overwrite = 1, + /* + * FIXME: This will lose PERF_RECORD_MMAP and other metadata + * when we pause, fix that and reenable. Probably using a + * separate evlist with a dummy event, i.e. a non-overwrite + * ring buffer just for metadata events, while PERF_RECORD_SAMPLE + * stays in overwrite mode. -acme + * */ + .overwrite = 0, }, .max_stack = sysctl__max_stack(), .annotation_opts = annotation__default_options, @@ -1373,7 +1380,7 @@ int cmd_top(int argc, const char **argv) OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite, - "Use a backward ring buffer, default: yes"), + "Use a backward ring buffer, default: no"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, "number of thread to run event synthesize"), -- cgit v1.2.3 From 1fe627da30331024f453faef04d500079b901107 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 29 Oct 2018 15:16:44 +0100 Subject: perf unwind: Take pgoff into account when reporting elf to libdwfl libdwfl parses an ELF file itself and creates mappings for the individual sections. perf on the other hand sees raw mmap events which represent individual sections. When we encounter an address pointing into a mapping with pgoff != 0, we must take that into account and report the file at the non-offset base address. This fixes unwinding with libdwfl in some cases. E.g. for a file like: ``` using namespace std; mutex g_mutex; double worker() { lock_guard guard(g_mutex); uniform_real_distribution uniform(-1E5, 1E5); default_random_engine engine; double s = 0; for (int i = 0; i < 1000; ++i) { s += norm(complex(uniform(engine), uniform(engine))); } cout << s << endl; return s; } int main() { vector> results; for (int i = 0; i < 10000; ++i) { results.push_back(async(launch::async, worker)); } return 0; } ``` Compile it with `g++ -g -O2 -lpthread cpp-locking.cpp -o cpp-locking`, then record it with `perf record --call-graph dwarf -e sched:sched_switch`. When you analyze it with `perf script` and libunwind, you should see: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) 7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so) 7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so) 7f38e42569e5 __GI___libc_malloc+0x115 (inlined) 7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined) 7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined) 7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined) 7f38e424df36 _IO_new_file_xsputn+0x116 (inlined) 7f38e4242bfb __GI__IO_fwrite+0xdb (inlined) 7f38e463fa6d std::basic_streambuf >::sputn(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator >::_M_put(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::__write(std::ostreambuf_iterator >, char const*, int)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::num_put > >::_M_insert_float(std::ostreambuf_iterator 7f38e464bd70 std::num_put > >::put(std::ostreambuf_iterator >, std::ios_base&, char, double) const+0x90 (inl> 7f38e464bd70 std::ostream& std::ostream::_M_insert(double)+0x90 (/usr/lib/libstdc++.so.6.0.25) 563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined) 563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking) 563b9cb506fb double std::__invoke_impl(std::__invoke_other, double (*&&)())+0x2b (inlined) 563b9cb506fb std::__invoke_result::type std::__invoke(double (*&&)())+0x2b (inlined) 563b9cb506fb decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker >::_M_invoke<0ul>(std::_Index_tuple<0ul>)+0x2b (inlined) 563b9cb506fb std::thread::_Invoker >::operator()()+0x2b (inlined) 563b9cb506fb std::__future_base::_Task_setter, std::__future_base::_Result_base::_Deleter>, std::thread::_Invoker >, dou> 563b9cb506fb std::_Function_handler (), std::__future_base::_Task_setter 563b9cb507e8 std::function ()>::operator()() const+0x28 (inlined) 563b9cb507e8 std::__future_base::_State_baseV2::_M_do_set(std::function ()>*, bool*)+0x28 (/ssd/milian/> 7f38e46d24fe __pthread_once_slow+0xbe (/usr/lib/libpthread-2.28.so) 563b9cb51149 __gthread_once+0xe9 (inlined) 563b9cb51149 void std::call_once ()>*, bool*)> 563b9cb51149 std::__future_base::_State_baseV2::_M_set_result(std::function ()>, bool)+0xe9 (inlined) 563b9cb51149 std::__future_base::_Async_state_impl >, double>::_Async_state_impl(std::thread::_Invoker >&&)::{lambda()#1}::op> 563b9cb51149 void std::__invoke_impl >, double>::_Async_state_impl(std::thread::_Invoker 563b9cb51149 std::__invoke_result >, double>::_Async_state_impl(std::thread::_Invoker >> 563b9cb51149 decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker >, double>::_Async_state_> 563b9cb51149 std::thread::_Invoker >, double>::_Async_state_impl(std::thread::_Invoker 563b9cb51149 std::thread::_State_impl >, double>::_Async_state_impl(std::thread> 7f38e45f0062 execute_native_thread_routine+0x12 (/usr/lib/libstdc++.so.6.0.25) 7f38e46caa9c start_thread+0xfc (/usr/lib/libpthread-2.28.so) 7f38e42ccb22 __GI___clone+0x42 (inlined) ``` Before this patch, using libdwfl, you would see: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) a041161e77950c5c [unknown] ([unknown]) ``` With this patch applied, we get a bit further in unwinding: ``` cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120 ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux) ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux) 7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so) 7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so) 7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so) 7f38e42569e5 __GI___libc_malloc+0x115 (inlined) 7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined) 7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined) 7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined) 7f38e424df36 _IO_new_file_xsputn+0x116 (inlined) 7f38e4242bfb __GI__IO_fwrite+0xdb (inlined) 7f38e463fa6d std::basic_streambuf >::sputn(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator >::_M_put(char const*, long)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::__write(std::ostreambuf_iterator >, char const*, int)+0x1cd (inlined) 7f38e463fa6d std::ostreambuf_iterator > std::num_put > >::_M_insert_float(std::ostreambuf_iterator 7f38e464bd70 std::num_put > >::put(std::ostreambuf_iterator >, std::ios_base&, char, double) const+0x90 (inl> 7f38e464bd70 std::ostream& std::ostream::_M_insert(double)+0x90 (/usr/lib/libstdc++.so.6.0.25) 563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined) 563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking) 6eab825c1ee3e4ff [unknown] ([unknown]) ``` Note that the backtrace is still stopping too early, when compared to the nice results obtained via libunwind. It's unclear so far what the reason for that is. Committer note: Further comment by Milian on the thread started on the Link: tag below: --- The remaining issue is due to a bug in elfutils: https://sourceware.org/ml/elfutils-devel/2018-q4/msg00089.html With both patches applied, libunwind and elfutils produce the same output for the above scenario. --- Signed-off-by: Milian Wolff Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20181029141644.3907-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 6f318b15950e8..5eff9bfc57583 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; -- cgit v1.2.3 From d6c9c05fe1eb4b213b183d8a1e79416256dc833a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 30 Oct 2018 15:18:28 +0800 Subject: perf cs-etm: Correct CPU mode for samples Since commit edeb0c90df35 ("perf tools: Stop fallbacking to kallsyms for vdso symbols lookup"), the kernel address cannot be properly parsed to kernel symbol with command 'perf script -k vmlinux'. The reason is CoreSight samples is always to set CPU mode as PERF_RECORD_MISC_USER, thus it fails to find corresponding map/dso in below flows: process_sample_event() `-> machine__resolve() `-> thread__find_map(thread, sample->cpumode, sample->ip, al); In this flow it needs to pass argument 'sample->cpumode' to tell what's the CPU mode, before it always passed PERF_RECORD_MISC_USER but without any failure until the commit edeb0c90df35 ("perf tools: Stop fallbacking to kallsyms for vdso symbols lookup") has been merged. The reason is even with the wrong CPU mode the function thread__find_map() firstly fails to find map but it will rollback to find kernel map for vdso symbols lookup. In the latest code it has removed the fallback code, thus if CPU mode is PERF_RECORD_MISC_USER then it cannot find map anymore with kernel address. This patch is to correct samples CPU mode setting, it creates a new helper function cs_etm__cpu_mode() to tell what's the CPU mode based on the address with the info from machine structure; this patch has a bit extension to check not only kernel and user mode, but also check for host/guest and hypervisor mode. Finally this patch uses the function in instruction and branch samples and also apply in cs_etm__mem_access() for a minor polishing. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Miller Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org # v4.19 Link: http://lkml.kernel.org/r/1540883908-17018-1-git-send-email-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3b37d66dc5337..73430b73570d5 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) +{ + struct machine *machine; + + machine = etmq->etm->machine; + + if (address >= etmq->etm->kernel_start) { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_KERNEL; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_USER; + else if (perf_guest) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_HYPERVISOR; + } +} + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, size_t size, u8 *buffer) { @@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return -1; machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = cs_etm__cpu_mode(etmq, address); thread = etmq->thread; if (!thread) { @@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; @@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.insn_len = 1; - sample.cpumode = event->header.misc; + sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq); @@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) u64 nr; struct branch_entry entries; } dummy_bs; + u64 ip; + + ip = cs_etm__last_executed_instr(etmq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); + sample.ip = ip; sample.pid = etmq->pid; sample.tid = etmq->tid; sample.addr = cs_etm__first_executed_instr(etmq->packet); @@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.period = 1; sample.cpu = etmq->packet->cpu; sample.flags = 0; - sample.cpumode = PERF_RECORD_MISC_USER; + sample.cpumode = event->sample.header.misc; /* * perf report cannot handle events without a branch stack -- cgit v1.2.3 From e9024d519d892b38176cafd46f68a7cdddd77412 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Oct 2018 12:12:26 -0300 Subject: perf callchain: Honour the ordering of PERF_CONTEXT_{USER,KERNEL,etc} When processing using 'perf report -g caller', which is the default, we ended up reverting the callchain entries received from the kernel, but simply reverting throws away the information that tells that from a point onwards the addresses are for userspace, kernel, guest kernel, guest user, hypervisor. The idea is that if we are walking backwards, for each cluster of non-cpumode entries we have to first scan backwards for the next one and use that for the cluster. This seems silly and more expensive than it needs to be but it is enough for a initial fix. The code here is really complicated because it is intimately intertwined with the lbr and branch handling, as well as this callchain order, further fixes will be needed to properly take into account the cpumode in those cases. Another problem with ORDER_CALLER is that the NULL "0" IP that is at the end of most callchains shows up at the top of the histogram because every callchain contains it and with ORDER_CALLER it is the first entry. Signed-off-by: David S. Miller Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Souvik Banerjee Cc: Wang Nan Cc: stable@vger.kernel.org # 4.19 Link: https://lkml.kernel.org/n/tip-2wt3ayp6j2y2f2xowixa8y6y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 111ae858cbcbd..8ee8ab39d8acf 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2140,6 +2140,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread, return 0; } +static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, + struct callchain_cursor *cursor, + struct symbol **parent, + struct addr_location *root_al, + u8 *cpumode, int ent) +{ + int err = 0; + + while (--ent >= 0) { + u64 ip = chain->ips[ent]; + + if (ip >= PERF_CONTEXT_MAX) { + err = add_callchain_ip(thread, cursor, parent, + root_al, cpumode, ip, + false, NULL, NULL, 0); + break; + } + } + return err; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct perf_evsel *evsel, @@ -2246,6 +2267,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: + if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, root_al, + &cpumode, chain->nr - first_call); + if (err) + return (err < 0) ? err : 0; + } for (i = first_call, nr_entries = 0; i < chain_nr && nr_entries < max_stack; i++) { u64 ip; @@ -2260,9 +2287,15 @@ check_calls: continue; #endif ip = chain->ips[j]; - if (ip < PERF_CONTEXT_MAX) ++nr_entries; + else if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, + root_al, &cpumode, j); + if (err) + return (err < 0) ? err : 0; + continue; + } err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, -- cgit v1.2.3 From 733ac4f9935c4b257aff2ec476e6c20665daf6c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 12:27:37 -0300 Subject: tools include uapi: Update linux/fs.h copy To silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h' diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h Due to just two comments added by: Fixes: 578bdaabd015 ("crypto: speck - remove Speck") So nothing that entails changes in tools/, that so far uses fs.h to generate the mount and umount syscalls 'flags' argument integer->string tables with: $ tools/perf/trace/beauty/mount_flags.sh static const char *mount_flags[] = { [4096 ? (ilog2(4096) + 1) : 0] = "BIND", [30 + 1] = "ACTIVE", [31 + 1] = "NOUSER", }; $ # trace -e mount,umount mount --bind /proc /mnt 1.228 ( 2.581 ms): mount/1068 mount(dev_name: /mnt, dir_name: 0x55f011c354a0, type: 0x55f011c38170, flags: BIND) = 0 # trace -e mount,umount umount /proc /mnt umount: /proc: target is busy. 1.587 ( 0.010 ms): umount/1070 umount2(name: /proc) = -1 EBUSY Device or resource busy 1.799 (12.660 ms): umount/1070 umount2(name: /mnt) = 0 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Jason A. Donenfeld Cc: Herbert Xu Link: https://lkml.kernel.org/n/tip-c00bqzclscgah26z2g5zxm73@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 73e01918f9963..a441ea1bfe6d9 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -279,8 +279,8 @@ struct fsxattr { #define FS_ENCRYPTION_MODE_AES_256_CTS 4 #define FS_ENCRYPTION_MODE_AES_128_CBC 5 #define FS_ENCRYPTION_MODE_AES_128_CTS 6 -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ struct fscrypt_policy { __u8 version; -- cgit v1.2.3 From 8dd4c0f68c0db4c0f01af60a99a7ed34fd3dee2b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 13:10:50 -0300 Subject: tools arch uapi: Update asm-generic/unistd.h and arm64 unistd.h copies To get the changes in: 82b355d161c9 ("y2038: Remove newstat family from default syscall set") Which will make the syscall table used by 'perf trace' for arm64 to be updated from the changes in that patch. This silences these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/unistd.h' differs from latest version at 'arch/arm64/include/uapi/asm/unistd.h' diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Cc: Kim Phillips Cc: Arnd Bergmann Cc: Geert Uytterhoeven Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-3euy7c4yy5mvnp5bm16t9vqg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/unistd.h | 1 + tools/include/uapi/asm-generic/unistd.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 5072cbd15c829..dae1584cf017f 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -16,5 +16,6 @@ */ #define __ARCH_WANT_RENAMEAT +#define __ARCH_WANT_NEW_STAT #include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index df4bedb9b01c2..538546edbfbd2 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee) /* fs/stat.c */ #define __NR_readlinkat 78 __SYSCALL(__NR_readlinkat, sys_readlinkat) +#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64) #define __NR3264_fstatat 79 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) #define __NR3264_fstat 80 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) +#endif /* fs/sync.c */ #define __NR_sync 81 -- cgit v1.2.3 From 89eb1f3b7f2a9156ce6f78713d7924c1bb2fab9f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 14:20:07 -0300 Subject: tools include uapi: Update asound.h copy To silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/sound/asound.h' differs from latest version at 'include/uapi/sound/asound.h' diff -u tools/include/uapi/sound/asound.h include/uapi/sound/asound.h Due to this cset: a98401518def ("ALSA: timer: fix wrong comment to refer to 'SNDRV_TIMER_PSFLG_*'") Cc: Jiri Olsa Cc: Namhyung Kim Cc: Takashi Sakamoto Cc: Takashi Iwai Link: https://lkml.kernel.org/n/tip-76gsvs0w2g0x723ivqa2xua3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index ed0a120d4f084..404d4b9ffe764 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -752,7 +752,7 @@ struct snd_timer_info { #define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */ struct snd_timer_params { - unsigned int flags; /* flags - SNDRV_MIXER_PSFLG_* */ + unsigned int flags; /* flags - SNDRV_TIMER_PSFLG_* */ unsigned int ticks; /* requested resolution in ticks */ unsigned int queue_size; /* total size of queue (32-1024) */ unsigned int reserved0; /* reserved, was: failure locations */ -- cgit v1.2.3 From 80ee5668b8a755039ca6180c6272a6e8efee5aad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 15:26:47 -0300 Subject: perf beauty: Add a generator for MAP_ mmap's flag constants It'll use tools/{arch}/*,include copies of mman.h to generate a table to be used by tools, initially by the 'mmap' beautifiers in 'perf trace', but that could also be used to translate from a string constant to the integer value to be used in a eBPF or tracefs tracepoint filter. Tested for all archs using: $ for arch in `ls tools/arch/` ; \ do echo $arch ; tools/perf/trace/beauty/mmap_flags.sh $arch ; \ done | less Example for alpha, an oddball, doesn't include any header, defines all its stuff: $ tools/perf/trace/beauty/mmap_flags.sh alpha static const char *mmap_flags[] = { [ilog2(0x10) + 1] = "ANONYMOUS", [ilog2(0x02000) + 1] = "DENYWRITE", [ilog2(0x04000) + 1] = "EXECUTABLE", [ilog2(0x100) + 1] = "FIXED", [ilog2(0x01000) + 1] = "GROWSDOWN", [ilog2(0x100000) + 1] = "HUGETLB", [ilog2(0x08000) + 1] = "LOCKED", [ilog2(0x40000) + 1] = "NONBLOCK", [ilog2(0x10000) + 1] = "NORESERVE", [ilog2(0x20000) + 1] = "POPULATE", [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x01) + 1] = "SHARED", [ilog2(0x80000) + 1] = "STACK", }; $ Common case, my workstation, defines one entry (MAP_32BIT), then includes mman.h, which gets it to include mman-common.h too: $ tools/perf/trace/beauty/mmap_flags.sh static const char *mmap_flags[] = { [ilog2(0x40) + 1] = "32BIT", [ilog2(0x01) + 1] = "SHARED", [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x10) + 1] = "FIXED", [ilog2(0x20) + 1] = "ANONYMOUS", [ilog2(0x100000) + 1] = "FIXED_NOREPLACE", [ilog2(0x0100) + 1] = "GROWSDOWN", [ilog2(0x0800) + 1] = "DENYWRITE", [ilog2(0x1000) + 1] = "EXECUTABLE", [ilog2(0x2000) + 1] = "LOCKED", [ilog2(0x4000) + 1] = "NORESERVE", [ilog2(0x8000) + 1] = "POPULATE", [ilog2(0x10000) + 1] = "NONBLOCK", [ilog2(0x20000) + 1] = "STACK", [ilog2(0x40000) + 1] = "HUGETLB", [ilog2(0x80000) + 1] = "SYNC", }; $ uname -m x86_64 $ Sparc, that defines a bunch then includes just mman-common.h: $ tools/perf/trace/beauty/mmap_flags.sh sparc static const char *mmap_flags[] = { [ilog2(0x0800) + 1] = "DENYWRITE", [ilog2(0x1000) + 1] = "EXECUTABLE", [ilog2(0x0200) + 1] = "GROWSDOWN", [ilog2(0x40000) + 1] = "HUGETLB", [ilog2(0x100) + 1] = "LOCKED", [ilog2(0x10000) + 1] = "NONBLOCK", [ilog2(0x40) + 1] = "NORESERVE", [ilog2(0x8000) + 1] = "POPULATE", [ilog2(0x20000) + 1] = "STACK", [ilog2(0x01) + 1] = "SHARED", [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x10) + 1] = "FIXED", [ilog2(0x20) + 1] = "ANONYMOUS", [ilog2(0x100000) + 1] = "FIXED_NOREPLACE", }; [acme@jouet perf]$ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-xydeh491z8fkgglcmqnl5thj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap_flags.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 tools/perf/trace/beauty/mmap_flags.sh diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh new file mode 100755 index 0000000000000..22c3fdca89751 --- /dev/null +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 2 ] ; then + [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + header_dir=tools/include/uapi/asm-generic + arch_header_dir=tools/arch/${hostarch}/include/uapi/asm +else + header_dir=$1 + arch_header_dir=$2 +fi + +arch_mman=${arch_header_dir}/mman.h + +# those in egrep -vw are flags, we want just the bits + +printf "static const char *mmap_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep -q $regex ${arch_mman} && \ +(egrep $regex ${arch_mman} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman} && +(egrep $regex ${header_dir}/mman.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +printf "};\n" -- cgit v1.2.3 From fbd7458db75790e37903c75be68513a4c087737c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 16:11:59 -0300 Subject: perf beauty: Wire up the mmap flags table generator to the Makefile Now when we run 'make -C tools/perf O=/tmp/build/perf' we end up with: $ cat /tmp/build/perf/trace/beauty/generated/mmap_flags_array.c static const char *mmap_flags[] = { [ilog2(0x40) + 1] = "32BIT", [ilog2(0x01) + 1] = "SHARED", [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x10) + 1] = "FIXED", [ilog2(0x20) + 1] = "ANONYMOUS", [ilog2(0x100000) + 1] = "FIXED_NOREPLACE", [ilog2(0x0100) + 1] = "GROWSDOWN", [ilog2(0x0800) + 1] = "DENYWRITE", [ilog2(0x1000) + 1] = "EXECUTABLE", [ilog2(0x2000) + 1] = "LOCKED", [ilog2(0x4000) + 1] = "NORESERVE", [ilog2(0x8000) + 1] = "POPULATE", [ilog2(0x10000) + 1] = "NONBLOCK", [ilog2(0x20000) + 1] = "STACK", [ilog2(0x40000) + 1] = "HUGETLB", [ilog2(0x80000) + 1] = "SYNC", }; $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-t3fn7u3tjsupio6e6vkufx9m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a31c5c29d53ab..3ccb4f0bf0883 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1,4 +1,5 @@ include ../scripts/Makefile.include +include ../scripts/Makefile.arch # The default target of this Makefile is... all: @@ -385,6 +386,8 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) linux_uapi_dir := $(srctree)/tools/include/uapi/linux +asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic +arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl @@ -460,6 +463,12 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ +mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c +mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh + +$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl) + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ + mount_flags_array := $(beauty_outdir)/mount_flags_array.c mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh @@ -583,6 +592,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(socket_ipproto_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ + $(mmap_flags_array) \ $(mount_flags_array) \ $(perf_ioctl_array) \ $(prctl_option_array) \ @@ -870,6 +880,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(madvise_behavior_array) \ + $(OUTPUT)$(mmap_flags_array) \ $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ -- cgit v1.2.3 From 2f967f1dbbc1f746ad0deda8352bcf563d59e1c7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 16:30:38 -0300 Subject: perf trace beauty: Use the mmap flags table generated from headers Instead of requiring us to go on and edit sources to add new flag. # perf trace -e *mmap sleep 0.1 0.025 ( 0.005 ms): sleep/29876 mmap(len: 163746, prot: READ, flags: PRIVATE, fd: 3) = 0x7faa68ad1000 0.059 ( 0.004 ms): sleep/29876 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7faa68acf000 0.069 ( 0.006 ms): sleep/29876 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7faa6851f000 0.086 ( 0.009 ms): sleep/29876 mmap(addr: 0x7faa688cb000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7faa688cb000 0.101 ( 0.005 ms): sleep/29876 mmap(addr: 0x7faa688d1000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7faa688d1000 0.348 ( 0.005 ms): sleep/29876 mmap(len: 111950656, prot: READ, flags: PRIVATE, fd: 3) = 0x7faa61a5b000 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ggmoy6vxoygh5yim890ht0kf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 48 ++++++++++-------------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 0605593552c6d..c534bd96ef5c9 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 #include +#include static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) @@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot +static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mmap_flags_array.c" + static DEFINE_STRARRAY(mmap_flags); + + return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags); +} + static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, struct syscall_arg *arg) { - int printed = 0, flags = arg->val; + unsigned long flags = arg->val; if (flags & MAP_ANONYMOUS) arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */ -#define P_MMAP_FLAG(n) \ - if (flags & MAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MAP_##n; \ - } - - P_MMAP_FLAG(SHARED); - P_MMAP_FLAG(PRIVATE); -#ifdef MAP_32BIT - P_MMAP_FLAG(32BIT); -#endif - P_MMAP_FLAG(ANONYMOUS); - P_MMAP_FLAG(DENYWRITE); - P_MMAP_FLAG(EXECUTABLE); - P_MMAP_FLAG(FILE); - P_MMAP_FLAG(FIXED); -#ifdef MAP_FIXED_NOREPLACE - P_MMAP_FLAG(FIXED_NOREPLACE); -#endif - P_MMAP_FLAG(GROWSDOWN); - P_MMAP_FLAG(HUGETLB); - P_MMAP_FLAG(LOCKED); - P_MMAP_FLAG(NONBLOCK); - P_MMAP_FLAG(NORESERVE); - P_MMAP_FLAG(POPULATE); - P_MMAP_FLAG(STACK); - P_MMAP_FLAG(UNINITIALIZED); -#ifdef MAP_SYNC - P_MMAP_FLAG(SYNC); -#endif -#undef P_MMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; + return mmap__scnprintf_flags(flags, bf, size); } #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags -- cgit v1.2.3 From 685626dc26bd9cead850d06520708acbd16bcfda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 16:50:08 -0300 Subject: tools include uapi: Update linux/mmap.h copy To pick up the changes from: 20916d4636a9 ("mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes") That do not entail changes in in tools, this just shows that we have to consider bits [26:31] of flags to beautify that in tools like 'perf trace' This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Anshuman Khandual Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-3rvc39lon93kgt5pl31d8g4x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index bfd5938fede6c..d0f515d53299e 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -28,7 +28,9 @@ #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB -- cgit v1.2.3 From 827758129a0f84fbd0b2dda15e14a77a7604803d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:01:46 -0300 Subject: tools headers: Sync the various kvm.h header copies For powerpc, s390, x86 and the main uapi linux/kvm.h header, none of them entail changes in tooling. Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-avn7iy8f4tcm2y40sbsdk31m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/kvm.h | 1 + tools/arch/s390/include/uapi/asm/kvm.h | 2 ++ tools/arch/x86/include/uapi/asm/kvm.h | 6 ++---- tools/include/uapi/linux/kvm.h | 21 +++++++++++++++++++-- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1b32b56a03d34..8c876c166ef27 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 9a50f02b98946..16511d97e8dc0 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 +#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 /* kvm attributes for migration mode */ #define KVM_S390_VM_MIGRATION_STOP 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 8a6eff9c27f3f..dabfcf7c3941a 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -300,10 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - union { - __u8 pad; - __u8 pending; - }; + __u8 pending; __u32 error_code; } exception; struct { @@ -387,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 2875ce85b3226..2b7a652c9fa46 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -751,6 +757,15 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 +/* + * On arm64, machine type can be used to request the physical + * address size for the VM. Bits[7-0] are reserved for the guest + * PA size shift (i.e, log2(PA_Size)). For backward compatibility, + * value 0 implies the default IPA size, 40bits. + */ +#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL +#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) /* * ioctls for /dev/kvm fds: */ @@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SEND_IPI 161 #define KVM_CAP_COALESCED_PIO 162 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 +#define KVM_CAP_ARM_VM_IPA_SIZE 165 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From d45a57fff0a657045a77b395ae713ffae0cb4e46 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:04:47 -0300 Subject: tools headers uapi: Update linux/netlink.h header copy Picking the changes from: 89d35528d17d ("netlink: Add new socket option to enable strict checking on dumps") To silence this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h' Cc: Alexei Starovoitov Cc: David Ahern Cc: David S. Miller Cc: Eric Leblond Link: https://lkml.kernel.org/n/tip-1xymkfjpmhxfzrs46t8z8mjw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h index 776bc92e91180..486ed1f0c0bc1 100644 --- a/tools/include/uapi/linux/netlink.h +++ b/tools/include/uapi/linux/netlink.h @@ -155,6 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 +#define NETLINK_DUMP_STRICT_CHK 12 struct nl_pktinfo { __u32 group; -- cgit v1.2.3 From 76b0b801782b34b3028dcef3de36cb634e3908a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Oct 2018 17:06:57 -0300 Subject: tools headers uapi: Update linux/if_link.h header copy To pick the changes from: 9163a0fc1f0c ("net: bridge: add support for per-port vlan stats") And silence this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' Cc: Alexei Starovoitov Cc: David S. Miller Cc: Eric Leblond Cc: Nikolay Aleksandrov Link: https://lkml.kernel.org/n/tip-7p53ghippywz7fqkwo3nkzet@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 58faab897201f..1debfa42cba1a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -287,6 +287,7 @@ enum { IFLA_BR_MCAST_STATS_ENABLED, IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, __IFLA_BR_MAX, }; -- cgit v1.2.3 From ff27a06af6ffd3f49b9e193eb68f487ad76651e1 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 30 Oct 2018 22:30:03 -0700 Subject: perf top: Start display thread earlier If events are coming in at a rate such that the event processing thread can barely keep up, our initial run of the event ring will almost never terminate and this delays the starting of the display thread. The screen basically stays black until the event thread can get out of it's endless loop. Therefore, start the display thread before we start processing the ring buffer. This also make sure that we always have the user requested real time setting engaged when processing the ring. Signed-off-by: David S. Miller Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181030.223003.2242527041807905962.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8e29e0cc86263..b2838de13de02 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top) if (!target__none(&opts->target)) perf_evlist__enable(top->evlist); - /* Wait for a minimal set of events before starting the snapshot */ - perf_evlist__poll(top->evlist, 100); - - perf_top__mmap_read(top); - ret = -1; if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : display_thread), top)) { @@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top) } } + /* Wait for a minimal set of events before starting the snapshot */ + perf_evlist__poll(top->evlist, 100); + + perf_top__mmap_read(top); + while (!done) { u64 hits = top->samples; -- cgit v1.2.3 From 4f8f382e635707ddaddf8269a116e4f8cc8835c0 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 30 Oct 2018 22:24:04 -0700 Subject: perf tools: Don't clone maps from parent when synthesizing forks When synthesizing FORK events, we are trying to create thread objects for the already running tasks on the machine. Normally, for a kernel FORK event, we want to clone the parent's maps because that is what the kernel just did. But when synthesizing, this should not be done. If we do, we end up with overlapping maps as we process the sythesized MMAP2 events that get delivered shortly thereafter. Use the FORK event misc flags in an internal way to signal this situation, so we can elide the map clone when appropriate. Signed-off-by: David S. Miller Cc: Don Zickus Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/20181030.222404.2085088822877051075.davem@davemloft.net [ Added comment about flag use in machine__process_fork_event(), use ternary op in thread__clone_map_groups() as suggested by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 2 ++ tools/include/uapi/linux/perf_event.h | 2 ++ tools/perf/util/event.c | 1 + tools/perf/util/machine.c | 19 ++++++++++++++++++- tools/perf/util/thread.c | 13 +++++-------- tools/perf/util/thread.h | 2 +- 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f35eb72739c09..9de8780ac8d97 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f35eb72739c09..9de8780ac8d97 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bc646185f8d91..e9c108a6b1c34 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8ee8ab39d8acf..8f36ce813bc5b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2048d393ece6f..3d9ed7d0e2818 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) @@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - if (map_groups__clone(thread, parent->mg) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 36c09a9904e66..30e2b4c165fe7 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -89,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); -- cgit v1.2.3 From 242483068b4b9ad02f1653819b6e683577681e0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 31 Oct 2018 11:10:42 +0200 Subject: perf intel-pt: Insert callchain context into synthesized callchains In the absence of a fallback, callchains must encode also the callchain context. Do that now there is no fallback. Signed-off-by: Adrian Hunter Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: David S. Miller Cc: Leo Yan Cc: Mathieu Poirier Cc: stable@vger.kernel.org # 4.19 Link: http://lkml.kernel.org/r/100ea2ec-ed14-b56d-d810-e0a6d2f4b069@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 6 ++++-- tools/perf/util/thread-stack.c | 44 +++++++++++++++++++++++++++++++++--------- tools/perf/util/thread-stack.h | 2 +- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ffa385a029b3f..60732213d16a1 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -759,7 +759,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->synth_opts.callchain) { size_t sz = sizeof(struct ip_callchain); - sz += pt->synth_opts.callchain_sz * sizeof(u64); + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); ptq->chain = zalloc(sz); if (!ptq->chain) goto out_free; @@ -1160,7 +1161,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt, if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample->ip); + pt->synth_opts.callchain_sz + 1, + sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index c091635bf7dcb..61a4286a74dc9 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread) } } +static inline u64 callchain_context(u64 ip, u64 kernel_start) +{ + return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; +} + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip) + size_t sz, u64 ip, u64 kernel_start) { - size_t i; + u64 context = callchain_context(ip, kernel_start); + u64 last_context; + size_t i, j; - if (!thread || !thread->ts) - chain->nr = 1; - else - chain->nr = min(sz, thread->ts->cnt + 1); + if (sz < 2) { + chain->nr = 0; + return; + } - chain->ips[0] = ip; + chain->ips[0] = context; + chain->ips[1] = ip; + + if (!thread || !thread->ts) { + chain->nr = 2; + return; + } + + last_context = context; + + for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { + ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (i >= sz - 1) + break; + chain->ips[i++] = context; + last_context = context; + } + chain->ips[i] = ip; + } - for (i = 1; i < chain->nr; i++) - chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; + chain->nr = i; } struct call_return_processor * diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index b7e41c4ebfdd9..f97c00a8c2514 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip); + size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread); -- cgit v1.2.3 From 5d4f0edaa3ac4f1844ed7c64cd2bae6f1912bac5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 31 Oct 2018 11:10:43 +0200 Subject: perf intel-pt/bts: Calculate cpumode for synthesized samples In the absence of a fallback, samples must provide a correct cpumode for the 'ip'. Do that now there is no fallback. Signed-off-by: Adrian Hunter Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: David S. Miller Cc: Leo Yan Cc: Mathieu Poirier Cc: stable@vger.kernel.org # 4.19 Link: http://lkml.kernel.org/r/20181031091043.23465-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-bts.c | 17 ++++++++++++----- tools/perf/util/intel-pt.c | 22 +++++++++++++--------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 3b3a3d55dca18..7b27d77306c22 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, return 0; } +static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip) +{ + return machine__kernel_ip(bts->machine, ip) ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, struct branch *branch) { @@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, bts->num_events++ <= bts->synth_opts.initial_skip) return 0; - event.sample.header.type = PERF_RECORD_SAMPLE; - event.sample.header.misc = PERF_RECORD_MISC_USER; - event.sample.header.size = sizeof(struct perf_event_header); - - sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); + sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; sample.tid = btsq->tid; sample.addr = le64_to_cpu(branch->to); @@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, sample.insn_len = btsq->intel_pt_insn.length; memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + if (bts->synth_opts.inject) { event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 60732213d16a1..86cc9a64e9827 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +{ + return ip >= pt->kernel_start ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) goto out_no_cache; - if (*ip >= ptq->pt->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = intel_pt_cpumode(ptq->pt, *ip); thread = ptq->thread; if (!thread) { @@ -1059,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - sample->cpumode = PERF_RECORD_MISC_USER; sample->ip = ptq->state->from_ip; + sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->pid = ptq->pid; sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; @@ -1076,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->flags = ptq->flags; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample->cpumode; + event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, -- cgit v1.2.3