summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.preempt25
-rw-r--r--kernel/bpf/btf.c19
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--kernel/cgroup/cgroup.c50
-rw-r--r--kernel/cgroup/cpuset.c61
-rw-r--r--kernel/dma/swiotlb.c30
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/memremap.c57
-rw-r--r--kernel/module.c60
-rw-r--r--kernel/padata.c12
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/resource.c49
-rw-r--r--kernel/sched/isolation.c6
-rw-r--r--kernel/smp.c16
-rw-r--r--kernel/sysctl.c197
-rw-r--r--kernel/trace/Kconfig12
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace.c17
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events.c10
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/trace/trace_kprobe.c357
-rw-r--r--kernel/trace/trace_output.c9
-rw-r--r--kernel/trace/trace_probe.c142
-rw-r--r--kernel/trace/trace_probe.h77
-rw-r--r--kernel/trace/trace_probe_tmpl.h36
-rw-r--r--kernel/trace/trace_uprobe.c180
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--kernel/ucount.c6
31 files changed, 888 insertions, 638 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d94..fc020c09b7e8f 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,10 +35,10 @@ config PREEMPT_VOLUNTARY
Select this if you are building a kernel for a desktop system.
-config PREEMPT
+config PREEMPT_LL
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
- select PREEMPT_COUNT
+ select PREEMPT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
embedded system with latency requirements in the milliseconds
range.
+config PREEMPT_RT
+ bool "Fully Preemptible Kernel (Real-Time)"
+ depends on EXPERT && ARCH_SUPPORTS_RT
+ select PREEMPT
+ help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+ preemptible priority-inheritance aware variants, enforcing
+ interrupt threading and introducing mechanisms to break up long
+ non-preemptible sections. This makes the kernel, except for very
+ low level and critical code pathes (entry code, scheduler, low
+ level interrupt handling) fully preemptible and brings most
+ execution contexts under scheduler control.
+
+ Select this if you are building a kernel for systems which
+ require real-time guarantees.
+
endchoice
config PREEMPT_COUNT
bool
+
+config PREEMPT
+ bool
+ select PREEMPT_COUNT
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 546ebee39e2af..5fcc7a17eb5a4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
!btf_type_is_var(size_type)))
return NULL;
- size = btf->resolved_sizes[size_type_id];
size_type_id = btf->resolved_ids[size_type_id];
size_type = btf_type_by_id(btf, size_type_id);
if (btf_type_nosize_or_null(size_type))
return NULL;
+ else if (btf_type_has_size(size_type))
+ size = size_type->size;
+ else if (btf_type_is_array(size_type))
+ size = btf->resolved_sizes[size_type_id];
+ else if (btf_type_is_ptr(size_type))
+ size = sizeof(void *);
+ else
+ return NULL;
}
*type_id = size_type_id;
@@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
const struct btf_type *next_type;
u32 next_type_id = t->type;
struct btf *btf = env->btf;
- u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
* save us a few type-following when we use it later (e.g. in
* pretty print).
*/
- if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ if (!btf_type_id_size(btf, &next_type_id, NULL)) {
if (env_type_is_resolved(env, next_type_id))
next_type = btf_type_id_resolve(btf, &next_type_id);
@@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
}
}
- env_stack_pop_resolved(env, next_type_id, next_type_size);
+ env_stack_pop_resolved(env, next_type_id, 0);
return 0;
}
@@ -1645,7 +1651,6 @@ static int btf_var_resolve(struct btf_verifier_env *env,
const struct btf_type *t = v->t;
u32 next_type_id = t->type;
struct btf *btf = env->btf;
- u32 next_type_size;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1675,12 +1680,12 @@ static int btf_var_resolve(struct btf_verifier_env *env,
* forward types or similar that would resolve to size of
* zero is allowed.
*/
- if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ if (!btf_type_id_size(btf, &next_type_id, NULL)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
- env_stack_pop_resolved(env, next_type_id, next_type_size);
+ env_stack_pop_resolved(env, next_type_id, 0);
return 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a2e763703c300..5900cbb966b17 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1519,9 +1519,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
return -EFAULT;
}
*stack_mask |= 1ull << spi;
- } else if (class == BPF_STX) {
+ } else if (class == BPF_STX || class == BPF_ST) {
if (*reg_mask & dreg)
- /* stx shouldn't be using _scalar_ dst_reg
+ /* stx & st shouldn't be using _scalar_ dst_reg
* to access memory. It means backtracking
* encountered a case of pointer subtraction.
*/
@@ -1540,7 +1540,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
if (!(*stack_mask & (1ull << spi)))
return 0;
*stack_mask &= ~(1ull << spi);
- *reg_mask |= sreg;
+ if (class == BPF_STX)
+ *reg_mask |= sreg;
} else if (class == BPF_JMP || class == BPF_JMP32) {
if (opcode == BPF_CALL) {
if (insn->src_reg == BPF_PSEUDO_CALL)
@@ -1569,10 +1570,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
if (mode == BPF_IND || mode == BPF_ABS)
/* to be analyzed */
return -ENOTSUPP;
- } else if (class == BPF_ST) {
- if (*reg_mask & dreg)
- /* likely pointer subtraction */
- return -ENOTSUPP;
}
return 0;
}
@@ -6106,11 +6103,13 @@ static int check_return_code(struct bpf_verifier_env *env)
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
range = tnum_range(1, 1);
+ break;
case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
range = tnum_range(0, 3);
enforce_attach_type_range = tnum_range(2, 3);
}
+ break;
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 300b0c416341c..753afbca549fd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2201,8 +2201,7 @@ static int cgroup_init_fs_context(struct fs_context *fc)
fc->ops = &cgroup_fs_context_ops;
else
fc->ops = &cgroup1_fs_context_ops;
- if (fc->user_ns)
- put_user_ns(fc->user_ns);
+ put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->ns->user_ns);
fc->global = true;
return 0;
@@ -2243,6 +2242,50 @@ static struct file_system_type cgroup2_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
+#ifdef CONFIG_CPUSETS
+static const struct fs_context_operations cpuset_fs_context_ops = {
+ .get_tree = cgroup1_get_tree,
+ .free = cgroup_fs_context_free,
+};
+
+/*
+ * This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead
+ */
+static int cpuset_init_fs_context(struct fs_context *fc)
+{
+ char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
+ struct cgroup_fs_context *ctx;
+ int err;
+
+ err = cgroup_init_fs_context(fc);
+ if (err) {
+ kfree(agent);
+ return err;
+ }
+
+ fc->ops = &cpuset_fs_context_ops;
+
+ ctx = cgroup_fc2context(fc);
+ ctx->subsys_mask = 1 << cpuset_cgrp_id;
+ ctx->flags |= CGRP_ROOT_NOPREFIX;
+ ctx->release_agent = agent;
+
+ get_filesystem(&cgroup_fs_type);
+ put_filesystem(fc->fs_type);
+ fc->fs_type = &cgroup_fs_type;
+
+ return 0;
+}
+
+static struct file_system_type cpuset_fs_type = {
+ .name = "cpuset",
+ .init_fs_context = cpuset_init_fs_context,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+#endif
+
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns)
{
@@ -5761,6 +5804,9 @@ int __init cgroup_init(void)
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
+#ifdef CONFIG_CPUSETS
+ WARN_ON(register_filesystem(&cpuset_fs_type));
+#endif
return 0;
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 863e434a60204..5aa37531ce76f 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -356,59 +356,6 @@ static inline bool is_in_v2_mode(void)
}
/*
- * This is ugly, but preserves the userspace API for existing cpuset
- * users. If someone tries to mount the "cpuset" filesystem, we
- * silently switch it to mount "cgroup" instead
- */
-static int cpuset_get_tree(struct fs_context *fc)
-{
- struct file_system_type *cgroup_fs;
- struct fs_context *new_fc;
- int ret;
-
- cgroup_fs = get_fs_type("cgroup");
- if (!cgroup_fs)
- return -ENODEV;
-
- new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags);
- if (IS_ERR(new_fc)) {
- ret = PTR_ERR(new_fc);
- } else {
- static const char agent_path[] = "/sbin/cpuset_release_agent";
- ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "release_agent",
- agent_path, sizeof(agent_path) - 1);
- if (!ret)
- ret = vfs_get_tree(new_fc);
- if (!ret) { /* steal the result */
- fc->root = new_fc->root;
- new_fc->root = NULL;
- }
- put_fs_context(new_fc);
- }
- put_filesystem(cgroup_fs);
- return ret;
-}
-
-static const struct fs_context_operations cpuset_fs_context_ops = {
- .get_tree = cpuset_get_tree,
-};
-
-static int cpuset_init_fs_context(struct fs_context *fc)
-{
- fc->ops = &cpuset_fs_context_ops;
- return 0;
-}
-
-static struct file_system_type cpuset_fs_type = {
- .name = "cpuset",
- .init_fs_context = cpuset_init_fs_context,
-};
-
-/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus.
@@ -2853,13 +2800,11 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
/**
* cpuset_init - initialize cpusets at system boot
*
- * Description: Initialize top_cpuset and the cpuset internal file system,
+ * Description: Initialize top_cpuset
**/
int __init cpuset_init(void)
{
- int err = 0;
-
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
@@ -2873,10 +2818,6 @@ int __init cpuset_init(void)
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
- err = register_filesystem(&cpuset_fs_type);
- if (err < 0)
- return err;
-
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 62fa5a82a0651..9de232229063b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -129,15 +129,17 @@ setup_io_tlb_npages(char *str)
}
early_param("swiotlb", setup_io_tlb_npages);
+static bool no_iotlb_memory;
+
unsigned long swiotlb_nr_tbl(void)
{
- return io_tlb_nslabs;
+ return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
}
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
unsigned int swiotlb_max_segment(void)
{
- return max_segment;
+ return unlikely(no_iotlb_memory) ? 0 : max_segment;
}
EXPORT_SYMBOL_GPL(swiotlb_max_segment);
@@ -160,8 +162,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-static bool no_iotlb_memory;
-
void swiotlb_print_info(void)
{
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
@@ -317,6 +317,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
return rc;
}
+static void swiotlb_cleanup(void)
+{
+ io_tlb_end = 0;
+ io_tlb_start = 0;
+ io_tlb_nslabs = 0;
+ max_segment = 0;
+}
+
int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
@@ -367,10 +375,7 @@ cleanup4:
sizeof(int)));
io_tlb_list = NULL;
cleanup3:
- io_tlb_end = 0;
- io_tlb_start = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
+ swiotlb_cleanup();
return -ENOMEM;
}
@@ -394,10 +399,7 @@ void __init swiotlb_exit(void)
memblock_free_late(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
- io_tlb_start = 0;
- io_tlb_end = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
+ swiotlb_cleanup();
}
/*
@@ -546,7 +548,7 @@ not_found:
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
size, io_tlb_nslabs, tmp_io_tlb_used);
- return DMA_MAPPING_ERROR;
+ return (phys_addr_t)DMA_MAPPING_ERROR;
found:
io_tlb_used += nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -664,7 +666,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
/* Oh well, have to allocate and map a bounce buffer. */
*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
*phys, size, dir, attrs);
- if (*phys == DMA_MAPPING_ERROR)
+ if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
return false;
/* Ensure that the address returned is DMA'ble */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index eea9d52b010c7..026a14541a382 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11618,9 +11618,7 @@ void perf_event_delayed_put(struct task_struct *task)
struct file *perf_event_get(unsigned int fd)
{
- struct file *file;
-
- file = fget_raw(fd);
+ struct file *file = fget(fd);
if (!file)
return ERR_PTR(-EBADF);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f5433a52488c..9873fc627d61f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2276,6 +2276,7 @@ static int __init init_kprobes(void)
init_test_probes();
return err;
}
+subsys_initcall(init_kprobes);
#ifdef CONFIG_DEBUG_FS
static void report_probe(struct seq_file *pi, struct kprobe *p,
@@ -2588,5 +2589,3 @@ static int __init debugfs_kprobe_init(void)
late_initcall(debugfs_kprobe_init);
#endif /* CONFIG_DEBUG_FS */
-
-module_init(init_kprobes);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index bea6f887adad1..6ee03a816d67a 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -54,7 +54,7 @@ static void pgmap_array_delete(struct resource *res)
static unsigned long pfn_first(struct dev_pagemap *pgmap)
{
- return (pgmap->res.start >> PAGE_SHIFT) +
+ return PHYS_PFN(pgmap->res.start) +
vmem_altmap_offset(pgmap_altmap(pgmap));
}
@@ -98,7 +98,6 @@ static void devm_memremap_pages_release(void *data)
struct dev_pagemap *pgmap = data;
struct device *dev = pgmap->dev;
struct resource *res = &pgmap->res;
- resource_size_t align_start, align_size;
unsigned long pfn;
int nid;
@@ -108,25 +107,21 @@ static void devm_memremap_pages_release(void *data)
dev_pagemap_cleanup(pgmap);
/* pages are dead and unused, undo the arch mapping */
- align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- - align_start;
-
- nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT));
+ nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
mem_hotplug_begin();
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- pfn = align_start >> PAGE_SHIFT;
+ pfn = PHYS_PFN(res->start);
__remove_pages(page_zone(pfn_to_page(pfn)), pfn,
- align_size >> PAGE_SHIFT, NULL);
+ PHYS_PFN(resource_size(res)), NULL);
} else {
- arch_remove_memory(nid, align_start, align_size,
+ arch_remove_memory(nid, res->start, resource_size(res),
pgmap_altmap(pgmap));
- kasan_remove_zero_shadow(__va(align_start), align_size);
+ kasan_remove_zero_shadow(__va(res->start), resource_size(res));
}
mem_hotplug_done();
- untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
pgmap_array_delete(res);
dev_WARN_ONCE(dev, pgmap->altmap.alloc,
"%s: failed to free all reserved pages\n", __func__);
@@ -162,13 +157,12 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
*/
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
- resource_size_t align_start, align_size, align_end;
struct resource *res = &pgmap->res;
struct dev_pagemap *conflict_pgmap;
struct mhp_restrictions restrictions = {
/*
* We do not want any optional features only our own memmap
- */
+ */
.altmap = pgmap_altmap(pgmap),
};
pgprot_t pgprot = PAGE_KERNEL;
@@ -225,12 +219,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
return ERR_PTR(error);
}
- align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- - align_start;
- align_end = align_start + align_size - 1;
-
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_start), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
@@ -238,7 +227,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
goto err_array;
}
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
@@ -246,7 +235,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
goto err_array;
}
- is_ram = region_intersects(align_start, align_size,
+ is_ram = region_intersects(res->start, resource_size(res),
IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
if (is_ram != REGION_DISJOINT) {
@@ -267,8 +256,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
- align_size);
+ error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
+ resource_size(res));
if (error)
goto err_pfn_remap;
@@ -286,16 +275,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
* arch_add_memory().
*/
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- error = add_pages(nid, align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT, &restrictions);
+ error = add_pages(nid, PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), &restrictions);
} else {
- error = kasan_add_zero_shadow(__va(align_start), align_size);
+ error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
if (error) {
mem_hotplug_done();
goto err_kasan;
}
- error = arch_add_memory(nid, align_start, align_size,
+ error = arch_add_memory(nid, res->start, resource_size(res),
&restrictions);
}
@@ -303,8 +292,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
struct zone *zone;
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
- move_pfn_range_to_zone(zone, align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT, pgmap_altmap(pgmap));
+ move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), restrictions.altmap);
}
mem_hotplug_done();
@@ -316,8 +305,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
* to allow us to do the work while not holding the hotplug lock.
*/
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
- align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT, pgmap);
+ PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
@@ -328,9 +317,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
return __va(res->start);
err_add_memory:
- kasan_remove_zero_shadow(__va(align_start), align_size);
+ kasan_remove_zero_shadow(__va(res->start), resource_size(res));
err_kasan:
- untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
err_pfn_remap:
pgmap_array_delete(res);
err_array:
diff --git a/kernel/module.c b/kernel/module.c
index a2cee14a83f36..5933395af9a0c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1492,8 +1492,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
for (i = 0; i < info->hdr->e_shnum; i++)
if (!sect_empty(&info->sechdrs[i]))
nloaded++;
- size[0] = ALIGN(sizeof(*sect_attrs)
- + nloaded * sizeof(sect_attrs->attrs[0]),
+ size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded),
sizeof(sect_attrs->grp.attrs[0]));
size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]);
sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
@@ -1697,6 +1696,8 @@ static int add_usage_links(struct module *mod)
return ret;
}
+static void module_remove_modinfo_attrs(struct module *mod, int end);
+
static int module_add_modinfo_attrs(struct module *mod)
{
struct module_attribute *attr;
@@ -1711,24 +1712,34 @@ static int module_add_modinfo_attrs(struct module *mod)
return -ENOMEM;
temp_attr = mod->modinfo_attrs;
- for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
if (!attr->test || attr->test(mod)) {
memcpy(temp_attr, attr, sizeof(*temp_attr));
sysfs_attr_init(&temp_attr->attr);
error = sysfs_create_file(&mod->mkobj.kobj,
&temp_attr->attr);
+ if (error)
+ goto error_out;
++temp_attr;
}
}
+
+ return 0;
+
+error_out:
+ if (i > 0)
+ module_remove_modinfo_attrs(mod, --i);
return error;
}
-static void module_remove_modinfo_attrs(struct module *mod)
+static void module_remove_modinfo_attrs(struct module *mod, int end)
{
struct module_attribute *attr;
int i;
for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) {
+ if (end >= 0 && i > end)
+ break;
/* pick a field to test for end of list */
if (!attr->attr.name)
break;
@@ -1816,7 +1827,7 @@ static int mod_sysfs_setup(struct module *mod,
return 0;
out_unreg_modinfo_attrs:
- module_remove_modinfo_attrs(mod);
+ module_remove_modinfo_attrs(mod, -1);
out_unreg_param:
module_param_sysfs_remove(mod);
out_unreg_holders:
@@ -1852,7 +1863,7 @@ static void mod_sysfs_fini(struct module *mod)
{
}
-static void module_remove_modinfo_attrs(struct module *mod)
+static void module_remove_modinfo_attrs(struct module *mod, int end)
{
}
@@ -1868,14 +1879,14 @@ static void init_param_lock(struct module *mod)
static void mod_sysfs_teardown(struct module *mod)
{
del_usage_links(mod);
- module_remove_modinfo_attrs(mod);
+ module_remove_modinfo_attrs(mod, -1);
module_param_sysfs_remove(mod);
kobject_put(mod->mkobj.drivers_dir);
kobject_put(mod->holders_dir);
mod_sysfs_fini(mod);
}
-#ifdef CONFIG_STRICT_MODULE_RWX
+#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX
/*
* LKM RO/NX protection: protect module's text/ro-data
* from modification and any data from execution.
@@ -1898,6 +1909,7 @@ static void frob_text(const struct module_layout *layout,
layout->text_size >> PAGE_SHIFT);
}
+#ifdef CONFIG_STRICT_MODULE_RWX
static void frob_rodata(const struct module_layout *layout,
int (*set_memory)(unsigned long start, int num_pages))
{
@@ -1949,13 +1961,9 @@ void module_enable_ro(const struct module *mod, bool after_init)
set_vm_flush_reset_perms(mod->core_layout.base);
set_vm_flush_reset_perms(mod->init_layout.base);
frob_text(&mod->core_layout, set_memory_ro);
- frob_text(&mod->core_layout, set_memory_x);
frob_rodata(&mod->core_layout, set_memory_ro);
-
frob_text(&mod->init_layout, set_memory_ro);
- frob_text(&mod->init_layout, set_memory_x);
-
frob_rodata(&mod->init_layout, set_memory_ro);
if (after_init)
@@ -2014,9 +2022,19 @@ void set_all_modules_text_ro(void)
}
mutex_unlock(&module_mutex);
}
-#else
+#else /* !CONFIG_STRICT_MODULE_RWX */
static void module_enable_nx(const struct module *mod) { }
-#endif
+#endif /* CONFIG_STRICT_MODULE_RWX */
+static void module_enable_x(const struct module *mod)
+{
+ frob_text(&mod->core_layout, set_memory_x);
+ frob_text(&mod->init_layout, set_memory_x);
+}
+#else /* !CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
+static void module_enable_nx(const struct module *mod) { }
+static void module_enable_x(const struct module *mod) { }
+#endif /* CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
+
#ifdef CONFIG_LIVEPATCH
/*
@@ -2723,6 +2741,11 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
+bool __weak module_exit_section(const char *name)
+{
+ return strstarts(name, ".exit");
+}
+
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
@@ -2912,7 +2935,7 @@ static int rewrite_section_headers(struct load_info *info, int flags)
#ifndef CONFIG_MODULE_UNLOAD
/* Don't load .exit sections */
- if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
+ if (module_exit_section(info->secstrings+shdr->sh_name))
shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
}
@@ -3390,8 +3413,7 @@ static bool finished_loading(const char *name)
sched_annotate_sleep();
mutex_lock(&module_mutex);
mod = find_module_all(name, strlen(name), true);
- ret = !mod || mod->state == MODULE_STATE_LIVE
- || mod->state == MODULE_STATE_GOING;
+ ret = !mod || mod->state == MODULE_STATE_LIVE;
mutex_unlock(&module_mutex);
return ret;
@@ -3581,8 +3603,7 @@ again:
mutex_lock(&module_mutex);
old = find_module_all(mod->name, strlen(mod->name), true);
if (old != NULL) {
- if (old->state == MODULE_STATE_COMING
- || old->state == MODULE_STATE_UNFORMED) {
+ if (old->state != MODULE_STATE_LIVE) {
/* Wait in case it fails to load. */
mutex_unlock(&module_mutex);
err = wait_event_interruptible(module_wq,
@@ -3621,6 +3642,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
module_enable_ro(mod, false);
module_enable_nx(mod);
+ module_enable_x(mod);
/* Mark state as coming so strong_try_module_get() ignores us,
* but kallsyms etc. can see us. */
diff --git a/kernel/padata.c b/kernel/padata.c
index 2d2fddbb7a4ce..15a8ad63f4ffe 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -267,7 +267,12 @@ static void padata_reorder(struct parallel_data *pd)
* The next object that needs serialization might have arrived to
* the reorder queues in the meantime, we will be called again
* from the timer function if no one else cares for it.
+ *
+ * Ensure reorder_objects is read after pd->lock is dropped so we see
+ * an increment from another task in padata_do_serial. Pairs with
+ * smp_mb__after_atomic in padata_do_serial.
*/
+ smp_mb();
if (atomic_read(&pd->reorder_objects)
&& !(pinst->flags & PADATA_RESET))
mod_timer(&pd->timer, jiffies + HZ);
@@ -387,6 +392,13 @@ void padata_do_serial(struct padata_priv *padata)
list_add_tail(&padata->list, &pqueue->reorder.list);
spin_unlock(&pqueue->reorder.lock);
+ /*
+ * Ensure the atomic_inc of reorder_objects above is ordered correctly
+ * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
+ * in padata_reorder.
+ */
+ smp_mb__after_atomic();
+
put_cpu();
/* If we're running on the wrong CPU, call padata_reorder() via a
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 6d726cef241cc..a6a79f85c81a8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -291,14 +291,13 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
}
extern int pid_max;
-static int zero = 0;
static struct ctl_table pid_ns_ctl_table[] = {
{
.procname = "ns_last_pid",
.maxlen = sizeof(int),
.mode = 0666, /* permissions are checked in the handler */
.proc_handler = pid_ns_ctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &pid_max,
},
{ }
diff --git a/kernel/resource.c b/kernel/resource.c
index d22423e85cf80..7ea4306503c57 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -326,7 +326,7 @@ EXPORT_SYMBOL(release_resource);
*
* If a resource is found, returns 0 and @*res is overwritten with the part
* of the resource that's within [@start..@end]; if none is found, returns
- * -1 or -EINVAL for other invalid parameters.
+ * -ENODEV. Returns -EINVAL for invalid parameters.
*
* This function walks the whole tree and not just first level children
* unless @first_lvl is true.
@@ -342,6 +342,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
unsigned long flags, unsigned long desc,
bool first_lvl, struct resource *res)
{
+ bool siblings_only = true;
struct resource *p;
if (!res)
@@ -352,29 +353,43 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
read_lock(&resource_lock);
- for (p = iomem_resource.child; p; p = next_resource(p, first_lvl)) {
- if ((p->flags & flags) != flags)
- continue;
- if ((desc != IORES_DESC_NONE) && (desc != p->desc))
- continue;
+ for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) {
+ /* If we passed the resource we are looking for, stop */
if (p->start > end) {
p = NULL;
break;
}
- if ((p->end >= start) && (p->start <= end))
- break;
+
+ /* Skip until we find a range that matches what we look for */
+ if (p->end < start)
+ continue;
+
+ /*
+ * Now that we found a range that matches what we look for,
+ * check the flags and the descriptor. If we were not asked to
+ * use only the first level, start looking at children as well.
+ */
+ siblings_only = first_lvl;
+
+ if ((p->flags & flags) != flags)
+ continue;
+ if ((desc != IORES_DESC_NONE) && (desc != p->desc))
+ continue;
+
+ /* Found a match, break */
+ break;
+ }
+
+ if (p) {
+ /* copy data */
+ res->start = max(start, p->start);
+ res->end = min(end, p->end);
+ res->flags = p->flags;
+ res->desc = p->desc;
}
read_unlock(&resource_lock);
- if (!p)
- return -1;
-
- /* copy data */
- res->start = max(start, p->start);
- res->end = min(end, p->end);
- res->flags = p->flags;
- res->desc = p->desc;
- return 0;
+ return p ? 0 : -ENODEV;
}
static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 123ea07a3f3b0..ccb28085b1141 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -14,6 +14,12 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden);
static cpumask_var_t housekeeping_mask;
static unsigned int housekeeping_flags;
+bool housekeeping_enabled(enum hk_flags flags)
+{
+ return !!(housekeeping_flags & flags);
+}
+EXPORT_SYMBOL_GPL(housekeeping_enabled);
+
int housekeeping_any_cpu(enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))
diff --git a/kernel/smp.c b/kernel/smp.c
index 616d4d1148475..7dbcb402c2fc0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -291,6 +291,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
@@ -416,6 +424,14 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 43186ccfa1396..078950d9605ba 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -125,9 +125,6 @@ static int sixty = 60;
#endif
static int __maybe_unused neg_one = -1;
-
-static int zero;
-static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
static unsigned long zero_ul;
@@ -385,8 +382,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sysctl_schedstats,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_SCHEDSTATS */
#endif /* CONFIG_SMP */
@@ -418,7 +415,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "numa_balancing",
@@ -426,8 +423,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sysctl_numa_balancing,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
@@ -475,8 +472,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_CFS_BANDWIDTH
@@ -486,7 +483,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
#endif
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
@@ -496,8 +493,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_energy_aware_handler,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_PROVE_LOCKING
@@ -562,7 +559,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &neg_one,
- .extra2 = &one,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_LATENCYTOP
@@ -696,8 +693,8 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
/* only handle a transition from default "0" to "1" */
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_MODULES
@@ -715,8 +712,8 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
/* only handle a transition from default "0" to "1" */
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_UEVENT_HELPER
@@ -875,7 +872,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &ten_thousand,
},
{
@@ -891,8 +888,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "kptr_restrict",
@@ -900,7 +897,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
#endif
@@ -925,8 +922,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_watchdog,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "watchdog_thresh",
@@ -934,7 +931,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_watchdog_thresh,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &sixty,
},
{
@@ -943,8 +940,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = NMI_WATCHDOG_SYSCTL_PERM,
.proc_handler = proc_nmi_watchdog,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "watchdog_cpumask",
@@ -960,8 +957,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_soft_watchdog,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "softlockup_panic",
@@ -969,8 +966,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#ifdef CONFIG_SMP
{
@@ -979,8 +976,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_SMP */
#endif
@@ -991,8 +988,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#ifdef CONFIG_SMP
{
@@ -1001,8 +998,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_SMP */
#endif
@@ -1115,8 +1112,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "hung_task_check_count",
@@ -1124,7 +1121,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "hung_task_timeout_secs",
@@ -1201,7 +1198,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(sysctl_perf_event_sample_rate),
.mode = 0644,
.proc_handler = perf_proc_update_handler,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "perf_cpu_time_max_percent",
@@ -1209,7 +1206,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
.mode = 0644,
.proc_handler = perf_cpu_time_max_percent_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
{
@@ -1218,7 +1215,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(sysctl_perf_event_max_stack),
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &six_hundred_forty_kb,
},
{
@@ -1227,7 +1224,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_thousand,
},
#endif
@@ -1237,8 +1234,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
{
@@ -1247,8 +1244,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_BPF_SYSCALL
@@ -1259,8 +1256,8 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
/* only handle a transition from default "0" to "1" */
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "bpf_stats_enabled",
@@ -1277,8 +1274,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(sysctl_panic_on_rcu_stall),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
@@ -1288,8 +1285,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = stack_erasing_sysctl,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{ }
@@ -1302,7 +1299,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1311,7 +1308,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_panic_on_oom),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1348,7 +1345,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "dirty_background_ratio",
@@ -1356,7 +1353,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(dirty_background_ratio),
.mode = 0644,
.proc_handler = dirty_background_ratio_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
{
@@ -1373,7 +1370,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(vm_dirty_ratio),
.mode = 0644,
.proc_handler = dirty_ratio_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
{
@@ -1397,7 +1394,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(dirty_expire_interval),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "dirtytime_expire_seconds",
@@ -1405,7 +1402,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(dirtytime_expire_interval),
.mode = 0644,
.proc_handler = dirtytime_interval_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "swappiness",
@@ -1413,7 +1410,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(vm_swappiness),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
#ifdef CONFIG_HUGETLB_PAGE
@@ -1438,8 +1435,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sysctl_vm_numa_stat_handler,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1470,7 +1467,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &four,
},
#ifdef CONFIG_COMPACTION
@@ -1496,8 +1493,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_COMPACTION */
@@ -1507,7 +1504,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(min_free_kbytes),
.mode = 0644,
.proc_handler = min_free_kbytes_sysctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "watermark_boost_factor",
@@ -1515,7 +1512,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(watermark_boost_factor),
.mode = 0644,
.proc_handler = watermark_boost_factor_sysctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "watermark_scale_factor",
@@ -1523,7 +1520,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(watermark_scale_factor),
.mode = 0644,
.proc_handler = watermark_scale_factor_sysctl_handler,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &one_thousand,
},
{
@@ -1532,7 +1529,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(percpu_pagelist_fraction),
.mode = 0644,
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#ifdef CONFIG_MMU
{
@@ -1541,7 +1538,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_max_map_count),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#else
{
@@ -1550,7 +1547,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#endif
{
@@ -1566,7 +1563,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(block_dump),
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "vfs_cache_pressure",
@@ -1574,7 +1571,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
{
@@ -1583,7 +1580,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_NUMA
@@ -1593,7 +1590,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(node_reclaim_mode),
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "min_unmapped_ratio",
@@ -1601,7 +1598,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_min_unmapped_ratio),
.mode = 0644,
.proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
{
@@ -1610,7 +1607,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_min_slab_ratio),
.mode = 0644,
.proc_handler = sysctl_min_slab_ratio_sysctl_handler,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
#endif
@@ -1661,7 +1658,7 @@ static struct ctl_table vm_table[] = {
#endif
.mode = 0644,
.proc_handler = proc_dointvec,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_HIGHMEM
@@ -1671,8 +1668,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(vm_highmem_is_dirtyable),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_MEMORY_FAILURE
@@ -1682,8 +1679,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_memory_failure_early_kill),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "memory_failure_recovery",
@@ -1691,8 +1688,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1738,8 +1735,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(sysctl_unprivileged_userfaultfd),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{ }
@@ -1875,8 +1872,8 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "protected_hardlinks",
@@ -1884,8 +1881,8 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "protected_fifos",
@@ -1893,7 +1890,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1902,7 +1899,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1911,7 +1908,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_coredump,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
@@ -1948,7 +1945,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{ }
};
@@ -1970,8 +1967,8 @@ static struct ctl_table debug_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_kprobes_optimization_handler,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{ }
@@ -3395,8 +3392,8 @@ int proc_do_static_key(struct ctl_table *table, int write,
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
};
if (write && !capable(CAP_SYS_ADMIN))
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 564e5fdb025ff..98da8998c25ce 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -597,9 +597,19 @@ config FTRACE_STARTUP_TEST
functioning properly. It will do tests on all the configured
tracers of ftrace.
+config EVENT_TRACE_STARTUP_TEST
+ bool "Run selftest on trace events"
+ depends on FTRACE_STARTUP_TEST
+ default y
+ help
+ This option performs a test on all trace events in the system.
+ It basically just enables each event and runs some code that
+ will trigger events (not necessarily the event it enables)
+ This may take some time run as there are a lot of events.
+
config EVENT_TRACE_TEST_SYSCALLS
bool "Run selftest on syscall events"
- depends on FTRACE_STARTUP_TEST
+ depends on EVENT_TRACE_STARTUP_TEST
help
This option will also enable testing every syscall event.
It only enables the event and disables it and runs various loads
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 576c41644e77c..eca34503f178e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1622,6 +1622,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
return keep_regs;
}
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+static struct ftrace_ops *
+ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1750,15 +1755,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
}
/*
- * If the rec had TRAMP enabled, then it needs to
- * be cleared. As TRAMP can only be enabled iff
- * there is only a single ops attached to it.
- * In otherwords, always disable it on decrementing.
- * In the future, we may set it if rec count is
- * decremented to one, and the ops that is left
- * has a trampoline.
+ * The TRAMP needs to be set only if rec count
+ * is decremented to one, and the ops that is
+ * left has a trampoline. As TRAMP can only be
+ * enabled if there is only a single ops attached
+ * to it.
*/
- rec->flags &= ~FTRACE_FL_TRAMP;
+ if (ftrace_rec_count(rec) == 1 &&
+ ftrace_find_tramp_ops_any(rec))
+ rec->flags |= FTRACE_FL_TRAMP;
+ else
+ rec->flags &= ~FTRACE_FL_TRAMP;
/*
* flags will be cleared in ftrace_check_record()
@@ -1768,7 +1775,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
- update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+ update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE;
/* Shortcut, if we handled all records, we are done. */
if (!all && count == hash->count)
@@ -1951,11 +1958,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static struct ftrace_ops *
-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
-static struct ftrace_ops *
-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
-
enum ftrace_bug_type ftrace_bug_type;
const void *ftrace_expected;
@@ -2047,7 +2049,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec)
}
}
-static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
{
unsigned long flag = 0UL;
@@ -2146,28 +2148,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
/**
* ftrace_update_record, set a record that now is tracing or not
* @rec: the record to update
- * @enable: set to 1 if the record is tracing, zero to force disable
+ * @enable: set to true if the record is tracing, false to force disable
*
* The records that represent all functions that can be traced need
* to be updated when tracing has been enabled.
*/
-int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 1);
+ return ftrace_check_record(rec, enable, true);
}
/**
* ftrace_test_record, check if the record has been enabled or not
* @rec: the record to test
- * @enable: set to 1 to check if enabled, 0 if it is disabled
+ * @enable: set to true to check if enabled, false if it is disabled
*
* The arch code may need to test if a record is already set to
* tracing to determine how to modify the function code that it
* represents.
*/
-int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 0);
+ return ftrace_check_record(rec, enable, false);
}
static struct ftrace_ops *
@@ -2356,7 +2358,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
}
static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2395,7 +2397,7 @@ void __weak ftrace_replace_code(int mod_flags)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+ bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
int failed;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05b0b3139ebcc..66358d66c9336 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -128,16 +128,7 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-
-#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
-# define RB_FORCE_8BYTE_ALIGNMENT 0
-# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
-#else
-# define RB_FORCE_8BYTE_ALIGNMENT 1
-# define RB_ARCH_ALIGNMENT 8U
-#endif
-
-#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -2373,7 +2364,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ if (length > RB_MAX_SMALL_DATA) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2388,11 +2379,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+ if (length > RB_MAX_SMALL_DATA)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ARCH_ALIGNMENT);
+ length = ALIGN(length, RB_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c90c687cf950a..525a97fbbc603 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -366,7 +366,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
}
/**
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
* @pid_list: The list to modify
* @self: The current task for fork or NULL for exit
* @task: The task to add or remove
@@ -743,8 +743,7 @@ trace_event_setup(struct ring_buffer_event *event,
{
struct trace_entry *ent = ring_buffer_event_data(event);
- tracing_generic_entry_update(ent, flags, pc);
- ent->type = type;
+ tracing_generic_entry_update(ent, type, flags, pc);
}
static __always_inline struct ring_buffer_event *
@@ -2312,13 +2311,14 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
EXPORT_SYMBOL_GPL(trace_handle_return);
void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
- int pc)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
+ unsigned long flags, int pc)
{
struct task_struct *tsk = current;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->type = type;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -4842,12 +4842,13 @@ static const char readme_msg[] =
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
- "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#else
- "\t $stack<index>, $stack, $retval, $comm\n"
+ "\t $stack<index>, $stack, $retval, $comm,\n"
#endif
+ "\t +|-[u]<offset>(<fetcharg>)\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
"\t <type>\\[<array-size>\\]\n"
#ifdef CONFIG_HIST_TRIGGERS
"\t field: <stype> <name>;\n"
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 4629a61044745..0892e38ed6fbe 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -416,8 +416,7 @@ void perf_trace_buf_update(void *record, u16 type)
unsigned long flags;
local_save_flags(flags);
- tracing_generic_entry_update(entry, flags, pc);
- entry->type = type;
+ tracing_generic_entry_update(entry, type, flags, pc);
}
NOKPROBE_SYMBOL(perf_trace_buf_update);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0ce3db67f5569..c7506bc81b757 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -70,14 +70,6 @@ static int system_refcount_dec(struct event_subsystem *system)
#define while_for_each_event_file() \
}
-static struct list_head *
-trace_get_fields(struct trace_event_call *event_call)
-{
- if (!event_call->class->get_fields)
- return &event_call->class->fields;
- return event_call->class->get_fields(event_call);
-}
-
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
@@ -3190,7 +3182,7 @@ void __init trace_event_init(void)
event_trace_enable();
}
-#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
static DEFINE_SPINLOCK(test_spinlock);
static DEFINE_SPINLOCK(test_spinlock_irq);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 5079d1db3754a..c773b8fb270c5 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1084,6 +1084,9 @@ int filter_assign_type(const char *type)
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
+ if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0)
+ return FILTER_PTR_STRING;
+
return FILTER_OTHER;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7d736248a070b..9d483ad9bb6c4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -12,6 +12,8 @@
#include <linux/rculist.h>
#include <linux/error-injection.h>
+#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
+
#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
@@ -19,6 +21,18 @@
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
+#define MAX_KPROBE_CMDLINE_SIZE 1024
+
+/* Kprobe early definition from command line */
+static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
+static bool kprobe_boot_events_enabled __initdata;
+
+static int __init set_kprobe_boot_events(char *str)
+{
+ strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ return 0;
+}
+__setup("kprobe_event=", set_kprobe_boot_events);
static int trace_kprobe_create(int argc, const char **argv);
static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -128,8 +142,8 @@ static bool trace_kprobe_match(const char *system, const char *event,
{
struct trace_kprobe *tk = to_trace_kprobe(ev);
- return strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
- (!system || strcmp(tk->tp.call.class->system, system) == 0);
+ return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0);
}
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -143,6 +157,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
+static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
+{
+ return !(list_empty(&tk->rp.kp.list) &&
+ hlist_unhashed(&tk->rp.kp.hlist));
+}
+
/* Return 0 if it fails to find the symbol address */
static nokprobe_inline
unsigned long trace_kprobe_address(struct trace_kprobe *tk)
@@ -183,6 +203,16 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
+static void free_trace_kprobe(struct trace_kprobe *tk)
+{
+ if (tk) {
+ trace_probe_cleanup(&tk->tp);
+ kfree(tk->symbol);
+ free_percpu(tk->nhit);
+ kfree(tk);
+ }
+}
+
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -220,49 +250,20 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
tk->rp.kp.pre_handler = kprobe_dispatcher;
tk->rp.maxactive = maxactive;
+ INIT_HLIST_NODE(&tk->rp.kp.hlist);
+ INIT_LIST_HEAD(&tk->rp.kp.list);
- if (!event || !group) {
- ret = -EINVAL;
- goto error;
- }
-
- tk->tp.call.class = &tk->tp.class;
- tk->tp.call.name = kstrdup(event, GFP_KERNEL);
- if (!tk->tp.call.name)
- goto error;
-
- tk->tp.class.system = kstrdup(group, GFP_KERNEL);
- if (!tk->tp.class.system)
+ ret = trace_probe_init(&tk->tp, event, group);
+ if (ret < 0)
goto error;
dyn_event_init(&tk->devent, &trace_kprobe_ops);
- INIT_LIST_HEAD(&tk->tp.files);
return tk;
error:
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
+ free_trace_kprobe(tk);
return ERR_PTR(ret);
}
-static void free_trace_kprobe(struct trace_kprobe *tk)
-{
- int i;
-
- if (!tk)
- return;
-
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_free_probe_arg(&tk->tp.args[i]);
-
- kfree(tk->tp.call.class->system);
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
-}
-
static struct trace_kprobe *find_trace_kprobe(const char *event,
const char *group)
{
@@ -270,8 +271,8 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
struct trace_kprobe *tk;
for_each_trace_kprobe(tk, pos)
- if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
- strcmp(tk->tp.call.class->system, group) == 0)
+ if (strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ strcmp(trace_probe_group_name(&tk->tp), group) == 0)
return tk;
return NULL;
}
@@ -280,7 +281,7 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
{
int ret = 0;
- if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
+ if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) {
if (trace_kprobe_is_return(tk))
ret = enable_kretprobe(&tk->rp);
else
@@ -297,34 +298,27 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
static int
enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
{
- struct event_file_link *link;
+ bool enabled = trace_probe_is_enabled(&tk->tp);
int ret = 0;
if (file) {
- link = kmalloc(sizeof(*link), GFP_KERNEL);
- if (!link) {
- ret = -ENOMEM;
- goto out;
- }
-
- link->file = file;
- list_add_tail_rcu(&link->list, &tk->tp.files);
+ ret = trace_probe_add_file(&tk->tp, file);
+ if (ret)
+ return ret;
+ } else
+ trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE);
- tk->tp.flags |= TP_FLAG_TRACE;
- ret = __enable_trace_kprobe(tk);
- if (ret) {
- list_del_rcu(&link->list);
- kfree(link);
- tk->tp.flags &= ~TP_FLAG_TRACE;
- }
+ if (enabled)
+ return 0;
- } else {
- tk->tp.flags |= TP_FLAG_PROFILE;
- ret = __enable_trace_kprobe(tk);
- if (ret)
- tk->tp.flags &= ~TP_FLAG_PROFILE;
+ ret = __enable_trace_kprobe(tk);
+ if (ret) {
+ if (file)
+ trace_probe_remove_file(&tk->tp, file);
+ else
+ trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE);
}
- out:
+
return ret;
}
@@ -335,54 +329,34 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
static int
disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
{
- struct event_file_link *link = NULL;
- int wait = 0;
+ struct trace_probe *tp = &tk->tp;
int ret = 0;
if (file) {
- link = find_event_file_link(&tk->tp, file);
- if (!link) {
- ret = -EINVAL;
- goto out;
- }
-
- list_del_rcu(&link->list);
- wait = 1;
- if (!list_empty(&tk->tp.files))
+ if (!trace_probe_get_file_link(tp, file))
+ return -ENOENT;
+ if (!trace_probe_has_single_file(tp))
goto out;
-
- tk->tp.flags &= ~TP_FLAG_TRACE;
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
} else
- tk->tp.flags &= ~TP_FLAG_PROFILE;
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
- if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
+ if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) {
if (trace_kprobe_is_return(tk))
disable_kretprobe(&tk->rp);
else
disable_kprobe(&tk->rp.kp);
- wait = 1;
}
- /*
- * if tk is not added to any list, it must be a local trace_kprobe
- * created with perf_event_open. We don't need to wait for these
- * trace_kprobes
- */
- if (list_empty(&tk->devent.list))
- wait = 0;
out:
- if (wait) {
+ if (file)
/*
- * Synchronize with kprobe_trace_func/kretprobe_trace_func
- * to ensure disabled (all running handlers are finished).
- * This is not only for kfree(), but also the caller,
- * trace_remove_event_call() supposes it for releasing
- * event_call related objects, which will be accessed in
- * the kprobe_trace_func/kretprobe_trace_func.
+ * Synchronization is done in below function. For perf event,
+ * file == NULL and perf_trace_event_unreg() calls
+ * tracepoint_synchronize_unregister() to ensure synchronize
+ * event. We don't need to care about it.
*/
- synchronize_rcu();
- kfree(link); /* Ignored if link == NULL */
- }
+ trace_probe_remove_file(tp, file);
return ret;
}
@@ -415,7 +389,7 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
{
int i, ret;
- if (trace_probe_is_registered(&tk->tp))
+ if (trace_kprobe_is_registered(tk))
return -EINVAL;
if (within_notrace_func(tk)) {
@@ -441,21 +415,20 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
else
ret = register_kprobe(&tk->rp.kp);
- if (ret == 0)
- tk->tp.flags |= TP_FLAG_REGISTERED;
return ret;
}
/* Internal unregister function - just handle k*probes and flags */
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
{
- if (trace_probe_is_registered(&tk->tp)) {
+ if (trace_kprobe_is_registered(tk)) {
if (trace_kprobe_is_return(tk))
unregister_kretprobe(&tk->rp);
else
unregister_kprobe(&tk->rp.kp);
- tk->tp.flags &= ~TP_FLAG_REGISTERED;
- /* Cleanup kprobe for reuse */
+ /* Cleanup kprobe for reuse and mark it unregistered */
+ INIT_HLIST_NODE(&tk->rp.kp.hlist);
+ INIT_LIST_HEAD(&tk->rp.kp.list);
if (tk->rp.kp.symbol_name)
tk->rp.kp.addr = NULL;
}
@@ -487,8 +460,8 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
mutex_lock(&event_mutex);
/* Delete old (same name) event if exist */
- old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
- tk->tp.call.class->system);
+ old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
+ trace_probe_group_name(&tk->tp));
if (old_tk) {
ret = unregister_trace_kprobe(old_tk);
if (ret < 0)
@@ -541,7 +514,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
ret = __register_trace_kprobe(tk);
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
- trace_event_name(&tk->tp.call),
+ trace_probe_name(&tk->tp),
mod->name, ret);
}
}
@@ -716,6 +689,10 @@ static int trace_kprobe_create(int argc, const char *argv[])
goto error; /* This can be -ENOMEM */
}
+ ret = traceprobe_set_print_fmt(&tk->tp, is_return);
+ if (ret < 0)
+ goto error;
+
ret = register_trace_kprobe(tk);
if (ret) {
trace_probe_log_set_index(1);
@@ -767,8 +744,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
int i;
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
- seq_printf(m, ":%s/%s", tk->tp.call.class->system,
- trace_event_name(&tk->tp.call));
+ seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
+ trace_probe_name(&tk->tp));
if (!tk->symbol)
seq_printf(m, " 0x%p", tk->rp.kp.addr);
@@ -842,7 +819,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
tk = to_trace_kprobe(ev);
seq_printf(m, " %-44s %15lu %15lu\n",
- trace_event_name(&tk->tp.call),
+ trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
tk->rp.kp.nmissed);
@@ -886,6 +863,15 @@ fetch_store_strlen(unsigned long addr)
return (ret < 0) ? ret : len;
}
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+
+ return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
+}
+
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
* length and relative data location.
@@ -894,19 +880,46 @@ static nokprobe_inline int
fetch_store_string(unsigned long addr, void *dest, void *base)
{
int maxlen = get_loc_len(*(u32 *)dest);
- u8 *dst = get_loc_data(dest, base);
+ void *__dest;
long ret;
if (unlikely(!maxlen))
return -ENOMEM;
+
+ __dest = get_loc_data(dest, base);
+
/*
* Try to get string again, since the string can be changed while
* probing.
*/
- ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
+ ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+ return ret;
+}
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+ int maxlen = get_loc_len(*(u32 *)dest);
+ void *__dest;
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+
+ __dest = get_loc_data(dest, base);
+
+ ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
if (ret >= 0)
- *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
return ret;
}
@@ -916,6 +929,14 @@ probe_mem_read(void *dest, void *src, size_t size)
return probe_kernel_read(dest, src, size);
}
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ const void __user *uaddr = (__force const void __user *)src;
+
+ return probe_user_read(dest, uaddr, size);
+}
+
/* Note that we don't verify it, since the code does not come from user space */
static int
process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
@@ -971,7 +992,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
struct ring_buffer *buffer;
int size, dsize, pc;
unsigned long irq_flags;
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
WARN_ON(call != trace_file->event_call);
@@ -1003,7 +1024,7 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tk->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tk->tp)
__kprobe_trace_func(tk, regs, link->file);
}
NOKPROBE_SYMBOL(kprobe_trace_func);
@@ -1019,7 +1040,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct ring_buffer *buffer;
int size, pc, dsize;
unsigned long irq_flags;
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
WARN_ON(call != trace_file->event_call);
@@ -1053,7 +1074,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tk->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tk->tp)
__kretprobe_trace_func(tk, ri, regs, link->file);
}
NOKPROBE_SYMBOL(kretprobe_trace_func);
@@ -1070,7 +1091,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
field = (struct kprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
@@ -1097,7 +1118,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
field = (struct kretprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
@@ -1149,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1199,7 +1220,7 @@ static void
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1299,10 +1320,10 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
raw_cpu_inc(*tk->nhit);
- if (tk->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
ret = kprobe_perf_func(tk, regs);
#endif
return ret;
@@ -1316,10 +1337,10 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
raw_cpu_inc(*tk->nhit);
- if (tk->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
kretprobe_trace_func(tk, ri, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
kretprobe_perf_func(tk, ri, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
@@ -1334,10 +1355,10 @@ static struct trace_event_functions kprobe_funcs = {
.trace = print_kprobe_event
};
-static inline void init_trace_event_call(struct trace_kprobe *tk,
- struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_kprobe *tk)
{
- INIT_LIST_HEAD(&call->class->fields);
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
+
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
call->class->define_fields = kretprobe_event_define_fields;
@@ -1353,37 +1374,14 @@ static inline void init_trace_event_call(struct trace_kprobe *tk,
static int register_kprobe_event(struct trace_kprobe *tk)
{
- struct trace_event_call *call = &tk->tp.call;
- int ret = 0;
-
- init_trace_event_call(tk, call);
+ init_trace_event_call(tk);
- if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
- return -ENOMEM;
- ret = register_trace_event(&call->event);
- if (!ret) {
- kfree(call->print_fmt);
- return -ENODEV;
- }
- ret = trace_add_event_call(call);
- if (ret) {
- pr_info("Failed to register kprobe event: %s\n",
- trace_event_name(call));
- kfree(call->print_fmt);
- unregister_trace_event(&call->event);
- }
- return ret;
+ return trace_probe_register_event_call(&tk->tp);
}
static int unregister_kprobe_event(struct trace_kprobe *tk)
{
- int ret;
-
- /* tp->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tk->tp.call);
- if (!ret)
- kfree(tk->tp.call.print_fmt);
- return ret;
+ return trace_probe_unregister_event_call(&tk->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1413,7 +1411,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
return ERR_CAST(tk);
}
- init_trace_event_call(tk, &tk->tp.call);
+ init_trace_event_call(tk);
if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
ret = -ENOMEM;
@@ -1421,12 +1419,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
}
ret = __register_trace_kprobe(tk);
- if (ret < 0) {
- kfree(tk->tp.call.print_fmt);
+ if (ret < 0)
goto error;
- }
- return &tk->tp.call;
+ return trace_probe_event_call(&tk->tp);
error:
free_trace_kprobe(tk);
return ERR_PTR(ret);
@@ -1445,11 +1441,50 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
__unregister_trace_kprobe(tk);
- kfree(tk->tp.call.print_fmt);
free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */
+static __init void enable_boot_kprobe_events(void)
+{
+ struct trace_array *tr = top_trace_array();
+ struct trace_event_file *file;
+ struct trace_kprobe *tk;
+ struct dyn_event *pos;
+
+ mutex_lock(&event_mutex);
+ for_each_trace_kprobe(tk, pos) {
+ list_for_each_entry(file, &tr->events, list)
+ if (file->event_call == trace_probe_event_call(&tk->tp))
+ trace_event_enable_disable(file, 1, 0);
+ }
+ mutex_unlock(&event_mutex);
+}
+
+static __init void setup_boot_kprobe_events(void)
+{
+ char *p, *cmd = kprobe_boot_events_buf;
+ int ret;
+
+ strreplace(kprobe_boot_events_buf, ',', ' ');
+
+ while (cmd && *cmd != '\0') {
+ p = strchr(cmd, ';');
+ if (p)
+ *p++ = '\0';
+
+ ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
+ if (ret)
+ pr_warn("Failed to add event(%d): %s\n", ret, cmd);
+ else
+ kprobe_boot_events_enabled = true;
+
+ cmd = p;
+ }
+
+ enable_boot_kprobe_events();
+}
+
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
@@ -1481,6 +1516,9 @@ static __init int init_kprobe_trace(void)
if (!entry)
pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+
+ setup_boot_kprobe_events();
+
return 0;
}
fs_initcall(init_kprobe_trace);
@@ -1493,7 +1531,7 @@ find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list)
- if (file->event_call == &tk->tp.call)
+ if (file->event_call == trace_probe_event_call(&tk->tp))
return file;
return NULL;
@@ -1513,6 +1551,11 @@ static __init int kprobe_trace_self_tests_init(void)
if (tracing_is_disabled())
return -ENODEV;
+ if (kprobe_boot_events_enabled) {
+ pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
+ return 0;
+ }
+
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ba751f993c3b6..cab4a5398f1d4 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1109,17 +1109,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
unsigned long ip = field->caller[i];
- if (ip == ULONG_MAX || trace_seq_has_overflowed(s))
+ if (!ip || trace_seq_has_overflowed(s))
break;
trace_seq_puts(s, " => ");
-
- if (!ip) {
- trace_seq_puts(s, "??");
- trace_seq_putc(s, '\n');
- continue;
- }
-
seq_print_user_ip(s, mm, ip, flags);
trace_seq_putc(s, '\n');
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index a347faced9595..dbef0d1350754 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -78,6 +78,8 @@ static const struct fetch_type probe_fetch_types[] = {
/* Special types */
__ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
"__data_loc char[]"),
+ __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1,
+ "__data_loc char[]"),
/* Basic types */
ASSIGN_FETCH_TYPE(u8, u8, 0),
ASSIGN_FETCH_TYPE(u16, u16, 0),
@@ -322,6 +324,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
{
struct fetch_insn *code = *pcode;
unsigned long param;
+ int deref = FETCH_OP_DEREF;
long offset = 0;
char *tmp;
int ret = 0;
@@ -394,9 +397,14 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
break;
case '+': /* deref memory */
- arg++; /* Skip '+', because kstrtol() rejects it. */
- /* fall through */
case '-':
+ if (arg[1] == 'u') {
+ deref = FETCH_OP_UDEREF;
+ arg[1] = arg[0];
+ arg++;
+ }
+ if (arg[0] == '+')
+ arg++; /* Skip '+', because kstrtol() rejects it. */
tmp = strchr(arg, '(');
if (!tmp) {
trace_probe_log_err(offs, DEREF_NEED_BRACE);
@@ -432,7 +440,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
}
*pcode = code;
- code->op = FETCH_OP_DEREF;
+ code->op = deref;
code->offset = offset;
}
break;
@@ -569,15 +577,17 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
goto fail;
/* Store operation */
- if (!strcmp(parg->type->name, "string")) {
- if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM &&
- code->op != FETCH_OP_COMM) {
+ if (!strcmp(parg->type->name, "string") ||
+ !strcmp(parg->type->name, "ustring")) {
+ if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
+ code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) {
trace_probe_log_err(offset + (t ? (t - arg) : 0),
BAD_STRING);
ret = -EINVAL;
goto fail;
}
- if (code->op != FETCH_OP_DEREF || parg->count) {
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
+ parg->count) {
/*
* IMM and COMM is pointing actual address, those must
* be kept, and if parg->count != 0, this is an array
@@ -590,12 +600,20 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
goto fail;
}
}
- code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */
+ /* If op == DEREF, replace it with STRING */
+ if (!strcmp(parg->type->name, "ustring") ||
+ code->op == FETCH_OP_UDEREF)
+ code->op = FETCH_OP_ST_USTRING;
+ else
+ code->op = FETCH_OP_ST_STRING;
code->size = parg->type->size;
parg->dynamic = true;
} else if (code->op == FETCH_OP_DEREF) {
code->op = FETCH_OP_ST_MEM;
code->size = parg->type->size;
+ } else if (code->op == FETCH_OP_UDEREF) {
+ code->op = FETCH_OP_ST_UMEM;
+ code->size = parg->type->size;
} else {
code++;
if (code->op != FETCH_OP_NOP) {
@@ -618,7 +636,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
/* Loop(Array) operation */
if (parg->count) {
if (scode->op != FETCH_OP_ST_MEM &&
- scode->op != FETCH_OP_ST_STRING) {
+ scode->op != FETCH_OP_ST_STRING &&
+ scode->op != FETCH_OP_ST_USTRING) {
trace_probe_log_err(offset + (t ? (t - arg) : 0),
BAD_STRING);
ret = -EINVAL;
@@ -825,6 +844,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
{
+ struct trace_event_call *call = trace_probe_event_call(tp);
int len;
char *print_fmt;
@@ -836,7 +856,7 @@ int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
/* Second: actually write the @print_fmt */
__set_print_fmt(tp, print_fmt, len + 1, is_return);
- tp->call.print_fmt = print_fmt;
+ call->print_fmt = print_fmt;
return 0;
}
@@ -865,3 +885,105 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call,
}
return 0;
}
+
+
+void trace_probe_cleanup(struct trace_probe *tp)
+{
+ struct trace_event_call *call = trace_probe_event_call(tp);
+ int i;
+
+ for (i = 0; i < tp->nr_args; i++)
+ traceprobe_free_probe_arg(&tp->args[i]);
+
+ kfree(call->class->system);
+ kfree(call->name);
+ kfree(call->print_fmt);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+ const char *group)
+{
+ struct trace_event_call *call = trace_probe_event_call(tp);
+
+ if (!event || !group)
+ return -EINVAL;
+
+ call->class = &tp->class;
+ call->name = kstrdup(event, GFP_KERNEL);
+ if (!call->name)
+ return -ENOMEM;
+
+ tp->class.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->class.system) {
+ kfree(call->name);
+ call->name = NULL;
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&tp->files);
+ INIT_LIST_HEAD(&tp->class.fields);
+
+ return 0;
+}
+
+int trace_probe_register_event_call(struct trace_probe *tp)
+{
+ struct trace_event_call *call = trace_probe_event_call(tp);
+ int ret;
+
+ ret = register_trace_event(&call->event);
+ if (!ret)
+ return -ENODEV;
+
+ ret = trace_add_event_call(call);
+ if (ret)
+ unregister_trace_event(&call->event);
+
+ return ret;
+}
+
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ link->file = file;
+ INIT_LIST_HEAD(&link->list);
+ list_add_tail_rcu(&link->list, &tp->files);
+ trace_probe_set_flag(tp, TP_FLAG_TRACE);
+ return 0;
+}
+
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+ struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ trace_probe_for_each_link(link, tp) {
+ if (link->file == file)
+ return link;
+ }
+
+ return NULL;
+}
+
+int trace_probe_remove_file(struct trace_probe *tp,
+ struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ link = trace_probe_get_file_link(tp, file);
+ if (!link)
+ return -ENOENT;
+
+ list_del_rcu(&link->list);
+ synchronize_rcu();
+ kfree(link);
+
+ if (list_empty(&tp->files))
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+
+ return 0;
+}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f9a8c632188bc..d1714820efe19 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -55,7 +55,6 @@
/* Flags for trace_probe */
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
-#define TP_FLAG_REGISTERED 4
/* data_loc: data location, compatible with u32 */
#define make_data_loc(len, offs) \
@@ -92,10 +91,13 @@ enum fetch_op {
FETCH_OP_FOFFS, /* File offset: .immediate */
// Stage 2 (dereference) op
FETCH_OP_DEREF, /* Dereference: .offset */
+ FETCH_OP_UDEREF, /* User-space Dereference: .offset */
// Stage 3 (store) ops
FETCH_OP_ST_RAW, /* Raw: .size */
FETCH_OP_ST_MEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
FETCH_OP_ST_STRING, /* String: .offset, .size */
+ FETCH_OP_ST_USTRING, /* User String: .offset, .size */
// Stage 4 (modify) op
FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
// Stage 5 (loop) op
@@ -235,16 +237,71 @@ struct event_file_link {
struct list_head list;
};
+static inline bool trace_probe_test_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ return !!(tp->flags & flag);
+}
+
+static inline void trace_probe_set_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ tp->flags |= flag;
+}
+
+static inline void trace_probe_clear_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ tp->flags &= ~flag;
+}
+
static inline bool trace_probe_is_enabled(struct trace_probe *tp)
{
- return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+ return trace_probe_test_flag(tp, TP_FLAG_TRACE | TP_FLAG_PROFILE);
}
-static inline bool trace_probe_is_registered(struct trace_probe *tp)
+static inline const char *trace_probe_name(struct trace_probe *tp)
{
- return !!(tp->flags & TP_FLAG_REGISTERED);
+ return trace_event_name(&tp->call);
}
+static inline const char *trace_probe_group_name(struct trace_probe *tp)
+{
+ return tp->call.class->system;
+}
+
+static inline struct trace_event_call *
+ trace_probe_event_call(struct trace_probe *tp)
+{
+ return &tp->call;
+}
+
+static inline int trace_probe_unregister_event_call(struct trace_probe *tp)
+{
+ /* tp->event is unregistered in trace_remove_event_call() */
+ return trace_remove_event_call(&tp->call);
+}
+
+static inline bool trace_probe_has_single_file(struct trace_probe *tp)
+{
+ return !!list_is_singular(&tp->files);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+ const char *group);
+void trace_probe_cleanup(struct trace_probe *tp);
+int trace_probe_register_event_call(struct trace_probe *tp);
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file);
+int trace_probe_remove_file(struct trace_probe *tp,
+ struct trace_event_file *file);
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+ struct trace_event_file *file);
+
+#define trace_probe_for_each_link(pos, tp) \
+ list_for_each_entry(pos, &(tp)->files, list)
+#define trace_probe_for_each_link_rcu(pos, tp) \
+ list_for_each_entry_rcu(pos, &(tp)->files, list)
+
/* Check the name is good for event/group/fields */
static inline bool is_good_name(const char *name)
{
@@ -257,18 +314,6 @@ static inline bool is_good_name(const char *name)
return true;
}
-static inline struct event_file_link *
-find_event_file_link(struct trace_probe *tp, struct trace_event_file *file)
-{
- struct event_file_link *link;
-
- list_for_each_entry(link, &tp->files, list)
- if (link->file == file)
- return link;
-
- return NULL;
-}
-
#define TPARG_FL_RETURN BIT(0)
#define TPARG_FL_KERNEL BIT(1)
#define TPARG_FL_FENTRY BIT(2)
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index c30c61f12dddf..e5282828f4a60 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -59,8 +59,13 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
static nokprobe_inline int fetch_store_strlen(unsigned long addr);
static nokprobe_inline int
fetch_store_string(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen_user(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base);
static nokprobe_inline int
probe_mem_read(void *dest, void *src, size_t size);
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size);
/* From the 2nd stage, routine is same */
static nokprobe_inline int
@@ -74,14 +79,21 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
stage2:
/* 2nd stage: dereference memory if needed */
- while (code->op == FETCH_OP_DEREF) {
- lval = val;
- ret = probe_mem_read(&val, (void *)val + code->offset,
- sizeof(val));
+ do {
+ if (code->op == FETCH_OP_DEREF) {
+ lval = val;
+ ret = probe_mem_read(&val, (void *)val + code->offset,
+ sizeof(val));
+ } else if (code->op == FETCH_OP_UDEREF) {
+ lval = val;
+ ret = probe_mem_read_user(&val,
+ (void *)val + code->offset, sizeof(val));
+ } else
+ break;
if (ret)
return ret;
code++;
- }
+ } while (1);
s3 = code;
stage3:
@@ -91,6 +103,10 @@ stage3:
ret = fetch_store_strlen(val + code->offset);
code++;
goto array;
+ } else if (code->op == FETCH_OP_ST_USTRING) {
+ ret += fetch_store_strlen_user(val + code->offset);
+ code++;
+ goto array;
} else
return -EILSEQ;
}
@@ -102,10 +118,17 @@ stage3:
case FETCH_OP_ST_MEM:
probe_mem_read(dest, (void *)val + code->offset, code->size);
break;
+ case FETCH_OP_ST_UMEM:
+ probe_mem_read_user(dest, (void *)val + code->offset, code->size);
+ break;
case FETCH_OP_ST_STRING:
loc = *(u32 *)dest;
ret = fetch_store_string(val + code->offset, dest, base);
break;
+ case FETCH_OP_ST_USTRING:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string_user(val + code->offset, dest, base);
+ break;
default:
return -EILSEQ;
}
@@ -123,7 +146,8 @@ array:
total += ret;
if (++i < code->param) {
code = s3;
- if (s3->op != FETCH_OP_ST_STRING) {
+ if (s3->op != FETCH_OP_ST_STRING &&
+ s3->op != FETCH_OP_ST_USTRING) {
dest += s3->size;
val += s3->size;
goto stage3;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 7860e3f59fad6..1ceedb9146b11 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -140,6 +140,13 @@ probe_mem_read(void *dest, void *src, size_t size)
return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ return probe_mem_read(dest, src, size);
+}
+
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
* length and relative data location.
@@ -176,6 +183,12 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
return ret;
}
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ return fetch_store_string(addr, dest, base);
+}
+
/* Return the length of string -- including null terminal byte */
static nokprobe_inline int
fetch_store_strlen(unsigned long addr)
@@ -191,6 +204,12 @@ fetch_store_strlen(unsigned long addr)
return (len > MAX_STRING_SIZE) ? 0 : len;
}
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ return fetch_store_strlen(addr);
+}
+
static unsigned long translate_user_vaddr(unsigned long file_offset)
{
unsigned long base_addr;
@@ -270,8 +289,8 @@ static bool trace_uprobe_match(const char *system, const char *event,
{
struct trace_uprobe *tu = to_trace_uprobe(ev);
- return strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
- (!system || strcmp(tu->tp.call.class->system, system) == 0);
+ return strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+ (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0);
}
/*
@@ -281,25 +300,17 @@ static struct trace_uprobe *
alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
{
struct trace_uprobe *tu;
-
- if (!event || !group)
- return ERR_PTR(-EINVAL);
+ int ret;
tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
if (!tu)
return ERR_PTR(-ENOMEM);
- tu->tp.call.class = &tu->tp.class;
- tu->tp.call.name = kstrdup(event, GFP_KERNEL);
- if (!tu->tp.call.name)
- goto error;
-
- tu->tp.class.system = kstrdup(group, GFP_KERNEL);
- if (!tu->tp.class.system)
+ ret = trace_probe_init(&tu->tp, event, group);
+ if (ret < 0)
goto error;
dyn_event_init(&tu->devent, &trace_uprobe_ops);
- INIT_LIST_HEAD(&tu->tp.files);
tu->consumer.handler = uprobe_dispatcher;
if (is_ret)
tu->consumer.ret_handler = uretprobe_dispatcher;
@@ -307,25 +318,18 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
return tu;
error:
- kfree(tu->tp.call.name);
kfree(tu);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
static void free_trace_uprobe(struct trace_uprobe *tu)
{
- int i;
-
if (!tu)
return;
- for (i = 0; i < tu->tp.nr_args; i++)
- traceprobe_free_probe_arg(&tu->tp.args[i]);
-
path_put(&tu->path);
- kfree(tu->tp.call.class->system);
- kfree(tu->tp.call.name);
+ trace_probe_cleanup(&tu->tp);
kfree(tu->filename);
kfree(tu);
}
@@ -336,8 +340,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
struct trace_uprobe *tu;
for_each_trace_uprobe(tu, pos)
- if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
- strcmp(tu->tp.call.class->system, group) == 0)
+ if (strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+ strcmp(trace_probe_group_name(&tu->tp), group) == 0)
return tu;
return NULL;
@@ -372,8 +376,8 @@ static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new)
struct trace_uprobe *tmp, *old = NULL;
struct inode *new_inode = d_real_inode(new->path.dentry);
- old = find_probe_event(trace_event_name(&new->tp.call),
- new->tp.call.class->system);
+ old = find_probe_event(trace_probe_name(&new->tp),
+ trace_probe_group_name(&new->tp));
for_each_trace_uprobe(tmp, pos) {
if ((old ? old != tmp : true) &&
@@ -578,6 +582,10 @@ static int trace_uprobe_create(int argc, const char **argv)
goto error;
}
+ ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu));
+ if (ret < 0)
+ goto error;
+
ret = register_trace_uprobe(tu);
if (!ret)
goto out;
@@ -621,8 +629,8 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
char c = is_ret_probe(tu) ? 'r' : 'p';
int i;
- seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system,
- trace_event_name(&tu->tp.call), tu->filename,
+ seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, trace_probe_group_name(&tu->tp),
+ trace_probe_name(&tu->tp), tu->filename,
(int)(sizeof(void *) * 2), tu->offset);
if (tu->ref_ctr_offset)
@@ -692,7 +700,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
tu = to_trace_uprobe(ev);
seq_printf(m, " %s %-44s %15lu\n", tu->filename,
- trace_event_name(&tu->tp.call), tu->nhit);
+ trace_probe_name(&tu->tp), tu->nhit);
return 0;
}
@@ -818,7 +826,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
struct ring_buffer *buffer;
void *data;
int size, esize;
- struct trace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
WARN_ON(call != trace_file->event_call);
@@ -860,7 +868,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
return 0;
rcu_read_lock();
- list_for_each_entry_rcu(link, &tu->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tu->tp)
__uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
rcu_read_unlock();
@@ -874,7 +882,7 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
struct event_file_link *link;
rcu_read_lock();
- list_for_each_entry_rcu(link, &tu->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tu->tp)
__uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
rcu_read_unlock();
}
@@ -893,12 +901,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
if (is_ret_probe(tu)) {
trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
- trace_event_name(&tu->tp.call),
+ trace_probe_name(&tu->tp),
entry->vaddr[1], entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, true);
} else {
trace_seq_printf(s, "%s: (0x%lx)",
- trace_event_name(&tu->tp.call),
+ trace_probe_name(&tu->tp),
entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, false);
}
@@ -921,26 +929,20 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
filter_func_t filter)
{
bool enabled = trace_probe_is_enabled(&tu->tp);
- struct event_file_link *link = NULL;
int ret;
if (file) {
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
return -EINTR;
- link = kmalloc(sizeof(*link), GFP_KERNEL);
- if (!link)
- return -ENOMEM;
-
- link->file = file;
- list_add_tail_rcu(&link->list, &tu->tp.files);
-
- tu->tp.flags |= TP_FLAG_TRACE;
+ ret = trace_probe_add_file(&tu->tp, file);
+ if (ret < 0)
+ return ret;
} else {
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
return -EINTR;
- tu->tp.flags |= TP_FLAG_PROFILE;
+ trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE);
}
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
@@ -970,13 +972,11 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
uprobe_buffer_disable();
err_flags:
- if (file) {
- list_del(&link->list);
- kfree(link);
- tu->tp.flags &= ~TP_FLAG_TRACE;
- } else {
- tu->tp.flags &= ~TP_FLAG_PROFILE;
- }
+ if (file)
+ trace_probe_remove_file(&tu->tp, file);
+ else
+ trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE);
+
return ret;
}
@@ -987,26 +987,18 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
return;
if (file) {
- struct event_file_link *link;
-
- link = find_event_file_link(&tu->tp, file);
- if (!link)
+ if (trace_probe_remove_file(&tu->tp, file) < 0)
return;
- list_del_rcu(&link->list);
- /* synchronize with u{,ret}probe_trace_func */
- synchronize_rcu();
- kfree(link);
-
- if (!list_empty(&tu->tp.files))
+ if (trace_probe_is_enabled(&tu->tp))
return;
- }
+ } else
+ trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE);
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
tu->inode = NULL;
- tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
uprobe_buffer_disable();
}
@@ -1126,7 +1118,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
struct uprobe_cpu_buffer *ucb, int dsize)
{
- struct trace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
struct uprobe_trace_entry_head *entry;
struct hlist_head *head;
void *data;
@@ -1279,11 +1271,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
ucb = uprobe_buffer_get();
store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
ret |= uprobe_trace_func(tu, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
ret |= uprobe_perf_func(tu, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);
@@ -1314,11 +1306,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
ucb = uprobe_buffer_get();
store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
uretprobe_trace_func(tu, func, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
uretprobe_perf_func(tu, func, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);
@@ -1329,10 +1321,10 @@ static struct trace_event_functions uprobe_funcs = {
.trace = print_uprobe_event
};
-static inline void init_trace_event_call(struct trace_uprobe *tu,
- struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_uprobe *tu)
{
- INIT_LIST_HEAD(&call->class->fields);
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
@@ -1343,43 +1335,14 @@ static inline void init_trace_event_call(struct trace_uprobe *tu,
static int register_uprobe_event(struct trace_uprobe *tu)
{
- struct trace_event_call *call = &tu->tp.call;
- int ret = 0;
-
- init_trace_event_call(tu, call);
-
- if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
- return -ENOMEM;
+ init_trace_event_call(tu);
- ret = register_trace_event(&call->event);
- if (!ret) {
- kfree(call->print_fmt);
- return -ENODEV;
- }
-
- ret = trace_add_event_call(call);
-
- if (ret) {
- pr_info("Failed to register uprobe event: %s\n",
- trace_event_name(call));
- kfree(call->print_fmt);
- unregister_trace_event(&call->event);
- }
-
- return ret;
+ return trace_probe_register_event_call(&tu->tp);
}
static int unregister_uprobe_event(struct trace_uprobe *tu)
{
- int ret;
-
- /* tu->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tu->tp.call);
- if (ret)
- return ret;
- kfree(tu->tp.call.print_fmt);
- tu->tp.call.print_fmt = NULL;
- return 0;
+ return trace_probe_unregister_event_call(&tu->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1419,14 +1382,14 @@ create_local_trace_uprobe(char *name, unsigned long offs,
tu->path = path;
tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
- init_trace_event_call(tu, &tu->tp.call);
+ init_trace_event_call(tu);
if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
ret = -ENOMEM;
goto error;
}
- return &tu->tp.call;
+ return trace_probe_event_call(&tu->tp);
error:
free_trace_uprobe(tu);
return ERR_PTR(ret);
@@ -1438,9 +1401,6 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call)
tu = container_of(event_call, struct trace_uprobe, tp.call);
- kfree(tu->tp.call.print_fmt);
- tu->tp.call.print_fmt = NULL;
-
free_trace_uprobe(tu);
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index df3ade14ccbde..73956eaff8a9c 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -55,8 +55,8 @@ struct tp_probes {
static inline void *allocate_probes(int count)
{
- struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func)
- + sizeof(struct tp_probes), GFP_KERNEL);
+ struct tp_probes *p = kmalloc(struct_size(p, probes, count),
+ GFP_KERNEL);
return p == NULL ? NULL : p->probes;
}
diff --git a/kernel/ucount.c b/kernel/ucount.c
index feb128c7b5d96..a53cc2b4179c0 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -52,16 +52,14 @@ static struct ctl_table_root set_root = {
.permissions = set_permissions,
};
-static int zero = 0;
-static int int_max = INT_MAX;
#define UCOUNT_ENTRY(name) \
{ \
.procname = name, \
.maxlen = sizeof(int), \
.mode = 0644, \
.proc_handler = proc_dointvec_minmax, \
- .extra1 = &zero, \
- .extra2 = &int_max, \
+ .extra1 = SYSCTL_ZERO, \
+ .extra2 = SYSCTL_INT_MAX, \
}
static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_user_namespaces"),