summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-06-03 18:22:42 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-06-03 18:22:42 -0700
commit432bdb581e410ad3cea8f04e9323397f17501e3e (patch)
tree74151563ca1a780f930b7537846123d165af5829
parentea9916ea3ed98d0a1f67f5cbe8ed8ae28e37f8c8 (diff)
parentf269099a7e7a0c6732c4a817d0e99e92216414d9 (diff)
downloadlinux-0-day-432bdb581e410ad3cea8f04e9323397f17501e3e.tar.gz
linux-0-day-432bdb581e410ad3cea8f04e9323397f17501e3e.tar.xz
Merge branch 'bpf_get_current_cgroup_id'
Yonghong Song says: ==================== bpf has been used extensively for tracing. For example, bcc contains an almost full set of bpf-based tools to trace kernel and user functions/events. Most tracing tools are currently either filtered based on pid or system-wide. Containers have been used quite extensively in industry and cgroup is often used together to provide resource isolation and protection. Several processes may run inside the same container. It is often desirable to get container-level tracing results as well, e.g. syscall count, function count, I/O activity, etc. This patch implements a new helper, bpf_get_current_cgroup_id(), which will return cgroup id based on the cgroup within which the current task is running. Patch #1 implements the new helper in the kernel. Patch #2 syncs the uapi bpf.h header and helper between tools and kernel. Patch #3 shows how to get the same cgroup id in user space, so a filter or policy could be configgured in the bpf program based on current task cgroup. Changelog: v1 -> v2: . rebase to resolve merge conflict with latest bpf-next. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/uapi/linux/bpf.h8
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/bpf/helpers.c15
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--tools/include/uapi/linux/bpf.h8
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c57
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_kern.c28
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c141
13 files changed, 267 insertions, 4 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bbe297436e5d6..995c3b1e59bfa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -746,6 +746,7 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
+extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f0b6608b1f1ca..18712b0dbfe7c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2070,6 +2070,11 @@ union bpf_attr {
* **CONFIG_SOCK_CGROUP_DATA** configuration option.
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2151,7 +2156,8 @@ union bpf_attr {
FN(lwt_seg6_action), \
FN(rc_repeat), \
FN(rc_keydown), \
- FN(skb_cgroup_id),
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 527587de8a67a..9f1493705f404 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1765,6 +1765,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
+const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3d24e238221ec..73065e2d23c28 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -179,3 +179,18 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE,
};
+
+#ifdef CONFIG_CGROUPS
+BPF_CALL_0(bpf_get_current_cgroup_id)
+{
+ struct cgroup *cgrp = task_dfl_cgroup(current);
+
+ return cgrp->kn->id.id;
+}
+
+const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
+ .func = bpf_get_current_cgroup_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+#endif
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 752992ce35131..e2ab5b7f29d25 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -564,6 +564,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
+ case BPF_FUNC_get_current_cgroup_id:
+ return &bpf_get_current_cgroup_id_proto;
default:
return NULL;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f0b6608b1f1ca..18712b0dbfe7c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2070,6 +2070,11 @@ union bpf_attr {
* **CONFIG_SOCK_CGROUP_DATA** configuration option.
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2151,7 +2156,8 @@ union bpf_attr {
FN(lwt_seg6_action), \
FN(rc_repeat), \
FN(rc_keydown), \
- FN(skb_cgroup_id),
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 6ea8359824644..49938d72cf639 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -18,3 +18,4 @@ urandom_read
test_btf
test_sockmap
test_lirc_mode2_user
+get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 553d1816b77a5..607ed8729c06d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -24,7 +24,7 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap test_lirc_mode2_user
+ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
- test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o
+ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+ get_cgroup_id_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -63,6 +64,7 @@ $(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
+$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index a66a9d91acf4c..f2f28b6c89151 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -131,6 +131,8 @@ static int (*bpf_rc_repeat)(void *ctx) =
static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
unsigned long long scancode, unsigned int toggle) =
(void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+ (void *) BPF_FUNC_get_current_cgroup_id;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f3bca3ade0f3d..c87b4e052ce96 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -6,6 +6,7 @@
#include <sys/types.h>
#include <linux/limits.h>
#include <stdio.h>
+#include <stdlib.h>
#include <linux/sched.h>
#include <fcntl.h>
#include <unistd.h>
@@ -176,3 +177,59 @@ int create_and_get_cgroup(char *path)
return fd;
}
+
+/**
+ * get_cgroup_id() - Get cgroup id for a particular cgroup path
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * On success, it returns the cgroup id. On failure it returns 0,
+ * which is an invalid cgroup id.
+ * If there is a failure, it prints the error to stderr.
+ */
+unsigned long long get_cgroup_id(char *path)
+{
+ int dirfd, err, flags, mount_id, fhsize;
+ union {
+ unsigned long long cgid;
+ unsigned char raw_bytes[8];
+ } id;
+ char cgroup_workdir[PATH_MAX + 1];
+ struct file_handle *fhp, *fhp2;
+ unsigned long long ret = 0;
+
+ format_cgroup_path(cgroup_workdir, path);
+
+ dirfd = AT_FDCWD;
+ flags = 0;
+ fhsize = sizeof(*fhp);
+ fhp = calloc(1, fhsize);
+ if (!fhp) {
+ log_err("calloc");
+ return 0;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
+ if (err >= 0 || fhp->handle_bytes != 8) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
+ fhp2 = realloc(fhp, fhsize);
+ if (!fhp2) {
+ log_err("realloc");
+ goto free_mem;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
+ fhp = fhp2;
+ if (err < 0) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ memcpy(id.raw_bytes, fhp->f_handle, 8);
+ ret = id.cgid;
+
+free_mem:
+ free(fhp);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 06485e0002b3b..20a4a5dcd4690 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -13,5 +13,6 @@ int create_and_get_cgroup(char *path);
int join_cgroup(char *path);
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
+unsigned long long get_cgroup_id(char *path);
#endif
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 0000000000000..2cf8cb23f2095
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+ __u32 key = 0;
+ __u64 *val;
+
+ val = bpf_map_lookup_elem(&cg_ids, &key);
+ if (val)
+ *val = bpf_get_current_cgroup_id();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 0000000000000..ea19a42e58940
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return -1;
+ return bpf_map__fd(map);
+}
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+int main(int argc, char **argv)
+{
+ const char *probe_name = "syscalls/sys_enter_nanosleep";
+ const char *file = "get_cgroup_id_kern.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct perf_event_attr attr = {};
+ int cgroup_fd, cgidmap_fd;
+ struct bpf_object *obj;
+ __u64 kcgid = 0, ucgid;
+ int exit_code = 1;
+ char buf[256];
+ __u32 key = 0;
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
+ errno))
+ return 1;
+
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
+ cgroup_fd, errno))
+ goto cleanup_cgroup_env;
+
+ err = join_cgroup(TEST_CGROUP);
+ if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
+ if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ cgidmap_fd, errno))
+ goto close_prog;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ /* attach to this pid so the all bpf invocations will be in the
+ * cgroup associated with this pid.
+ */
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* trigger some syscalls */
+ sleep(1);
+
+ err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
+ if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ ucgid = get_cgroup_id(TEST_CGROUP);
+ if (CHECK(kcgid != ucgid, "compare_cgroup_id",
+ "kern cgid %llx user cgid %llx", kcgid, ucgid))
+ goto close_pmu;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+ return exit_code;
+}