summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2021-08-18 08:35:23 +0200
committerIngo Molnar <mingo@kernel.org>2021-08-18 08:35:23 +0200
commit7f3b457977d26e2b202d1d043bfef024aa0a9799 (patch)
treecd4722b078fdbc2127d362a295782fa086c62280
parent9ae6ab27f44ee0da47520011afc04218f90e8b12 (diff)
parente04938042d77addc7f41d983aebea125cddbed33 (diff)
downloadlinux-7f3b457977d26e2b202d1d043bfef024aa0a9799.tar.gz
linux-7f3b457977d26e2b202d1d043bfef024aa0a9799.tar.xz
Merge branch 'kcsan' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into locking/debug
Pull KCSAN updates from Paul E. McKenney: - improve comments - introduce CONFIG_KCSAN_STRICT (which RCU uses) - optimize use of get_ctx() by kcsan_found_watchpoint() - rework atomic.h into permissive.h - add the ability to ignore writes that change only one bit of a given data-racy variable. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/dev-tools/kcsan.rst12
-rw-r--r--kernel/kcsan/atomic.h23
-rw-r--r--kernel/kcsan/core.c77
-rw-r--r--kernel/kcsan/kcsan_test.c32
-rw-r--r--kernel/kcsan/permissive.h94
-rw-r--r--lib/Kconfig.kcsan42
6 files changed, 217 insertions, 63 deletions
diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst
index 6a600cf8430b..7db43c7c09b8 100644
--- a/Documentation/dev-tools/kcsan.rst
+++ b/Documentation/dev-tools/kcsan.rst
@@ -127,6 +127,18 @@ Kconfig options:
causes KCSAN to not report data races due to conflicts where the only plain
accesses are aligned writes up to word size.
+* ``CONFIG_KCSAN_PERMISSIVE``: Enable additional permissive rules to ignore
+ certain classes of common data races. Unlike the above, the rules are more
+ complex involving value-change patterns, access type, and address. This
+ option depends on ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=y``. For details
+ please see the ``kernel/kcsan/permissive.h``. Testers and maintainers that
+ only focus on reports from specific subsystems and not the whole kernel are
+ recommended to disable this option.
+
+To use the strictest possible rules, select ``CONFIG_KCSAN_STRICT=y``, which
+configures KCSAN to follow the Linux-kernel memory consistency model (LKMM) as
+closely as possible.
+
DebugFS interface
~~~~~~~~~~~~~~~~~
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
deleted file mode 100644
index 530ae1bda8e7..000000000000
--- a/kernel/kcsan/atomic.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Rules for implicitly atomic memory accesses.
- *
- * Copyright (C) 2019, Google LLC.
- */
-
-#ifndef _KERNEL_KCSAN_ATOMIC_H
-#define _KERNEL_KCSAN_ATOMIC_H
-
-#include <linux/types.h>
-
-/*
- * Special rules for certain memory where concurrent conflicting accesses are
- * common, however, the current convention is to not mark them; returns true if
- * access to @ptr should be considered atomic. Called from slow-path.
- */
-static bool kcsan_is_atomic_special(const volatile void *ptr)
-{
- return false;
-}
-
-#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 26709ea65c71..76e67d1e02d4 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -20,9 +20,9 @@
#include <linux/sched.h>
#include <linux/uaccess.h>
-#include "atomic.h"
#include "encoding.h"
#include "kcsan.h"
+#include "permissive.h"
static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
@@ -301,9 +301,9 @@ static inline void reset_kcsan_skip(void)
this_cpu_write(kcsan_skip, skip_count);
}
-static __always_inline bool kcsan_is_enabled(void)
+static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx)
{
- return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
+ return READ_ONCE(kcsan_enabled) && !ctx->disable_count;
}
/* Introduce delay depending on context and configuration. */
@@ -353,10 +353,18 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
atomic_long_t *watchpoint,
long encoded_watchpoint)
{
+ const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
+ struct kcsan_ctx *ctx = get_ctx();
unsigned long flags;
bool consumed;
- if (!kcsan_is_enabled())
+ /*
+ * We know a watchpoint exists. Let's try to keep the race-window
+ * between here and finally consuming the watchpoint below as small as
+ * possible -- avoid unneccessarily complex code until consumed.
+ */
+
+ if (!kcsan_is_enabled(ctx))
return;
/*
@@ -364,14 +372,22 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
* reporting a race where e.g. the writer set up the watchpoint, but the
* reader has access_mask!=0, we have to ignore the found watchpoint.
*/
- if (get_ctx()->access_mask != 0)
+ if (ctx->access_mask)
return;
/*
- * Consume the watchpoint as soon as possible, to minimize the chances
- * of !consumed. Consuming the watchpoint must always be guarded by
- * kcsan_is_enabled() check, as otherwise we might erroneously
- * triggering reports when disabled.
+ * If the other thread does not want to ignore the access, and there was
+ * a value change as a result of this thread's operation, we will still
+ * generate a report of unknown origin.
+ *
+ * Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter.
+ */
+ if (!is_assert && kcsan_ignore_address(ptr))
+ return;
+
+ /*
+ * Consuming the watchpoint must be guarded by kcsan_is_enabled() to
+ * avoid erroneously triggering reports if the context is disabled.
*/
consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
@@ -391,7 +407,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
}
- if ((type & KCSAN_ACCESS_ASSERT) != 0)
+ if (is_assert)
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
else
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
@@ -409,6 +425,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
unsigned long access_mask;
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
unsigned long ua_flags = user_access_save();
+ struct kcsan_ctx *ctx = get_ctx();
unsigned long irq_flags = 0;
/*
@@ -417,16 +434,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
*/
reset_kcsan_skip();
- if (!kcsan_is_enabled())
+ if (!kcsan_is_enabled(ctx))
goto out;
/*
- * Special atomic rules: unlikely to be true, so we check them here in
- * the slow-path, and not in the fast-path in is_atomic(). Call after
- * kcsan_is_enabled(), as we may access memory that is not yet
- * initialized during early boot.
+ * Check to-ignore addresses after kcsan_is_enabled(), as we may access
+ * memory that is not yet initialized during early boot.
*/
- if (!is_assert && kcsan_is_atomic_special(ptr))
+ if (!is_assert && kcsan_ignore_address(ptr))
goto out;
if (!check_encodable((unsigned long)ptr, size)) {
@@ -479,15 +494,6 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
break; /* ignore; we do not diff the values */
}
- if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) {
- kcsan_disable_current();
- pr_err("watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n",
- is_write ? "write" : "read", size, ptr,
- watchpoint_slot((unsigned long)ptr),
- encode_watchpoint((unsigned long)ptr, size, is_write));
- kcsan_enable_current();
- }
-
/*
* Delay this thread, to increase probability of observing a racy
* conflicting access.
@@ -498,7 +504,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
* Re-read value, and check if it is as expected; if not, we infer a
* racy access.
*/
- access_mask = get_ctx()->access_mask;
+ access_mask = ctx->access_mask;
new = 0;
switch (size) {
case 1:
@@ -521,8 +527,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (access_mask)
diff &= access_mask;
- /* Were we able to observe a value-change? */
- if (diff != 0)
+ /*
+ * Check if we observed a value change.
+ *
+ * Also check if the data race should be ignored (the rules depend on
+ * non-zero diff); if it is to be ignored, the below rules for
+ * KCSAN_VALUE_CHANGE_MAYBE apply.
+ */
+ if (diff && !kcsan_ignore_data_race(size, type, old, new, diff))
value_change = KCSAN_VALUE_CHANGE_TRUE;
/* Check if this access raced with another. */
@@ -644,6 +656,15 @@ void __init kcsan_init(void)
pr_info("enabled early\n");
WRITE_ONCE(kcsan_enabled, true);
}
+
+ if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) ||
+ IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) ||
+ IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) ||
+ IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
+ pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n");
+ } else {
+ pr_info("strict mode configured\n");
+ }
}
/* === Exported interface =================================================== */
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 8bcffbdef3d3..dc55fd5a36fc 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -414,6 +414,14 @@ static noinline void test_kernel_atomic_builtins(void)
__atomic_load_n(&test_var, __ATOMIC_RELAXED);
}
+static noinline void test_kernel_xor_1bit(void)
+{
+ /* Do not report data races between the read-writes. */
+ kcsan_nestable_atomic_begin();
+ test_var ^= 0x10000;
+ kcsan_nestable_atomic_end();
+}
+
/* ===== Test cases ===== */
/* Simple test with normal data race. */
@@ -952,6 +960,29 @@ static void test_atomic_builtins(struct kunit *test)
KUNIT_EXPECT_FALSE(test, match_never);
}
+__no_kcsan
+static void test_1bit_value_change(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ { test_kernel_xor_1bit, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) },
+ },
+ };
+ bool match = false;
+
+ begin_test_checks(test_kernel_read, test_kernel_xor_1bit);
+ do {
+ match = IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)
+ ? report_available()
+ : report_matches(&expect);
+ } while (!end_test_checks(match));
+ if (IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+ KUNIT_EXPECT_FALSE(test, match);
+ else
+ KUNIT_EXPECT_TRUE(test, match);
+}
+
/*
* Generate thread counts for all test cases. Values generated are in interval
* [2, 5] followed by exponentially increasing thread counts from 8 to 32.
@@ -1024,6 +1055,7 @@ static struct kunit_case kcsan_test_cases[] = {
KCSAN_KUNIT_CASE(test_jiffies_noreport),
KCSAN_KUNIT_CASE(test_seqlock_noreport),
KCSAN_KUNIT_CASE(test_atomic_builtins),
+ KCSAN_KUNIT_CASE(test_1bit_value_change),
{},
};
diff --git a/kernel/kcsan/permissive.h b/kernel/kcsan/permissive.h
new file mode 100644
index 000000000000..2c01fe4a59ee
--- /dev/null
+++ b/kernel/kcsan/permissive.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Special rules for ignoring entire classes of data-racy memory accesses. None
+ * of the rules here imply that such data races are generally safe!
+ *
+ * All rules in this file can be configured via CONFIG_KCSAN_PERMISSIVE. Keep
+ * them separate from core code to make it easier to audit.
+ *
+ * Copyright (C) 2019, Google LLC.
+ */
+
+#ifndef _KERNEL_KCSAN_PERMISSIVE_H
+#define _KERNEL_KCSAN_PERMISSIVE_H
+
+#include <linux/bitops.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+/*
+ * Access ignore rules based on address.
+ */
+static __always_inline bool kcsan_ignore_address(const volatile void *ptr)
+{
+ if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+ return false;
+
+ /*
+ * Data-racy bitops on current->flags are too common, ignore completely
+ * for now.
+ */
+ return ptr == &current->flags;
+}
+
+/*
+ * Data race ignore rules based on access type and value change patterns.
+ */
+static bool
+kcsan_ignore_data_race(size_t size, int type, u64 old, u64 new, u64 diff)
+{
+ if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+ return false;
+
+ /*
+ * Rules here are only for plain read accesses, so that we still report
+ * data races between plain read-write accesses.
+ */
+ if (type || size > sizeof(long))
+ return false;
+
+ /*
+ * A common pattern is checking/setting just 1 bit in a variable; for
+ * example:
+ *
+ * if (flags & SOME_FLAG) { ... }
+ *
+ * and elsewhere flags is updated concurrently:
+ *
+ * flags |= SOME_OTHER_FLAG; // just 1 bit
+ *
+ * While it is still recommended that such accesses be marked
+ * appropriately, in many cases these types of data races are so common
+ * that marking them all is often unrealistic and left to maintainer
+ * preference.
+ *
+ * The assumption in all cases is that with all known compiler
+ * optimizations (including those that tear accesses), because no more
+ * than 1 bit changed, the plain accesses are safe despite the presence
+ * of data races.
+ *
+ * The rules here will ignore the data races if we observe no more than
+ * 1 bit changed.
+ *
+ * Of course many operations can effecively change just 1 bit, but the
+ * general assuption that data races involving 1-bit changes can be
+ * tolerated still applies.
+ *
+ * And in case a true bug is missed, the bug likely manifests as a
+ * reportable data race elsewhere.
+ */
+ if (hweight64(diff) == 1) {
+ /*
+ * Exception: Report data races where the values look like
+ * ordinary booleans (one of them was 0 and the 0th bit was
+ * changed) More often than not, they come with interesting
+ * memory ordering requirements, so let's report them.
+ */
+ if (!((!old || !new) && diff == 1))
+ return true;
+ }
+
+ return false;
+}
+
+#endif /* _KERNEL_KCSAN_PERMISSIVE_H */
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 0440f373248e..e0a93ffdef30 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -40,10 +40,14 @@ menuconfig KCSAN
if KCSAN
-# Compiler capabilities that should not fail the test if they are unavailable.
config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
(CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
+ help
+ The compiler instruments plain compound read-write operations
+ differently (++, --, +=, -=, |=, &=, etc.), which allows KCSAN to
+ distinguish them from other plain accesses. This is currently
+ supported by Clang 12 or later.
config KCSAN_VERBOSE
bool "Show verbose reports with more information about system state"
@@ -58,9 +62,6 @@ config KCSAN_VERBOSE
generated from any one of them, system stability may suffer due to
deadlocks or recursion. If in doubt, say N.
-config KCSAN_DEBUG
- bool "Debugging of KCSAN internals"
-
config KCSAN_SELFTEST
bool "Perform short selftests on boot"
default y
@@ -149,7 +150,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
KCSAN_WATCH_SKIP.
config KCSAN_INTERRUPT_WATCHER
- bool "Interruptible watchers"
+ bool "Interruptible watchers" if !KCSAN_STRICT
+ default KCSAN_STRICT
help
If enabled, a task that set up a watchpoint may be interrupted while
delayed. This option will allow KCSAN to detect races between
@@ -169,13 +171,9 @@ config KCSAN_REPORT_ONCE_IN_MS
reporting to avoid flooding the console with reports. Setting this
to 0 disables rate limiting.
-# The main purpose of the below options is to control reported data races (e.g.
-# in fuzzer configs), and are not expected to be switched frequently by other
-# users. We could turn some of them into boot parameters, but given they should
-# not be switched normally, let's keep them here to simplify configuration.
-#
-# The defaults below are chosen to be very conservative, and may miss certain
-# bugs.
+# The main purpose of the below options is to control reported data races, and
+# are not expected to be switched frequently by non-testers or at runtime.
+# The defaults are chosen to be conservative, and can miss certain bugs.
config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
bool "Report races of unknown origin"
@@ -186,9 +184,17 @@ config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
reported if it was only possible to infer a race due to a data value
change while an access is being delayed on a watchpoint.
+config KCSAN_STRICT
+ bool "Strict data-race checking"
+ help
+ KCSAN will report data races with the strictest possible rules, which
+ closely aligns with the rules defined by the Linux-kernel memory
+ consistency model (LKMM).
+
config KCSAN_REPORT_VALUE_CHANGE_ONLY
bool "Only report races where watcher observed a data value change"
default y
+ depends on !KCSAN_STRICT
help
If enabled and a conflicting write is observed via a watchpoint, but
the data value of the memory location was observed to remain
@@ -197,6 +203,7 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY
config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
bool "Assume that plain aligned writes up to word size are atomic"
default y
+ depends on !KCSAN_STRICT
help
Assume that plain aligned writes up to word size are atomic by
default, and also not subject to other unsafe compiler optimizations
@@ -209,6 +216,7 @@ config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
config KCSAN_IGNORE_ATOMICS
bool "Do not instrument marked atomic accesses"
+ depends on !KCSAN_STRICT
help
Never instrument marked atomic accesses. This option can be used for
additional filtering. Conflicting marked atomic reads and plain
@@ -224,4 +232,14 @@ config KCSAN_IGNORE_ATOMICS
due to two conflicting plain writes will be reported (aligned and
unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n).
+config KCSAN_PERMISSIVE
+ bool "Enable all additional permissive rules"
+ depends on KCSAN_REPORT_VALUE_CHANGE_ONLY
+ help
+ Enable additional permissive rules to ignore certain classes of data
+ races (also see kernel/kcsan/permissive.h). None of the permissive
+ rules imply that such data races are generally safe, but can be used
+ to further reduce reported data races due to data-racy patterns
+ common across the kernel.
+
endif # KCSAN