summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Price <steven.price@arm.com>2019-11-05 09:57:56 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2019-11-05 20:48:36 +1100
commitf8d64f481ec91c4226b6c6ae4c60ad6640bce76d (patch)
tree079f9a78bbd8d303960758becdb0c341b83c9ff2
parent448b6de33073d2c8aec0bb944b3d71099077a224 (diff)
downloadlinux-f8d64f481ec91c4226b6c6ae4c60ad6640bce76d.tar.gz
linux-f8d64f481ec91c4226b6c6ae4c60ad6640bce76d.tar.xz
mm: add generic ptdump
Add a generic version of page table dumping that architectures can opt-in to Link: http://lkml.kernel.org/r/20191028135910.33253-20-steven.price@arm.com Signed-off-by: Steven Price <steven.price@arm.com> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Alexander Potapenko <glider@google.com> Cc: Alexandre Ghiti <alex@ghiti.fr> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Hogan <jhogan@kernel.org> Cc: James Morse <james.morse@arm.com> Cc: "Liang, Kan" <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Shiraz Hashim <shashim@codeaurora.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will@kernel.org> Cc: Zong Li <zong.li@sifive.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
-rw-r--r--include/linux/ptdump.h21
-rw-r--r--mm/Kconfig.debug21
-rw-r--r--mm/Makefile1
-rw-r--r--mm/ptdump.c150
4 files changed, 193 insertions, 0 deletions
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
new file mode 100644
index 000000000000..a0fb8dd2be97
--- /dev/null
+++ b/include/linux/ptdump.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_PTDUMP_H
+#define _LINUX_PTDUMP_H
+
+#include <linux/mm_types.h>
+
+struct ptdump_range {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct ptdump_state {
+ void (*note_page)(struct ptdump_state *st, unsigned long addr,
+ int level, unsigned long val);
+ const struct ptdump_range *range;
+};
+
+void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm);
+
+#endif /* _LINUX_PTDUMP_H */
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 327b3ebf23bf..0271b22e063f 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -117,3 +117,24 @@ config DEBUG_RODATA_TEST
depends on STRICT_KERNEL_RWX
---help---
This option enables a testcase for the setting rodata read-only.
+
+config GENERIC_PTDUMP
+ bool
+
+config PTDUMP_CORE
+ bool
+
+config PTDUMP_DEBUGFS
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on GENERIC_PTDUMP
+ select PTDUMP_CORE
+ help
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
diff --git a/mm/Makefile b/mm/Makefile
index d996846697ef..6e962007f186 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
diff --git a/mm/ptdump.c b/mm/ptdump.c
new file mode 100644
index 000000000000..5d349311e77e
--- /dev/null
+++ b/mm/ptdump.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pagewalk.h>
+#include <linux/ptdump.h>
+#include <linux/kasan.h>
+
+static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pgd_t val = READ_ONCE(*pgd);
+
+ if (pgd_leaf(val))
+ st->note_page(st, addr, 1, pgd_val(val));
+
+ return 0;
+}
+
+static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ p4d_t val = READ_ONCE(*p4d);
+
+ if (p4d_leaf(val))
+ st->note_page(st, addr, 2, p4d_val(val));
+
+ return 0;
+}
+
+static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pud_t val = READ_ONCE(*pud);
+
+ if (pud_leaf(val))
+ st->note_page(st, addr, 3, pud_val(val));
+
+ return 0;
+}
+
+static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pmd_t val = READ_ONCE(*pmd);
+
+ if (pmd_leaf(val))
+ st->note_page(st, addr, 4, pmd_val(val));
+
+ return 0;
+}
+
+static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 5, pte_val(READ_ONCE(*pte)));
+
+ return 0;
+}
+
+#ifdef CONFIG_KASAN
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_early_shadow_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline int note_kasan_page_table(struct mm_walk *walk,
+ unsigned long addr)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 5, pte_val(kasan_early_shadow_pte[0]));
+ return 1;
+}
+
+static int ptdump_test_p4d(unsigned long addr, unsigned long next,
+ p4d_t *p4d, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 4
+ if (p4d == lm_alias(kasan_early_shadow_p4d))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+
+static int ptdump_test_pud(unsigned long addr, unsigned long next,
+ pud_t *pud, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 3
+ if (pud == lm_alias(kasan_early_shadow_pud))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+
+static int ptdump_test_pmd(unsigned long addr, unsigned long next,
+ pmd_t *pmd, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 2
+ if (pmd == lm_alias(kasan_early_shadow_pmd))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+#endif /* CONFIG_KASAN */
+
+static int ptdump_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, depth + 1, 0);
+
+ return 0;
+}
+
+const static struct mm_walk_ops ptdump_ops = {
+ .pgd_entry = ptdump_pgd_entry,
+ .p4d_entry = ptdump_p4d_entry,
+ .pud_entry = ptdump_pud_entry,
+ .pmd_entry = ptdump_pmd_entry,
+ .pte_entry = ptdump_pte_entry,
+#ifdef CONFIG_KASAN
+ .test_p4d = ptdump_test_p4d,
+ .test_pud = ptdump_test_pud,
+ .test_pmd = ptdump_test_pmd,
+#endif
+ .pte_hole = ptdump_hole,
+};
+
+void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
+{
+ const struct ptdump_range *range = st->range;
+
+ down_read(&mm->mmap_sem);
+ while (range->start != range->end) {
+ walk_page_range(mm, range->start, range->end, &ptdump_ops, st);
+ range++;
+ }
+ up_read(&mm->mmap_sem);
+
+ /* Flush out the last page */
+ st->note_page(st, 0, 0, 0);
+}