diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/extable.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 28 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/mpx.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/pat_rbtree.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 31 |
13 files changed, 110 insertions, 25 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 8aa6bea1cd6cc..58b5bee7ea270 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> +#include <asm/kasan.h> #include <asm/pgtable.h> /* @@ -51,6 +52,10 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif # ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, # endif @@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = { { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, { 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/* VMEMMAP_START */, "Vmemmap" }, +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "KASAN shadow" }, + { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif # ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, # endif @@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, #if PTRS_PER_PUD > 1 +/* + * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y + * KASAN fills page tables with the same values. Since there is no + * point in checking page table more than once we just skip repeated + * entries. This saves us dozens of seconds during boot. + */ +static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) +{ + return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); +} + static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; pud_t *start; pgprotval_t prot; + pud_t *prev_pud = NULL; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { + if (!pud_none(*start) && + !pud_already_checked(prev_pud, start, st->check_wx)) { if (pud_large(*start) || !pud_present(*start)) { prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); @@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } else note_page(m, st, __pgprot(0), 2); + prev_pud = start; start++; } } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61a7e9ea9aa16..35ea061010a1a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/sched/debug.h> + #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e3254ca0eec4e..428e31763cb93 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -4,6 +4,7 @@ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ #include <linux/sched.h> /* test_thread_flag(), ... */ +#include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/extable.h> /* search_exception_tables */ #include <linux/bootmem.h> /* max_low_pfn */ diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0d4fb3ebbbac9..99c7805a96937 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -154,14 +154,12 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, +static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { int nr_start = *nr; - unsigned long pfn = pmd_pfn(pmd); struct dev_pagemap *pgmap = NULL; - pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; do { struct page *page = pfn_to_page(pfn); @@ -180,6 +178,24 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, return 1; } +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -251,9 +267,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, if (!pte_allows_gup(pud_val(pud), write)) return 0; + + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); + if (pud_devmap(pud)) + return __gup_device_huge_pud(pud, addr, end, pages, nr); + /* hugepages are never "special" */ VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); refs = 0; head = pud_page(pud); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 2ae8584b44c73..c5066a260803d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> +#include <linux/sched/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/err.h> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 928d657de8295..2b4b53e6793f1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void) return flag; } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly __read_mostly; void set_kernel_text_rw(void) @@ -939,7 +936,6 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", size >> 10); - rodata_test(); #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af85b686a7b0a..15173d37f3996 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -679,7 +679,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); @@ -1000,9 +1000,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly; void set_kernel_text_rw(void) @@ -1071,8 +1068,6 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); - rodata_test(); - #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0493c17b8a516..8d63d7a104c3c 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -4,6 +4,7 @@ #include <linux/kdebug.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/vmalloc.h> #include <asm/tlbflush.h> diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index d2dc0438d654a..7940166c799b7 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -28,7 +28,8 @@ #include <linux/mm.h> #include <linux/random.h> #include <linux/limits.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <asm/elf.h> struct va_alignment __read_mostly va_align = { diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index af59f808742f9..5126dfd52b182 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -7,6 +7,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/mm_types.h> #include <linux/syscalls.h> #include <linux/sched/sysctl.h> @@ -51,7 +52,7 @@ static unsigned long mpx_mmap(unsigned long len) down_write(&mm->mmap_sem); addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate); + MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); up_write(&mm->mmap_sem); if (populate) mm_populate(addr, populate); @@ -796,7 +797,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm, return -EINVAL; len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); + zap_page_range(vma, addr, len); trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; @@ -893,7 +894,7 @@ static int unmap_entire_bt(struct mm_struct *mm, * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); } static int try_unmap_single_bt(struct mm_struct *mm, diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5a287e523eab0..28d42130243c0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; +#ifdef CONFIG_PREEMPT + /* + * Avoid wbinvd() because it causes latencies on all CPUs, + * regardless of any CPU isolation that may be in effect. + * + * This should be extended for CAT enabled systems independent of + * PREEMPT because wbinvd() does not respect the CAT partitions and + * this is exposed to unpriviledged users through the graphics + * subsystem. + */ + unsigned long do_wbinvd = 0; +#else unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ +#endif BUG_ON(irqs_disabled()); diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 159b52ccd600e..d76485b228243 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node) { u64 ret = 0; if (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); ret = data->subtree_max_end; } return ret; @@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, struct memtype *last_lower = NULL; while (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); if (get_subtree_max_end(node->rb_left) > start) { /* Lowest overlap if any must be on left side */ @@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root, node = rb_next(&match->rb); if (node) - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); else match = NULL; } @@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root, node = rb_next(&match->rb); while (node) { - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); if (match->start >= end) /* Checked all possible matches */ goto success; @@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct rb_node *parent = NULL; while (*node) { - struct memtype *data = container_of(*node, struct memtype, rb); + struct memtype *data = rb_entry(*node, struct memtype, rb); parent = *node; if (data->subtree_max_end < newdata->end) @@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) } if (node) { /* pos == i */ - struct memtype *this = container_of(node, struct memtype, rb); + struct memtype *this = rb_entry(node, struct memtype, rb); *out = *this; return 0; } else { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67c..6cbdff26bb96a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, return changed; } + +int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp, pud_t entry, int dirty) +{ + int changed = !pud_same(*pudp, entry); + + VM_BUG_ON(address & ~HPAGE_PUD_MASK); + + if (changed && dirty) { + *pudp = entry; + /* + * We had a write-protection fault here and changed the pud + * to to more permissive. No need to flush the TLB for that, + * #PF is architecturally guaranteed to do that and in the + * worst-case we'll generate a spurious fault. + */ + } + + return changed; +} #endif int ptep_test_and_clear_young(struct vm_area_struct *vma, @@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp) +{ + int ret = 0; + + if (pud_young(*pudp)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + + return ret; +} #endif int ptep_clear_flush_young(struct vm_area_struct *vma, |