summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/acpi.c338
-rw-r--r--arch/x86/boot/compressed/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/head_64.S21
-rw-r--r--arch/x86/boot/compressed/kaslr.c75
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/misc.h23
-rw-r--r--arch/x86/boot/compressed/pgtable.h2
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c19
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S2
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/string.c141
-rw-r--r--arch/x86/boot/string.h1
-rw-r--r--arch/x86/configs/i386_defconfig3
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c47
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S782
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c12
-rw-r--r--arch/x86/crypto/morus1280_glue.c40
-rw-r--r--arch/x86/crypto/morus640_glue.c39
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S4
-rw-r--r--arch/x86/entry/entry_64_compat.S6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl85
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/events/amd/ibs.c13
-rw-r--r--arch/x86/events/amd/iommu.c6
-rw-r--r--arch/x86/events/amd/power.c10
-rw-r--r--arch/x86/events/amd/uncore.c7
-rw-r--r--arch/x86/events/core.c14
-rw-r--r--arch/x86/events/intel/bts.c4
-rw-r--r--arch/x86/events/intel/core.c162
-rw-r--r--arch/x86/events/intel/cstate.c12
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/events/intel/lbr.c1
-rw-r--r--arch/x86/events/intel/pt.c14
-rw-r--r--arch/x86/events/intel/rapl.c9
-rw-r--r--arch/x86/events/intel/uncore.c9
-rw-r--r--arch/x86/events/intel/uncore_snb.c9
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/events/msr.c10
-rw-r--r--arch/x86/events/perf_event.h31
-rw-r--r--arch/x86/ia32/ia32_aout.c157
-rw-r--r--arch/x86/include/asm/a.out-core.h67
-rw-r--r--arch/x86/include/asm/alternative.h39
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/cpu_device_id.h28
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/fpu/internal.h57
-rw-r--r--arch/x86/include/asm/fpu/types.h7
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h2
-rw-r--r--arch/x86/include/asm/intel-family.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h18
-rw-r--r--arch/x86/include/asm/msr.h16
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h13
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/refcount.h22
-rw-r--r--arch/x86/include/asm/resctrl_sched.h4
-rw-r--r--arch/x86/include/asm/uaccess.h36
-rw-r--r--arch/x86/include/asm/unistd.h8
-rw-r--r--arch/x86/include/asm/uv/bios.h13
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/socket.h1
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S12
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c7
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/bugs.c14
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/match.c31
-rw-r--r--arch/x86/kernel/cpu/mce/core.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/Makefile4
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c7
-rw-r--r--arch/x86/kernel/crash.c1
-rw-r--r--arch/x86/kernel/e820.c9
-rw-r--r--arch/x86/kernel/early_printk.c4
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/hw_breakpoint.c5
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c9
-rw-r--r--arch/x86/kernel/kgdb.c1
-rw-r--r--arch/x86/kernel/kprobes/core.c7
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kernel/machine_kexec_64.c3
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c10
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc.c30
-rw-r--r--arch/x86/kernel/uprobes.c1
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/hyperv.c7
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c19
-rw-r--r--arch/x86/kvm/svm.c31
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/evmcs.c7
-rw-r--r--arch/x86/kvm/vmx/nested.c27
-rw-r--r--arch/x86/kvm/vmx/vmx.c179
-rw-r--r--arch/x86/kvm/vmx/vmx.h10
-rw-r--r--arch/x86/kvm/x86.c17
-rw-r--r--arch/x86/lib/insn-eval.c2
-rw-r--r--arch/x86/lib/iomem.c33
-rw-r--r--arch/x86/lib/kaslr.c4
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/mm/cpu_entry_area.c2
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/extable.c59
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/mm/pageattr.c50
-rw-r--r--arch/x86/mm/tlb.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c46
-rw-r--r--arch/x86/net/bpf_jit_comp32.c121
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/early_printk.c240
-rw-r--r--arch/x86/platform/efi/quirks.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c1
-rw-r--r--arch/x86/platform/uv/bios_uv.c35
-rw-r--r--arch/x86/platform/uv/tlb_uv.c8
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S2
-rw-r--r--arch/x86/um/Kconfig2
-rw-r--r--arch/x86/xen/enlighten_pv.c5
-rw-r--r--arch/x86/xen/mmu.h4
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/time.c12
151 files changed, 1946 insertions, 1908 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 15af091611e29..90b562a34d654 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,7 +14,6 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
- select HAVE_AOUT
select HAVE_GENERIC_DMA_COHERENT
select MODULES_USE_ELF_REL
select OLD_SIGACTION
@@ -47,6 +46,7 @@ config X86
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ANON_INODES
+ select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_DATA
select ARCH_CLOCKSOURCE_INIT
select ARCH_DISCARD_MEMBLOCK
@@ -198,7 +198,7 @@ config X86
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
select PCI_DOMAINS if PCI
- select PCI_LOCKLESS_CONFIG
+ select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
select RTC_LIB
select RTC_MC146818_LIB
@@ -446,12 +446,12 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.
-config X86_RESCTRL
- bool "Resource Control support"
+config X86_CPU_RESCTRL
+ bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
select KERNFS
help
- Enable Resource Control support.
+ Enable x86 CPU resource control support.
Provide support for the allocation and monitoring of system resources
usage by the CPU.
@@ -617,7 +617,7 @@ config X86_INTEL_QUARK
config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
- depends on X86 && ACPI
+ depends on X86 && ACPI && PCI
select COMMON_CLK
select PINCTRL
select IOSF_MBI
@@ -1510,6 +1510,7 @@ config AMD_MEM_ENCRYPT
bool "AMD Secure Memory Encryption (SME) support"
depends on X86_64 && CPU_SUP_AMD
select DYNAMIC_PHYSICAL_MASK
+ select ARCH_USE_MEMREMAP_PROT
---help---
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -1529,10 +1530,6 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
If set to N, then the encryption of system memory can be
activated with the mem_encrypt=on command line option.
-config ARCH_USE_MEMREMAP_PROT
- def_bool y
- depends on AMD_MEM_ENCRYPT
-
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
@@ -2843,6 +2840,7 @@ config IA32_EMULATION
config IA32_AOUT
tristate "IA32 a.out support"
depends on IA32_EMULATION
+ depends on BROKEN
---help---
Support old a.out binaries in the 32bit emulation.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0723dff17e6cb..15d0fbe278726 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -40,16 +40,6 @@ config EARLY_PRINTK_DBGP
with klogd/syslogd or the X server. You should normally say N here,
unless you want to debug such a crash. You need usb debug device.
-config EARLY_PRINTK_EFI
- bool "Early printk via the EFI framebuffer"
- depends on EFI && EARLY_PRINTK
- select FONT_SUPPORT
- ---help---
- Write kernel log output directly into the EFI framebuffer.
-
- This is useful for kernel debugging when your machine crashes very
- early before the console code is initialized.
-
config EARLY_PRINTK_USB_XDBC
bool "Early printk via the xHCI debug port"
depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9c5a67d1b9c1b..2d8b9d8ca4f87 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
# does binutils support specific instructions?
-asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
@@ -217,6 +216,11 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
+ # Additionally, avoid generating expensive indirect jumps which
+ # are subject to retpolines for small number of switch cases.
+ # clang turns off jump table generation by default when under
+ # retpoline builds, however, gcc does not for x86.
+ KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
endif
archscripts: scripts_basic
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 9b5adae9cc40c..e2839b5c246c2 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -100,7 +100,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
AFLAGS_header.o += -I$(objtree)/$(obj)
$(obj)/header.o: $(obj)/zoffset.h
-LDFLAGS_setup.elf := -T
+LDFLAGS_setup.elf := -m elf_i386 -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f0515ac895a43..6b84afdd75382 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -84,6 +84,8 @@ ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
+vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
new file mode 100644
index 0000000000000..0ef4ad55b29b2
--- /dev/null
+++ b/arch/x86/boot/compressed/acpi.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BOOT_CTYPE_H
+#include "misc.h"
+#include "error.h"
+#include "../string.h"
+
+#include <linux/numa.h>
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+/*
+ * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
+ * for termination.
+ */
+#define MAX_ACPI_ARG_LENGTH 10
+
+/*
+ * Immovable memory regions representation. Max amount of memory regions is
+ * MAX_NUMNODES*2.
+ */
+struct mem_vector immovable_mem[MAX_NUMNODES*2];
+
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_acpi_rsdp(void)
+{
+ acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (kstrtoull(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
+/* Search EFI system tables for RSDP. */
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+ acpi_physical_address rsdp_addr = 0;
+
+#ifdef CONFIG_EFI
+ unsigned long systab, systab_tables, config_tables;
+ unsigned int nr_tables;
+ struct efi_info *ei;
+ bool efi_64;
+ int size, i;
+ char *sig;
+
+ ei = &boot_params->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+
+ if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ efi_64 = true;
+ } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+ efi_64 = false;
+ } else {
+ debug_putstr("Wrong EFI loader signature.\n");
+ return 0;
+ }
+
+ /* Get systab from boot params. */
+#ifdef CONFIG_X86_64
+ systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+ if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+ debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
+ return 0;
+ }
+ systab = ei->efi_systab;
+#endif
+ if (!systab)
+ error("EFI system table not found.");
+
+ /* Handle EFI bitness properly */
+ if (efi_64) {
+ efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
+
+ config_tables = stbl->tables;
+ nr_tables = stbl->nr_tables;
+ size = sizeof(efi_config_table_64_t);
+ } else {
+ efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
+
+ config_tables = stbl->tables;
+ nr_tables = stbl->nr_tables;
+ size = sizeof(efi_config_table_32_t);
+ }
+
+ if (!config_tables)
+ error("EFI config tables not found.");
+
+ /* Get EFI tables from systab. */
+ for (i = 0; i < nr_tables; i++) {
+ acpi_physical_address table;
+ efi_guid_t guid;
+
+ config_tables += size;
+
+ if (efi_64) {
+ efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
+
+ guid = tbl->guid;
+ table = tbl->table;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+ debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+ return 0;
+ }
+ } else {
+ efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
+
+ guid = tbl->guid;
+ table = tbl->table;
+ }
+
+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+ rsdp_addr = table;
+ else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+ return table;
+ }
+#endif
+ return rsdp_addr;
+}
+
+static u8 compute_checksum(u8 *buffer, u32 length)
+{
+ u8 *end = buffer + length;
+ u8 sum = 0;
+
+ while (buffer < end)
+ sum += *(buffer++);
+
+ return sum;
+}
+
+/* Search a block of memory for the RSDP signature. */
+static u8 *scan_mem_for_rsdp(u8 *start, u32 length)
+{
+ struct acpi_table_rsdp *rsdp;
+ u8 *address, *end;
+
+ end = start + length;
+
+ /* Search from given start address for the requested length */
+ for (address = start; address < end; address += ACPI_RSDP_SCAN_STEP) {
+ /*
+ * Both RSDP signature and checksum must be correct.
+ * Note: Sometimes there exists more than one RSDP in memory;
+ * the valid RSDP has a valid checksum, all others have an
+ * invalid checksum.
+ */
+ rsdp = (struct acpi_table_rsdp *)address;
+
+ /* BAD Signature */
+ if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature))
+ continue;
+
+ /* Check the standard checksum */
+ if (compute_checksum((u8 *)rsdp, ACPI_RSDP_CHECKSUM_LENGTH))
+ continue;
+
+ /* Check extended checksum if table version >= 2 */
+ if ((rsdp->revision >= 2) &&
+ (compute_checksum((u8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH)))
+ continue;
+
+ /* Signature and checksum valid, we have found a real RSDP */
+ return address;
+ }
+ return NULL;
+}
+
+/* Search RSDP address in EBDA. */
+static acpi_physical_address bios_get_rsdp_addr(void)
+{
+ unsigned long address;
+ u8 *rsdp;
+
+ /* Get the location of the Extended BIOS Data Area (EBDA) */
+ address = *(u16 *)ACPI_EBDA_PTR_LOCATION;
+ address <<= 4;
+
+ /*
+ * Search EBDA paragraphs (EBDA is required to be a minimum of
+ * 1K length)
+ */
+ if (address > 0x400) {
+ rsdp = scan_mem_for_rsdp((u8 *)address, ACPI_EBDA_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+ }
+
+ /* Search upper memory: 16-byte boundaries in E0000h-FFFFFh */
+ rsdp = scan_mem_for_rsdp((u8 *) ACPI_HI_RSDP_WINDOW_BASE,
+ ACPI_HI_RSDP_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+
+ return 0;
+}
+
+/* Return RSDP address on success, otherwise 0. */
+acpi_physical_address get_rsdp_addr(void)
+{
+ acpi_physical_address pa;
+
+ pa = get_acpi_rsdp();
+
+ if (!pa)
+ pa = boot_params->acpi_rsdp_addr;
+
+ if (!pa)
+ pa = efi_get_rsdp_addr();
+
+ if (!pa)
+ pa = bios_get_rsdp_addr();
+
+ return pa;
+}
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/* Compute SRAT address from RSDP. */
+static unsigned long get_acpi_srat_table(void)
+{
+ unsigned long root_table, acpi_table;
+ struct acpi_table_header *header;
+ struct acpi_table_rsdp *rsdp;
+ u32 num_entries, size, len;
+ char arg[10];
+ u8 *entry;
+
+ rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+ if (!rsdp)
+ return 0;
+
+ /* Get ACPI root table from RSDP.*/
+ if (!(cmdline_find_option("acpi", arg, sizeof(arg)) == 4 &&
+ !strncmp(arg, "rsdt", 4)) &&
+ rsdp->xsdt_physical_address &&
+ rsdp->revision > 1) {
+ root_table = rsdp->xsdt_physical_address;
+ size = ACPI_XSDT_ENTRY_SIZE;
+ } else {
+ root_table = rsdp->rsdt_physical_address;
+ size = ACPI_RSDT_ENTRY_SIZE;
+ }
+
+ if (!root_table)
+ return 0;
+
+ header = (struct acpi_table_header *)root_table;
+ len = header->length;
+ if (len < sizeof(struct acpi_table_header) + size)
+ return 0;
+
+ num_entries = (len - sizeof(struct acpi_table_header)) / size;
+ entry = (u8 *)(root_table + sizeof(struct acpi_table_header));
+
+ while (num_entries--) {
+ if (size == ACPI_RSDT_ENTRY_SIZE)
+ acpi_table = *(u32 *)entry;
+ else
+ acpi_table = *(u64 *)entry;
+
+ if (acpi_table) {
+ header = (struct acpi_table_header *)acpi_table;
+
+ if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_SRAT))
+ return acpi_table;
+ }
+ entry += size;
+ }
+ return 0;
+}
+
+/**
+ * count_immovable_mem_regions - Parse SRAT and cache the immovable
+ * memory regions into the immovable_mem array.
+ *
+ * Return the number of immovable memory regions on success, 0 on failure:
+ *
+ * - Too many immovable memory regions
+ * - ACPI off or no SRAT found
+ * - No immovable memory region found.
+ */
+int count_immovable_mem_regions(void)
+{
+ unsigned long table_addr, table_end, table;
+ struct acpi_subtable_header *sub_table;
+ struct acpi_table_header *table_header;
+ char arg[MAX_ACPI_ARG_LENGTH];
+ int num = 0;
+
+ if (cmdline_find_option("acpi", arg, sizeof(arg)) == 3 &&
+ !strncmp(arg, "off", 3))
+ return 0;
+
+ table_addr = get_acpi_srat_table();
+ if (!table_addr)
+ return 0;
+
+ table_header = (struct acpi_table_header *)table_addr;
+ table_end = table_addr + table_header->length;
+ table = table_addr + sizeof(struct acpi_table_srat);
+
+ while (table + sizeof(struct acpi_subtable_header) < table_end) {
+ sub_table = (struct acpi_subtable_header *)table;
+ if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+ struct acpi_srat_mem_affinity *ma;
+
+ ma = (struct acpi_srat_mem_affinity *)sub_table;
+ if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && ma->length) {
+ immovable_mem[num].start = ma->base_address;
+ immovable_mem[num].size = ma->length;
+ num++;
+ }
+
+ if (num >= MAX_NUMNODES*2) {
+ debug_putstr("Too many immovable memory regions, aborting.\n");
+ return 0;
+ }
+ }
+ table += sub_table->length;
+ }
+ return num;
+}
+#endif /* CONFIG_RANDOMIZE_BASE && CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index af6cda0b7900f..f1add5d85da9d 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
-
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
@@ -30,5 +28,3 @@ int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
-
-#endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 64037895b0859..fafb75c6c5925 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -358,8 +358,11 @@ ENTRY(startup_64)
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
*
- * Address of the trampoline is returned in RAX.
- * Non zero RDX on return means we need to enable 5-level paging.
+ * paging_prepare() returns a two-quadword structure which lands
+ * into RDX:RAX:
+ * - Address of the trampoline is returned in RAX.
+ * - Non zero RDX means trampoline needs to enable 5-level
+ * paging.
*
* RSI holds real mode data and needs to be preserved across
* this function call.
@@ -565,7 +568,7 @@ adjust_got:
*
* RDI contains the return address (might be above 4G).
* ECX contains the base address of the trampoline memory.
- * Non zero RDX on return means we need to enable 5-level paging.
+ * Non zero RDX means trampoline needs to enable 5-level paging.
*/
ENTRY(trampoline_32bit_src)
/* Set up data and stack segments */
@@ -600,6 +603,16 @@ ENTRY(trampoline_32bit_src)
leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
movl %eax, %cr3
3:
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+ pushl %ecx
+ pushl %edx
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+ popl %edx
+ popl %ecx
+
/* Enable PAE and LA57 (if required) paging modes */
movl $X86_CR4_PAE, %eax
cmpl $0, %edx
@@ -645,8 +658,6 @@ no_longmode:
.data
gdt64:
.word gdt_end - gdt
- .long 0
- .word 0
.quad 0
gdt:
.word gdt_end - gdt
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 9ed9709d9947a..fa0332dda9f2c 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -87,10 +87,6 @@ static unsigned long get_boot_seed(void)
#define KASLR_COMPRESSED_BOOT
#include "../../lib/kaslr.c"
-struct mem_vector {
- unsigned long long start;
- unsigned long long size;
-};
/* Only supporting at most 4 unusable memmap regions with kaslr */
#define MAX_MEMMAP_REGIONS 4
@@ -101,6 +97,8 @@ static bool memmap_too_large;
/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
static unsigned long long mem_limit = ULLONG_MAX;
+/* Number of immovable memory regions */
+static int num_immovable_mem;
enum mem_avoid_index {
MEM_AVOID_ZO_RANGE = 0,
@@ -417,6 +415,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* Mark the memmap regions we need to avoid */
handle_mem_options();
+ /* Enumerate the immovable memory regions */
+ num_immovable_mem = count_immovable_mem_regions();
+
#ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */
add_identity_map(0, PMD_SIZE);
@@ -572,9 +573,9 @@ static unsigned long slots_fetch_random(void)
return 0;
}
-static void process_mem_region(struct mem_vector *entry,
- unsigned long minimum,
- unsigned long image_size)
+static void __process_mem_region(struct mem_vector *entry,
+ unsigned long minimum,
+ unsigned long image_size)
{
struct mem_vector region, overlap;
unsigned long start_orig, end;
@@ -650,6 +651,56 @@ static void process_mem_region(struct mem_vector *entry,
}
}
+static bool process_mem_region(struct mem_vector *region,
+ unsigned long long minimum,
+ unsigned long long image_size)
+{
+ int i;
+ /*
+ * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
+ * use @region directly.
+ */
+ if (!num_immovable_mem) {
+ __process_mem_region(region, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
+ return 1;
+ }
+ return 0;
+ }
+
+#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+ /*
+ * If immovable memory found, filter the intersection between
+ * immovable memory and @region.
+ */
+ for (i = 0; i < num_immovable_mem; i++) {
+ unsigned long long start, end, entry_end, region_end;
+ struct mem_vector entry;
+
+ if (!mem_overlaps(region, &immovable_mem[i]))
+ continue;
+
+ start = immovable_mem[i].start;
+ end = start + immovable_mem[i].size;
+ region_end = region->start + region->size;
+
+ entry.start = clamp(region->start, start, end);
+ entry_end = clamp(region_end, start, end);
+ entry.size = entry_end - entry.start;
+
+ __process_mem_region(&entry, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
+ return 1;
+ }
+ }
+ return 0;
+#endif
+}
+
#ifdef CONFIG_EFI
/*
* Returns true if mirror region found (and must have been processed
@@ -715,11 +766,8 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
region.start = md->phys_addr;
region.size = md->num_pages << EFI_PAGE_SHIFT;
- process_mem_region(&region, minimum, image_size);
- if (slot_area_index == MAX_SLOT_AREA) {
- debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ if (process_mem_region(&region, minimum, image_size))
break;
- }
}
return true;
}
@@ -746,11 +794,8 @@ static void process_e820_entries(unsigned long minimum,
continue;
region.start = entry->addr;
region.size = entry->size;
- process_mem_region(&region, minimum, image_size);
- if (slot_area_index == MAX_SLOT_AREA) {
- debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+ if (process_mem_region(&region, minimum, image_size))
break;
- }
}
}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8dd1d5ccae580..c0d6c560df69e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -351,6 +351,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ /* Save RSDP address for later use. */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) {
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index a1d5918765f36..fd13655e0f9b0 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -25,6 +25,9 @@
#include <asm/bootparam.h>
#include <asm/bootparam_utils.h>
+#define BOOT_CTYPE_H
+#include <linux/acpi.h>
+
#define BOOT_BOOT_H
#include "../ctype.h"
@@ -63,12 +66,14 @@ static inline void debug_puthex(const char *s)
#endif
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
-#endif
+struct mem_vector {
+ unsigned long long start;
+ unsigned long long size;
+};
#if CONFIG_RANDOMIZE_BASE
/* kaslr.c */
@@ -116,3 +121,17 @@ static inline void console_init(void)
void set_sev_encryption_mask(void);
#endif
+
+/* acpi.c */
+#ifdef CONFIG_ACPI
+acpi_physical_address get_rsdp_addr(void);
+#else
+static inline acpi_physical_address get_rsdp_addr(void) { return 0; }
+#endif
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+extern struct mem_vector immovable_mem[MAX_NUMNODES*2];
+int count_immovable_mem_regions(void);
+#else
+static inline int count_immovable_mem_regions(void) { return 0; }
+#endif
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 91f75638f6e68..6ff7e81b56284 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,7 +6,7 @@
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0x60
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x70
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 9e21573714910..f8debf7aeb4c1 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,5 +1,7 @@
+#include <linux/efi.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
+#include <asm/efi.h>
#include "pgtable.h"
#include "../string.h"
@@ -37,9 +39,10 @@ int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
{
- unsigned long bios_start, ebda_start;
+ unsigned long bios_start = 0, ebda_start = 0;
unsigned long trampoline_start;
struct boot_e820_entry *entry;
+ char *signature;
int i;
/*
@@ -47,8 +50,18 @@ static unsigned long find_trampoline_placement(void)
* This code is based on reserve_bios_regions().
*/
- ebda_start = *(unsigned short *)0x40e << 4;
- bios_start = *(unsigned short *)0x413 << 10;
+ /*
+ * EFI systems may not provide legacy ROM. The memory may not be mapped
+ * at all.
+ *
+ * Only look for values in the legacy ROM for non-EFI system.
+ */
+ signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+ }
if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
bios_start = BIOS_START_MAX;
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index f491bbde84931..508cfa6828c5d 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/vmlinux.lds.h>
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
#undef i386
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 96a6c75635383..0149e41d42c27 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -3,7 +3,7 @@
*
* Linker script for the i386 setup code
*/
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start)
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index c4428a1769733..315a67b8896b9 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -13,10 +13,14 @@
*/
#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
#include <asm/asm.h>
#include "ctype.h"
#include "string.h"
+#define KSTRTOX_OVERFLOW (1U << 31)
+
/*
* Undef these macros so that the functions that we provide
* here will have the correct names regardless of how string.h
@@ -187,3 +191,140 @@ char *strchr(const char *s, int c)
return NULL;
return (char *)s;
}
+
+static inline u64 __div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ union {
+ u64 v64;
+ u32 v32[2];
+ } d = { dividend };
+ u32 upper;
+
+ upper = d.v32[1];
+ d.v32[1] = 0;
+ if (upper >= divisor) {
+ d.v32[1] = upper / divisor;
+ upper %= divisor;
+ }
+ asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
+ "rm" (divisor), "0" (d.v32[0]), "1" (upper));
+ return d.v64;
+}
+
+static inline u64 __div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+
+ return __div_u64_rem(dividend, divisor, &remainder);
+}
+
+static inline char _tolower(const char c)
+{
+ return c | 0x20;
+}
+
+static const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+static unsigned int _parse_integer(const char *s,
+ unsigned int base,
+ unsigned long long *p)
+{
+ unsigned long long res;
+ unsigned int rv;
+
+ res = 0;
+ rv = 0;
+ while (1) {
+ unsigned int c = *s;
+ unsigned int lc = c | 0x20; /* don't tolower() this line */
+ unsigned int val;
+
+ if ('0' <= c && c <= '9')
+ val = c - '0';
+ else if ('a' <= lc && lc <= 'f')
+ val = lc - 'a' + 10;
+ else
+ break;
+
+ if (val >= base)
+ break;
+ /*
+ * Check for overflow only if we are within range of
+ * it in the max base we support (16)
+ */
+ if (unlikely(res & (~0ull << 60))) {
+ if (res > __div_u64(ULLONG_MAX - val, base))
+ rv |= KSTRTOX_OVERFLOW;
+ }
+ res = res * base + val;
+ rv++;
+ s++;
+ }
+ *p = res;
+ return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ unsigned long long _res;
+ unsigned int rv;
+
+ s = _parse_integer_fixup_radix(s, &base);
+ rv = _parse_integer(s, base, &_res);
+ if (rv & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+ if (rv == 0)
+ return -EINVAL;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (s[0] == '+')
+ s++;
+ return _kstrtoull(s, base, res);
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 3d78e27077f41..38d8f2f5e47e2 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -29,4 +29,5 @@ extern unsigned int atou(const char *s);
extern unsigned long long simple_strtoull(const char *cp, char **endp,
unsigned int base);
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res);
#endif /* BOOT_STRING_H */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 4bb95d7ad9475..9f908112bbb97 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
@@ -310,3 +309,5 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_EFI_STUB=y
+CONFIG_ACPI_BGRT=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0fed049422a8f..1d3badfda09ee 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -286,7 +286,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
@@ -308,3 +307,6 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_EFI_STUB=y
+CONFIG_EFI_MIXED=y
+CONFIG_ACPI_BGRT=y
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 2a356b948720e..3ea71b8718135 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis128_aesni_process_ad(
}
static void crypto_aegis128_aesni_process_crypt(
- struct aegis_state *state, struct aead_request *req,
+ struct aegis_state *state, struct skcipher_walk *walk,
const struct aegis_crypt_ops *ops)
{
- struct skcipher_walk walk;
- u8 *src, *dst;
- unsigned int chunksize, base;
-
- ops->skcipher_walk_init(&walk, req, false);
-
- while (walk.nbytes) {
- src = walk.src.virt.addr;
- dst = walk.dst.virt.addr;
- chunksize = walk.nbytes;
-
- ops->crypt_blocks(state, chunksize, src, dst);
-
- base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
- src += base;
- dst += base;
- chunksize &= AEGIS128_BLOCK_SIZE - 1;
-
- if (chunksize > 0)
- ops->crypt_tail(state, chunksize, src, dst);
+ while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
+ ops->crypt_blocks(state,
+ round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
+ walk->src.virt.addr, walk->dst.virt.addr);
+ skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+ }
- skcipher_walk_done(&walk, 0);
+ if (walk->nbytes) {
+ ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
+ walk->dst.virt.addr);
+ skcipher_walk_done(walk, 0);
}
}
@@ -186,13 +175,16 @@ static void crypto_aegis128_aesni_crypt(struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
+ struct skcipher_walk walk;
struct aegis_state state;
+ ops->skcipher_walk_init(&walk, req, true);
+
kernel_fpu_begin();
crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
- crypto_aegis128_aesni_process_crypt(&state, req, ops);
+ crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index dbe8bb980da15..1b1b39c66c5e2 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis128l_aesni_process_ad(
}
static void crypto_aegis128l_aesni_process_crypt(
- struct aegis_state *state, struct aead_request *req,
+ struct aegis_state *state, struct skcipher_walk *walk,
const struct aegis_crypt_ops *ops)
{
- struct skcipher_walk walk;
- u8 *src, *dst;
- unsigned int chunksize, base;
-
- ops->skcipher_walk_init(&walk, req, false);
-
- while (walk.nbytes) {
- src = walk.src.virt.addr;
- dst = walk.dst.virt.addr;
- chunksize = walk.nbytes;
-
- ops->crypt_blocks(state, chunksize, src, dst);
-
- base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
- src += base;
- dst += base;
- chunksize &= AEGIS128L_BLOCK_SIZE - 1;
-
- if (chunksize > 0)
- ops->crypt_tail(state, chunksize, src, dst);
+ while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) {
+ ops->crypt_blocks(state, round_down(walk->nbytes,
+ AEGIS128L_BLOCK_SIZE),
+ walk->src.virt.addr, walk->dst.virt.addr);
+ skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE);
+ }
- skcipher_walk_done(&walk, 0);
+ if (walk->nbytes) {
+ ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
+ walk->dst.virt.addr);
+ skcipher_walk_done(walk, 0);
}
}
@@ -186,13 +175,16 @@ static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
+ struct skcipher_walk walk;
struct aegis_state state;
+ ops->skcipher_walk_init(&walk, req, true);
+
kernel_fpu_begin();
crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
- crypto_aegis128l_aesni_process_crypt(&state, req, ops);
+ crypto_aegis128l_aesni_process_crypt(&state, &walk, ops);
crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 8bebda2de92fe..6227ca3220a05 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis256_aesni_process_ad(
}
static void crypto_aegis256_aesni_process_crypt(
- struct aegis_state *state, struct aead_request *req,
+ struct aegis_state *state, struct skcipher_walk *walk,
const struct aegis_crypt_ops *ops)
{
- struct skcipher_walk walk;
- u8 *src, *dst;
- unsigned int chunksize, base;
-
- ops->skcipher_walk_init(&walk, req, false);
-
- while (walk.nbytes) {
- src = walk.src.virt.addr;
- dst = walk.dst.virt.addr;
- chunksize = walk.nbytes;
-
- ops->crypt_blocks(state, chunksize, src, dst);
-
- base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
- src += base;
- dst += base;
- chunksize &= AEGIS256_BLOCK_SIZE - 1;
-
- if (chunksize > 0)
- ops->crypt_tail(state, chunksize, src, dst);
+ while (walk->nbytes >= AEGIS256_BLOCK_SIZE) {
+ ops->crypt_blocks(state,
+ round_down(walk->nbytes, AEGIS256_BLOCK_SIZE),
+ walk->src.virt.addr, walk->dst.virt.addr);
+ skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE);
+ }
- skcipher_walk_done(&walk, 0);
+ if (walk->nbytes) {
+ ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
+ walk->dst.virt.addr);
+ skcipher_walk_done(walk, 0);
}
}
@@ -186,13 +175,16 @@ static void crypto_aegis256_aesni_crypt(struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
+ struct skcipher_walk walk;
struct aegis_state state;
+ ops->skcipher_walk_init(&walk, req, true);
+
kernel_fpu_begin();
crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
- crypto_aegis256_aesni_process_crypt(&state, req, ops);
+ crypto_aegis256_aesni_process_crypt(&state, &walk, ops);
crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1321700d6647f..1e3d2102033a0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -175,26 +175,18 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
-static struct aesni_gcm_tfm_s {
-void (*init)(void *ctx,
- struct gcm_context_data *gdata,
- u8 *iv,
- u8 *hash_subkey, const u8 *aad,
- unsigned long aad_len);
-void (*enc_update)(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in,
- unsigned long plaintext_len);
-void (*dec_update)(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in,
- unsigned long ciphertext_len);
-void (*finalize)(void *ctx,
- struct gcm_context_data *gdata,
- u8 *auth_tag, unsigned long auth_tag_len);
+static const struct aesni_gcm_tfm_s {
+ void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv,
+ u8 *hash_subkey, const u8 *aad, unsigned long aad_len);
+ void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
+ const u8 *in, unsigned long plaintext_len);
+ void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
+ const u8 *in, unsigned long ciphertext_len);
+ void (*finalize)(void *ctx, struct gcm_context_data *gdata,
+ u8 *auth_tag, unsigned long auth_tag_len);
} *aesni_gcm_tfm;
-struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
+static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
.init = &aesni_gcm_init,
.enc_update = &aesni_gcm_enc_update,
.dec_update = &aesni_gcm_dec_update,
@@ -243,7 +235,7 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
+static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
.init = &aesni_gcm_init_avx_gen2,
.enc_update = &aesni_gcm_enc_update_avx_gen2,
.dec_update = &aesni_gcm_dec_update_avx_gen2,
@@ -288,7 +280,7 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
+static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
.init = &aesni_gcm_init_avx_gen4,
.enc_update = &aesni_gcm_enc_update_avx_gen4,
.dec_update = &aesni_gcm_dec_update_avx_gen4,
@@ -778,7 +770,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
+ const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
struct gcm_context_data data AESNI_ALIGN_ATTR;
struct scatter_walk dst_sg_walk = {};
unsigned long left = req->cryptlen;
@@ -821,11 +813,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
}
- src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
- scatterwalk_start(&src_sg_walk, src_sg);
- if (req->src != req->dst) {
- dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen);
- scatterwalk_start(&dst_sg_walk, dst_sg);
+ if (left) {
+ src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
+ scatterwalk_start(&src_sg_walk, src_sg);
+ if (req->src != req->dst) {
+ dst_sg = scatterwalk_ffwd(dst_start, req->dst,
+ req->assoclen);
+ scatterwalk_start(&dst_sg_walk, dst_sg);
+ }
}
kernel_fpu_begin();
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index de04d3e98d8d3..3d873e67749d7 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -43,609 +43,291 @@
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-# Function API:
-# UINT16 crc_t10dif_pcl(
-# UINT16 init_crc, //initial CRC value, 16 bits
-# const unsigned char *buf, //buffer pointer to calculate CRC on
-# UINT64 len //buffer length in bytes (64-bit data)
-# );
#
# Reference paper titled "Fast CRC Computation for Generic
# Polynomials Using PCLMULQDQ Instruction"
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
#
-#
#include <linux/linkage.h>
.text
-#define arg1 %rdi
-#define arg2 %rsi
-#define arg3 %rdx
-
-#define arg1_low32 %edi
+#define init_crc %edi
+#define buf %rsi
+#define len %rdx
+
+#define FOLD_CONSTS %xmm10
+#define BSWAP_MASK %xmm11
+
+# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
+# reg1, reg2.
+.macro fold_32_bytes offset, reg1, reg2
+ movdqu \offset(buf), %xmm9
+ movdqu \offset+16(buf), %xmm12
+ pshufb BSWAP_MASK, %xmm9
+ pshufb BSWAP_MASK, %xmm12
+ movdqa \reg1, %xmm8
+ movdqa \reg2, %xmm13
+ pclmulqdq $0x00, FOLD_CONSTS, \reg1
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm8
+ pclmulqdq $0x00, FOLD_CONSTS, \reg2
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm13
+ pxor %xmm9 , \reg1
+ xorps %xmm8 , \reg1
+ pxor %xmm12, \reg2
+ xorps %xmm13, \reg2
+.endm
+
+# Fold src_reg into dst_reg.
+.macro fold_16_bytes src_reg, dst_reg
+ movdqa \src_reg, %xmm8
+ pclmulqdq $0x11, FOLD_CONSTS, \src_reg
+ pclmulqdq $0x00, FOLD_CONSTS, %xmm8
+ pxor %xmm8, \dst_reg
+ xorps \src_reg, \dst_reg
+.endm
-ENTRY(crc_t10dif_pcl)
+#
+# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
+#
+# Assumes len >= 16.
+#
.align 16
+ENTRY(crc_t10dif_pcl)
- # adjust the 16-bit initial_crc value, scale it to 32 bits
- shl $16, arg1_low32
-
- # Allocate Stack Space
- mov %rsp, %rcx
- sub $16*2, %rsp
- # align stack to 16 byte boundary
- and $~(0x10 - 1), %rsp
-
- # check if smaller than 256
- cmp $256, arg3
-
- # for sizes less than 128, we can't fold 64B at a time...
- jl _less_than_128
-
-
- # load the initial crc value
- movd arg1_low32, %xmm10 # initial crc
-
- # crc value does not need to be byte-reflected, but it needs
- # to be moved to the high part of the register.
- # because data will be byte-reflected and will align with
- # initial crc at correct place.
- pslldq $12, %xmm10
-
- movdqa SHUF_MASK(%rip), %xmm11
- # receive the initial 64B data, xor the initial crc value
- movdqu 16*0(arg2), %xmm0
- movdqu 16*1(arg2), %xmm1
- movdqu 16*2(arg2), %xmm2
- movdqu 16*3(arg2), %xmm3
- movdqu 16*4(arg2), %xmm4
- movdqu 16*5(arg2), %xmm5
- movdqu 16*6(arg2), %xmm6
- movdqu 16*7(arg2), %xmm7
-
- pshufb %xmm11, %xmm0
- # XOR the initial_crc value
- pxor %xmm10, %xmm0
- pshufb %xmm11, %xmm1
- pshufb %xmm11, %xmm2
- pshufb %xmm11, %xmm3
- pshufb %xmm11, %xmm4
- pshufb %xmm11, %xmm5
- pshufb %xmm11, %xmm6
- pshufb %xmm11, %xmm7
-
- movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
- #imm value of pclmulqdq instruction
- #will determine which constant to use
-
- #################################################################
- # we subtract 256 instead of 128 to save one instruction from the loop
- sub $256, arg3
-
- # at this section of the code, there is 64*x+y (0<=y<64) bytes of
- # buffer. The _fold_64_B_loop will fold 64B at a time
- # until we have 64+y Bytes of buffer
-
-
- # fold 64B at a time. This section of the code folds 4 xmm
- # registers in parallel
-_fold_64_B_loop:
-
- # update the buffer pointer
- add $128, arg2 # buf += 64#
-
- movdqu 16*0(arg2), %xmm9
- movdqu 16*1(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm0, %xmm8
- movdqa %xmm1, %xmm13
- pclmulqdq $0x0 , %xmm10, %xmm0
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0 , %xmm10, %xmm1
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm0
- xorps %xmm8 , %xmm0
- pxor %xmm12, %xmm1
- xorps %xmm13, %xmm1
-
- movdqu 16*2(arg2), %xmm9
- movdqu 16*3(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm2, %xmm8
- movdqa %xmm3, %xmm13
- pclmulqdq $0x0, %xmm10, %xmm2
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0, %xmm10, %xmm3
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm2
- xorps %xmm8 , %xmm2
- pxor %xmm12, %xmm3
- xorps %xmm13, %xmm3
-
- movdqu 16*4(arg2), %xmm9
- movdqu 16*5(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm4, %xmm8
- movdqa %xmm5, %xmm13
- pclmulqdq $0x0, %xmm10, %xmm4
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0, %xmm10, %xmm5
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm4
- xorps %xmm8 , %xmm4
- pxor %xmm12, %xmm5
- xorps %xmm13, %xmm5
-
- movdqu 16*6(arg2), %xmm9
- movdqu 16*7(arg2), %xmm12
- pshufb %xmm11, %xmm9
- pshufb %xmm11, %xmm12
- movdqa %xmm6 , %xmm8
- movdqa %xmm7 , %xmm13
- pclmulqdq $0x0 , %xmm10, %xmm6
- pclmulqdq $0x11, %xmm10, %xmm8
- pclmulqdq $0x0 , %xmm10, %xmm7
- pclmulqdq $0x11, %xmm10, %xmm13
- pxor %xmm9 , %xmm6
- xorps %xmm8 , %xmm6
- pxor %xmm12, %xmm7
- xorps %xmm13, %xmm7
-
- sub $128, arg3
-
- # check if there is another 64B in the buffer to be able to fold
- jge _fold_64_B_loop
- ##################################################################
-
-
- add $128, arg2
- # at this point, the buffer pointer is pointing at the last y Bytes
- # of the buffer the 64B of folded data is in 4 of the xmm
- # registers: xmm0, xmm1, xmm2, xmm3
-
-
- # fold the 8 xmm registers to 1 xmm register with different constants
-
- movdqa rk9(%rip), %xmm10
- movdqa %xmm0, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm0
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm0, %xmm7
-
- movdqa rk11(%rip), %xmm10
- movdqa %xmm1, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm1
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm1, %xmm7
-
- movdqa rk13(%rip), %xmm10
- movdqa %xmm2, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm2
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm2, %xmm7
-
- movdqa rk15(%rip), %xmm10
- movdqa %xmm3, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm3
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm3, %xmm7
-
- movdqa rk17(%rip), %xmm10
- movdqa %xmm4, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm4
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm4, %xmm7
-
- movdqa rk19(%rip), %xmm10
- movdqa %xmm5, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm5
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- xorps %xmm5, %xmm7
-
- movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
- #imm value of pclmulqdq instruction
- #will determine which constant to use
- movdqa %xmm6, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm6
- pclmulqdq $0x0 , %xmm10, %xmm8
- pxor %xmm8, %xmm7
- pxor %xmm6, %xmm7
-
-
- # instead of 64, we add 48 to the loop counter to save 1 instruction
- # from the loop instead of a cmp instruction, we use the negative
- # flag with the jl instruction
- add $128-16, arg3
- jl _final_reduction_for_128
-
- # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
- # and the rest is in memory. We can fold 16 bytes at a time if y>=16
- # continue folding 16B at a time
-
-_16B_reduction_loop:
+ movdqa .Lbswap_mask(%rip), BSWAP_MASK
+
+ # For sizes less than 256 bytes, we can't fold 128 bytes at a time.
+ cmp $256, len
+ jl .Lless_than_256_bytes
+
+ # Load the first 128 data bytes. Byte swapping is necessary to make the
+ # bit order match the polynomial coefficient order.
+ movdqu 16*0(buf), %xmm0
+ movdqu 16*1(buf), %xmm1
+ movdqu 16*2(buf), %xmm2
+ movdqu 16*3(buf), %xmm3
+ movdqu 16*4(buf), %xmm4
+ movdqu 16*5(buf), %xmm5
+ movdqu 16*6(buf), %xmm6
+ movdqu 16*7(buf), %xmm7
+ add $128, buf
+ pshufb BSWAP_MASK, %xmm0
+ pshufb BSWAP_MASK, %xmm1
+ pshufb BSWAP_MASK, %xmm2
+ pshufb BSWAP_MASK, %xmm3
+ pshufb BSWAP_MASK, %xmm4
+ pshufb BSWAP_MASK, %xmm5
+ pshufb BSWAP_MASK, %xmm6
+ pshufb BSWAP_MASK, %xmm7
+
+ # XOR the first 16 data *bits* with the initial CRC value.
+ pxor %xmm8, %xmm8
+ pinsrw $7, init_crc, %xmm8
+ pxor %xmm8, %xmm0
+
+ movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
+
+ # Subtract 128 for the 128 data bytes just consumed. Subtract another
+ # 128 to simplify the termination condition of the following loop.
+ sub $256, len
+
+ # While >= 128 data bytes remain (not counting xmm0-7), fold the 128
+ # bytes xmm0-7 into them, storing the result back into xmm0-7.
+.Lfold_128_bytes_loop:
+ fold_32_bytes 0, %xmm0, %xmm1
+ fold_32_bytes 32, %xmm2, %xmm3
+ fold_32_bytes 64, %xmm4, %xmm5
+ fold_32_bytes 96, %xmm6, %xmm7
+ add $128, buf
+ sub $128, len
+ jge .Lfold_128_bytes_loop
+
+ # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
+
+ # Fold across 64 bytes.
+ movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
+ fold_16_bytes %xmm0, %xmm4
+ fold_16_bytes %xmm1, %xmm5
+ fold_16_bytes %xmm2, %xmm6
+ fold_16_bytes %xmm3, %xmm7
+ # Fold across 32 bytes.
+ movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
+ fold_16_bytes %xmm4, %xmm6
+ fold_16_bytes %xmm5, %xmm7
+ # Fold across 16 bytes.
+ movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
+ fold_16_bytes %xmm6, %xmm7
+
+ # Add 128 to get the correct number of data bytes remaining in 0...127
+ # (not counting xmm7), following the previous extra subtraction by 128.
+ # Then subtract 16 to simplify the termination condition of the
+ # following loop.
+ add $128-16, len
+
+ # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
+ # xmm7 into them, storing the result back into xmm7.
+ jl .Lfold_16_bytes_loop_done
+.Lfold_16_bytes_loop:
movdqa %xmm7, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm7
- pclmulqdq $0x0 , %xmm10, %xmm8
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm7
+ pclmulqdq $0x00, FOLD_CONSTS, %xmm8
pxor %xmm8, %xmm7
- movdqu (arg2), %xmm0
- pshufb %xmm11, %xmm0
+ movdqu (buf), %xmm0
+ pshufb BSWAP_MASK, %xmm0
pxor %xmm0 , %xmm7
- add $16, arg2
- sub $16, arg3
- # instead of a cmp instruction, we utilize the flags with the
- # jge instruction equivalent of: cmp arg3, 16-16
- # check if there is any more 16B in the buffer to be able to fold
- jge _16B_reduction_loop
-
- #now we have 16+z bytes left to reduce, where 0<= z < 16.
- #first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
- # check if any more data to fold. If not, compute the CRC of
- # the final 128 bits
- add $16, arg3
- je _128_done
-
- # here we are getting data that is less than 16 bytes.
- # since we know that there was data before the pointer, we can
- # offset the input pointer before the actual point, to receive
- # exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
+ add $16, buf
+ sub $16, len
+ jge .Lfold_16_bytes_loop
+
+.Lfold_16_bytes_loop_done:
+ # Add 16 to get the correct number of data bytes remaining in 0...15
+ # (not counting xmm7), following the previous extra subtraction by 16.
+ add $16, len
+ je .Lreduce_final_16_bytes
+
+.Lhandle_partial_segment:
+ # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
+ # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do
+ # this without needing a fold constant for each possible 'len', redivide
+ # the bytes into a first chunk of 'len' bytes and a second chunk of 16
+ # bytes, then fold the first chunk into the second.
+
movdqa %xmm7, %xmm2
- movdqu -16(arg2, arg3), %xmm1
- pshufb %xmm11, %xmm1
+ # xmm1 = last 16 original data bytes
+ movdqu -16(buf, len), %xmm1
+ pshufb BSWAP_MASK, %xmm1
- # get rid of the extra data that was loaded before
- # load the shift constant
- lea pshufb_shf_table+16(%rip), %rax
- sub arg3, %rax
+ # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
+ lea .Lbyteshift_table+16(%rip), %rax
+ sub len, %rax
movdqu (%rax), %xmm0
-
- # shift xmm2 to the left by arg3 bytes
pshufb %xmm0, %xmm2
- # shift xmm7 to the right by 16-arg3 bytes
- pxor mask1(%rip), %xmm0
+ # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
+ pxor .Lmask1(%rip), %xmm0
pshufb %xmm0, %xmm7
+
+ # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
+ # then '16-len' bytes from xmm2 (high-order bytes).
pblendvb %xmm2, %xmm1 #xmm0 is implicit
- # fold 16 Bytes
- movdqa %xmm1, %xmm2
+ # Fold the first chunk into the second chunk, storing the result in xmm7.
movdqa %xmm7, %xmm8
- pclmulqdq $0x11, %xmm10, %xmm7
- pclmulqdq $0x0 , %xmm10, %xmm8
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm7
+ pclmulqdq $0x00, FOLD_CONSTS, %xmm8
pxor %xmm8, %xmm7
- pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm7
-_128_done:
- # compute crc of a 128-bit value
- movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
- movdqa %xmm7, %xmm0
+.Lreduce_final_16_bytes:
+ # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
- #64b fold
- pclmulqdq $0x1, %xmm10, %xmm7
- pslldq $8 , %xmm0
- pxor %xmm0, %xmm7
+ # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
+ movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS
- #32b fold
+ # Fold the high 64 bits into the low 64 bits, while also multiplying by
+ # x^64. This produces a 128-bit value congruent to x^64 * M(x) and
+ # whose low 48 bits are 0.
movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
+ pslldq $8, %xmm0
+ pxor %xmm0, %xmm7 # + low bits * x^64
- pand mask2(%rip), %xmm0
-
- psrldq $12, %xmm7
- pclmulqdq $0x10, %xmm10, %xmm7
- pxor %xmm0, %xmm7
-
- #barrett reduction
-_barrett:
- movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
+ # Fold the high 32 bits into the low 96 bits. This produces a 96-bit
+ # value congruent to x^64 * M(x) and whose low 48 bits are 0.
movdqa %xmm7, %xmm0
- pclmulqdq $0x01, %xmm10, %xmm7
- pslldq $4, %xmm7
- pclmulqdq $0x11, %xmm10, %xmm7
+ pand .Lmask2(%rip), %xmm0 # zero high 32 bits
+ psrldq $12, %xmm7 # extract high 32 bits
+ pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
+ pxor %xmm0, %xmm7 # + low bits
- pslldq $4, %xmm7
- pxor %xmm0, %xmm7
- pextrd $1, %xmm7, %eax
+ # Load G(x) and floor(x^48 / G(x)).
+ movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS
-_cleanup:
- # scale the result back to 16 bits
- shr $16, %eax
- mov %rcx, %rsp
+ # Use Barrett reduction to compute the final CRC value.
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
+ psrlq $32, %xmm7 # /= x^32
+ pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x)
+ psrlq $48, %xmm0
+ pxor %xmm7, %xmm0 # + low 16 nonzero bits
+ # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
+
+ pextrw $0, %xmm0, %eax
ret
-########################################################################
-
.align 16
-_less_than_128:
-
- # check if there is enough buffer to be able to fold 16B at a time
- cmp $32, arg3
- jl _less_than_32
- movdqa SHUF_MASK(%rip), %xmm11
+.Lless_than_256_bytes:
+ # Checksumming a buffer of length 16...255 bytes
- # now if there is, load the constants
- movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
+ # Load the first 16 data bytes.
+ movdqu (buf), %xmm7
+ pshufb BSWAP_MASK, %xmm7
+ add $16, buf
- movd arg1_low32, %xmm0 # get the initial crc value
- pslldq $12, %xmm0 # align it to its correct place
- movdqu (arg2), %xmm7 # load the plaintext
- pshufb %xmm11, %xmm7 # byte-reflect the plaintext
+ # XOR the first 16 data *bits* with the initial CRC value.
+ pxor %xmm0, %xmm0
+ pinsrw $7, init_crc, %xmm0
pxor %xmm0, %xmm7
-
- # update the buffer pointer
- add $16, arg2
-
- # update the counter. subtract 32 instead of 16 to save one
- # instruction from the loop
- sub $32, arg3
-
- jmp _16B_reduction_loop
-
-
-.align 16
-_less_than_32:
- # mov initial crc to the return value. this is necessary for
- # zero-length buffers.
- mov arg1_low32, %eax
- test arg3, arg3
- je _cleanup
-
- movdqa SHUF_MASK(%rip), %xmm11
-
- movd arg1_low32, %xmm0 # get the initial crc value
- pslldq $12, %xmm0 # align it to its correct place
-
- cmp $16, arg3
- je _exact_16_left
- jl _less_than_16_left
-
- movdqu (arg2), %xmm7 # load the plaintext
- pshufb %xmm11, %xmm7 # byte-reflect the plaintext
- pxor %xmm0 , %xmm7 # xor the initial crc value
- add $16, arg2
- sub $16, arg3
- movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
- jmp _get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
- # use stack space to load data less than 16 bytes, zero-out
- # the 16B in memory first.
-
- pxor %xmm1, %xmm1
- mov %rsp, %r11
- movdqa %xmm1, (%r11)
-
- cmp $4, arg3
- jl _only_less_than_4
-
- # backup the counter value
- mov arg3, %r9
- cmp $8, arg3
- jl _less_than_8_left
-
- # load 8 Bytes
- mov (arg2), %rax
- mov %rax, (%r11)
- add $8, %r11
- sub $8, arg3
- add $8, arg2
-_less_than_8_left:
-
- cmp $4, arg3
- jl _less_than_4_left
-
- # load 4 Bytes
- mov (arg2), %eax
- mov %eax, (%r11)
- add $4, %r11
- sub $4, arg3
- add $4, arg2
-_less_than_4_left:
-
- cmp $2, arg3
- jl _less_than_2_left
-
- # load 2 Bytes
- mov (arg2), %ax
- mov %ax, (%r11)
- add $2, %r11
- sub $2, arg3
- add $2, arg2
-_less_than_2_left:
- cmp $1, arg3
- jl _zero_left
-
- # load 1 Byte
- mov (arg2), %al
- mov %al, (%r11)
-_zero_left:
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- # shl r9, 4
- lea pshufb_shf_table+16(%rip), %rax
- sub %r9, %rax
- movdqu (%rax), %xmm0
- pxor mask1(%rip), %xmm0
-
- pshufb %xmm0, %xmm7
- jmp _128_done
-
-.align 16
-_exact_16_left:
- movdqu (arg2), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- jmp _128_done
-
-_only_less_than_4:
- cmp $3, arg3
- jl _only_less_than_3
-
- # load 3 Bytes
- mov (arg2), %al
- mov %al, (%r11)
-
- mov 1(arg2), %al
- mov %al, 1(%r11)
-
- mov 2(arg2), %al
- mov %al, 2(%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $5, %xmm7
-
- jmp _barrett
-_only_less_than_3:
- cmp $2, arg3
- jl _only_less_than_2
-
- # load 2 Bytes
- mov (arg2), %al
- mov %al, (%r11)
-
- mov 1(arg2), %al
- mov %al, 1(%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $6, %xmm7
-
- jmp _barrett
-_only_less_than_2:
-
- # load 1 Byte
- mov (arg2), %al
- mov %al, (%r11)
-
- movdqa (%rsp), %xmm7
- pshufb %xmm11, %xmm7
- pxor %xmm0 , %xmm7 # xor the initial crc value
-
- psrldq $7, %xmm7
-
- jmp _barrett
-
+ movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
+ cmp $16, len
+ je .Lreduce_final_16_bytes # len == 16
+ sub $32, len
+ jge .Lfold_16_bytes_loop # 32 <= len <= 255
+ add $16, len
+ jmp .Lhandle_partial_segment # 17 <= len <= 31
ENDPROC(crc_t10dif_pcl)
.section .rodata, "a", @progbits
.align 16
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
+# Fold constants precomputed from the polynomial 0x18bb7
+# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
+.Lfold_across_128_bytes_consts:
+ .quad 0x0000000000006123 # x^(8*128) mod G(x)
+ .quad 0x0000000000002295 # x^(8*128+64) mod G(x)
+.Lfold_across_64_bytes_consts:
+ .quad 0x0000000000001069 # x^(4*128) mod G(x)
+ .quad 0x000000000000dd31 # x^(4*128+64) mod G(x)
+.Lfold_across_32_bytes_consts:
+ .quad 0x000000000000857d # x^(2*128) mod G(x)
+ .quad 0x0000000000007acc # x^(2*128+64) mod G(x)
+.Lfold_across_16_bytes_consts:
+ .quad 0x000000000000a010 # x^(1*128) mod G(x)
+ .quad 0x0000000000001faa # x^(1*128+64) mod G(x)
+.Lfinal_fold_consts:
+ .quad 0x1368000000000000 # x^48 * (x^48 mod G(x))
+ .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x))
+.Lbarrett_reduction_consts:
+ .quad 0x0000000000018bb7 # G(x)
+ .quad 0x00000001f65a57f8 # floor(x^48 / G(x))
.section .rodata.cst16.mask1, "aM", @progbits, 16
.align 16
-mask1:
-.octa 0x80808080808080808080808080808080
+.Lmask1:
+ .octa 0x80808080808080808080808080808080
.section .rodata.cst16.mask2, "aM", @progbits, 16
.align 16
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
+.Lmask2:
+ .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
+
+.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
+.align 16
+.Lbswap_mask:
+ .octa 0x000102030405060708090A0B0C0D0E0F
-.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
+.section .rodata.cst32.byteshift_table, "aM", @progbits, 32
.align 16
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32
-.align 32
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
-# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
-# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
-# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
-# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
-# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
-# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
-# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
-# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
+# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
+# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
+# 0x80} XOR the index vector to shift right by '16 - len' bytes.
+.Lbyteshift_table:
+ .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
+ .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index cd4df93225014..0e785c0b23542 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -33,18 +33,12 @@
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
- size_t len);
+asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -59,7 +53,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
- if (irq_fpu_usable()) {
+ if (length >= 16 && irq_fpu_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
@@ -79,7 +73,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (irq_fpu_usable()) {
+ if (len >= 16 && irq_fpu_usable()) {
kernel_fpu_begin();
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
index 0dccdda1eb3a1..7e600f8bcdad8 100644
--- a/arch/x86/crypto/morus1280_glue.c
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -85,31 +85,20 @@ static void crypto_morus1280_glue_process_ad(
static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
struct morus1280_ops ops,
- struct aead_request *req)
+ struct skcipher_walk *walk)
{
- struct skcipher_walk walk;
- u8 *cursor_src, *cursor_dst;
- unsigned int chunksize, base;
-
- ops.skcipher_walk_init(&walk, req, false);
-
- while (walk.nbytes) {
- cursor_src = walk.src.virt.addr;
- cursor_dst = walk.dst.virt.addr;
- chunksize = walk.nbytes;
-
- ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
-
- base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
- cursor_src += base;
- cursor_dst += base;
- chunksize &= MORUS1280_BLOCK_SIZE - 1;
-
- if (chunksize > 0)
- ops.crypt_tail(state, cursor_src, cursor_dst,
- chunksize);
+ while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
+ ops.crypt_blocks(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ round_down(walk->nbytes,
+ MORUS1280_BLOCK_SIZE));
+ skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
+ }
- skcipher_walk_done(&walk, 0);
+ if (walk->nbytes) {
+ ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
+ walk->nbytes);
+ skcipher_walk_done(walk, 0);
}
}
@@ -147,12 +136,15 @@ static void crypto_morus1280_glue_crypt(struct aead_request *req,
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_state state;
+ struct skcipher_walk walk;
+
+ ops.skcipher_walk_init(&walk, req, true);
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
- crypto_morus1280_glue_process_crypt(&state, ops, req);
+ crypto_morus1280_glue_process_crypt(&state, ops, &walk);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
index 7b58fe4d9bd1a..cb3a817320160 100644
--- a/arch/x86/crypto/morus640_glue.c
+++ b/arch/x86/crypto/morus640_glue.c
@@ -85,31 +85,19 @@ static void crypto_morus640_glue_process_ad(
static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
struct morus640_ops ops,
- struct aead_request *req)
+ struct skcipher_walk *walk)
{
- struct skcipher_walk walk;
- u8 *cursor_src, *cursor_dst;
- unsigned int chunksize, base;
-
- ops.skcipher_walk_init(&walk, req, false);
-
- while (walk.nbytes) {
- cursor_src = walk.src.virt.addr;
- cursor_dst = walk.dst.virt.addr;
- chunksize = walk.nbytes;
-
- ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
-
- base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
- cursor_src += base;
- cursor_dst += base;
- chunksize &= MORUS640_BLOCK_SIZE - 1;
-
- if (chunksize > 0)
- ops.crypt_tail(state, cursor_src, cursor_dst,
- chunksize);
+ while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
+ ops.crypt_blocks(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
+ skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
+ }
- skcipher_walk_done(&walk, 0);
+ if (walk->nbytes) {
+ ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
+ walk->nbytes);
+ skcipher_walk_done(walk, 0);
}
}
@@ -143,12 +131,15 @@ static void crypto_morus640_glue_crypt(struct aead_request *req,
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_state state;
+ struct skcipher_walk walk;
+
+ ops.skcipher_walk_init(&walk, req, true);
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
- crypto_morus640_glue_process_crypt(&state, ops, req);
+ crypto_morus640_glue_process_crypt(&state, ops, &walk);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index c88c670cb5fc6..e6add74d78a59 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -272,6 +272,10 @@ ENTRY(poly1305_block_sse2)
dec %rcx
jnz .Ldoblock
+ # Zeroing of key material
+ mov %rcx,0x00(%rsp)
+ mov %rcx,0x08(%rsp)
+
add $0x10,%rsp
pop %r12
pop %rbx
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 8eaf8952c408c..39913770a44d5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat)
/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
+ /* In the Xen PV case we already run on the thread stack. */
+ ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq 6*8(%rdi) /* regs->ss */
@@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat)
pushq 3*8(%rdi) /* regs->cs */
pushq 2*8(%rdi) /* regs->ip */
pushq 1*8(%rdi) /* regs->orig_ax */
-
pushq (%rdi) /* pt_regs->di */
+.Lint80_keep_stack:
+
pushq %rsi /* pt_regs->si */
xorl %esi, %esi /* nospec si */
pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b3d13..955ab6a3b61f5 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -24,7 +24,7 @@
10 i386 unlink sys_unlink __ia32_sys_unlink
11 i386 execve sys_execve __ia32_compat_sys_execve
12 i386 chdir sys_chdir __ia32_sys_chdir
-13 i386 time sys_time __ia32_compat_sys_time
+13 i386 time sys_time32 __ia32_sys_time32
14 i386 mknod sys_mknod __ia32_sys_mknod
15 i386 chmod sys_chmod __ia32_sys_chmod
16 i386 lchown sys_lchown16 __ia32_sys_lchown16
@@ -36,12 +36,12 @@
22 i386 umount sys_oldumount __ia32_sys_oldumount
23 i386 setuid sys_setuid16 __ia32_sys_setuid16
24 i386 getuid sys_getuid16 __ia32_sys_getuid16
-25 i386 stime sys_stime __ia32_compat_sys_stime
+25 i386 stime sys_stime32 __ia32_sys_stime32
26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace
27 i386 alarm sys_alarm __ia32_sys_alarm
28 i386 oldfstat sys_fstat __ia32_sys_fstat
29 i386 pause sys_pause __ia32_sys_pause
-30 i386 utime sys_utime __ia32_compat_sys_utime
+30 i386 utime sys_utime32 __ia32_sys_utime32
31 i386 stty
32 i386 gtty
33 i386 access sys_access __ia32_sys_access
@@ -135,7 +135,7 @@
121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname
122 i386 uname sys_newuname __ia32_sys_newuname
123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt
-124 i386 adjtimex sys_adjtimex __ia32_compat_sys_adjtimex
+124 i386 adjtimex sys_adjtimex_time32 __ia32_sys_adjtimex_time32
125 i386 mprotect sys_mprotect __ia32_sys_mprotect
126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask
127 i386 create_module
@@ -172,8 +172,8 @@
158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield
159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max
160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_compat_sys_sched_rr_get_interval
-162 i386 nanosleep sys_nanosleep __ia32_compat_sys_nanosleep
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32 __ia32_sys_sched_rr_get_interval_time32
+162 i386 nanosleep sys_nanosleep_time32 __ia32_sys_nanosleep_time32
163 i386 mremap sys_mremap __ia32_sys_mremap
164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16
165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16
@@ -188,7 +188,7 @@
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
-177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend
180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
@@ -251,14 +251,14 @@
237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr
238 i386 tkill sys_tkill __ia32_sys_tkill
239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64
-240 i386 futex sys_futex __ia32_compat_sys_futex
+240 i386 futex sys_futex_time32 __ia32_sys_futex_time32
241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity
242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity
243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area
244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area
245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup
246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy
-247 i386 io_getevents sys_io_getevents __ia32_compat_sys_io_getevents
+247 i386 io_getevents sys_io_getevents_time32 __ia32_sys_io_getevents_time32
248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit
249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel
250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64
@@ -271,18 +271,18 @@
257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages
258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address
259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create
-260 i386 timer_settime sys_timer_settime __ia32_compat_sys_timer_settime
-261 i386 timer_gettime sys_timer_gettime __ia32_compat_sys_timer_gettime
+260 i386 timer_settime sys_timer_settime32 __ia32_sys_timer_settime32
+261 i386 timer_gettime sys_timer_gettime32 __ia32_sys_timer_gettime32
262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun
263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete
-264 i386 clock_settime sys_clock_settime __ia32_compat_sys_clock_settime
-265 i386 clock_gettime sys_clock_gettime __ia32_compat_sys_clock_gettime
-266 i386 clock_getres sys_clock_getres __ia32_compat_sys_clock_getres
-267 i386 clock_nanosleep sys_clock_nanosleep __ia32_compat_sys_clock_nanosleep
+264 i386 clock_settime sys_clock_settime32 __ia32_sys_clock_settime32
+265 i386 clock_gettime sys_clock_gettime32 __ia32_sys_clock_gettime32
+266 i386 clock_getres sys_clock_getres_time32 __ia32_sys_clock_getres_time32
+267 i386 clock_nanosleep sys_clock_nanosleep_time32 __ia32_sys_clock_nanosleep_time32
268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64
269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64
270 i386 tgkill sys_tgkill __ia32_sys_tgkill
-271 i386 utimes sys_utimes __ia32_compat_sys_utimes
+271 i386 utimes sys_utimes_time32 __ia32_sys_utimes_time32
272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64
273 i386 vserver
274 i386 mbind sys_mbind __ia32_sys_mbind
@@ -290,8 +290,8 @@
276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy
277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open
278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink
-279 i386 mq_timedsend sys_mq_timedsend __ia32_compat_sys_mq_timedsend
-280 i386 mq_timedreceive sys_mq_timedreceive __ia32_compat_sys_mq_timedreceive
+279 i386 mq_timedsend sys_mq_timedsend_time32 __ia32_sys_mq_timedsend_time32
+280 i386 mq_timedreceive sys_mq_timedreceive_time32 __ia32_sys_mq_timedreceive_time32
281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify
282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr
283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load
@@ -310,7 +310,7 @@
296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat
297 i386 mknodat sys_mknodat __ia32_sys_mknodat
298 i386 fchownat sys_fchownat __ia32_sys_fchownat
-299 i386 futimesat sys_futimesat __ia32_compat_sys_futimesat
+299 i386 futimesat sys_futimesat_time32 __ia32_sys_futimesat_time32
300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat
301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat
302 i386 renameat sys_renameat __ia32_sys_renameat
@@ -319,8 +319,8 @@
305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat
306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat
307 i386 faccessat sys_faccessat __ia32_sys_faccessat
-308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6
-309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll
+308 i386 pselect6 sys_pselect6_time32 __ia32_compat_sys_pselect6_time32
+309 i386 ppoll sys_ppoll_time32 __ia32_compat_sys_ppoll_time32
310 i386 unshare sys_unshare __ia32_sys_unshare
311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list
312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list
@@ -331,13 +331,13 @@
317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages
318 i386 getcpu sys_getcpu __ia32_sys_getcpu
319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait
-320 i386 utimensat sys_utimensat __ia32_compat_sys_utimensat
+320 i386 utimensat sys_utimensat_time32 __ia32_sys_utimensat_time32
321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd
322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create
323 i386 eventfd sys_eventfd __ia32_sys_eventfd
324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate
-325 i386 timerfd_settime sys_timerfd_settime __ia32_compat_sys_timerfd_settime
-326 i386 timerfd_gettime sys_timerfd_gettime __ia32_compat_sys_timerfd_gettime
+325 i386 timerfd_settime sys_timerfd_settime32 __ia32_sys_timerfd_settime32
+326 i386 timerfd_gettime sys_timerfd_gettime32 __ia32_sys_timerfd_gettime32
327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4
328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2
329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1
@@ -348,13 +348,13 @@
334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev
335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo
336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open
-337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg
+337 i386 recvmmsg sys_recvmmsg_time32 __ia32_compat_sys_recvmmsg_time32
338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init
339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark
340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64
341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at
342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at
-343 i386 clock_adjtime sys_clock_adjtime __ia32_compat_sys_clock_adjtime
+343 i386 clock_adjtime sys_clock_adjtime32 __ia32_sys_clock_adjtime32
344 i386 syncfs sys_syncfs __ia32_sys_syncfs
345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg
346 i386 setns sys_setns __ia32_sys_setns
@@ -396,5 +396,36 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
-385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
+385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
+# don't use numbers 387 through 392, add new calls at the end
+393 i386 semget sys_semget __ia32_sys_semget
+394 i386 semctl sys_semctl __ia32_compat_sys_semctl
+395 i386 shmget sys_shmget __ia32_sys_shmget
+396 i386 shmctl sys_shmctl __ia32_compat_sys_shmctl
+397 i386 shmat sys_shmat __ia32_compat_sys_shmat
+398 i386 shmdt sys_shmdt __ia32_sys_shmdt
+399 i386 msgget sys_msgget __ia32_sys_msgget
+400 i386 msgsnd sys_msgsnd __ia32_compat_sys_msgsnd
+401 i386 msgrcv sys_msgrcv __ia32_compat_sys_msgrcv
+402 i386 msgctl sys_msgctl __ia32_compat_sys_msgctl
+403 i386 clock_gettime64 sys_clock_gettime __ia32_sys_clock_gettime
+404 i386 clock_settime64 sys_clock_settime __ia32_sys_clock_settime
+405 i386 clock_adjtime64 sys_clock_adjtime __ia32_sys_clock_adjtime
+406 i386 clock_getres_time64 sys_clock_getres __ia32_sys_clock_getres
+407 i386 clock_nanosleep_time64 sys_clock_nanosleep __ia32_sys_clock_nanosleep
+408 i386 timer_gettime64 sys_timer_gettime __ia32_sys_timer_gettime
+409 i386 timer_settime64 sys_timer_settime __ia32_sys_timer_settime
+410 i386 timerfd_gettime64 sys_timerfd_gettime __ia32_sys_timerfd_gettime
+411 i386 timerfd_settime64 sys_timerfd_settime __ia32_sys_timerfd_settime
+412 i386 utimensat_time64 sys_utimensat __ia32_sys_utimensat
+413 i386 pselect6_time64 sys_pselect6 __ia32_compat_sys_pselect6_time64
+414 i386 ppoll_time64 sys_ppoll __ia32_compat_sys_ppoll_time64
+416 i386 io_pgetevents_time64 sys_io_pgetevents __ia32_sys_io_pgetevents
+417 i386 recvmmsg_time64 sys_recvmmsg __ia32_compat_sys_recvmmsg_time64
+418 i386 mq_timedsend_time64 sys_mq_timedsend __ia32_sys_mq_timedsend
+419 i386 mq_timedreceive_time64 sys_mq_timedreceive __ia32_sys_mq_timedreceive
+420 i386 semtimedop_time64 sys_semtimedop __ia32_sys_semtimedop
+421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
+422 i386 futex_time64 sys_futex __ia32_sys_futex
+423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb2..2ae92fddb6d5f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,8 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
+# don't use numbers 387 through 423, add new calls after the last
+# 'common' entry
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -361,7 +363,7 @@
520 x32 execve __x32_compat_sys_execve/ptregs
521 x32 ptrace __x32_compat_sys_ptrace
522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
+523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
525 x32 sigaltstack __x32_compat_sys_sigaltstack
526 x32 timer_create __x32_compat_sys_timer_create
@@ -375,7 +377,7 @@
534 x32 preadv __x32_compat_sys_preadv64
535 x32 pwritev __x32_compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg __x32_compat_sys_recvmmsg
+537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
538 x32 sendmmsg __x32_compat_sys_sendmmsg
539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index d50bb4dc06503..62f317c9113af 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -253,15 +253,6 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}
-static const struct perf_event_attr ibs_notsupp = {
- .exclude_user = 1,
- .exclude_kernel = 1,
- .exclude_hv = 1,
- .exclude_idle = 1,
- .exclude_host = 1,
- .exclude_guest = 1,
-};
-
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -282,9 +273,6 @@ static int perf_ibs_init(struct perf_event *event)
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
- if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
- return -EINVAL;
-
if (config & ~perf_ibs->config_mask)
return -EINVAL;
@@ -537,6 +525,7 @@ static struct perf_ibs perf_ibs_fetch = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSFETCHCTL,
.config_mask = IBS_FETCH_CONFIG_MASK,
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 3210fee27e7f9..7635c23f7d82e 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -223,11 +223,6 @@ static int perf_iommu_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
- /* IOMMU counters do not have usr/os/guest/host bits */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -414,6 +409,7 @@ static const struct pmu iommu_pmu __initconst = {
.read = perf_iommu_read,
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_iommu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static __init int init_one_iommu(unsigned int idx)
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 2aefacf5c5b2a..c5ff084551c6f 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -136,14 +136,7 @@ static int pmu_event_init(struct perf_event *event)
return -ENOENT;
/* Unsupported modes and filters. */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- /* no sampling */
- event->attr.sample_period)
+ if (event->attr.sample_period)
return -EINVAL;
if (cfg != AMD_POWER_EVENTSEL_PKG)
@@ -226,6 +219,7 @@ static struct pmu pmu_class = {
.start = pmu_event_start,
.stop = pmu_event_stop,
.read = pmu_event_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int power_cpu_exit(unsigned int cpu)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 398df6eaa1094..79cfd3b30ceb4 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -201,11 +201,6 @@ static int amd_uncore_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
- /* NB and Last level cache counters do not have usr/os/guest/host bits */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
/* and we do not enable counter overflow interrupts */
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
@@ -307,6 +302,7 @@ static struct pmu amd_nb_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct pmu amd_llc_pmu = {
@@ -317,6 +313,7 @@ static struct pmu amd_llc_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 374a19712e200..b684f0294f35d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2278,6 +2278,19 @@ void perf_check_microcode(void)
x86_pmu.check_microcode();
}
+static int x86_pmu_check_period(struct perf_event *event, u64 value)
+{
+ if (x86_pmu.check_period && x86_pmu.check_period(event, value))
+ return -EINVAL;
+
+ if (value && x86_pmu.limit_period) {
+ if (x86_pmu.limit_period(event, value) > value)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
@@ -2302,6 +2315,7 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
+ .check_period = x86_pmu_check_period,
};
void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index a01ef1b0f8833..7cdd7b13bbda6 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -77,10 +77,12 @@ static size_t buf_size(struct page *page)
}
static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+bts_buffer_setup_aux(struct perf_event *event, void **pages,
+ int nr_pages, bool overwrite)
{
struct bts_buffer *buf;
struct page *page;
+ int cpu = event->cpu;
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
unsigned long offset;
size_t size = nr_pages << PAGE_SHIFT;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f62..7ec265bacb6a2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/apic.h>
+#include <asm/cpu_device_id.h>
#include "../perf_event.h"
@@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
- /*
- * If PMU counter has PEBS enabled it is not enough to disable counter
- * on a guest entry since PEBS memory write can overshoot guest entry
- * and corrupt guest memory. Disabling PEBS solves the problem.
- */
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ arr[0].guest &= ~cpuc->pebs_enabled;
+ else
+ arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ *nr = 1;
+
+ if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
+ /*
+ * If PMU counter has PEBS enabled it is not enough to
+ * disable counter on a guest entry since PEBS memory
+ * write can overshoot guest entry and corrupt guest
+ * memory. Disabling PEBS solves the problem.
+ *
+ * Don't do this if the CPU already enforces it.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
+ *nr = 2;
+ }
- *nr = 2;
return arr;
}
@@ -3559,6 +3571,14 @@ static void free_excl_cntrs(int cpu)
static void intel_pmu_cpu_dying(int cpu)
{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3590,6 @@ static void intel_pmu_cpu_dying(int cpu)
}
free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3584,6 +3599,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
intel_pmu_lbr_sched_task(ctx, sched_in);
}
+static int intel_pmu_check_period(struct perf_event *event, u64 value)
+{
+ return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
+}
+
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3663,6 +3683,9 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
+ .check_period = intel_pmu_check_period,
};
static struct attribute *intel_pmu_attrs[];
@@ -3703,8 +3726,12 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
+
+ .check_period = intel_pmu_check_period,
};
static __init void intel_clovertown_quirk(void)
@@ -3733,36 +3760,62 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
}
-static int intel_snb_pebs_broken(int cpu)
+static const struct x86_cpu_desc isolation_ucodes[] = {
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
+ {}
+};
+
+static void intel_check_pebs_isolation(void)
{
- u32 rev = UINT_MAX; /* default to broken for unknown models */
+ x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
+}
- switch (cpu_data(cpu).x86_model) {
- case INTEL_FAM6_SANDYBRIDGE:
- rev = 0x28;
- break;
+static __init void intel_pebs_isolation_quirk(void)
+{
+ WARN_ON_ONCE(x86_pmu.check_microcode);
+ x86_pmu.check_microcode = intel_check_pebs_isolation;
+ intel_check_pebs_isolation();
+}
- case INTEL_FAM6_SANDYBRIDGE_X:
- switch (cpu_data(cpu).x86_stepping) {
- case 6: rev = 0x618; break;
- case 7: rev = 0x70c; break;
- }
- }
+static const struct x86_cpu_desc pebs_ucodes[] = {
+ INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028),
+ INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618),
+ INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c),
+ {}
+};
- return (cpu_data(cpu).microcode < rev);
+static bool intel_snb_pebs_broken(void)
+{
+ return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
}
static void intel_snb_check_microcode(void)
{
- int pebs_broken = 0;
- int cpu;
-
- for_each_online_cpu(cpu) {
- if ((pebs_broken = intel_snb_pebs_broken(cpu)))
- break;
- }
-
- if (pebs_broken == x86_pmu.pebs_broken)
+ if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
return;
/*
@@ -3879,23 +3932,22 @@ static __init void intel_nehalem_quirk(void)
}
}
-static bool intel_glp_counter_freezing_broken(int cpu)
-{
- u32 rev = UINT_MAX; /* default to broken for unknown stepping */
-
- switch (cpu_data(cpu).x86_stepping) {
- case 1:
- rev = 0x28;
- break;
- case 8:
- rev = 0x6;
- break;
- }
+static const struct x86_cpu_desc counter_freezing_ucodes[] = {
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X, 1, 0x00000028),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
+ {}
+};
- return (cpu_data(cpu).microcode < rev);
+static bool intel_counter_freezing_broken(void)
+{
+ return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
}
-static __init void intel_glp_counter_freezing_quirk(void)
+static __init void intel_counter_freezing_quirk(void)
{
/* Check if it's already disabled */
if (disable_counter_freezing)
@@ -3905,7 +3957,7 @@ static __init void intel_glp_counter_freezing_quirk(void)
* If the system starts with the wrong ucode, leave the
* counter-freezing feature permanently disabled.
*/
- if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) {
+ if (intel_counter_freezing_broken()) {
pr_info("PMU counter freezing disabled due to CPU errata,"
"please upgrade microcode\n");
x86_pmu.counter_freezing = false;
@@ -4168,6 +4220,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
+ /* fall through */
+
case INTEL_FAM6_CORE2_MEROM_L:
case INTEL_FAM6_CORE2_PENRYN:
case INTEL_FAM6_CORE2_DUNNINGTON:
@@ -4256,6 +4310,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_X:
+ x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -4282,7 +4337,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- x86_add_quirk(intel_glp_counter_freezing_quirk);
+ x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -4425,6 +4480,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_HASWELL_ULT:
case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4456,6 +4512,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4518,6 +4575,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index d2e780705c5a2..94a4b7fc75d0e 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -280,13 +280,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
return -ENOENT;
/* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
+ if (event->attr.sample_period) /* no sampling */
return -EINVAL;
if (event->cpu < 0)
@@ -437,7 +431,7 @@ static struct pmu cstate_core_pmu = {
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
.module = THIS_MODULE,
};
@@ -451,7 +445,7 @@ static struct pmu cstate_pkg_pmu = {
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
.module = THIS_MODULE,
};
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e9acf1d2e7b28..10c99ce1feadd 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1628,6 +1628,8 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+ if (x86_pmu.version <= 4)
+ x86_pmu.pebs_no_isolation = 1;
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
int format = x86_pmu.intel_cap.pebs_format;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c88ed39582a10..580c1b91c4540 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -931,6 +931,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
ret = X86_BR_ZERO_CALL;
break;
}
+ /* fall through */
case 0x9a: /* call far absolute */
ret = X86_BR_CALL;
break;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 9494ca68fd9df..fb3a2f13fc709 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
* Return: Our private PT buffer structure.
*/
static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+pt_buffer_setup_aux(struct perf_event *event, void **pages,
+ int nr_pages, bool snapshot)
{
struct pt_buffer *buf;
- int node, ret;
+ int node, ret, cpu = event->cpu;
if (!nr_pages)
return NULL;
@@ -1222,7 +1223,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
static void pt_event_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
- unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+ unsigned long msr_a, msr_b;
+ struct perf_addr_filter_range *fr = event->addr_filter_ranges;
struct pt_filters *filters = event->hw.addr_filters;
struct perf_addr_filter *filter;
int range = 0;
@@ -1231,12 +1233,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
- if (filter->path.dentry && !offs[range]) {
+ if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
- msr_a = filter->offset + offs[range];
- msr_b = filter->size + msr_a - 1;
+ msr_a = fr[range].start;
+ msr_b = msr_a + fr[range].size - 1;
}
filters->filter[range].msr_a = msr_a;
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 91039ffed6333..94dc564146ca8 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -397,13 +397,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
/* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
+ if (event->attr.sample_period) /* no sampling */
return -EINVAL;
/* must be done before validate_group */
@@ -699,6 +693,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.module = THIS_MODULE;
+ rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
return 0;
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 27a461414b306..d516161c00c47 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -695,14 +695,6 @@ static int uncore_pmu_event_init(struct perf_event *event)
if (pmu->func_id < 0)
return -ENOENT;
- /*
- * Uncore PMU does measure at all privilege level all the time.
- * So it doesn't make sense to specify any exclude bits.
- */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_hv || event->attr.exclude_idle)
- return -EINVAL;
-
/* Sampling not supported yet */
if (hwc->sample_period)
return -EINVAL;
@@ -800,6 +792,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
.module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
} else {
pmu->pmu = *pmu->type->pmu;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 2593b0d7aeee6..b12517fae77a5 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -397,13 +397,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
return -EINVAL;
/* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
+ if (event->attr.sample_period) /* no sampling */
return -EINVAL;
/*
@@ -497,6 +491,7 @@ static struct pmu snb_uncore_imc_pmu = {
.start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct intel_uncore_ops snb_uncore_imc_ops = {
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index c07bee31abe85..b10e04387f380 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
.id_table = snbep_uncore_pci_ids,
};
+#define NODE_ID_MASK 0x7
+
/*
* build pci bus to socket mapping
*/
@@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
if (err)
break;
- nodeid = config;
+ nodeid = config & NODE_ID_MASK;
/* get the Node ID mapping */
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
if (err)
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 1b9f85abf9bc1..a878e6286e4af 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -160,13 +160,7 @@ static int msr_event_init(struct perf_event *event)
return -ENOENT;
/* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
+ if (event->attr.sample_period) /* no sampling */
return -EINVAL;
if (cfg >= PERF_MSR_EVENT_MAX)
@@ -256,7 +250,7 @@ static struct pmu pmu_msr = {
.start = msr_event_start,
.stop = msr_event_stop,
.read = msr_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
};
static int __init msr_init(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfcc..7e75f474b0765 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,13 +601,14 @@ struct x86_pmu {
/*
* Intel DebugStore bits
*/
- unsigned int bts :1,
- bts_active :1,
- pebs :1,
- pebs_active :1,
- pebs_broken :1,
- pebs_prec_dist :1,
- pebs_no_tlb :1;
+ unsigned int bts :1,
+ bts_active :1,
+ pebs :1,
+ pebs_active :1,
+ pebs_broken :1,
+ pebs_prec_dist :1,
+ pebs_no_tlb :1,
+ pebs_no_isolation :1;
int pebs_record_size;
int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);
@@ -646,6 +647,11 @@ struct x86_pmu {
* Intel host/guest support (KVM)
*/
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+
+ /*
+ * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
+ */
+ int (*check_period) (struct perf_event *event, u64 period);
};
struct x86_perf_task_context {
@@ -857,7 +863,7 @@ static inline int amd_pmu_init(void)
#ifdef CONFIG_CPU_SUP_INTEL
-static inline bool intel_pmu_has_bts(struct perf_event *event)
+static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
{
struct hw_perf_event *hwc = &event->hw;
unsigned int hw_event, bts_event;
@@ -868,7 +874,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
- return hw_event == bts_event && hwc->sample_period == 1;
+ return hw_event == bts_event && period == 1;
+}
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ return intel_pmu_has_bts_period(event, hwc->sample_period);
}
int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f65b78d32f5eb..3c135084e1eb9 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -39,82 +39,10 @@
static int load_aout_binary(struct linux_binprm *);
static int load_aout_library(struct file *);
-#ifdef CONFIG_COREDUMP
-static int aout_core_dump(struct coredump_params *);
-
-static unsigned long get_dr(int n)
-{
- struct perf_event *bp = current->thread.ptrace_bps[n];
- return bp ? bp->hw.info.address : 0;
-}
-
-/*
- * fill in the user structure for a core dump..
- */
-static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
-{
- u32 fs, gs;
- memset(dump, 0, sizeof(*dump));
-
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long)
- (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_debugreg[0] = get_dr(0);
- dump->u_debugreg[1] = get_dr(1);
- dump->u_debugreg[2] = get_dr(2);
- dump->u_debugreg[3] = get_dr(3);
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.ptrace_dr7;
-
- if (dump->start_stack < 0xc0000000) {
- unsigned long tmp;
-
- tmp = (unsigned long) (0xc0000000 - dump->start_stack);
- dump->u_ssize = tmp >> PAGE_SHIFT;
- }
-
- dump->regs.ebx = regs->bx;
- dump->regs.ecx = regs->cx;
- dump->regs.edx = regs->dx;
- dump->regs.esi = regs->si;
- dump->regs.edi = regs->di;
- dump->regs.ebp = regs->bp;
- dump->regs.eax = regs->ax;
- dump->regs.ds = current->thread.ds;
- dump->regs.es = current->thread.es;
- savesegment(fs, fs);
- dump->regs.fs = fs;
- savesegment(gs, gs);
- dump->regs.gs = gs;
- dump->regs.orig_eax = regs->orig_ax;
- dump->regs.eip = regs->ip;
- dump->regs.cs = regs->cs;
- dump->regs.eflags = regs->flags;
- dump->regs.esp = regs->sp;
- dump->regs.ss = regs->ss;
-
-#if 1 /* FIXME */
- dump->u_fpvalid = 0;
-#else
- dump->u_fpvalid = dump_fpu(regs, &dump->i387);
-#endif
-}
-
-#endif
-
static struct linux_binfmt aout_format = {
.module = THIS_MODULE,
.load_binary = load_aout_binary,
.load_shlib = load_aout_library,
-#ifdef CONFIG_COREDUMP
- .core_dump = aout_core_dump,
-#endif
- .min_coredump = PAGE_SIZE
};
static int set_brk(unsigned long start, unsigned long end)
@@ -126,91 +54,6 @@ static int set_brk(unsigned long start, unsigned long end)
return vm_brk(start, end - start);
}
-#ifdef CONFIG_COREDUMP
-/*
- * These are the only things you should do on a core-file: use only these
- * macros to write out all the necessary info.
- */
-
-#include <linux/coredump.h>
-
-#define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
-#define START_STACK(u) (u.start_stack)
-
-/*
- * Routine writes a core dump image in the current directory.
- * Currently only a stub-function.
- *
- * Note that setuid/setgid files won't make a core-dump if the uid/gid
- * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
- * field, which also makes sure the core-dumps won't be recursive if the
- * dumping of the process results in another error..
- */
-
-static int aout_core_dump(struct coredump_params *cprm)
-{
- mm_segment_t fs;
- int has_dumped = 0;
- unsigned long dump_start, dump_size;
- struct user32 dump;
-
- fs = get_fs();
- set_fs(KERNEL_DS);
- has_dumped = 1;
- strncpy(dump.u_comm, current->comm, sizeof(current->comm));
- dump.u_ar0 = offsetof(struct user32, regs);
- dump.signal = cprm->siginfo->si_signo;
- dump_thread32(cprm->regs, &dump);
-
- /*
- * If the size of the dump file exceeds the rlimit, then see
- * what would happen if we wrote the stack, but not the data
- * area.
- */
- if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
- dump.u_dsize = 0;
-
- /* Make sure we have enough room to write the stack and data areas. */
- if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
- dump.u_ssize = 0;
-
- /* make sure we actually have a data and stack area to dump */
- set_fs(USER_DS);
- if (!access_ok((void *) (unsigned long)START_DATA(dump),
- dump.u_dsize << PAGE_SHIFT))
- dump.u_dsize = 0;
- if (!access_ok((void *) (unsigned long)START_STACK(dump),
- dump.u_ssize << PAGE_SHIFT))
- dump.u_ssize = 0;
-
- set_fs(KERNEL_DS);
- /* struct user */
- if (!dump_emit(cprm, &dump, sizeof(dump)))
- goto end_coredump;
- /* Now dump all of the user data. Include malloced stuff as well */
- if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
- goto end_coredump;
- /* now we start writing out the user space info */
- set_fs(USER_DS);
- /* Dump the data area */
- if (dump.u_dsize != 0) {
- dump_start = START_DATA(dump);
- dump_size = dump.u_dsize << PAGE_SHIFT;
- if (!dump_emit(cprm, (void *)dump_start, dump_size))
- goto end_coredump;
- }
- /* Now prepare to dump the stack area */
- if (dump.u_ssize != 0) {
- dump_start = START_STACK(dump);
- dump_size = dump.u_ssize << PAGE_SHIFT;
- if (!dump_emit(cprm, (void *)dump_start, dump_size))
- goto end_coredump;
- }
-end_coredump:
- set_fs(fs);
- return has_dumped;
-}
-#endif
/*
* create_aout_tables() parses the env- and arg-strings in new user
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
deleted file mode 100644
index 7d3ece8bfb616..0000000000000
--- a/arch/x86/include/asm/a.out-core.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_X86_A_OUT_CORE_H
-#define _ASM_X86_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-#ifdef CONFIG_X86_32
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/mm_types.h>
-
-#include <asm/debugreg.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1)))
- >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- aout_dump_debugregs(dump);
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
- >> PAGE_SHIFT;
-
- dump->regs.bx = regs->bx;
- dump->regs.cx = regs->cx;
- dump->regs.dx = regs->dx;
- dump->regs.si = regs->si;
- dump->regs.di = regs->di;
- dump->regs.bp = regs->bp;
- dump->regs.ax = regs->ax;
- dump->regs.ds = (u16)regs->ds;
- dump->regs.es = (u16)regs->es;
- dump->regs.fs = (u16)regs->fs;
- dump->regs.gs = get_user_gs(regs);
- dump->regs.orig_ax = regs->orig_ax;
- dump->regs.ip = regs->ip;
- dump->regs.cs = (u16)regs->cs;
- dump->regs.flags = regs->flags;
- dump->regs.sp = regs->sp;
- dump->regs.ss = (u16)regs->ss;
-
- dump->u_fpvalid = dump_fpu(regs, &dump->i387);
-}
-
-#endif /* CONFIG_X86_32 */
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_A_OUT_CORE_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 0660e14690c8b..4c74073a19ccd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -94,13 +94,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-#define __OLDINSTR(oldinstr, num) \
+#define OLDINSTR(oldinstr, num) \
+ "# ALT: oldnstr\n" \
"661:\n\t" oldinstr "\n662:\n" \
+ "# ALT: padding\n" \
".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
-
-#define OLDINSTR(oldinstr, num) \
- __OLDINSTR(oldinstr, num) \
+ "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
alt_end_marker ":\n"
/*
@@ -116,11 +115,23 @@ static inline int alternatives_text_reserved(void *start, void *end)
* additionally longer than the first replacement alternative.
*/
#define OLDINSTR_2(oldinstr, num1, num2) \
+ "# ALT: oldinstr2\n" \
"661:\n\t" oldinstr "\n662:\n" \
+ "# ALT: padding2\n" \
".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
+#define OLDINSTR_3(oldinsn, n1, n2, n3) \
+ "# ALT: oldinstr3\n" \
+ "661:\n\t" oldinsn "\n662:\n" \
+ "# ALT: padding3\n" \
+ ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
+ " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
+ " - (" alt_slen ")), 0x90\n" \
+ alt_end_marker ":\n"
+
#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
@@ -129,8 +140,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
" .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+ "# ALT: replacement " #num "\n" \
+ b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \
@@ -153,6 +165,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection\n"
+#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
+ OLDINSTR_3(oldinsn, 1, 2, 3) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feat1, 1) \
+ ALTINSTR_ENTRY(feat2, 2) \
+ ALTINSTR_ENTRY(feat3, 3) \
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \
+ ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \
+ ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \
+ ".popsection\n"
+
/*
* Alternative instructions for different CPU types or capabilities.
*
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 1908214b91257..ce92c4acc9133 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -7,7 +7,6 @@
#include <asm-generic/asm-prototypes.h>
-#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index baeba05671268..3417110574c12 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -11,4 +11,32 @@
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+/*
+ * Match specific microcode revisions.
+ *
+ * vendor/family/model/stepping must be all set.
+ *
+ * Only checks against the boot CPU. When mixed-stepping configs are
+ * valid for a CPU model, add a quirk for every valid stepping and
+ * do the fine-tuning in the quirk handler.
+ */
+
+struct x86_cpu_desc {
+ __u8 x86_family;
+ __u8 x86_vendor;
+ __u8 x86_model;
+ __u8 x86_stepping;
+ __u32 x86_microcode_rev;
+};
+
+#define INTEL_CPU_DESC(mod, step, rev) { \
+ .x86_family = 6, \
+ .x86_vendor = X86_VENDOR_INTEL, \
+ .x86_model = mod, \
+ .x86_stepping = step, \
+ .x86_microcode_rev = rev, \
+}
+
+extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
+
#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 107283b1eb1e4..606a4b6a9812c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -170,7 +170,6 @@ static inline bool efi_runtime_supported(void)
return false;
}
-extern struct console early_efi_console;
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fa2c93cb42a27..fb04a3ded7ddb 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
}
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
- if (IS_ENABLED(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
- kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- /* See comment in copy_fxregs_to_kernel() below. */
- kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
- }
- }
+ else
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
}
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
@@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{
if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else {
- /* Using "rex64; fxsave %0" is broken because, if the memory
- * operand uses any extended registers for addressing, a second
- * REX prefix will be generated (to the assembler, rex64
- * followed by semicolon is a separate instruction), and hence
- * the 64-bitness is lost.
- *
- * Using "fxsaveq %0" would be the ideal choice, but is only
- * supported starting with gas 2.16.
- *
- * Using, as a workaround, the properly prefixed form below
- * isn't accepted by any binutils version so far released,
- * complaining that the same type of prefix is used twice if
- * an extended register is needed for addressing (fix submitted
- * to mainline 2005-11-21).
- *
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
- *
- * This, however, we can work around by forcing the compiler to
- * select an addressing mode that doesn't require extended
- * registers.
- */
- asm volatile( "rex64/fxsave (%[fx])"
- : "=m" (fpu->state.fxsave)
- : [fx] "R" (&fpu->state.fxsave));
- }
}
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
@@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
copy_xregs_to_kernel(&fpu->state.xsave);
+
+ /*
+ * AVX512 state is tracked here because its use is
+ * known to slow the max clock speed of the core.
+ */
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+ fpu->avx512_timestamp = jiffies;
return 1;
}
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecfa..2e32e178e0645 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -303,6 +303,13 @@ struct fpu {
unsigned char initialized;
/*
+ * @avx512_timestamp:
+ *
+ * Records the timestamp of AVX512 use during last context switch.
+ */
+ unsigned long avx512_timestamp;
+
+ /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 705dafc2d11ab..2bdbbbcfa393f 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -841,7 +841,7 @@ union hv_gpa_page_range {
* count is equal with how many entries of union hv_gpa_page_range can
* be populated into the input parameter page.
*/
-#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \
+#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) / \
sizeof(union hv_gpa_page_range))
struct hv_guest_mapping_flush_list {
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0dd6b0f4000e8..9f15384c504a4 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -6,7 +6,7 @@
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
* The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E.
+ * "Extreme" ones, like Broadwell-E, or Atom microserver.
*
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
@@ -52,6 +52,8 @@
#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@@ -71,6 +73,7 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce90de7ff..180373360e342 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -299,6 +299,7 @@ union kvm_mmu_extended_role {
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
+ unsigned int maxphyaddr:6;
};
};
@@ -397,6 +398,7 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
+ gpa_t root_cr3;
union kvm_mmu_role mmu_role;
u8 root_level;
u8 shadow_root_level;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 0ca50611e8cec..19d18fae6ec66 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+/*
+ * Init a new mm. Used on mm copies, like at fork()
+ * and on mm's that are brand-new, like at execve().
+ */
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
@@ -228,8 +232,22 @@ do { \
} while (0)
#endif
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
+
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+#endif
+}
+
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
+ arch_dup_pkeys(oldmm, mm);
paravirt_arch_dup_mmap(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 91e4cf189914a..5cc3930cb465e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void)
*/
static __always_inline unsigned long long rdtsc_ordered(void)
{
+ DECLARE_ARGS(val, low, high);
+
/*
* The RDTSC instruction is not ordered relative to memory
* access. The Intel SDM and the AMD APM are both vague on this
@@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* ordering guarantees as reading from a global memory location
* that some other imaginary CPU is updating continuously with a
* time stamp.
+ *
+ * Thus, use the preferred barrier on the respective CPU, aiming for
+ * RDTSCP as the default.
*/
- barrier_nospec();
- return rdtsc();
+ asm volatile(ALTERNATIVE_3("rdtsc",
+ "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
+ "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+ "rdtscp", X86_FEATURE_RDTSCP)
+ : EAX_EDX_RET(val, low, high)
+ /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+ :: "ecx");
+
+ return EAX_EDX_VAL(val, low, high);
}
static inline unsigned long long native_read_pmc(int counter)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599a..0ce558a8150d3 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -7,7 +7,11 @@
#endif
#ifdef CONFIG_KASAN
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_STACK_ORDER 2
+#else
#define KASAN_STACK_ORDER 1
+#endif
#else
#define KASAN_STACK_ORDER 0
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a97f28d914d56..c25c38a05c1c9 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -422,25 +422,26 @@ static inline pgdval_t pgd_val(pgd_t pgd)
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
{
pteval_t ret;
- ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep);
+ ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep);
return (pte_t) { .pte = ret };
}
-static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
+
if (sizeof(pteval_t) > sizeof(long))
/* 5 arg words */
- pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte);
+ pv_ops.mmu.ptep_modify_prot_commit(vma, addr, ptep, pte);
else
PVOP_VCALL4(mmu.ptep_modify_prot_commit,
- mm, addr, ptep, pte.pte);
+ vma, addr, ptep, pte.pte);
}
static inline void set_pte(pte_t *ptep, pte_t pte)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 488c59686a733..2474e434a6f72 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -55,6 +55,7 @@ struct task_struct;
struct cpumask;
struct flush_tlb_info;
struct mmu_gather;
+struct vm_area_struct;
/*
* Wrapper type for pointers to code which uses the non-standard
@@ -254,9 +255,9 @@ struct pv_mmu_ops {
pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
- pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+ pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep);
- void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+ void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte);
struct paravirt_callee_save pte_val;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 662963681ea6c..e662f987dfa2c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/numa.h>
#include <asm/io.h>
#include <asm/pat.h>
#include <asm/x86_init.h>
@@ -141,7 +142,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
int node;
node = __pcibus_to_node(bus);
- return (node == -1) ? cpu_online_mask :
+ return (node == NUMA_NO_NODE) ? cpu_online_mask :
cpumask_of_node(node);
}
#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e8052924..2779ace16d23f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- native_set_pmd(pmdp, pmd);
+ set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9c85b54bf03ca..0bb5663156218 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -259,8 +259,7 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
- int write)
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
{
unsigned long len, end;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c8645..2bb3a648fc12c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -742,7 +742,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
-void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index dbaed55c1c244..232f856e0db06 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -67,16 +67,30 @@ static __always_inline void refcount_dec(refcount_t *r)
static __always_inline __must_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
- return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
+ bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
REFCOUNT_CHECK_LT_ZERO,
r->refs.counter, e, "er", i, "cx");
+
+ if (ret) {
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ return false;
}
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
- return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "cx");
+ bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
+ REFCOUNT_CHECK_LT_ZERO,
+ r->refs.counter, e, "cx");
+
+ if (ret) {
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ return false;
}
static __always_inline __must_check
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
index 40ebddde6ac21..f6b7fe2833cc7 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl_sched.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_RESCTRL_SCHED_H
#define _ASM_X86_RESCTRL_SCHED_H
-#ifdef CONFIG_X86_RESCTRL
+#ifdef CONFIG_X86_CPU_RESCTRL
#include <linux/sched.h>
#include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
static inline void resctrl_sched_in(void) {}
-#endif /* CONFIG_X86_RESCTRL */
+#endif /* CONFIG_X86_CPU_RESCTRL */
#endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a77445d1b0348..1954dd5552a2e 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,6 @@
#define KERNEL_DS MAKE_MM_SEG(-1UL)
#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
-#define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.addr_limit)
static inline void set_fs(mm_segment_t fs)
{
@@ -35,10 +34,7 @@ static inline void set_fs(mm_segment_t fs)
}
#define segment_eq(a, b) ((a).seg == (b).seg)
-
#define user_addr_max() (current->thread.addr_limit.seg)
-#define __addr_ok(addr) \
- ((unsigned long __force)(addr) < user_addr_max())
/*
* Test whether a block of memory is a valid user space address.
@@ -76,7 +72,7 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
#endif
/**
- * access_ok: - Checks if a user space pointer is valid
+ * access_ok - Checks if a user space pointer is valid
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
@@ -85,12 +81,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
*
* Checks if a pointer to a block of memory in user space is valid.
*
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
* Note that, depending on architecture, this function probably just
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
+ *
+ * Return: true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
*/
#define access_ok(addr, size) \
({ \
@@ -135,7 +131,7 @@ extern int __get_user_bad(void);
__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
/**
- * get_user: - Get a simple variable from user space.
+ * get_user - Get a simple variable from user space.
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
@@ -149,7 +145,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
* @ptr must have pointer-to-simple-variable type, and the result of
* dereferencing @ptr must be assignable to @x without a cast.
*
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
* On error, the variable @x is set to zero.
*/
/*
@@ -227,7 +223,7 @@ extern void __put_user_4(void);
extern void __put_user_8(void);
/**
- * put_user: - Write a simple value into user space.
+ * put_user - Write a simple value into user space.
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
@@ -241,7 +237,7 @@ extern void __put_user_8(void);
* @ptr must have pointer-to-simple-variable type, and @x must be assignable
* to the result of dereferencing @ptr.
*
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
*/
#define put_user(x, ptr) \
({ \
@@ -284,7 +280,7 @@ do { \
__put_user_goto(x, ptr, "l", "k", "ir", label); \
break; \
case 8: \
- __put_user_goto_u64((__typeof__(*ptr))(x), ptr, label); \
+ __put_user_goto_u64(x, ptr, label); \
break; \
default: \
__put_user_bad(); \
@@ -431,8 +427,10 @@ do { \
({ \
__label__ __pu_label; \
int __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) __pu_val; \
+ __pu_val = x; \
__uaccess_begin(); \
- __put_user_size((x), (ptr), (size), __pu_label); \
+ __put_user_size(__pu_val, (ptr), (size), __pu_label); \
__pu_err = 0; \
__pu_label: \
__uaccess_end(); \
@@ -501,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; };
} while (0)
/**
- * __get_user: - Get a simple variable from user space, with less checking.
+ * __get_user - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
@@ -518,7 +516,7 @@ struct __large_struct { unsigned long buf[100]; };
* Caller must check the pointer with access_ok() before calling this
* function.
*
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
* On error, the variable @x is set to zero.
*/
@@ -526,7 +524,7 @@ struct __large_struct { unsigned long buf[100]; };
__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
/**
- * __put_user: - Write a simple value into user space, with less checking.
+ * __put_user - Write a simple value into user space, with less checking.
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
@@ -543,7 +541,7 @@ struct __large_struct { unsigned long buf[100]; };
* Caller must check the pointer with access_ok() before calling this
* function.
*
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
*/
#define __put_user(x, ptr) \
@@ -711,7 +709,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
{
if (unlikely(!access_ok(ptr,len)))
return 0;
- __uaccess_begin();
+ __uaccess_begin_nospec();
return 1;
}
#define user_access_begin(a,b) user_access_begin(a,b)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index dc4ed8bc2382c..146859efd83c6 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -23,8 +23,8 @@
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
-# define __ARCH_WANT_COMPAT_SYS_TIME
-# define __ARCH_WANT_SYS_UTIME32
+# define __ARCH_WANT_SYS_TIME
+# define __ARCH_WANT_SYS_UTIME
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
@@ -48,8 +48,8 @@
# define __ARCH_WANT_SYS_SIGPENDING
# define __ARCH_WANT_SYS_SIGPROCMASK
# define __ARCH_WANT_SYS_SOCKETCALL
-# define __ARCH_WANT_SYS_TIME
-# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_SYS_TIME32
+# define __ARCH_WANT_SYS_UTIME32
# define __ARCH_WANT_SYS_WAITPID
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc61863..8cfccc3cbbf42 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -48,7 +48,8 @@ enum {
BIOS_STATUS_SUCCESS = 0,
BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
BIOS_STATUS_EINVAL = -EINVAL,
- BIOS_STATUS_UNAVAIL = -EBUSY
+ BIOS_STATUS_UNAVAIL = -EBUSY,
+ BIOS_STATUS_ABORT = -EINTR,
};
/* Address map parameters */
@@ -140,7 +141,6 @@ enum uv_memprotect {
*/
extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
@@ -151,11 +151,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
-#ifdef CONFIG_EFI
extern void uv_bios_init(void);
-#else
-void uv_bios_init(void) { }
-#endif
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
@@ -167,4 +163,9 @@ extern long system_serial_number;
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
#endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index f6648e9928b31..efe701b7c6ceb 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -3,3 +3,4 @@ include include/uapi/asm-generic/Kbuild.asm
generated-y += unistd_32.h
generated-y += unistd_64.h
generated-y += unistd_x32.h
+generic-y += socket.h
diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b4..0000000000000
--- a/arch/x86/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 0c26b1b44e51a..4203d4f0c68d0 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -90,7 +90,7 @@ ret_point:
.data
ALIGN
ENTRY(saved_magic) .long 0
-ENTRY(saved_eip) .long 0
+saved_eip: .long 0
# saved registers
saved_idt: .long 0,0
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 50b8ed0317a34..510fa12aab73a 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -125,12 +125,12 @@ ENTRY(do_suspend_lowlevel)
ENDPROC(do_suspend_lowlevel)
.data
-ENTRY(saved_rbp) .quad 0
-ENTRY(saved_rsi) .quad 0
-ENTRY(saved_rdi) .quad 0
-ENTRY(saved_rbx) .quad 0
+saved_rbp: .quad 0
+saved_rsi: .quad 0
+saved_rdi: .quad 0
+saved_rbx: .quad 0
-ENTRY(saved_rip) .quad 0
-ENTRY(saved_rsp) .quad 0
+saved_rip: .quad 0
+saved_rsp: .quad 0
ENTRY(saved_magic) .quad 0
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ebeac487a20c7..9a79c7808f9cc 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/stop_machine.h>
#include <linux/slab.h>
#include <linux/kdebug.h>
+#include <linux/kprobes.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -393,10 +394,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
+ DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
- instr, a->instrlen,
+ instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
@@ -764,8 +765,8 @@ int poke_int3_handler(struct pt_regs *regs)
regs->ip = (unsigned long) bp_int3_handler;
return 1;
-
}
+NOKPROBE_SYMBOL(poke_int3_handler);
/**
* text_poke_bp() -- update instructions on live kernel on SMP
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2953bbf05c085..264e3221d9233 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -812,6 +812,7 @@ static int irq_polarity(int idx)
return IOAPIC_POL_HIGH;
case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
+ /* fall through */
case MP_IRQPOL_ACTIVE_LOW:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_POL_LOW;
@@ -859,6 +860,7 @@ static int irq_trigger(int idx)
return IOAPIC_EDGE;
case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
+ /* fall through */
case MP_IRQTRIG_LEVEL:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_LEVEL;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index a555da0941570..1e225528f0d7f 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -27,6 +27,7 @@
#include <linux/crash_dump.h>
#include <linux/reboot.h>
#include <linux/memory.h>
+#include <linux/numa.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -1390,7 +1391,7 @@ static void __init build_socket_tables(void)
}
/* Set socket -> node values: */
- lnid = -1;
+ lnid = NUMA_NO_NODE;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
int apicid, sockid;
@@ -1521,7 +1522,7 @@ static void __init uv_system_init_hub(void)
new_hub->pnode = 0xffff;
new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
- new_hub->memory_nid = -1;
+ new_hub->memory_nid = NUMA_NO_NODE;
new_hub->nr_possible_cpus = 0;
new_hub->nr_online_cpus = 0;
}
@@ -1538,7 +1539,7 @@ static void __init uv_system_init_hub(void)
uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
- if (uv_cpu_hub_info(cpu)->memory_nid == -1)
+ if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
/* Init memoryless node: */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b6fa0869f7aa1..cfd24f9f76144 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
-obj-$(CONFIG_X86_RESCTRL) += resctrl/
+obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 69f6bbb41be0b..01004bfb1a1bc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -819,11 +819,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
- * all up to and including B1.
- */
- if (c->x86_model <= 1 && c->x86_stepping <= 1)
+
+ /* Fix erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1de0f41701783..2da82eff0eb4f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -71,7 +71,7 @@ void __init check_bugs(void)
* identify_boot_cpu() initialized SMT support information, let the
* core code know.
*/
- cpu_smt_check_topology_early();
+ cpu_smt_check_topology();
if (!IS_ENABLED(CONFIG_SMP)) {
pr_info("CPU: ");
@@ -798,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
+ task_clear_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
+ task_clear_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
+ task_clear_spec_ssb_noexec(task);
+ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_DISABLE_NOEXEC:
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
default:
@@ -885,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task)
case SPEC_STORE_BYPASS_PRCTL:
if (task_spec_ssb_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_noexec(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
if (task_spec_ssb_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index c4d1023fb0abc..395d46f78582b 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -248,6 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
switch (leaf) {
case 1:
l1 = &l1i;
+ /* fall through */
case 0:
if (!l1->val)
return;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 3fed38812eea3..6dd78d8235e44 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -48,3 +48,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
return NULL;
}
EXPORT_SYMBOL(x86_match_cpu);
+
+static const struct x86_cpu_desc *
+x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ const struct x86_cpu_desc *m;
+
+ for (m = match; m->x86_family | m->x86_model; m++) {
+ if (c->x86_vendor != m->x86_vendor)
+ continue;
+ if (c->x86 != m->x86_family)
+ continue;
+ if (c->x86_model != m->x86_model)
+ continue;
+ if (c->x86_stepping != m->x86_stepping)
+ continue;
+ return m;
+ }
+ return NULL;
+}
+
+bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table)
+{
+ const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table);
+
+ if (!res || res->x86_microcode_rev > boot_cpu_data.microcode)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6063ae2376b2d..b7fb541a4873f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ m->bank = i;
mce_read_aux(m, i);
*msg = tmp;
return 1;
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 51adde0a0f1a0..e1f3ba19ba54f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -855,7 +855,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
if (!p) {
return ret;
} else {
- if (boot_cpu_data.microcode == p->patch_id)
+ if (boot_cpu_data.microcode >= p->patch_id)
return ret;
ret = UCODE_NEW;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 3668c5df90c69..5bd011737272d 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -296,7 +296,7 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
unsigned long sizek)
{
unsigned long hole_basek, hole_sizek;
- unsigned long second_basek, second_sizek;
+ unsigned long second_sizek;
unsigned long range0_basek, range0_sizek;
unsigned long range_basek, range_sizek;
unsigned long chunk_sizek;
@@ -304,7 +304,6 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
hole_basek = 0;
hole_sizek = 0;
- second_basek = 0;
second_sizek = 0;
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 1cabe6fd8e115..4a06c37b9cf11 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_RESCTRL) += core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_RESCTRL) += ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o
CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 14bed6af83773..604c0e3bcc830 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -34,13 +34,6 @@
#include "pseudo_lock_event.h"
/*
- * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware
- * prefetcher state. Details about this register can be found in the MSR
- * tables for specific platforms found in Intel's SDM.
- */
-#define MSR_MISC_FEATURE_CONTROL 0x000001a4
-
-/*
* The bits needed to disable hardware prefetching varies based on the
* platform. During initialization we will discover which bits to use.
*/
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c8b07d8ea5a2b..17ffc869cab82 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image)
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree((void *)image->arch.elf_headers);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 50895c2f937d1..a687d10da4178 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -671,21 +671,18 @@ __init void e820__reallocate_tables(void)
int size;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table, size);
e820_table = n;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table_kexec, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table_kexec, size);
e820_table_kexec = n;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table_firmware, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table_firmware, size);
e820_table_firmware = n;
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 374a52fa52969..9b33904251a9f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -388,10 +388,6 @@ static int __init setup_early_printk(char *buf)
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_EARLY_PRINTK_EFI
- if (!strncmp(buf, "efi", 3))
- early_console_register(&early_efi_console, keep);
-#endif
#ifdef CONFIG_EARLY_PRINTK_USB_XDBC
if (!strncmp(buf, "xdbc", 4))
early_xdbc_parse_parameter(buf + 4);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9cc108456d0be..d7432c2b10514 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -669,7 +669,7 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
return false;
}
-static int init_xstate_size(void)
+static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int possible_xstate_size;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8257a59704ae9..3e3789c8f8e1a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -269,7 +269,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
-static int is_ftrace_caller(unsigned long ip)
+static nokprobe_inline int is_ftrace_caller(unsigned long ip)
{
if (ip == ftrace_update_func)
return 1;
@@ -299,6 +299,7 @@ int ftrace_int3_handler(struct pt_regs *regs)
return 1;
}
+NOKPROBE_SYMBOL(ftrace_int3_handler);
static int ftrace_write(unsigned long ip, const char *val, int size)
{
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b0acb22e5a465..dfd3aca82c61c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -21,10 +21,6 @@
#define HPET_MASK CLOCKSOURCE_MASK(32)
-/* FSEC = 10^-15
- NSEC = 10^-9 */
-#define FSEC_PER_NSEC 1000000L
-
#define HPET_DEV_USED_BIT 2
#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
#define HPET_DEV_VALID 0x8
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 34a5c17151487..ff9bfd40429ef 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp,
* allow kernel breakpoints at all.
*/
if (attr->bp_addr >= TASK_SIZE_MAX) {
-#ifdef CONFIG_KPROBES
if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
-#else
- return -EINVAL;
-#endif
}
hw->type = X86_BREAKPOINT_EXECUTE;
@@ -279,6 +275,7 @@ static int arch_build_bp_info(struct perf_event *bp,
hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
+ /* fall through */
default:
return -EINVAL;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 278cd07228dd8..1f3b77367948d 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
struct efi_info *current_ei = &boot_params.efi_info;
struct efi_info *ei = &params->efi_info;
+ if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ return 0;
+
if (!current_ei->efi_memmap_size)
return 0;
@@ -215,6 +218,9 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
params->screen_info.ext_mem_k = 0;
params->alt_mem_k = 0;
+ /* Always fill in RSDP: it is either 0 or a valid value */
+ params->acpi_rsdp_addr = boot_params.acpi_rsdp_addr;
+
/* Default APM info */
memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
@@ -253,7 +259,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
efi_setup_data_offset);
#endif
-
/* Setup EDD info */
memcpy(params->eddbuf, boot_params.eddbuf,
EDDMAXNR * sizeof(struct edd_info));
@@ -434,6 +439,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
@@ -448,6 +454,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5db08425063ed..4ff6b4cdb9419 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -467,6 +467,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
ptr = &remcomInBuffer[1];
if (kgdb_hex2long(&ptr, &addr))
linux_regs->ip = addr;
+ /* fall through */
case 'D':
case 'k':
/* clear the trace bit */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4ba75afba5271..a034cb808e7eb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1028,6 +1028,13 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);
int __init arch_populate_kprobe_blacklist(void)
{
+ int ret;
+
+ ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+ if (ret)
+ return ret;
+
return kprobe_add_area_blacklist((unsigned long)__entry_text_start,
(unsigned long)__entry_text_end);
}
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 6adf6e6c29339..f142629520158 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -97,6 +97,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
}
asm (
+ ".pushsection .rodata\n"
"optprobe_template_func:\n"
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
@@ -136,8 +137,7 @@ asm (
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
- ".type optprobe_template_func, @function\n"
- ".size optprobe_template_func, .-optprobe_template_func\n");
+ ".popsection\n");
void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index ba4bfb7f6a369..5c93a65ee1e5c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
#else
u64 ipi_bitmap = 0;
#endif
+ long ret;
if (cpumask_empty(mask))
return;
@@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
max = apic_id < max ? max : apic_id;
} else {
- kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+ ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+ WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
min = max = apic_id;
ipi_bitmap = 0;
}
@@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
}
if (ipi_bitmap) {
- kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+ ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+ WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
}
local_irq_restore(flags);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4c8acdfdc5a74..ceba408ea9824 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -352,6 +352,8 @@ void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ u64 sme_mask = sme_me_mask;
+
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
@@ -364,6 +366,7 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
kaslr_offset());
VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+ VMCOREINFO_NUMBER(sme_mask);
}
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 90ae0ca510837..58ac7be52c7a6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -255,6 +255,18 @@ void arch_setup_new_exec(void)
/* If cpuid was previously disabled for this task, re-enable it. */
if (test_thread_flag(TIF_NOCPUID))
enable_cpuid();
+
+ /*
+ * Don't inherit TIF_SSBD across exec boundary when
+ * PR_SPEC_DISABLE_NOEXEC is used.
+ */
+ if (test_thread_flag(TIF_SSBD) &&
+ task_spec_ssb_noexec(current)) {
+ clear_thread_flag(TIF_SSBD);
+ task_clear_spec_ssb_disable(current);
+ task_clear_spec_ssb_noexec(current);
+ speculation_ctrl_update(task_thread_info(current)->flags);
+ }
}
static inline void switch_to_bitmap(struct thread_struct *prev,
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e8796fcd7e5a5..13af08827eefc 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -171,7 +171,7 @@ void __init setup_per_cpu_areas(void)
unsigned long delta;
int rc;
- pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n",
+ pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%u\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ccd1f2a8e5577..ce1a67b70168e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,6 +56,7 @@
#include <linux/stackprotector.h>
#include <linux/gfp.h>
#include <linux/cpuidle.h>
+#include <linux/numa.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -149,7 +150,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
static void smp_callin(void)
{
- int cpuid, phys_id;
+ int cpuid;
/*
* If waken up by an INIT in an 82489DX configuration
@@ -160,11 +161,6 @@ static void smp_callin(void)
cpuid = smp_processor_id();
/*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = read_apic_id();
-
- /*
* the boot CPU has finished the init stage and is spinning
* on callin_map until we finish. We are free to set up this
* CPU, first the APIC. (this is probably redundant on most
@@ -841,7 +837,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
- static int current_node = -1;
+ static int current_node = NUMA_NO_NODE;
int node = early_cpu_to_node(cpu);
static int width, node_width;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9b7c4ca8f0a73..d26f9e9c3d830 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -111,6 +111,7 @@ void ist_enter(struct pt_regs *regs)
/* This code is a bit fragile. Test it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
}
+NOKPROBE_SYMBOL(ist_enter);
void ist_exit(struct pt_regs *regs)
{
@@ -880,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
- unsigned long cr0;
+ unsigned long cr0 = read_cr0();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_MATH_EMULATION
- if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
+ if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
struct math_emu_info info = { };
cond_local_irq_enable(regs);
@@ -897,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
/* This should not happen. */
- cr0 = read_cr0();
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
/* Try to fix it up and carry on. */
write_cr0(cr0 & ~X86_CR0_TS);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e9f777bfed404..3fae238340699 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)
__setup("tsc=", tsc_setup);
-#define MAX_RETRIES 5
-#define SMI_TRESHOLD 50000
+#define MAX_RETRIES 5
+#define TSC_DEFAULT_THRESHOLD 0x20000
/*
- * Read TSC and the reference counters. Take care of SMI disturbance
+ * Read TSC and the reference counters. Take care of any disturbances
*/
static u64 tsc_read_refs(u64 *p, int hpet)
{
u64 t1, t2;
+ u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
int i;
for (i = 0; i < MAX_RETRIES; i++) {
@@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
else
*p = acpi_pm_read_early();
t2 = get_cycles();
- if ((t2 - t1) < SMI_TRESHOLD)
+ if ((t2 - t1) < thresh)
return t2;
}
return ULLONG_MAX;
@@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
* zero. In each wait loop iteration we read the TSC and check
* the delta to the previous read. We keep track of the min
* and max values of that delta. The delta is mostly defined
- * by the IO time of the PIT access, so we can detect when a
- * SMI/SMM disturbance happened between the two reads. If the
+ * by the IO time of the PIT access, so we can detect when
+ * any disturbance happened between the two reads. If the
* maximum time is significantly larger than the minimum time,
* then we discard the result and have another try.
*
* 2) Reference counter. If available we use the HPET or the
* PMTIMER as a reference to check the sanity of that value.
* We use separate TSC readouts and check inside of the
- * reference read for a SMI/SMM disturbance. We dicard
+ * reference read for any possible disturbance. We dicard
* disturbed values here as well. We do that around the PIT
* calibration delay loop as we have to wait for a certain
* amount of time anyway.
@@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
if (ref1 == ref2)
continue;
- /* Check, whether the sampling was disturbed by an SMI */
+ /* Check, whether the sampling was disturbed */
if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
continue;
@@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
*/
static void tsc_refine_calibration_work(struct work_struct *work)
{
- static u64 tsc_start = -1, ref_start;
+ static u64 tsc_start = ULLONG_MAX, ref_start;
static int hpet;
u64 tsc_stop, ref_stop, delta;
unsigned long freq;
@@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
* delayed the first time we expire. So set the workqueue
* again once we know timers are working.
*/
- if (tsc_start == -1) {
+ if (tsc_start == ULLONG_MAX) {
+restart:
/*
* Only set hpet once, to avoid mixing hardware
* if the hpet becomes enabled later.
*/
hpet = is_hpet_enabled();
- schedule_delayed_work(&tsc_irqwork, HZ);
tsc_start = tsc_read_refs(&ref_start, hpet);
+ schedule_delayed_work(&tsc_irqwork, HZ);
return;
}
@@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
if (ref_start == ref_stop)
goto out;
- /* Check, whether the sampling was disturbed by an SMI */
- if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
- goto out;
+ /* Check, whether the sampling was disturbed */
+ if (tsc_stop == ULLONG_MAX)
+ goto restart;
delta = tsc_stop - tsc_start;
delta *= 1000000LL;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 843feb94a9501..ccf03416e4342 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -745,6 +745,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
* OPCODE1() of the "short" jmp which checks the same condition.
*/
opc1 = OPCODE2(insn) - 0x10;
+ /* fall through */
default:
if (!is_cond_jmp_opcode(opc1))
return -ENOSYS;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0d618ee634ac4..bad8c51fee6ee 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -31,7 +31,7 @@
#undef i386 /* in case the preprocessor is a 32bit one */
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
#ifdef CONFIG_X86_32
OUTPUT_ARCH(i386)
@@ -401,7 +401,7 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
-#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 69b3a7c300139..31ecf7a76d5a4 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,10 +2,6 @@
ccflags-y += -Iarch/x86/kvm
-CFLAGS_x86.o := -I.
-CFLAGS_svm.o := -I.
-CFLAGS_vmx.o := -I.
-
KVM := ../../../virt/kvm
kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbffa6c54697a..c07958b59f505 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -335,6 +335,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
+ unsigned f_la57 = 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -489,7 +490,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
// TSC_ADJUST is emulated
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
+ f_la57 = entry->ecx & F(LA57);
cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ /* Set LA57 based on hardware capability. */
+ entry->ecx |= f_la57;
entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c90a5352d158f..89d20ed1d2e8b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
- /* maybe userspace knows this conn_id: fall through */
+ /* fall through - maybe userspace knows this conn_id. */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
@@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
- ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
@@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
case HYPERV_CPUID_ENLIGHTMENT_INFO:
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
- ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
- ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
+ if (evmcs_ver)
+ ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
/*
* Default number of spinlock retry attempts, matches
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9f089e2e09d02..4b6c2da7265c8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
+ /* fall through */
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
@@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
+ /* fall through */
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ce770b4462385..f2d1d230d5b84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
&invalid_list);
mmu->root_hpa = INVALID_PAGE;
}
+ mmu->root_cr3 = 0;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
} else
BUG();
+ vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
return 0;
}
@@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_page *sp;
u64 pdptr, pm_mask;
- gfn_t root_gfn;
+ gfn_t root_gfn, root_cr3;
int i;
- root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
+ root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+ root_gfn = root_cr3 >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
return 1;
@@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu->root_hpa = root;
- return 0;
+ goto set_root_cr3;
}
/*
@@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
}
+set_root_cr3:
+ vcpu->arch.mmu->root_cr3 = root_cr3;
+
return 0;
}
@@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
struct kvm_mmu_root_info root;
struct kvm_mmu *mmu = vcpu->arch.mmu;
- root.cr3 = mmu->get_cr3(vcpu);
+ root.cr3 = mmu->root_cr3;
root.hpa = mmu->root_hpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
@@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
}
mmu->root_hpa = root.hpa;
+ mmu->root_cr3 = root.cr3;
return i < KVM_MMU_NUM_PREV_ROOTS;
}
@@ -4371,6 +4378,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[1][4] =
rsvd_check->rsvd_bits_mask[0][4];
+ /* fall through */
case PT64_ROOT_4LEVEL:
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
@@ -4769,6 +4777,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
ext.cr4_pse = !!is_pse(vcpu);
ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+ ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
ext.valid = 1;
@@ -5515,11 +5524,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.root_mmu.root_cr3 = 0;
vcpu->arch.root_mmu.translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.guest_mmu.root_cr3 = 0;
vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a157ca5b68695..f13a3a24d3609 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
+ /*
+ * Drop what we picked up for L2 via svm_complete_interrupts() so it
+ * doesn't end up in L1.
+ */
+ svm->vcpu.arch.nmi_injected = false;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
+
return 0;
}
@@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
- /* Follow through */
+ /* Fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
- int i;
- struct kvm_vcpu *vcpu;
- struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
- * At this point, we expect that the AVIC HW has already
- * set the appropriate IRR bits on the valid target
- * vcpus. So, we just need to kick the appropriate vcpu.
+ * Update ICR high and low, then emulate sending IPI,
+ * which is handled when writing APIC_ICR.
*/
- kvm_for_each_vcpu(i, vcpu, kvm) {
- bool m = kvm_apic_match_dest(vcpu, apic,
- icrl & KVM_APIC_SHORT_MASK,
- GET_APIC_DEST_FIELD(icrh),
- icrl & KVM_APIC_DEST_MASK);
-
- if (m && !avic_vcpu_is_running(vcpu))
- kvm_vcpu_wake_up(vcpu);
- }
+ kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+ kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
+ WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
+ index, svm->vcpu.vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 705f40ae25329..6432d08c7de79 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/x86/kvm
+#define TRACE_INCLUDE_PATH ../../arch/x86/kvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 95bc2247478d9..5466c6d85cf3e 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
+
+ vmx->nested.enlightened_vmcs_enabled = true;
if (vmcs_version)
*vmcs_version = nested_get_evmcs_version(vcpu);
/* We don't support disabling the feature for simplicity. */
- if (vmx->nested.enlightened_vmcs_enabled)
+ if (evmcs_already_enabled)
return 0;
- vmx->nested.enlightened_vmcs_enabled = true;
-
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2616bd2c7f2c7..d737a51a53ca3 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -55,7 +55,7 @@ static u16 shadow_read_write_fields[] = {
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
-void init_vmcs_shadow_fields(void)
+static void init_vmcs_shadow_fields(void)
{
int i, j;
@@ -211,6 +211,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
+ hrtimer_cancel(&vmx->nested.preemption_timer);
vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
@@ -2472,6 +2473,10 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
return -EINVAL;
+ if (!nested_cpu_has_preemption_timer(vmcs12) &&
+ nested_cpu_has_save_preemption_timer(vmcs12))
+ return -EINVAL;
+
if (nested_cpu_has_ept(vmcs12) &&
!valid_ept_address(vcpu, vmcs12->ept_pointer))
return -EINVAL;
@@ -4140,11 +4145,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (r < 0)
goto out_vmcs02;
- vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
goto out_cached_vmcs12;
- vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_shadow_vmcs12)
goto out_cached_shadow_vmcs12;
@@ -5263,13 +5268,17 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
copy_shadow_to_vmcs12(vmx);
}
- if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
+ /*
+ * Copy over the full allocated size of vmcs12 rather than just the size
+ * of the struct.
+ */
+ if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE))
return -EFAULT;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
- get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
+ get_shadow_vmcs12(vcpu), VMCS12_SIZE))
return -EFAULT;
}
@@ -5552,9 +5561,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
* secondary cpu-based controls. Do not include those that
* depend on CPUID bits, they are added later by vmx_cpuid_update.
*/
- rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
- msrs->secondary_ctls_low,
- msrs->secondary_ctls_high);
+ if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ msrs->secondary_ctls_low,
+ msrs->secondary_ctls_high);
+
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
SECONDARY_EXEC_DESC |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f6915f10e584a..30a6bcd735ec3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -26,6 +26,7 @@
#include <linux/mod_devicetable.h>
#include <linux/mm.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/trace_events.h>
@@ -423,7 +424,7 @@ static void check_ept_pointer_match(struct kvm *kvm)
to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
}
-int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
+static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
void *data)
{
struct kvm_tlb_range *range = data;
@@ -862,7 +863,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
if (!entry_only)
j = find_msr(&m->host, msr);
- if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
+ if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
+ (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) {
printk_once(KERN_WARNING "Not enough msr switch entries. "
"Can't add msr %x\n", msr);
return;
@@ -1192,21 +1194,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
return;
- /*
- * First handle the simple case where no cmpxchg is necessary; just
- * allow posting non-urgent interrupts.
- *
- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
- * PI.NDST: pi_post_block will do it for us and the wakeup_handler
- * expects the VCPU to be on the blocked_vcpu_list that matches
- * PI.NDST.
- */
- if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
- vcpu->cpu == cpu) {
- pi_clear_sn(pi_desc);
- return;
- }
-
/* The full case. */
do {
old.control = new.control = pi_desc->control;
@@ -1221,6 +1208,17 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
new.sn = 0;
} while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control);
+
+ /*
+ * Clear SN before reading the bitmap. The VT-d firmware
+ * writes the bitmap and reads SN atomically (5.2.3 in the
+ * spec), so it doesn't really have a memory barrier that
+ * pairs with this, but we cannot do that and we need one.
+ */
+ smp_mb__after_atomic();
+
+ if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+ pi_set_on(pi_desc);
}
/*
@@ -1773,7 +1771,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
return 1;
- /* Otherwise falls through */
+ /* Else, falls through */
default:
msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
@@ -2014,7 +2012,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
return 1;
- /* Otherwise falls through */
+ /* Else, falls through */
default:
msr = find_msr_entry(vmx, msr_index);
if (msr) {
@@ -2344,7 +2342,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
case 37: /* AAT100 */
case 44: /* BC86,AAY89,BD102 */
case 46: /* BA97 */
- _vmexit_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
_vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
"does not work properly. Using workaround\n");
@@ -6362,72 +6360,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->hv_timer_armed = false;
}
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4, evmcs_rsp;
-
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!enable_vnmi &&
- vmx->loaded_vmcs->soft_vnmi_blocked))
- vmx->loaded_vmcs->entry_time = ktime_get();
-
- /* Don't enter VMX if guest state is invalid, let the exit handler
- start emulation until we arrive back to a valid state */
- if (vmx->emulation_required)
- return;
-
- if (vmx->ple_window_dirty) {
- vmx->ple_window_dirty = false;
- vmcs_write32(PLE_WINDOW, vmx->ple_window);
- }
-
- if (vmx->nested.need_vmcs12_sync)
- nested_sync_from_vmcs12(vcpu);
-
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
-
- cr4 = cr4_read_shadow();
- if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
- vmcs_writel(HOST_CR4, cr4);
- vmx->loaded_vmcs->host_state.cr4 = cr4;
- }
-
- /* When single-stepping over STI and MOV SS, we must clear the
- * corresponding interruptibility bits in the guest state. Otherwise
- * vmentry fails as it then expects bit 14 (BS) in pending debug
- * exceptions being set, but that's not correct for the guest debugging
- * case. */
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vmx_set_interrupt_shadow(vcpu, 0);
-
- if (static_cpu_has(X86_FEATURE_PKU) &&
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
- vcpu->arch.pkru != vmx->host_pkru)
- __write_pkru(vcpu->arch.pkru);
-
- pt_guest_enter(vmx);
-
- atomic_switch_perf_msrs(vmx);
-
- vmx_update_hv_timer(vcpu);
-
- /*
- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
- * it's non-zero. Since vmentry is serialising on affected CPUs, there
- * is no need to worry about the conditional branch over the wrmsr
- * being speculatively taken.
- */
- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+ unsigned long evmcs_rsp;
vmx->__launched = vmx->loaded_vmcs->launched;
@@ -6567,6 +6502,77 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
, "eax", "ebx", "edi"
#endif
);
+}
+STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
+
+static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long cr3, cr4;
+
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
+ /* Don't enter VMX if guest state is invalid, let the exit handler
+ start emulation until we arrive back to a valid state */
+ if (vmx->emulation_required)
+ return;
+
+ if (vmx->ple_window_dirty) {
+ vmx->ple_window_dirty = false;
+ vmcs_write32(PLE_WINDOW, vmx->ple_window);
+ }
+
+ if (vmx->nested.need_vmcs12_sync)
+ nested_sync_from_vmcs12(vcpu);
+
+ if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+ vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+ if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+ vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
+ cr4 = cr4_read_shadow();
+ if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
+ vmcs_writel(HOST_CR4, cr4);
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
+ }
+
+ /* When single-stepping over STI and MOV SS, we must clear the
+ * corresponding interruptibility bits in the guest state. Otherwise
+ * vmentry fails as it then expects bit 14 (BS) in pending debug
+ * exceptions being set, but that's not correct for the guest debugging
+ * case. */
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmx_set_interrupt_shadow(vcpu, 0);
+
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
+ vcpu->arch.pkru != vmx->host_pkru)
+ __write_pkru(vcpu->arch.pkru);
+
+ pt_guest_enter(vmx);
+
+ atomic_switch_perf_msrs(vmx);
+
+ vmx_update_hv_timer(vcpu);
+
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+
+ __vmx_vcpu_run(vcpu, vmx);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
@@ -6648,7 +6654,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
-STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
static struct kvm *vmx_vm_alloc(void)
{
@@ -6816,7 +6821,7 @@ static int vmx_vm_init(struct kvm *kvm)
* Warn upon starting the first VM in a potentially
* insecure environment.
*/
- if (cpu_smt_control == CPU_SMT_ENABLED)
+ if (sched_smt_active())
pr_warn_once(L1TF_MSG_SMT);
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
pr_warn_once(L1TF_MSG_L1D);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 99328954c2fc1..0ac0a64c7790f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -337,16 +337,16 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
+static inline void pi_set_sn(struct pi_desc *pi_desc)
{
- return clear_bit(POSTED_INTR_SN,
+ return set_bit(POSTED_INTR_SN,
(unsigned long *)&pi_desc->control);
}
-static inline void pi_set_sn(struct pi_desc *pi_desc)
+static inline void pi_set_on(struct pi_desc *pi_desc)
{
- return set_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
+ set_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
}
static inline void pi_clear_on(struct pi_desc *pi_desc)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02c8e095a2390..941f932373d03 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3834,6 +3834,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
+ /* fall through */
+
case KVM_CAP_HYPERV_SYNIC:
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
@@ -5114,6 +5116,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
{
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ /*
+ * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+ * is returned, but our callers are not ready for that and they blindly
+ * call kvm_inject_page_fault. Ensure that they at least do not leak
+ * uninitialized kernel stack memory into cr2 and error code.
+ */
+ memset(exception, 0, sizeof(*exception));
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
exception);
}
@@ -6480,8 +6489,7 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE &&
- (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ if (r == EMULATE_DONE && ctxt->tf)
kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
@@ -7093,10 +7101,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_CLOCK_PAIRING:
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
+#endif
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
-#endif
default:
ret = -KVM_ENOSYS;
break;
@@ -7793,7 +7801,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* 1) We should set ->mode before checking ->requests. Please see
* the comment in kvm_vcpu_exiting_guest_mode().
*
- * 2) For APICv, we should set ->mode before checking PIR.ON. This
+ * 2) For APICv, we should set ->mode before checking PID.ON. This
* pairs with the memory barrier implicit in pi_test_and_set_on
* (see vmx_deliver_posted_interrupt).
*
@@ -7937,6 +7945,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
+ /* fall through */
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 9119d8e41f1ff..cf00ab6c66210 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -179,6 +179,8 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
if (insn->addr_bytes == 2)
return -EINVAL;
+ /* fall through */
+
case -EDOM:
case offsetof(struct pt_regs, bx):
case offsetof(struct pt_regs, si):
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index 66894675f3c8d..df50451d94ef7 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -2,8 +2,11 @@
#include <linux/module.h>
#include <linux/io.h>
+#define movs(type,to,from) \
+ asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
+
/* Originally from i386/string.h */
-static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n)
+static __always_inline void rep_movs(void *to, const void *from, size_t n)
{
unsigned long d0, d1, d2;
asm volatile("rep ; movsl\n\t"
@@ -21,13 +24,37 @@ static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n)
void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
{
- __iomem_memcpy(to, (const void *)from, n);
+ if (unlikely(!n))
+ return;
+
+ /* Align any unaligned source IO */
+ if (unlikely(1 & (unsigned long)from)) {
+ movs("b", to, from);
+ n--;
+ }
+ if (n > 1 && unlikely(2 & (unsigned long)from)) {
+ movs("w", to, from);
+ n-=2;
+ }
+ rep_movs(to, (const void *)from, n);
}
EXPORT_SYMBOL(memcpy_fromio);
void memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
{
- __iomem_memcpy((void *)to, (const void *) from, n);
+ if (unlikely(!n))
+ return;
+
+ /* Align any unaligned destination IO */
+ if (unlikely(1 & (unsigned long)to)) {
+ movs("b", to, from);
+ n--;
+ }
+ if (n > 1 && unlikely(2 & (unsigned long)to)) {
+ movs("w", to, from);
+ n-=2;
+ }
+ rep_movs((void *)to, (const void *) from, n);
}
EXPORT_SYMBOL(memcpy_toio);
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 79778ab200e49..a536651164584 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -36,8 +36,8 @@ static inline u16 i8254(void)
u16 status, timer;
do {
- outb(I8254_PORT_CONTROL,
- I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+ outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0,
+ I8254_PORT_CONTROL);
status = inb(I8254_PORT_COUNTER0);
timer = inb(I8254_PORT_COUNTER0);
timer |= inb(I8254_PORT_COUNTER0) << 8;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index bfd94e7812fcb..7d290777246d2 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -54,13 +54,13 @@ do { \
} while (0)
/**
- * clear_user: - Zero a block of memory in user space.
+ * clear_user - Zero a block of memory in user space.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
* Zero a block of memory in user space.
*
- * Returns number of bytes that could not be cleared.
+ * Return: number of bytes that could not be cleared.
* On success, this will be zero.
*/
unsigned long
@@ -74,14 +74,14 @@ clear_user(void __user *to, unsigned long n)
EXPORT_SYMBOL(clear_user);
/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __clear_user - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
* Zero a block of memory in user space. Caller must check
* the specified block with access_ok() before calling this function.
*
- * Returns number of bytes that could not be cleared.
+ * Return: number of bytes that could not be cleared.
* On success, this will be zero.
*/
unsigned long
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 12d7e7fb4efdf..19c6abf9ea317 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -52,7 +52,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
-static void percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
int npages;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e3cdc85ce5b6e..ee8f8ab469417 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -444,7 +444,6 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
int i;
pud_t *start, *pud_start;
pgprotval_t prot, eff;
- pud_t *prev_pud = NULL;
pud_start = start = (pud_t *)p4d_page_vaddr(addr);
@@ -462,7 +461,6 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
} else
note_page(m, st, __pgprot(0), 0, 3);
- prev_pud = start;
start++;
}
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 6521134057e8f..3c4568f8fb28e 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -117,67 +117,12 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
-/* Helper to check whether a uaccess fault indicates a kernel bug. */
-static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
- unsigned long fault_addr)
-{
- /* This is the normal case: #PF with a fault address in userspace. */
- if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
- return false;
-
- /*
- * This code can be reached for machine checks, but only if the #MC
- * handler has already decided that it looks like a candidate for fixup.
- * This e.g. happens when attempting to access userspace memory which
- * the CPU can't access because of uncorrectable bad memory.
- */
- if (trapnr == X86_TRAP_MC)
- return false;
-
- /*
- * There are two remaining exception types we might encounter here:
- * - #PF for faulting accesses to kernel addresses
- * - #GP for faulting accesses to noncanonical addresses
- * Complain about anything else.
- */
- if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
- WARN(1, "unexpected trap %d in uaccess\n", trapnr);
- return false;
- }
-
- /*
- * This is a faulting memory access in kernel space, on a kernel
- * address, in a usercopy function. This can e.g. be caused by improper
- * use of helpers like __put_user and by improper attempts to access
- * userspace addresses in KERNEL_DS regions.
- * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
- * which can be invoked from places like kgdb, /dev/mem (for reading)
- * and privileged BPF code (for reading).
- * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
- * to tell us that faulting on kernel addresses, and even noncanonical
- * addresses, in a userspace accessor does not necessarily imply a
- * kernel bug, root might just be doing weird stuff.
- */
- if (current->kernel_uaccess_faults_ok)
- return false;
-
- /* This is bad. Refuse the fixup so that we go into die(). */
- if (trapnr == X86_TRAP_PF) {
- pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
- fault_addr);
- } else {
- pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
- }
- return true;
-}
-
__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
- if (bogus_uaccess(regs, trapnr, fault_addr))
- return false;
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
return true;
}
@@ -188,8 +133,6 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
unsigned long error_code,
unsigned long fault_addr)
{
- if (bogus_uaccess(regs, trapnr, fault_addr))
- return false;
/* Special hack for uaccess_err */
current->thread.uaccess_err = 1;
regs->ip = ex_fixup_addr(fixup);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2ff25ad332338..667f1da36208e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -595,7 +595,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
return;
}
- addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24);
+ addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24);
#ifdef CONFIG_X86_64
addr |= ((u64)desc.base3 << 32);
#endif
@@ -1031,7 +1031,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- unsigned int fault)
+ vm_fault_t fault)
{
struct task_struct *tsk = current;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5378d10f1d31d..0029604af8a41 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -705,7 +705,7 @@ bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
return arch_memremap_can_ram_remap(phys_addr, size, 0);
}
-#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
+#ifdef CONFIG_AMD_MEM_ENCRYPT
/* Remap memory with encryption */
void __init *early_memremap_encrypted(resource_size_t phys_addr,
unsigned long size)
@@ -747,7 +747,7 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
}
-#endif /* CONFIG_ARCH_USE_MEMREMAP_PROT */
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index a19ef1a416ff6..4aa9b1480866b 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -158,8 +158,8 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
pmd = pmd_offset(pud, ppd->vaddr);
if (pmd_none(*pmd)) {
pte = ppd->pgtable_area;
- memset(pte, 0, sizeof(pte) * PTRS_PER_PTE);
- ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE;
+ memset(pte, 0, sizeof(*pte) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(*pte) * PTRS_PER_PTE;
set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index de1851d156997..c805db6236b47 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -9,12 +9,12 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm_types.h>
+#include <linux/mman.h>
#include <linux/syscalls.h>
#include <linux/sched/sysctl.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
-#include <asm/mman.h>
#include <asm/mmu_context.h>
#include <asm/mpx.h>
#include <asm/processor.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1308f5408bf74..12c1b7a83ed7b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -123,7 +123,7 @@ void __init setup_node_to_cpumask_map(void)
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
- pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
+ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}
static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
@@ -866,7 +866,7 @@ const struct cpumask *cpumask_of_node(int node)
{
if (node >= nr_node_ids) {
printk(KERN_WARNING
- "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+ "cpumask_of_node(%d): node > nr_node_ids(%u)\n",
node, nr_node_ids);
dump_stack();
return cpu_none_mask;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f8972311a77e..14e6119838a64 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
#endif
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+ return (long)(addr << 1) >> 1;
+#else
+ return addr;
+#endif
+}
+
static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
{
if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
unsigned int i;
for (i = 0; i < cpa->numpages; i++)
- __flush_tlb_one_kernel(__cpa_addr(cpa, i));
+ __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
}
static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
* Only flush present addresses:
*/
if (pte && (pte_val(*pte) & _PAGE_PRESENT))
- clflush_cache_range_opt((void *)addr, PAGE_SIZE);
+ clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
}
mb();
}
@@ -1627,29 +1650,6 @@ out:
return ret;
}
-/*
- * Machine check recovery code needs to change cache mode of poisoned
- * pages to UC to avoid speculative access logging another error. But
- * passing the address of the 1:1 mapping to set_memory_uc() is a fine
- * way to encourage a speculative access. So we cheat and flip the top
- * bit of the address. This works fine for the code that updates the
- * page tables. But at the end of the process we need to flush the cache
- * and the non-canonical address causes a #GP fault when used by the
- * CLFLUSH instruction.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long make_addr_canonical_again(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
- return (long)(addr << 1) >> 1;
-#else
- return addr;
-#endif
-}
-
-
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr,
int force_split, int in_flag,
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 999d6d8f0beff..bc4bc7b2f075d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -685,9 +685,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
* that UV should be updated so that smp_call_function_many(),
* etc, are optimal on UV.
*/
- unsigned int cpu;
-
- cpu = smp_processor_id();
cpumask = uv_flush_tlb_others(cpumask, info);
if (cpumask)
smp_call_function_many(cpumask, flush_tlb_func_remote,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5542303c43d9c..afabf597c8557 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -881,20 +881,41 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
- EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
- add_2reg(0xC0, dst_reg, src_reg));
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, src_reg));
+ else if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
/* test dst_reg, src_reg */
- EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
- add_2reg(0xC0, dst_reg, src_reg));
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, src_reg));
+ else if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
/* test dst_reg, imm32 */
- EMIT1(add_1mod(0x48, dst_reg));
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_1mod(0x48, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
goto emit_cond_jmp;
@@ -908,8 +929,21 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
/* cmp dst_reg, imm8/32 */
- EMIT1(add_1mod(0x48, dst_reg));
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_1mod(0x48, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
if (is_imm8(imm32))
EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 8f6cc71e08482..0d9cdffce6ac0 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2072,7 +2072,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_X:
- case BPF_JMP | BPF_JSGE | BPF_X: {
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X: {
+ bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2081,25 +2092,35 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (dstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
STACK_VAR(dst_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
- STACK_VAR(dst_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
}
if (sstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
STACK_VAR(src_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
- STACK_VAR(src_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EBX),
+ STACK_VAR(src_hi));
}
- /* cmp dreg_hi,sreg_hi */
- EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
- EMIT2(IA32_JNE, 2);
+ if (is_jmp64) {
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ }
/* cmp dreg_lo,sreg_lo */
EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
goto emit_cond_jmp;
}
- case BPF_JMP | BPF_JSET | BPF_X: {
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_X: {
+ bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2108,15 +2129,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (dstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
STACK_VAR(dst_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
- STACK_VAR(dst_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
}
if (sstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
STACK_VAR(src_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
- STACK_VAR(src_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EBX),
+ STACK_VAR(src_hi));
}
/* and dreg_lo,sreg_lo */
EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
@@ -2126,32 +2153,39 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
goto emit_cond_jmp;
}
- case BPF_JMP | BPF_JSET | BPF_K: {
- u32 hi;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K: {
+ bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
u8 sreg_lo = IA32_ECX;
u8 sreg_hi = IA32_EBX;
+ u32 hi;
if (dstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
STACK_VAR(dst_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
- STACK_VAR(dst_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
}
- hi = imm32 & (1<<31) ? (u32)~0 : 0;
/* mov ecx,imm32 */
- EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
- /* mov ebx,imm32 */
- EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+ EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
/* and dreg_lo,sreg_lo */
EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
- /* and dreg_hi,sreg_hi */
- EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
- /* or dreg_lo,dreg_hi */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ if (is_jmp64) {
+ hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
goto emit_cond_jmp;
}
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -2163,29 +2197,44 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_K:
- case BPF_JMP | BPF_JSGE | BPF_K: {
- u32 hi;
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K: {
+ bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
u8 sreg_lo = IA32_ECX;
u8 sreg_hi = IA32_EBX;
+ u32 hi;
if (dstk) {
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
STACK_VAR(dst_lo));
- EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
- STACK_VAR(dst_hi));
+ if (is_jmp64)
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
}
- hi = imm32 & (1<<31) ? (u32)~0 : 0;
/* mov ecx,imm32 */
EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
- /* mov ebx,imm32 */
- EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
-
- /* cmp dreg_hi,sreg_hi */
- EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
- EMIT2(IA32_JNE, 2);
+ if (is_jmp64) {
+ hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ }
/* cmp dreg_lo,sreg_lo */
EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index e4dc3862d423c..fe29f3f5d384f 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -3,5 +3,4 @@ OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
-obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
deleted file mode 100644
index 7138bc7a265c0..0000000000000
--- a/arch/x86/platform/efi/early_printk.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (C) 2013 Intel Corporation; author Matt Fleming
- *
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- */
-
-#include <linux/console.h>
-#include <linux/efi.h>
-#include <linux/font.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <asm/setup.h>
-
-static const struct font_desc *font;
-static u32 efi_x, efi_y;
-static void *efi_fb;
-static bool early_efi_keep;
-
-/*
- * efi earlyprintk need use early_ioremap to map the framebuffer.
- * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should
- * be used instead. ioremap will be available after paging_init() which is
- * earlier than initcall callbacks. Thus adding this early initcall function
- * early_efi_map_fb to map the whole efi framebuffer.
- */
-static __init int early_efi_map_fb(void)
-{
- u64 base, size;
-
- if (!early_efi_keep)
- return 0;
-
- base = boot_params.screen_info.lfb_base;
- if (boot_params.screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
- base |= (u64)boot_params.screen_info.ext_lfb_base << 32;
- size = boot_params.screen_info.lfb_size;
- efi_fb = ioremap(base, size);
-
- return efi_fb ? 0 : -ENOMEM;
-}
-early_initcall(early_efi_map_fb);
-
-/*
- * early_efi_map maps efi framebuffer region [start, start + len -1]
- * In case earlyprintk=efi,keep we have the whole framebuffer mapped already
- * so just return the offset efi_fb + start.
- */
-static __ref void *early_efi_map(unsigned long start, unsigned long len)
-{
- u64 base;
-
- base = boot_params.screen_info.lfb_base;
- if (boot_params.screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
- base |= (u64)boot_params.screen_info.ext_lfb_base << 32;
-
- if (efi_fb)
- return (efi_fb + start);
- else
- return early_ioremap(base + start, len);
-}
-
-static __ref void early_efi_unmap(void *addr, unsigned long len)
-{
- if (!efi_fb)
- early_iounmap(addr, len);
-}
-
-static void early_efi_clear_scanline(unsigned int y)
-{
- unsigned long *dst;
- u16 len;
-
- len = boot_params.screen_info.lfb_linelength;
- dst = early_efi_map(y*len, len);
- if (!dst)
- return;
-
- memset(dst, 0, len);
- early_efi_unmap(dst, len);
-}
-
-static void early_efi_scroll_up(void)
-{
- unsigned long *dst, *src;
- u16 len;
- u32 i, height;
-
- len = boot_params.screen_info.lfb_linelength;
- height = boot_params.screen_info.lfb_height;
-
- for (i = 0; i < height - font->height; i++) {
- dst = early_efi_map(i*len, len);
- if (!dst)
- return;
-
- src = early_efi_map((i + font->height) * len, len);
- if (!src) {
- early_efi_unmap(dst, len);
- return;
- }
-
- memmove(dst, src, len);
-
- early_efi_unmap(src, len);
- early_efi_unmap(dst, len);
- }
-}
-
-static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h)
-{
- const u32 color_black = 0x00000000;
- const u32 color_white = 0x00ffffff;
- const u8 *src;
- u8 s8;
- int m;
-
- src = font->data + c * font->height;
- s8 = *(src + h);
-
- for (m = 0; m < 8; m++) {
- if ((s8 >> (7 - m)) & 1)
- *dst = color_white;
- else
- *dst = color_black;
- dst++;
- }
-}
-
-static void
-early_efi_write(struct console *con, const char *str, unsigned int num)
-{
- struct screen_info *si;
- unsigned int len;
- const char *s;
- void *dst;
-
- si = &boot_params.screen_info;
- len = si->lfb_linelength;
-
- while (num) {
- unsigned int linemax;
- unsigned int h, count = 0;
-
- for (s = str; *s && *s != '\n'; s++) {
- if (count == num)
- break;
- count++;
- }
-
- linemax = (si->lfb_width - efi_x) / font->width;
- if (count > linemax)
- count = linemax;
-
- for (h = 0; h < font->height; h++) {
- unsigned int n, x;
-
- dst = early_efi_map((efi_y + h) * len, len);
- if (!dst)
- return;
-
- s = str;
- n = count;
- x = efi_x;
-
- while (n-- > 0) {
- early_efi_write_char(dst + x*4, *s, h);
- x += font->width;
- s++;
- }
-
- early_efi_unmap(dst, len);
- }
-
- num -= count;
- efi_x += count * font->width;
- str += count;
-
- if (num > 0 && *s == '\n') {
- efi_x = 0;
- efi_y += font->height;
- str++;
- num--;
- }
-
- if (efi_x + font->width > si->lfb_width) {
- efi_x = 0;
- efi_y += font->height;
- }
-
- if (efi_y + font->height > si->lfb_height) {
- u32 i;
-
- efi_y -= font->height;
- early_efi_scroll_up();
-
- for (i = 0; i < font->height; i++)
- early_efi_clear_scanline(efi_y + i);
- }
- }
-}
-
-static __init int early_efi_setup(struct console *con, char *options)
-{
- struct screen_info *si;
- u16 xres, yres;
- u32 i;
-
- si = &boot_params.screen_info;
- xres = si->lfb_width;
- yres = si->lfb_height;
-
- /*
- * early_efi_write_char() implicitly assumes a framebuffer with
- * 32-bits per pixel.
- */
- if (si->lfb_depth != 32)
- return -ENODEV;
-
- font = get_default_font(xres, yres, -1, -1);
- if (!font)
- return -ENODEV;
-
- efi_y = rounddown(yres, font->height) - font->height;
- for (i = 0; i < (yres - efi_y) / font->height; i++)
- early_efi_scroll_up();
-
- /* early_console_register will unset CON_BOOT in case ,keep */
- if (!(con->flags & CON_BOOT))
- early_efi_keep = true;
- return 0;
-}
-
-struct console early_efi_console = {
- .name = "earlyefi",
- .write = early_efi_write,
- .setup = early_efi_setup,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 17456a1d3f049..458a0e2bcc57c 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -304,7 +304,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
* - Not within any part of the kernel
* - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc)
*/
-static bool can_free_region(u64 start, u64 size)
+static __init bool can_free_region(u64 start, u64 size)
{
if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
return false;
@@ -717,7 +717,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
* "efi_mm" cannot be used to check if the page fault had occurred
* in the firmware context because efi=old_map doesn't use efi_pgd.
*/
- if (efi_rts_work.efi_rts_id == NONE)
+ if (efi_rts_work.efi_rts_id == EFI_NONE)
return;
/*
@@ -742,7 +742,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
* because this case occurs *very* rarely and hence could be improved
* on a need by basis.
*/
- if (efi_rts_work.efi_rts_id == RESET_SYSTEM) {
+ if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) {
pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
machine_real_restart(MRR_BIOS);
return;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 96f438d4b026f..1421d5330b2ce 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -44,7 +44,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = {
*/
.microvolts = 2000000, /* 1.8V */
.startup_delay = 250 * 1000, /* 250ms */
- .enable_high = 1, /* active high */
.enabled_at_boot = 0, /* disabled at boot */
.init_data = &bcm43xx_vmmc_data,
};
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4a6a5a26c5829..ef60d789c76ed 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -29,7 +29,8 @@
struct uv_systab *uv_systab;
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
{
struct uv_systab *tab = uv_systab;
s64 ret;
@@ -44,36 +45,42 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
* If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
* callback method, which uses efi_call() directly, with the kernel page tables:
*/
- if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+ if (unlikely(efi_enabled(EFI_OLD_MEMMAP)))
ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
else
ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
return ret;
}
-EXPORT_SYMBOL_GPL(uv_bios_call);
-s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
- u64 a4, u64 a5)
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
{
- unsigned long bios_flags;
s64 ret;
- local_irq_save(bios_flags);
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
- local_irq_restore(bios_flags);
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+ up(&__efi_uv_runtime_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(uv_bios_call);
-s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
u64 a4, u64 a5)
{
+ unsigned long bios_flags;
s64 ret;
- preempt_disable();
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
- preempt_enable();
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
+ local_irq_save(bios_flags);
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+ local_irq_restore(bios_flags);
+
+ up(&__efi_uv_runtime_lock);
return ret;
}
@@ -188,7 +195,6 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
}
EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
-#ifdef CONFIG_EFI
void uv_bios_init(void)
{
uv_systab = NULL;
@@ -218,4 +224,3 @@ void uv_bios_init(void)
}
pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
}
-#endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a4130b84d1ff5..2c53b0f19329a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2010,8 +2010,7 @@ static void make_per_cpu_thp(struct bau_control *smaster)
int cpu;
size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
- smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
- memset(smaster->thp, 0, hpsz);
+ smaster->thp = kzalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
for_each_present_cpu(cpu) {
smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
@@ -2135,15 +2134,12 @@ static int __init summarize_uvhub_sockets(int nuvhubs,
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
{
unsigned char *uvhub_mask;
- void *vp;
struct uvhub_desc *uvhub_descs;
if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
timeout_us = calculate_destination_timeout();
- vp = kmalloc_array(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
- uvhub_descs = (struct uvhub_desc *)vp;
- memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
+ uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 4463fa72db945..96cb20de08af8 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -47,7 +47,7 @@ $(obj)/pasyms.h: $(REALMODE_OBJS) FORCE
targets += realmode.lds
$(obj)/realmode.lds: $(obj)/pasyms.h
-LDFLAGS_realmode.elf := --emit-relocs -T
+LDFLAGS_realmode.elf := -m elf_i386 --emit-relocs -T
CPPFLAGS_realmode.lds += -P -C -I$(objtree)/$(obj)
targets += realmode.elf
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
index df8e11e26bc39..3bb980800c581 100644
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -9,7 +9,7 @@
#undef i386
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
SECTIONS
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index f518b4744ff89..a9e80e44178c7 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -16,7 +16,7 @@ config 64BIT
config X86_32
def_bool !64BIT
- select HAVE_AOUT
+ select ARCH_32BIT_OFF_T
select ARCH_WANT_IPC_PARSE_VERSION
select MODULES_USE_ELF_REL
select CLONE_BACKWARDS
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 2f6787fc71066..c54a493e139a7 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -898,10 +898,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
-#ifdef CONFIG_X86_X2APIC
- if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
-#endif
- val &= ~X2APIC_ENABLE;
+ val &= ~X2APIC_ENABLE;
break;
}
return val;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index a7e47cf7ec6cd..6e4c6bd622033 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -17,8 +17,8 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte);
unsigned long xen_read_cr2_direct(void);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0f4fe206dcc20..856a85814f005 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -306,20 +306,20 @@ static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
__xen_set_pte(ptep, pteval);
}
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/* Just return the pte as-is. We preserve the bits on commit */
- trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
+ trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
return *ptep;
}
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte)
{
struct mmu_update u;
- trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
+ trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
xen_mc_batch();
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 72bf446c3fee3..6e29794573b72 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -361,8 +361,6 @@ void xen_timer_resume(void)
{
int cpu;
- pvclock_resume();
-
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
@@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = {
};
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+static u64 xen_clock_value_saved;
void xen_save_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;
+ xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
+
if (!xen_clock)
return;
@@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void)
int ret;
if (!xen_clock)
- return;
+ goto out;
t.addr.v = &xen_clock->pvti;
@@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void)
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
ret);
+
+out:
+ /* Need pvclock_resume() before using xen_clocksource_read(). */
+ pvclock_resume();
+ xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
}
static void xen_setup_vsyscall_time_info(void)