From ee35b9b9f6d52ba134b4e75442531935f295be7a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Jan 2019 13:02:05 +0100 Subject: x86/traps: Have read_cr0() only once in the #NM handler ... instead of twice in the code. In any case, CR0 ends up being read once anyway: 1. The CONFIG_MATH_EMULATION case does so and exits. 2. The normal case does it once too. However, read it on function entry instead to make the code even simpler to follow. No functional changes. Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Acked-by: Sebastian Andrzej Siewior Cc: x86@kernel.org Link: https://lkml.kernel.org/r/20190117120728.3811-1-bp@alien8.de --- arch/x86/kernel/traps.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a73..2684a9d11e661 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -880,12 +880,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - unsigned long cr0; + unsigned long cr0 = read_cr0(); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) { struct math_emu_info info = { }; cond_local_irq_enable(regs); @@ -897,7 +897,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif /* This should not happen. */ - cr0 = read_cr0(); if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { /* Try to fix it up and carry on. */ write_cr0(cr0 & ~X86_CR0_TS); -- cgit v1.2.3 From bae54dc4f353653bbd3e3081754adad05da1d4dd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 18 Jan 2019 00:05:40 +0100 Subject: x86/fpu: Get rid of CONFIG_AS_FXSAVEQ This was a "workaround" to probe for binutils which could generate FXSAVEQ, apparently gas with min version 2.16. In the meantime, minimal required gas version is 2.20 so all those workarounds for older binutils can be dropped. Signed-off-by: Borislav Petkov Acked-by: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20190117232408.GH5023@zn.tnic --- arch/x86/Makefile | 1 - arch/x86/include/asm/fpu/internal.h | 50 +++++-------------------------------- 2 files changed, 6 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9c5a67d1b9c1b..76bc4dc03d5ee 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc, cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) # does binutils support specific instructions? -asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fa2c93cb42a27..5d536e3dcc6db 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { - if (IS_ENABLED(CONFIG_X86_32)) { + if (IS_ENABLED(CONFIG_X86_32)) kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - /* See comment in copy_fxregs_to_kernel() below. */ - kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); - } - } + else + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); } static inline void copy_kernel_to_fregs(struct fregs_state *fx) @@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) { if (IS_ENABLED(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) + else asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else { - /* Using "rex64; fxsave %0" is broken because, if the memory - * operand uses any extended registers for addressing, a second - * REX prefix will be generated (to the assembler, rex64 - * followed by semicolon is a separate instruction), and hence - * the 64-bitness is lost. - * - * Using "fxsaveq %0" would be the ideal choice, but is only - * supported starting with gas 2.16. - * - * Using, as a workaround, the properly prefixed form below - * isn't accepted by any binutils version so far released, - * complaining that the same type of prefix is used twice if - * an extended register is needed for addressing (fix submitted - * to mainline 2005-11-21). - * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); - * - * This, however, we can work around by forcing the compiler to - * select an addressing mode that doesn't require extended - * registers. - */ - asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state.fxsave) - : [fx] "R" (&fpu->state.fxsave)); - } } /* These macros all use (%edi)/(%rdi) as the single memory argument. */ -- cgit v1.2.3 From 2f7726f955572e587d5f50fbe9b2deed5334bd90 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Fri, 18 Jan 2019 02:38:20 +0800 Subject: x86/fpu: Track AVX-512 usage of tasks User space tools which do automated task placement need information about AVX-512 usage of tasks, because AVX-512 usage could cause core turbo frequency drop and impact the running task on the sibling CPU. The XSAVE hardware structure has bits that indicate when valid state is present in registers unique to AVX-512 use. Use these bits to indicate when AVX-512 has been in use and add per-task AVX-512 state timestamp tracking to context switch. Well-written AVX-512 applications are expected to clear the AVX-512 state when not actively using AVX-512 registers, so the tracking mechanism is imprecise and can theoretically miss AVX-512 usage during context switch. But it has been measured to be precise enough to be useful under real-world workloads like tensorflow and linpack. If higher precision is required, suggest user space tools to use the PMU-based mechanisms in combination. Signed-off-by: Aubrey Li Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Tim Chen Cc: aubrey.li@intel.com Link: http://lkml.kernel.org/r/20190117183822.31333-1-aubrey.li@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 7 +++++++ arch/x86/include/asm/fpu/types.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5d536e3dcc6db..fb04a3ded7ddb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -376,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { copy_xregs_to_kernel(&fpu->state.xsave); + + /* + * AVX512 state is tracked here because its use is + * known to slow the max clock speed of the core. + */ + if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) + fpu->avx512_timestamp = jiffies; return 1; } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 202c53918ecfa..2e32e178e0645 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -302,6 +302,13 @@ struct fpu { */ unsigned char initialized; + /* + * @avx512_timestamp: + * + * Records the timestamp of AVX512 use during last context switch. + */ + unsigned long avx512_timestamp; + /* * @state: * -- cgit v1.2.3