summaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/fpu/types.h
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-05-01 09:59:04 +0200
committerIngo Molnar <mingo@kernel.org>2015-05-19 15:48:09 +0200
commitaeb997b9f2a2199c72b89b7a304cafc394e4202b (patch)
treea9b963ffe79fee076fe09196714ba99f5326d254 /arch/x86/include/asm/fpu/types.h
parentc47ada305de3803517ae64aa50686f644c5456fa (diff)
downloadlinux-0-day-aeb997b9f2a2199c72b89b7a304cafc394e4202b.tar.gz
linux-0-day-aeb997b9f2a2199c72b89b7a304cafc394e4202b.tar.xz
x86/fpu: Change fpu->fpregs_active from 'int' to 'char', add lazy switching comments
Improve the memory layout of 'struct fpu': - change ->fpregs_active from 'int' to 'char' - it's just a single flag and modern x86 CPUs can do efficient byte accesses. - pack related fields closer to each other: often 'fpu->state' will not be touched, while the other fields will - so pack them into a group. Also add comments to each field, describing their purpose, and add some background information about lazy restores. Also fix an obsolete, lazy switching related comment in fpu_copy()'s description. Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include/asm/fpu/types.h')
-rw-r--r--arch/x86/include/asm/fpu/types.h82
1 files changed, 72 insertions, 10 deletions
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index fe2ce3276a38e..261cfb76065f6 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -159,8 +159,44 @@ union fpregs_state {
struct fpu {
/*
+ * @state:
+ *
+ * In-memory copy of all FPU registers that we save/restore
+ * over context switches. If the task is using the FPU then
+ * the registers in the FPU are more recent than this state
+ * copy. If the task context-switches away then they get
+ * saved here and represent the FPU state.
+ *
+ * After context switches there may be a (short) time period
+ * during which the in-FPU hardware registers are unchanged
+ * and still perfectly match this state, if the tasks
+ * scheduled afterwards are not using the FPU.
+ *
+ * This is the 'lazy restore' window of optimization, which
+ * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+ *
+ * We detect whether a subsequent task uses the FPU via setting
+ * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+ *
+ * During this window, if the task gets scheduled again, we
+ * might be able to skip having to do a restore from this
+ * memory buffer to the hardware registers - at the cost of
+ * incurring the overhead of #NM fault traps.
+ *
+ * Note that on modern CPUs that support the XSAVEOPT (or other
+ * optimized XSAVE instructions), we don't use #NM traps anymore,
+ * as the hardware can track whether FPU registers need saving
+ * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+ * logic, which unconditionally saves/restores all FPU state
+ * across context switches. (if FPU state exists.)
+ */
+ union fpregs_state state;
+
+ /*
+ * @last_cpu:
+ *
* Records the last CPU on which this context was loaded into
- * FPU registers. (In the lazy-switching case we might be
+ * FPU registers. (In the lazy-restore case we might be
* able to reuse FPU registers across multiple context switches
* this way, if no intermediate task used the FPU.)
*
@@ -170,23 +206,49 @@ struct fpu {
*/
unsigned int last_cpu;
- unsigned int fpregs_active;
- union fpregs_state state;
/*
+ * @fpstate_active:
+ *
+ * This flag indicates whether this context is active: if the task
+ * is not running then we can restore from this context, if the task
+ * is running then we should save into this context.
+ */
+ unsigned char fpstate_active;
+
+ /*
+ * @fpregs_active:
+ *
+ * This flag determines whether a given context is actively
+ * loaded into the FPU's registers and that those registers
+ * represent the task's current FPU state.
+ *
+ * Note the interaction with fpstate_active:
+ *
+ * # task does not use the FPU:
+ * fpstate_active == 0
+ *
+ * # task uses the FPU and regs are active:
+ * fpstate_active == 1 && fpregs_active == 1
+ *
+ * # the regs are inactive but still match fpstate:
+ * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
+ *
+ * The third state is what we use for the lazy restore optimization
+ * on lazy-switching CPUs.
+ */
+ unsigned char fpregs_active;
+
+ /*
+ * @counter:
+ *
* This counter contains the number of consecutive context switches
* during which the FPU stays used. If this is over a threshold, the
- * lazy fpu saving logic becomes unlazy, to save the trap overhead.
+ * lazy FPU restore logic becomes eager, to save the trap overhead.
* This is an unsigned char so that after 256 iterations the counter
* wraps and the context switch behavior turns lazy again; this is to
* deal with bursty apps that only use the FPU for a short time:
*/
unsigned char counter;
- /*
- * This flag indicates whether this context is fpstate_active: if the task is
- * not running then we can restore from this context, if the task
- * is running then we should save into this context.
- */
- unsigned char fpstate_active;
};
#endif /* _ASM_X86_FPU_H */