summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/extable.c
blob: 6521134057e8f9ef34dbfaf8a0a4e46672a32d3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <xen/xen.h>

#include <asm/fpu/internal.h>
#include <asm/traps.h>
#include <asm/kdebug.h>

typedef bool (*ex_handler_t)(const struct exception_table_entry *,
			    struct pt_regs *, int, unsigned long,
			    unsigned long);

static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
	return (unsigned long)&x->fixup + x->fixup;
}
static inline ex_handler_t
ex_fixup_handler(const struct exception_table_entry *x)
{
	return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}

__visible bool ex_handler_default(const struct exception_table_entry *fixup,
				  struct pt_regs *regs, int trapnr,
				  unsigned long error_code,
				  unsigned long fault_addr)
{
	regs->ip = ex_fixup_addr(fixup);
	return true;
}
EXPORT_SYMBOL(ex_handler_default);

__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
				struct pt_regs *regs, int trapnr,
				unsigned long error_code,
				unsigned long fault_addr)
{
	regs->ip = ex_fixup_addr(fixup);
	regs->ax = trapnr;
	return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fault);

/*
 * Handler for UD0 exception following a failed test against the
 * result of a refcount inc/dec/add/sub.
 */
__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
				   struct pt_regs *regs, int trapnr,
				   unsigned long error_code,
				   unsigned long fault_addr)
{
	/* First unconditionally saturate the refcount. */
	*(int *)regs->cx = INT_MIN / 2;

	/*
	 * Strictly speaking, this reports the fixup destination, not
	 * the fault location, and not the actually overflowing
	 * instruction, which is the instruction before the "js", but
	 * since that instruction could be a variety of lengths, just
	 * report the location after the overflow, which should be close
	 * enough for finding the overflow, as it's at least back in
	 * the function, having returned from .text.unlikely.
	 */
	regs->ip = ex_fixup_addr(fixup);

	/*
	 * This function has been called because either a negative refcount
	 * value was seen by any of the refcount functions, or a zero
	 * refcount value was seen by refcount_dec().
	 *
	 * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
	 * wrapped around) will be set. Additionally, seeing the refcount
	 * reach 0 will set ZF (Zero Flag: result was zero). In each of
	 * these cases we want a report, since it's a boundary condition.
	 * The SF case is not reported since it indicates post-boundary
	 * manipulations below zero or above INT_MAX. And if none of the
	 * flags are set, something has gone very wrong, so report it.
	 */
	if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
		bool zero = regs->flags & X86_EFLAGS_ZF;

		refcount_error_report(regs, zero ? "hit zero" : "overflow");
	} else if ((regs->flags & X86_EFLAGS_SF) == 0) {
		/* Report if none of OF, ZF, nor SF are set. */
		refcount_error_report(regs, "unexpected saturation");
	}

	return true;
}
EXPORT_SYMBOL(ex_handler_refcount);

/*
 * Handler for when we fail to restore a task's FPU state.  We should never get
 * here because the FPU state of a task using the FPU (task->thread.fpu.state)
 * should always be valid.  However, past bugs have allowed userspace to set
 * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
 * These caused XRSTOR to fail when switching to the task, leaking the FPU
 * registers of the task previously executing on the CPU.  Mitigate this class
 * of vulnerability by restoring from the initial state (essentially, zeroing
 * out all the FPU registers) if we can't restore from the task's FPU state.
 */
__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
				    struct pt_regs *regs, int trapnr,
				    unsigned long error_code,
				    unsigned long fault_addr)
{
	regs->ip = ex_fixup_addr(fixup);

	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
		  (void *)instruction_pointer(regs));

	__copy_kernel_to_fpregs(&init_fpstate, -1);
	return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);

/* Helper to check whether a uaccess fault indicates a kernel bug. */
static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
			  unsigned long fault_addr)
{
	/* This is the normal case: #PF with a fault address in userspace. */
	if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
		return false;

	/*
	 * This code can be reached for machine checks, but only if the #MC
	 * handler has already decided that it looks like a candidate for fixup.
	 * This e.g. happens when attempting to access userspace memory which
	 * the CPU can't access because of uncorrectable bad memory.
	 */
	if (trapnr == X86_TRAP_MC)
		return false;

	/*
	 * There are two remaining exception types we might encounter here:
	 *  - #PF for faulting accesses to kernel addresses
	 *  - #GP for faulting accesses to noncanonical addresses
	 * Complain about anything else.
	 */
	if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
		WARN(1, "unexpected trap %d in uaccess\n", trapnr);
		return false;
	}

	/*
	 * This is a faulting memory access in kernel space, on a kernel
	 * address, in a usercopy function. This can e.g. be caused by improper
	 * use of helpers like __put_user and by improper attempts to access
	 * userspace addresses in KERNEL_DS regions.
	 * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
	 * which can be invoked from places like kgdb, /dev/mem (for reading)
	 * and privileged BPF code (for reading).
	 * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
	 * to tell us that faulting on kernel addresses, and even noncanonical
	 * addresses, in a userspace accessor does not necessarily imply a
	 * kernel bug, root might just be doing weird stuff.
	 */
	if (current->kernel_uaccess_faults_ok)
		return false;

	/* This is bad. Refuse the fixup so that we go into die(). */
	if (trapnr == X86_TRAP_PF) {
		pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
			 fault_addr);
	} else {
		pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
	}
	return true;
}

__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
				  struct pt_regs *regs, int trapnr,
				  unsigned long error_code,
				  unsigned long fault_addr)
{
	if (bogus_uaccess(regs, trapnr, fault_addr))
		return false;
	regs->ip = ex_fixup_addr(fixup);
	return true;
}
EXPORT_SYMBOL(ex_handler_uaccess);

__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
			      struct pt_regs *regs, int trapnr,
			      unsigned long error_code,
			      unsigned long fault_addr)
{
	if (bogus_uaccess(regs, trapnr, fault_addr))
		return false;
	/* Special hack for uaccess_err */
	current->thread.uaccess_err = 1;
	regs->ip = ex_fixup_addr(fixup);
	return true;
}
EXPORT_SYMBOL(ex_handler_ext);

__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
				       struct pt_regs *regs, int trapnr,
				       unsigned long error_code,
				       unsigned long fault_addr)
{
	if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
			 (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
		show_stack_regs(regs);

	/* Pretend that the read succeeded and returned 0. */
	regs->ip = ex_fixup_addr(fixup);
	regs->ax = 0;
	regs->dx = 0;
	return true;
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);

__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
				       struct pt_regs *regs, int trapnr,
				       unsigned long error_code,
				       unsigned long fault_addr)
{
	if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
			 (unsigned int)regs->cx, (unsigned int)regs->dx,
			 (unsigned int)regs->ax,  regs->ip, (void *)regs->ip))
		show_stack_regs(regs);

	/* Pretend that the write succeeded. */
	regs->ip = ex_fixup_addr(fixup);
	return true;
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);

__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
				   struct pt_regs *regs, int trapnr,
				   unsigned long error_code,
				   unsigned long fault_addr)
{
	if (static_cpu_has(X86_BUG_NULL_SEG))
		asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
	asm volatile ("mov %0, %%fs" : : "rm" (0));
	return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
}
EXPORT_SYMBOL(ex_handler_clear_fs);

__visible bool ex_has_fault_handler(unsigned long ip)
{
	const struct exception_table_entry *e;
	ex_handler_t handler;

	e = search_exception_tables(ip);
	if (!e)
		return false;
	handler = ex_fixup_handler(e);

	return handler == ex_handler_fault;
}

int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
		    unsigned long fault_addr)
{
	const struct exception_table_entry *e;
	ex_handler_t handler;

#ifdef CONFIG_PNPBIOS
	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
		extern u32 pnp_bios_is_utter_crap;
		pnp_bios_is_utter_crap = 1;
		printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
		__asm__ volatile(
			"movl %0, %%esp\n\t"
			"jmp *%1\n\t"
			: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
		panic("do_trap: can't hit this");
	}
#endif

	e = search_exception_tables(regs->ip);
	if (!e)
		return 0;

	handler = ex_fixup_handler(e);
	return handler(e, regs, trapnr, error_code, fault_addr);
}

extern unsigned int early_recursion_flag;

/* Restricted version used during very early boot */
void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
	/* Ignore early NMIs. */
	if (trapnr == X86_TRAP_NMI)
		return;

	if (early_recursion_flag > 2)
		goto halt_loop;

	/*
	 * Old CPUs leave the high bits of CS on the stack
	 * undefined.  I'm not sure which CPUs do this, but at least
	 * the 486 DX works this way.
	 * Xen pv domains are not using the default __KERNEL_CS.
	 */
	if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
		goto fail;

	/*
	 * The full exception fixup machinery is available as soon as
	 * the early IDT is loaded.  This means that it is the
	 * responsibility of extable users to either function correctly
	 * when handlers are invoked early or to simply avoid causing
	 * exceptions before they're ready to handle them.
	 *
	 * This is better than filtering which handlers can be used,
	 * because refusing to call a handler here is guaranteed to
	 * result in a hard-to-debug panic.
	 *
	 * Keep in mind that not all vectors actually get here.  Early
	 * page faults, for example, are special.
	 */
	if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
		return;

	if (fixup_bug(regs, trapnr))
		return;

fail:
	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
		     regs->orig_ax, read_cr2());

	show_regs(regs);

halt_loop:
	while (true)
		halt();
}