summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2016-11-17 16:49:15 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2016-11-17 16:49:15 +1100
commitffdda3dbba55ac65ae992592728fd7997eb0de42 (patch)
treedab56194286ebb0f6ed5b9fe96fdec6b47264537
parent85bc052106c0077af85ad6ade5a66971ef7c1be2 (diff)
parentb3b1e8fc4b177384736fbab4a2853fc344eb7ab5 (diff)
downloadlinux-ffdda3dbba55ac65ae992592728fd7997eb0de42.tar.gz
linux-ffdda3dbba55ac65ae992592728fd7997eb0de42.tar.xz
Merge branch 'akpm/master'
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/powerpc/Kconfig16
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/string.S67
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/include/asm/debug.h2
-rw-r--r--arch/powerpc/include/asm/elf_util.h64
-rw-r--r--arch/powerpc/include/asm/ima.h29
-rw-r--r--arch/powerpc/include/asm/kexec.h29
-rw-r--r--arch/powerpc/include/asm/machdep.h4
-rw-r--r--arch/powerpc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h2
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile10
-rw-r--r--arch/powerpc/kernel/elf_util.c464
-rw-r--r--arch/powerpc/kernel/head_64.S2
-rw-r--r--arch/powerpc/kernel/ima_kexec.c223
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c280
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c588
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S6
-rw-r--r--arch/powerpc/kernel/module_64.c393
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c6
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c2
-rw-r--r--arch/powerpc/platforms/85xx/smp.c8
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c6
-rw-r--r--arch/powerpc/platforms/ps3/setup.c4
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c4
-rw-r--r--arch/powerpc/purgatory/.gitignore2
-rw-r--r--arch/powerpc/purgatory/Makefile33
-rw-r--r--arch/powerpc/purgatory/console-ppc64.c37
-rw-r--r--arch/powerpc/purgatory/crtsavres.S5
-rw-r--r--arch/powerpc/purgatory/hvCall.S27
-rw-r--r--arch/powerpc/purgatory/hvCall.h8
-rw-r--r--arch/powerpc/purgatory/kexec-sha256.h11
-rw-r--r--arch/powerpc/purgatory/ppc64_asm.h20
-rw-r--r--arch/powerpc/purgatory/printf.c164
-rw-r--r--arch/powerpc/purgatory/purgatory-ppc64.c36
-rw-r--r--arch/powerpc/purgatory/purgatory.c62
-rw-r--r--arch/powerpc/purgatory/purgatory.h14
-rw-r--r--arch/powerpc/purgatory/sha256.c6
-rw-r--r--arch/powerpc/purgatory/sha256.h1
-rw-r--r--arch/powerpc/purgatory/string.S2
-rw-r--r--arch/powerpc/purgatory/v2wrap.S134
-rw-r--r--arch/sparc/kernel/nmi.c44
-rw-r--r--arch/x86/kernel/crash.c37
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c48
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/calib.c3
-rw-r--r--include/linux/ima.h12
-rw-r--r--include/linux/kexec.h36
-rw-r--r--include/linux/nmi.h24
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/kexec_file.c145
-rw-r--r--kernel/kexec_internal.h16
-rw-r--r--kernel/watchdog.c269
-rw-r--r--kernel/watchdog_hld.c227
-rw-r--r--security/integrity/ima/Kconfig12
-rw-r--r--security/integrity/ima/Makefile1
-rw-r--r--security/integrity/ima/ima.h31
-rw-r--r--security/integrity/ima/ima_crypto.c6
-rw-r--r--security/integrity/ima/ima_fs.c30
-rw-r--r--security/integrity/ima/ima_init.c2
-rw-r--r--security/integrity/ima/ima_kexec.c168
-rw-r--r--security/integrity/ima/ima_main.c1
-rw-r--r--security/integrity/ima/ima_queue.c76
-rw-r--r--security/integrity/ima/ima_template.c293
-rw-r--r--security/integrity/ima/ima_template_lib.c7
-rwxr-xr-xtools/testing/ktest/ktest.pl8
77 files changed, 3652 insertions, 648 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index baee8c7a1712..ae5c64c82027 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1441,6 +1441,10 @@
The builtin appraise policy appraises all files
owned by uid=0.
+ ima_canonical_fmt [IMA]
+ Use the canonical format for the binary runtime
+ measurements, instead of host native format.
+
ima_hash= [IMA]
Format: { md5 | sha1 | rmd160 | sha256 | sha384
| sha512 | ... }
diff --git a/arch/Kconfig b/arch/Kconfig
index abab6590f08f..13f27c1a39ba 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -5,6 +5,9 @@
config KEXEC_CORE
bool
+config HAVE_IMA_KEXEC
+ bool
+
config OPROFILE
tristate "OProfile system profiling"
depends on PROFILING
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2e1b47dbd7fb..e119acad8559 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -457,6 +457,20 @@ config KEXEC
interface is strongly in flux, so no good recommendation can be
made.
+config KEXEC_FILE
+ bool "kexec file based system call"
+ select KEXEC_CORE
+ select HAVE_IMA_KEXEC
+ select BUILD_BIN2C
+ depends on PPC64
+ depends on CRYPTO=y
+ depends on CRYPTO_SHA256=y
+ help
+ This is a new version of the kexec system call. This call is
+ file based and takes in file descriptors as system call arguments
+ for kernel and initramfs as opposed to a list of segments as is the
+ case for the older kexec call.
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE))
@@ -500,7 +514,7 @@ config CRASH_DUMP
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
+ depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE
help
A robust mechanism to get reliable kernel crash dump with
assistance from firmware. This approach does not use kexec,
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 041fda1e2a5d..426328b0b141 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -250,6 +250,7 @@ core-y += arch/powerpc/kernel/ \
core-$(CONFIG_XMON) += arch/powerpc/xmon/
core-$(CONFIG_KVM) += arch/powerpc/kvm/
core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
+core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
diff --git a/arch/powerpc/boot/string.S b/arch/powerpc/boot/string.S
index acc9428f2789..b54bbad5f83d 100644
--- a/arch/powerpc/boot/string.S
+++ b/arch/powerpc/boot/string.S
@@ -11,9 +11,18 @@
#include "ppc_asm.h"
+/*
+ * The ppc64 kexec purgatory uses this file and packages it in ELF64,
+ * so it needs dot symbols for the ppc64 big endian ABI. This macro
+ * allows it to create those symbols.
+ */
+#ifndef DOTSYM
+#define DOTSYM(a) a
+#endif
+
.text
- .globl strcpy
-strcpy:
+ .globl DOTSYM(strcpy)
+DOTSYM(strcpy):
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r4)
@@ -22,8 +31,8 @@ strcpy:
bne 1b
blr
- .globl strncpy
-strncpy:
+ .globl DOTSYM(strncpy)
+DOTSYM(strncpy):
cmpwi 0,r5,0
beqlr
mtctr r5
@@ -35,8 +44,8 @@ strncpy:
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
blr
- .globl strcat
-strcat:
+ .globl DOTSYM(strcat)
+DOTSYM(strcat):
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r5)
@@ -49,8 +58,8 @@ strcat:
bne 1b
blr
- .globl strchr
-strchr:
+ .globl DOTSYM(strchr)
+DOTSYM(strchr):
addi r3,r3,-1
1: lbzu r0,1(r3)
cmpw 0,r0,r4
@@ -60,8 +69,8 @@ strchr:
li r3,0
blr
- .globl strcmp
-strcmp:
+ .globl DOTSYM(strcmp)
+DOTSYM(strcmp):
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r5)
@@ -72,8 +81,8 @@ strcmp:
beq 1b
blr
- .globl strncmp
-strncmp:
+ .globl DOTSYM(strncmp)
+DOTSYM(strncmp):
mtctr r5
addi r5,r3,-1
addi r4,r4,-1
@@ -85,8 +94,8 @@ strncmp:
bdnzt eq,1b
blr
- .globl strlen
-strlen:
+ .globl DOTSYM(strlen)
+DOTSYM(strlen):
addi r4,r3,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
@@ -94,8 +103,8 @@ strlen:
subf r3,r3,r4
blr
- .globl memset
-memset:
+ .globl DOTSYM(memset)
+DOTSYM(memset):
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
addi r6,r3,-4
@@ -120,14 +129,14 @@ memset:
bdnz 8b
blr
- .globl memmove
-memmove:
+ .globl DOTSYM(memmove)
+DOTSYM(memmove):
cmplw 0,r3,r4
- bgt backwards_memcpy
+ bgt DOTSYM(backwards_memcpy)
/* fall through */
- .globl memcpy
-memcpy:
+ .globl DOTSYM(memcpy)
+DOTSYM(memcpy):
rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */
addi r6,r3,-4
addi r4,r4,-4
@@ -175,8 +184,8 @@ memcpy:
mtctr r7
b 1b
- .globl backwards_memcpy
-backwards_memcpy:
+ .globl DOTSYM(backwards_memcpy)
+DOTSYM(backwards_memcpy):
rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */
add r6,r3,r5
add r4,r4,r5
@@ -219,8 +228,8 @@ backwards_memcpy:
mtctr r7
b 1b
- .globl memchr
-memchr:
+ .globl DOTSYM(memchr)
+DOTSYM(memchr):
cmpwi 0,r5,0
blelr
mtctr r5
@@ -232,8 +241,8 @@ memchr:
li r3,0
blr
- .globl memcmp
-memcmp:
+ .globl DOTSYM(memcmp)
+DOTSYM(memcmp):
cmpwi 0,r5,0
ble 2f
mtctr r5
@@ -253,8 +262,8 @@ memcmp:
*
* flush_cache(addr, len)
*/
- .global flush_cache
-flush_cache:
+ .globl DOTSYM(flush_cache)
+DOTSYM(flush_cache):
addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */
rlwinm. 4,4,27,5,31
mtctr 4
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index d77af0eca967..a0359daac863 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -49,6 +49,7 @@ CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_HOTPLUG_CPU=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
CONFIG_MEMORY_HOTPLUG=y
@@ -297,6 +298,7 @@ CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 58a98d40086f..0059d2088b9c 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -46,6 +46,7 @@ CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_CRASH_DUMP=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -336,6 +337,7 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c8cee26fd9a0..f7ea7c204c13 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -52,6 +52,7 @@ CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -299,6 +300,7 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index a954e4975049..86308f177f2d 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -10,7 +10,7 @@ struct pt_regs;
extern struct dentry *powerpc_debugfs_root;
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
extern int (*__debugger)(struct pt_regs *regs);
extern int (*__debugger_ipi)(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/elf_util.h b/arch/powerpc/include/asm/elf_util.h
new file mode 100644
index 000000000000..3dbad8cc7179
--- /dev/null
+++ b/arch/powerpc/include/asm/elf_util.h
@@ -0,0 +1,64 @@
+/*
+ * Utility functions to work with ELF files.
+ *
+ * Copyright (C) 2016, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ASM_POWERPC_ELF_UTIL_H
+#define _ASM_POWERPC_ELF_UTIL_H
+
+#include <linux/elf.h>
+
+struct elf_info {
+ /*
+ * Where the ELF binary contents are kept.
+ * Memory managed by the user of the struct.
+ */
+ const char *buffer;
+
+ const struct elfhdr *ehdr;
+ const struct elf_phdr *proghdrs;
+ struct elf_shdr *sechdrs;
+};
+
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+ * gives the value maximum span in an instruction which uses a signed
+ * offset)
+ */
+static inline unsigned long elf_my_r2(const struct elf_shdr *sechdrs,
+ unsigned int toc_section)
+{
+ return sechdrs[toc_section].sh_addr + 0x8000;
+}
+
+unsigned int elf_toc_section(const struct elfhdr *ehdr,
+ const struct elf_shdr *sechdrs);
+
+int elf64_apply_relocate_add_item(const Elf64_Shdr *sechdrs, const char *strtab,
+ const Elf64_Rela *rela, const Elf64_Sym *sym,
+ unsigned long *location,
+ unsigned long address, unsigned long value,
+ unsigned long my_r2, const char *obj_name,
+ struct module *me);
+
+static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
+{
+ return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
+}
+
+int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info);
+void elf_free_info(struct elf_info *elf_info);
+
+#endif /* _ASM_POWERPC_ELF_UTIL_H */
diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h
new file mode 100644
index 000000000000..2313bdface34
--- /dev/null
+++ b/arch/powerpc/include/asm/ima.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_POWERPC_IMA_H
+#define _ASM_POWERPC_IMA_H
+
+struct kimage;
+
+int ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_free_kexec_buffer(void);
+
+#ifdef CONFIG_IMA
+void remove_ima_buffer(void *fdt, int chosen_node);
+#else
+static inline void remove_ima_buffer(void *fdt, int chosen_node) {}
+#endif
+
+#ifdef CONFIG_IMA_KEXEC
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+ size_t size);
+
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node);
+#else
+static inline int setup_ima_buffer(const struct kimage *image, void *fdt,
+ int chosen_node)
+{
+ remove_ima_buffer(fdt, chosen_node);
+ return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
+
+#endif /* _ASM_POWERPC_IMA_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a46f5f45570c..a49cab287acb 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -53,7 +53,7 @@
typedef void (*crash_shutdown_t)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* This function is responsible for capturing register states if coming
@@ -91,7 +91,30 @@ static inline bool kdump_in_progress(void)
return crashing_cpu >= 0;
}
-#else /* !CONFIG_KEXEC */
+#ifdef CONFIG_KEXEC_FILE
+extern struct kexec_file_ops kexec_elf64_ops;
+
+#ifdef CONFIG_IMA_KEXEC
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ phys_addr_t ima_buffer_addr;
+ size_t ima_buffer_size;
+};
+#endif
+
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr, unsigned long stack_top,
+ int debug);
+int setup_new_fdt(const struct kimage *image, void *fdt,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ const char *cmdline);
+bool find_debug_console(const void *fdt);
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+#endif /* CONFIG_KEXEC_FILE */
+
+#else /* !CONFIG_KEXEC_CORE */
static inline void crash_kexec_secondary(struct pt_regs *regs) { }
static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
@@ -116,7 +139,7 @@ static inline bool kdump_in_progress(void)
return false;
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KEXEC_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e02cbc6a6c70..5011b69107a7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -183,7 +183,7 @@ struct machdep_calls {
*/
void (*machine_shutdown)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void (*kexec_cpu_down)(int crash_shutdown, int secondary);
/* Called to do what every setup is needed on image and the
@@ -198,7 +198,7 @@ struct machdep_calls {
* no return.
*/
void (*machine_kexec)(struct kimage *image);
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_SUSPEND
/* These are called to disable and enable, respectively, IRQs when
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 0d02c11dc331..32db16d2e7ad 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -176,7 +176,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_PPC64 */
-#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC))
+#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
extern void smp_release_cpus(void);
#else
static inline void smp_release_cpus(void) { };
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 2fc5d4db503c..4b369d83fe9c 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -386,3 +386,4 @@ SYSCALL(mlock2)
SYSCALL(copy_file_range)
COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
+SYSCALL(kexec_file_load)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e8cdfec8d512..eb1acee91a20 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 382
+#define NR_syscalls 383
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index e9f5f41aa55a..2f26335a3c42 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -392,5 +392,6 @@
#define __NR_copy_file_range 379
#define __NR_preadv2 380
#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341dbb9c..c3b37171168c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -107,8 +107,14 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o elf_util.o \
+ kexec_elf_$(BITS).o
+ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy)
+obj-y += ima_kexec.o
+endif
+
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
@@ -128,7 +134,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),)
obj-y += ppc_save_regs.o
endif
diff --git a/arch/powerpc/kernel/elf_util.c b/arch/powerpc/kernel/elf_util.c
new file mode 100644
index 000000000000..e57e7397f65c
--- /dev/null
+++ b/arch/powerpc/kernel/elf_util.c
@@ -0,0 +1,464 @@
+/*
+ * Utility functions to work with ELF files.
+ *
+ * Copyright (C) 2016, IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf.c. Heavily modified for the
+ * kernel by Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <asm/elf_util.h>
+#include <asm-generic/module.h>
+
+#if ELF_CLASS == ELFCLASS32
+#define elf_addr_to_cpu elf32_to_cpu
+
+#ifndef Elf_Rel
+#define Elf_Rel Elf32_Rel
+#endif /* Elf_Rel */
+#else /* ELF_CLASS == ELFCLASS32 */
+#define elf_addr_to_cpu elf64_to_cpu
+
+#ifndef Elf_Rel
+#define Elf_Rel Elf64_Rel
+#endif /* Elf_Rel */
+
+/**
+ * elf_toc_section - find the toc section in the file with the given ELF headers
+ * @ehdr: Pointer to already loaded ELF header.
+ * @sechdrs: Pointer to already loaded section headers contents.
+ *
+ * Return: TOC section index or 0 if one wasn't found.
+ */
+unsigned int elf_toc_section(const struct elfhdr *ehdr,
+ const struct elf_shdr *sechdrs)
+{
+ int i;
+ const char *shstrtab;
+
+ /* Section header string table. */
+ shstrtab = (const char *) sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_size == 0)
+ continue;
+
+ if (!strcmp(&shstrtab[sechdrs[i].sh_name], ".toc"))
+ return i;
+ }
+
+ return 0;
+}
+
+static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le64_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be64_to_cpu(value);
+
+ return value;
+}
+#endif /* ELF_CLASS == ELFCLASS32 */
+
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le16_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be16_to_cpu(value);
+
+ return value;
+}
+
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le32_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be32_to_cpu(value);
+
+ return value;
+}
+
+/**
+ * elf_is_ehdr_sane - check that it is safe to use the ELF header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
+{
+ if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
+ pr_debug("Bad program header size.\n");
+ return false;
+ } else if (ehdr->e_shnum > 0 &&
+ ehdr->e_shentsize != sizeof(struct elf_shdr)) {
+ pr_debug("Bad section header size.\n");
+ return false;
+ } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr->e_version != EV_CURRENT) {
+ pr_debug("Unknown ELF version.\n");
+ return false;
+ }
+
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ size_t phdr_size;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
+ pr_debug("Program headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_phoff + phdr_size > buf_len) {
+ pr_debug("Program headers truncated.\n");
+ return false;
+ }
+ }
+
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ size_t shdr_size;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
+
+ /* Sanity check the section header table location. */
+ if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
+ pr_debug("Section headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_shoff + shdr_size > buf_len) {
+ pr_debug("Section headers truncated.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
+{
+ struct elfhdr *buf_ehdr;
+
+ if (len < sizeof(*buf_ehdr)) {
+ pr_debug("Buffer is too small to hold ELF header.\n");
+ return -ENOEXEC;
+ }
+
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
+ if (!elf_is_elf_file(ehdr)) {
+ pr_debug("No ELF header magic.\n");
+ return -ENOEXEC;
+ }
+
+ if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
+ pr_debug("Not a supported ELF class.\n");
+ return -1;
+ } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
+ ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+ pr_debug("Not a supported ELF data format.\n");
+ return -ENOEXEC;
+ }
+
+ buf_ehdr = (struct elfhdr *) buf;
+ if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
+ pr_debug("Bad ELF header size.\n");
+ return -ENOEXEC;
+ }
+
+ ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
+ ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
+ ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
+ ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
+ ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
+ ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
+ ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
+ ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
+ ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
+ ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
+ ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
+ ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
+
+ return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_is_phdr_sane - check that it is safe to use the program header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
+{
+
+ if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
+ pr_debug("ELF segment location wraps around.\n");
+ return false;
+ } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
+ pr_debug("ELF segment not in file.\n");
+ return false;
+ } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
+ pr_debug("ELF segment address wraps around.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ /* Override the const in proghdrs, we are the ones doing the loading. */
+ struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
+ const char *pbuf;
+ struct elf_phdr *buf_phdr;
+
+ pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
+ buf_phdr = (struct elf_phdr *) pbuf;
+
+ phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
+ phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
+ phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
+ phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
+ phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
+ phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
+ phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+
+ return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_phdrs - read the program headers from the buffer
+ *
+ * This function assumes that the program header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_phdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t phdr_size, i;
+ const struct elfhdr *ehdr = elf_info->ehdr;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
+ if (!elf_info->proghdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ int ret;
+
+ ret = elf_read_phdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ elf_info->proghdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_is_shdr_sane - check that it is safe to use the section header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
+{
+ bool size_ok;
+
+ /* SHT_NULL headers have undefined values, so we can't check them. */
+ if (shdr->sh_type == SHT_NULL)
+ return true;
+
+ /* Now verify sh_entsize */
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
+ break;
+ case SHT_RELA:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
+ break;
+ case SHT_DYNAMIC:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
+ break;
+ case SHT_REL:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
+ break;
+ case SHT_NOTE:
+ case SHT_PROGBITS:
+ case SHT_HASH:
+ case SHT_NOBITS:
+ default:
+ /*
+ * This is a section whose entsize requirements
+ * I don't care about. If I don't know about
+ * the section I can't care about it's entsize
+ * requirements.
+ */
+ size_ok = true;
+ break;
+ }
+
+ if (!size_ok) {
+ pr_debug("ELF section with wrong entry size.\n");
+ return false;
+ } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
+ pr_debug("ELF section address wraps around.\n");
+ return false;
+ }
+
+ if (shdr->sh_type != SHT_NOBITS) {
+ if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
+ pr_debug("ELF section location wraps around.\n");
+ return false;
+ } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
+ pr_debug("ELF section not in file.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ struct elf_shdr *shdr = &elf_info->sechdrs[idx];
+ const struct elfhdr *ehdr = elf_info->ehdr;
+ const char *sbuf;
+ struct elf_shdr *buf_shdr;
+
+ sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
+ buf_shdr = (struct elf_shdr *) sbuf;
+
+ shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
+ shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
+ shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
+ shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
+ shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
+ shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
+ shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
+ shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
+ shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
+
+ return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_shdrs - read the section headers from the buffer
+ *
+ * This function assumes that the section header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_shdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t shdr_size, i;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
+
+ elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
+ if (!elf_info->sechdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
+ int ret;
+
+ ret = elf_read_shdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->sechdrs);
+ elf_info->sechdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
+ * @buf: Buffer to read ELF file from.
+ * @len: Size of @buf.
+ * @ehdr: Pointer to existing struct which will be populated.
+ * @elf_info: Pointer to existing struct which will be populated.
+ *
+ * This function allows reading ELF files with different byte order than
+ * the kernel, byte-swapping the fields as needed.
+ *
+ * Return:
+ * On success returns 0, and the caller should call elf_free_info(elf_info) to
+ * free the memory allocated for the section and program headers.
+ */
+int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int ret;
+
+ ret = elf_read_ehdr(buf, len, ehdr);
+ if (ret)
+ return ret;
+
+ elf_info->buffer = buf;
+ elf_info->ehdr = ehdr;
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ ret = elf_read_phdrs(buf, len, elf_info);
+ if (ret)
+ return ret;
+ }
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ ret = elf_read_shdrs(buf, len, elf_info);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_free_info - free memory allocated by elf_read_from_buffer
+ */
+void elf_free_info(struct elf_info *elf_info)
+{
+ kfree(elf_info->proghdrs);
+ kfree(elf_info->sechdrs);
+ memset(elf_info, 0, sizeof(*elf_info));
+}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 451a8e1cf57b..b9c65d8fee4e 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -160,7 +160,7 @@ __secondary_hold:
cmpdi 0,r12,0
beq 100b
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
#ifdef CONFIG_PPC_BOOK3E
tovirt(r12,r12)
#endif
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
new file mode 100644
index 000000000000..5ea42c937ca9
--- /dev/null
+++ b/arch/powerpc/kernel/ima_kexec.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Authors:
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+
+static int get_addr_size_cells(int *addr_cells, int *size_cells)
+{
+ struct device_node *root;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return -EINVAL;
+
+ *addr_cells = of_n_addr_cells(root);
+ *size_cells = of_n_size_cells(root);
+
+ of_node_put(root);
+
+ return 0;
+}
+
+static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
+ size_t *size)
+{
+ int ret, addr_cells, size_cells;
+
+ ret = get_addr_size_cells(&addr_cells, &size_cells);
+ if (ret)
+ return ret;
+
+ if (len < 4 * (addr_cells + size_cells))
+ return -ENOENT;
+
+ *addr = of_read_number(prop, addr_cells);
+ *size = of_read_number(prop + 4 * addr_cells, size_cells);
+
+ return 0;
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr: On successful return, set to point to the buffer contents.
+ * @size: On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+ int ret, len;
+ unsigned long tmp_addr;
+ size_t tmp_size;
+ const void *prop;
+
+ prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
+ if (!prop)
+ return -ENOENT;
+
+ ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
+ if (ret)
+ return ret;
+
+ *addr = __va(tmp_addr);
+ *size = tmp_size;
+
+ return 0;
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+int ima_free_kexec_buffer(void)
+{
+ int ret;
+ unsigned long addr;
+ size_t size;
+ struct property *prop;
+
+ prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
+ if (!prop)
+ return -ENOENT;
+
+ ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
+ if (ret)
+ return ret;
+
+ ret = of_remove_property(of_chosen, prop);
+ if (ret)
+ return ret;
+
+ return memblock_free(addr, size);
+
+}
+
+/**
+ * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
+ *
+ * The IMA measurement buffer is of no use to a subsequent kernel, so we always
+ * remove it from the device tree.
+ */
+void remove_ima_buffer(void *fdt, int chosen_node)
+{
+ int ret, len;
+ unsigned long addr;
+ size_t size;
+ const void *prop;
+
+ prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
+ if (!prop)
+ return;
+
+ ret = do_get_kexec_buffer(prop, len, &addr, &size);
+ fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
+ if (ret)
+ return;
+
+ ret = delete_fdt_mem_rsv(fdt, addr, size);
+ if (!ret)
+ pr_debug("Removed old IMA buffer reservation.\n");
+}
+
+#ifdef CONFIG_IMA_KEXEC
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
+ *
+ * Architectures should use this function to pass on the IMA buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+ size_t size)
+{
+ image->arch.ima_buffer_addr = load_addr;
+ image->arch.ima_buffer_size = size;
+
+ return 0;
+}
+
+static int write_number(void *p, u64 value, int cells)
+{
+ if (cells == 1) {
+ u32 tmp;
+
+ if (value > U32_MAX)
+ return -EINVAL;
+
+ tmp = cpu_to_be32(value);
+ memcpy(p, &tmp, sizeof(tmp));
+ } else if (cells == 2) {
+ u64 tmp;
+
+ tmp = cpu_to_be64(value);
+ memcpy(p, &tmp, sizeof(tmp));
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * setup_ima_buffer - add IMA buffer information to the fdt
+ * @image: kexec image being loaded.
+ * @fdt: Flattened device tree for the next kernel.
+ * @chosen_node: Offset to the chosen node.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
+{
+ int ret, addr_cells, size_cells, entry_size;
+ u8 value[16];
+
+ remove_ima_buffer(fdt, chosen_node);
+ if (!image->arch.ima_buffer_size)
+ return 0;
+
+ ret = get_addr_size_cells(&addr_cells, &size_cells);
+ if (ret)
+ return ret;
+
+ entry_size = 4 * (addr_cells + size_cells);
+
+ if (entry_size > sizeof(value))
+ return -EINVAL;
+
+ ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
+ if (ret)
+ return ret;
+
+ ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
+ size_cells);
+ if (ret)
+ return ret;
+
+ ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
+ entry_size);
+ if (ret < 0)
+ return -EINVAL;
+
+ ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
+ image->arch.ima_buffer_size);
+ if (ret)
+ return -EINVAL;
+
+ pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
+ image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
+
+ return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
new file mode 100644
index 000000000000..1bd1e9865835
--- /dev/null
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -0,0 +1,280 @@
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "kexec_elf: " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/elf_util.h>
+
+#define PURGATORY_STACK_SIZE (16 * 1024)
+
+/**
+ * build_elf_exec_info - read ELF executable and check that we can use it
+ */
+static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int i;
+ int ret;
+
+ ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
+ if (ret)
+ return ret;
+
+ /* Big endian vmlinux has type ET_DYN. */
+ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
+ pr_err("Not an ELF executable.\n");
+ goto error;
+ } else if (!elf_info->proghdrs) {
+ pr_err("No ELF program header.\n");
+ goto error;
+ }
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ /*
+ * Kexec does not support loading interpreters.
+ * In addition this check keeps us from attempting
+ * to kexec ordinay executables.
+ */
+ if (elf_info->proghdrs[i].p_type == PT_INTERP) {
+ pr_err("Requires an ELF interpreter.\n");
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ elf_free_info(elf_info);
+ return -ENOEXEC;
+}
+
+static int elf64_probe(const char *buf, unsigned long len)
+{
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ int ret;
+
+ ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
+ if (ret)
+ return ret;
+
+ elf_free_info(&elf_info);
+
+ return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_exec_load - load ELF executable image
+ * @lowest_load_addr: On return, will be the address where the first PT_LOAD
+ * section will be loaded in memory.
+ *
+ * Return:
+ * 0 on success, negative value on failure.
+ */
+static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
+ struct elf_info *elf_info,
+ unsigned long *lowest_load_addr)
+{
+ unsigned long base = 0, lowest_addr = UINT_MAX;
+ int ret;
+ size_t i;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
+ .top_down = false };
+
+ /* Read in the PT_LOAD segments. */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ unsigned long load_addr;
+ size_t size;
+ const struct elf_phdr *phdr;
+
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.buf_min = phdr->p_paddr + base;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ load_addr = kbuf.mem;
+
+ if (load_addr < lowest_addr)
+ lowest_addr = load_addr;
+ }
+
+ /* Update entry point to reflect new load address. */
+ ehdr->e_entry += base;
+
+ *lowest_load_addr = lowest_addr;
+ ret = 0;
+ out:
+ return ret;
+}
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int i, ret;
+ unsigned int fdt_size;
+ unsigned long kernel_load_addr, purgatory_load_addr;
+ unsigned long initrd_load_addr = 0, fdt_load_addr, stack_top;
+ void *fdt;
+ const void *slave_code;
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ struct fdt_reserve_entry *rsvmap;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size };
+
+ ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ goto out;
+
+ ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ if (ret)
+ goto out;
+
+ pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+ ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed.\n");
+ goto out;
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_load_addr = kbuf.mem;
+
+ pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ }
+
+ fdt_size = fdt_totalsize(initial_boot_params) * 2;
+ fdt = kmalloc(fdt_size, GFP_KERNEL);
+ if (!fdt) {
+ pr_err("Not enough memory for the device tree.\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+ if (ret)
+ goto out;
+
+ /*
+ * Documentation/devicetree/booting-without-of.txt says we need to
+ * add a reservation entry for the device tree block, but
+ * early_init_fdt_reserve_self reserves the memory even if there's no
+ * such entry. We'll add a reservation entry anyway, to be safe and
+ * compliant.
+ *
+ * Use dummy values, we will correct them in a moment.
+ */
+ ret = fdt_add_mem_rsv(fdt, 1, 1);
+ if (ret) {
+ pr_err("Error reserving device tree memory: %s\n",
+ fdt_strerror(ret));
+ ret = -EINVAL;
+ goto out;
+ }
+ fdt_pack(fdt);
+
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_size;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ fdt_load_addr = kbuf.mem;
+
+ /*
+ * Fix fdt reservation, now that we now where it will be loaded
+ * and how big it is.
+ */
+ rsvmap = fdt + fdt_off_mem_rsvmap(fdt);
+ i = fdt_num_mem_rsv(fdt) - 1;
+ rsvmap[i].address = cpu_to_fdt64(fdt_load_addr);
+ rsvmap[i].size = cpu_to_fdt64(fdt_totalsize(fdt));
+
+ pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+ kbuf.memsz = PURGATORY_STACK_SIZE;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ ret = kexec_locate_mem_hole(&kbuf);
+ if (ret) {
+ pr_err("Couldn't find free memory for the purgatory stack.\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ stack_top = kbuf.mem + PURGATORY_STACK_SIZE - 1;
+ pr_debug("Purgatory stack is at 0x%lx\n", stack_top);
+
+ slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr, stack_top,
+ find_debug_console(fdt));
+ if (ret)
+ pr_err("Error setting up the purgatory.\n");
+
+out:
+ elf_free_info(&elf_info);
+
+ /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+ return ret ? ERR_PTR(ret) : fdt;
+}
+
+struct kexec_file_ops kexec_elf64_ops = {
+ .probe = elf64_probe,
+ .load = elf64_load,
+};
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
new file mode 100644
index 000000000000..e4da26dabbaf
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -0,0 +1,588 @@
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/elf_util.h>
+#include <asm/ima.h>
+
+#define SLAVE_CODE_SIZE 256
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_elf64_ops,
+};
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ /* We don't support crash kernels yet. */
+ if (image->type == KEXEC_TYPE_CRASH)
+ return -ENOTSUPP;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+/**
+ * arch_kexec_walk_mem - call func(data) for each unreserved memory block
+ * @kbuf: Context info for the search. Also passed to @func.
+ * @func: Function to call for each memory block.
+ *
+ * This function is used by kexec_add_buffer and kexec_locate_mem_hole
+ * to find unreserved memory to load kexec segments into.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
+ */
+int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+{
+ int ret = 0;
+ u64 i;
+ phys_addr_t mstart, mend;
+
+ if (kbuf->top_down) {
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
+ &mstart, &mend, NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ } else {
+ for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
+ NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * arch_kexec_apply_relocations_add - apply purgatory relocations
+ * @ehdr: Pointer to ELF headers.
+ * @sechdrs: Pointer to section headers.
+ * @relsec: Section index of SHT_RELA section.
+ *
+ * Elf64_Shdr.sh_offset has been modified to keep the pointer to the section
+ * contents, while Elf64_Shdr.sh_addr points to the final address of the
+ * section in memory.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+ Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+ unsigned int i;
+ int ret;
+ int reloc_type;
+ unsigned long *location;
+ unsigned long address;
+ unsigned long value;
+ const char *name;
+ Elf64_Sym *sym;
+ /* Section containing the relocation entries. */
+ Elf64_Shdr *rel_section = &sechdrs[relsec];
+ const Elf64_Rela *rela = (const Elf64_Rela *) rel_section->sh_offset;
+ /* Section to which relocations apply. */
+ Elf64_Shdr *target_section = &sechdrs[rel_section->sh_info];
+ /* Associated symbol table. */
+ Elf64_Shdr *symtabsec = &sechdrs[rel_section->sh_link];
+ void *syms_base = (void *) symtabsec->sh_offset;
+ void *loc_base = (void *) target_section->sh_offset;
+ Elf64_Addr addr_base = target_section->sh_addr;
+ unsigned long sec_base;
+ unsigned long r2;
+ unsigned int toc;
+ const char *strtab;
+
+ if (symtabsec->sh_link >= ehdr->e_shnum) {
+ /* Invalid strtab section number */
+ pr_err("Invalid string table section index %d\n",
+ symtabsec->sh_link);
+ return -ENOEXEC;
+ }
+
+ toc = elf_toc_section(ehdr, sechdrs);
+ if (!toc) {
+ pr_err("Purgatory TOC section not found.");
+ return -ENOEXEC;
+ }
+
+ r2 = elf_my_r2(sechdrs, toc);
+
+ /* String table for the associated symbol table. */
+ strtab = (const char *) sechdrs[symtabsec->sh_link].sh_offset;
+
+ for (i = 0; i < rel_section->sh_size / sizeof(Elf64_Rela); i++) {
+ /*
+ * rels[i].r_offset contains the byte offset from the beginning
+ * of section to the storage unit affected.
+ *
+ * This is the location to update in the temporary buffer where
+ * the section is currently loaded. The section will finally
+ * be loaded to a different address later, pointed to by
+ * addr_base.
+ */
+ location = loc_base + rela[i].r_offset;
+
+ /* Final address of the location. */
+ address = addr_base + rela[i].r_offset;
+
+ /* This is the symbol the relocation is referring to. */
+ sym = (Elf64_Sym *) syms_base + ELF64_R_SYM(rela[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = "<unnamed symbol>";
+
+ reloc_type = ELF64_R_TYPE(rela[i].r_info);
+
+ pr_debug("RELOC at %p: %i-type as %s (0x%lx) + %li\n",
+ location, reloc_type, name, (unsigned long)sym->st_value,
+ (long)rela[i].r_addend);
+
+ /*
+ * TOC symbols appear as undefined but should be
+ * resolved as well, so allow them to be processed.
+ */
+ if (sym->st_shndx == SHN_UNDEF && strcmp(name, ".TOC.") != 0 &&
+ reloc_type != R_PPC64_TOC) {
+ pr_err("Undefined symbol: %s\n", name);
+ return -ENOEXEC;
+ } else if (sym->st_shndx == SHN_COMMON) {
+ pr_err("Symbol '%s' in common section.\n",
+ name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx != SHN_ABS) {
+ if (sym->st_shndx >= ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ }
+
+ sec_base = sechdrs[sym->st_shndx].sh_addr;
+ } else
+ sec_base = 0;
+
+ /* `Everything is relative'. */
+ value = sym->st_value + sec_base + rela[i].r_addend;
+
+ ret = elf64_apply_relocate_add_item(sechdrs, strtab, &rela[i],
+ sym, location, address,
+ value, r2,
+ "kexec purgatory", NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ * @stack_top: Address where the purgatory can place its stack.
+ * @debug: Can the purgatory print messages to the console?
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr, unsigned long stack_top,
+ int debug)
+{
+ int ret, tree_node;
+ const void *prop;
+ unsigned long opal_base, opal_entry;
+ uint64_t toc;
+ unsigned int *slave_code_buf, master_entry;
+ unsigned int toc_section;
+
+ slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+ if (!slave_code_buf)
+ return -ENOMEM;
+
+ /* Get the slave code from the new kernel and put it in purgatory. */
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ true);
+ if (ret) {
+ kfree(slave_code_buf);
+ return ret;
+ }
+
+ master_entry = slave_code_buf[0];
+ memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+ slave_code_buf[0] = master_entry;
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ false);
+ kfree(slave_code_buf);
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+ sizeof(kernel_load_addr), false);
+ if (ret)
+ return ret;
+ ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+ sizeof(fdt_load_addr), false);
+ if (ret)
+ return ret;
+
+ tree_node = fdt_path_offset(fdt, "/ibm,opal");
+ if (tree_node >= 0) {
+ prop = fdt_getprop(fdt, tree_node, "opal-base-address", NULL);
+ if (!prop) {
+ pr_err("OPAL address not found in the device tree.\n");
+ return -EINVAL;
+ }
+ opal_base = fdt64_to_cpu((const fdt64_t *) prop);
+
+ prop = fdt_getprop(fdt, tree_node, "opal-entry-address", NULL);
+ if (!prop) {
+ pr_err("OPAL address not found in the device tree.\n");
+ return -EINVAL;
+ }
+ opal_entry = fdt64_to_cpu((const fdt64_t *) prop);
+
+ ret = kexec_purgatory_get_set_symbol(image, "opal_base",
+ &opal_base,
+ sizeof(opal_base), false);
+ if (ret)
+ return ret;
+ ret = kexec_purgatory_get_set_symbol(image, "opal_entry",
+ &opal_entry,
+ sizeof(opal_entry), false);
+ if (ret)
+ return ret;
+ }
+
+ ret = kexec_purgatory_get_set_symbol(image, "stack", &stack_top,
+ sizeof(stack_top), false);
+ if (ret)
+ return ret;
+
+ toc_section = elf_toc_section(image->purgatory_info.ehdr,
+ image->purgatory_info.sechdrs);
+ if (!toc_section)
+ return -ENOEXEC;
+
+ toc = elf_my_r2(image->purgatory_info.sechdrs, toc_section);
+ ret = kexec_purgatory_get_set_symbol(image, "my_toc", &toc, sizeof(toc),
+ false);
+ if (ret)
+ return ret;
+
+ pr_debug("Purgatory TOC is at 0x%llx\n", toc);
+
+ ret = kexec_purgatory_get_set_symbol(image, "debug", &debug,
+ sizeof(debug), false);
+ if (ret)
+ return ret;
+ if (!debug)
+ pr_debug("Disabling purgatory output.\n");
+
+ return 0;
+}
+
+/**
+ * delete_fdt_mem_rsv - delete memory reservation with given address and size
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+{
+ int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
+
+ for (i = 0; i < num_rsvs; i++) {
+ uint64_t rsv_start, rsv_size;
+
+ ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
+ if (ret) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+
+ if (rsv_start == start && rsv_size == size) {
+ ret = fdt_del_mem_rsv(fdt, i);
+ if (ret) {
+ pr_err("Error deleting device tree reservation.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @image: kexec image being loaded.
+ * @fdt: Flattened device tree for the next kernel.
+ * @initrd_load_addr: Address where the next initrd will be loaded.
+ * @initrd_len: Size of the next initrd, or 0 if there will be none.
+ * @cmdline: Command line for the next kernel, or NULL if there will
+ * be none.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_new_fdt(const struct kimage *image, void *fdt,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ const char *cmdline)
+{
+ int ret, chosen_node;
+ const void *prop;
+
+ /* Remove memory reservation for the current device tree. */
+ ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
+ fdt_totalsize(initial_boot_params));
+ if (ret == 0)
+ pr_debug("Removed old device tree reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node == -FDT_ERR_NOTFOUND) {
+ chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+ "chosen");
+ if (chosen_node < 0) {
+ pr_err("Error creating /chosen.\n");
+ return -EINVAL;
+ }
+ } else if (chosen_node < 0) {
+ pr_err("Malformed device tree: error reading /chosen.\n");
+ return -EINVAL;
+ }
+
+ /* Did we boot using an initrd? */
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
+ if (prop) {
+ uint64_t tmp_start, tmp_end, tmp_size;
+
+ tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
+ if (!prop) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+ tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ /*
+ * kexec reserves exact initrd size, while firmware may
+ * reserve a multiple of PAGE_SIZE, so check for both.
+ */
+ tmp_size = tmp_end - tmp_start;
+ ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
+ if (ret == -ENOENT)
+ ret = delete_fdt_mem_rsv(fdt, tmp_start,
+ round_up(tmp_size, PAGE_SIZE));
+ if (ret == 0)
+ pr_debug("Removed old initrd reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ /* If there's no new initrd, delete the old initrd's info. */
+ if (initrd_len == 0) {
+ ret = fdt_delprop(fdt, chosen_node,
+ "linux,initrd-start");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-start.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-end.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (initrd_len) {
+ ret = fdt_setprop_u64(fdt, chosen_node,
+ "linux,initrd-start",
+ initrd_load_addr);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ /* initrd-end is the first address after the initrd image. */
+ ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
+ initrd_load_addr + initrd_len);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
+ if (ret) {
+ pr_err("Error reserving initrd memory: %s\n",
+ fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ if (cmdline != NULL) {
+ ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+ } else {
+ ret = fdt_delprop(fdt, chosen_node, "bootargs");
+ if (ret && ret != -FDT_ERR_NOTFOUND) {
+ pr_err("Error deleting bootargs.\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = setup_ima_buffer(image, fdt, chosen_node);
+ if (ret) {
+ pr_err("Error setting up the new device tree.\n");
+ return ret;
+ }
+
+ ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
+ if (ret) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * find_debug_console - find out whether there is a console for the purgatory
+ * @fdt: Flattened device tree to search.
+ */
+bool find_debug_console(const void *fdt)
+{
+ int len;
+ int console_node, chosen_node;
+ const void *prop, *colon;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node < 0) {
+ pr_err("Malformed device tree: /chosen not found.\n");
+ return false;
+ }
+
+ prop = fdt_getprop(fdt, chosen_node, "stdout-path", &len);
+ if (prop == NULL) {
+ if (len == -FDT_ERR_NOTFOUND) {
+ prop = fdt_getprop(fdt, chosen_node,
+ "linux,stdout-path", &len);
+ if (prop == NULL) {
+ pr_debug("Unable to find [linux,]stdout-path.\n");
+ return false;
+ }
+ } else {
+ pr_debug("Error finding console: %s\n",
+ fdt_strerror(len));
+ return false;
+ }
+ }
+
+ /*
+ * stdout-path can have a ':' separating the path from device-specific
+ * information, so we should only consider what's before it.
+ */
+ colon = strchr(prop, ':');
+ if (colon != NULL)
+ len = colon - prop;
+ else
+ len -= 1; /* Ignore the terminating NUL. */
+
+ console_node = fdt_path_offset_namelen(fdt, prop, len);
+ if (console_node < 0) {
+ pr_debug("Error finding console: %s\n",
+ fdt_strerror(console_node));
+ return false;
+ }
+
+ if (fdt_node_check_compatible(fdt, console_node, "hvterm1") == 0)
+ return true;
+ else if (fdt_node_check_compatible(fdt, console_node,
+ "hvterm-protocol") == 0)
+ return true;
+
+ return false;
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 93cf7a5846a6..1863324c6a3c 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume)
_GLOBAL(__main)
blr
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* Must be relocatable PIC code callable as a C function.
*/
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4f178671f230..32be2a844947 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -478,7 +478,7 @@ _GLOBAL(kexec_wait)
addi r5,r5,kexec_flag-1b
99: HMT_LOW
-#ifdef CONFIG_KEXEC /* use no memory without kexec */
+#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */
lwz r4,0(r5)
cmpwi 0,r4,0
beq 99b
@@ -503,7 +503,7 @@ kexec_flag:
.long 0
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_PPC_BOOK3E
/*
* BOOK3E has no real MMU mode, so we have to setup the initial TLB
@@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence)
mtlr 4
li r5,0
blr /* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index bb1807184bad..12c88c34e696 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -507,6 +507,224 @@ static int restore_r2(u32 *instruction, struct module *me)
return 1;
}
+int elf64_apply_relocate_add_item(const Elf64_Shdr *sechdrs, const char *strtab,
+ const Elf64_Rela *rela, const Elf64_Sym *sym,
+ unsigned long *location,
+ unsigned long address, unsigned long value,
+ unsigned long my_r2, const char *obj_name,
+ struct module *me)
+{
+ switch (ELF64_R_TYPE(rela->r_info)) {
+ case R_PPC64_ADDR32:
+ /* Simply set it */
+ *(u32 *)location = value;
+ break;
+
+ case R_PPC64_ADDR64:
+ /* Simply set it */
+ *(unsigned long *)location = value;
+ break;
+
+ case R_PPC64_TOC:
+ *(unsigned long *)location = my_r2;
+ break;
+
+ case R_PPC64_TOC16:
+ /* Subtract TOC pointer */
+ value -= my_r2;
+ if (value + 0x8000 > 0xffff) {
+ pr_err("%s: bad TOC16 relocation (0x%lx)\n",
+ obj_name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_LO:
+ /* Subtract TOC pointer */
+ value -= my_r2;
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_DS:
+ /* Subtract TOC pointer */
+ value -= my_r2;
+ if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+ pr_err("%s: bad TOC16_DS relocation (0x%lx)\n",
+ obj_name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC64_TOC16_LO_DS:
+ /* Subtract TOC pointer */
+ value -= my_r2;
+ if ((value & 3) != 0) {
+ pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n",
+ obj_name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC64_TOC16_HA:
+ /* Subtract TOC pointer */
+ value -= my_r2;
+ value = ((value + 0x8000) >> 16);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_REL14:
+ /* Convert value to relative */
+ value -= address;
+ if (value + 0x8000 > 0xffff || (value & 3) != 0) {
+ pr_err("%s: REL14 %li out of range!\n",
+ obj_name, (long int) value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 16 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC_REL24:
+ /* FIXME: Handle weak symbols here --RR */
+ if (sym->st_shndx == SHN_UNDEF) {
+ /*
+ * The purgatory relocation code passes NULL for me,
+ * but the purgatory doesn't have any REL24 relocations
+ * for undefined symbols, so if this happens it's a bug.
+ */
+ if (WARN_ON(!me))
+ return -ENOEXEC;
+
+ /* External: go via stub */
+ value = stub_for_addr(sechdrs, value, me);
+ if (!value)
+ return -ENOENT;
+ if (!restore_r2((u32 *)location + 1, me))
+ return -ENOEXEC;
+
+ squash_toc_save_inst(strtab + sym->st_name, value);
+ } else
+ value += local_entry_offset(sym);
+
+ /* Convert value to relative */
+ value -= address;
+ if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0) {
+ pr_err("%s: REL24 %li out of range!\n",
+ obj_name, (long int)value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 26 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
+ break;
+
+ case R_PPC64_REL64:
+ /* 64 bits relative (used by features fixups) */
+ *location = value - address;
+ break;
+
+ case R_PPC64_REL32:
+ /* 32 bits relative (used by relative exception tables) */
+ *(u32 *)location = value - (unsigned long)location;
+ break;
+
+ case R_PPC64_TOCSAVE:
+ /*
+ * Marker reloc indicates we don't have to save r2.
+ * That would only save us one instruction, so ignore
+ * it.
+ */
+ break;
+
+ case R_PPC64_ENTRY:
+ /*
+ * Optimize ELFv2 large code model entry point if
+ * the TOC is within 2GB range of current location.
+ */
+ value = my_r2 - address;
+ if (value + 0x80008000 > 0xffffffff)
+ break;
+ /*
+ * Check for the large code model prolog sequence:
+ * ld r2, ...(r12)
+ * add r2, r2, r12
+ */
+ if ((((uint32_t *)location)[0] & ~0xfffc)
+ != 0xe84c0000)
+ break;
+ if (((uint32_t *)location)[1] != 0x7c426214)
+ break;
+ /*
+ * If found, replace it with:
+ * addis r2, r12, (.TOC.-func)@ha
+ * addi r2, r12, (.TOC.-func)@l
+ */
+ ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
+ ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+ break;
+
+ case R_PPC64_ADDR16_LO:
+ *(uint16_t *)location = value & 0xffff;
+ break;
+
+ case R_PPC64_ADDR16_HI:
+ *(uint16_t *)location = (value >> 16) & 0xffff;
+ break;
+
+ case R_PPC64_ADDR16_HIGHER:
+ *(uint16_t *)location = (((uint64_t)value >> 32) &
+ 0xffff);
+ break;
+
+ case R_PPC64_ADDR16_HIGHEST:
+ *(uint16_t *)location = (((uint64_t)value >> 48) &
+ 0xffff);
+ break;
+
+ case R_PPC64_REL16_HA:
+ /* Subtract location pointer */
+ value -= address;
+ value = ((value + 0x8000) >> 16);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_REL16_LO:
+ /* Subtract location pointer */
+ value -= address;
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ default:
+ pr_err("%s: Unknown ADD relocation: %lu\n", obj_name,
+ (unsigned long)ELF64_R_TYPE(rela->r_info));
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
int apply_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
@@ -514,6 +732,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
struct module *me)
{
unsigned int i;
+ int ret;
Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
Elf64_Sym *sym;
unsigned long *location;
@@ -548,173 +767,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* `Everything is relative'. */
value = sym->st_value + rela[i].r_addend;
- switch (ELF64_R_TYPE(rela[i].r_info)) {
- case R_PPC64_ADDR32:
- /* Simply set it */
- *(u32 *)location = value;
- break;
-
- case R_PPC64_ADDR64:
- /* Simply set it */
- *(unsigned long *)location = value;
- break;
-
- case R_PPC64_TOC:
- *(unsigned long *)location = my_r2(sechdrs, me);
- break;
-
- case R_PPC64_TOC16:
- /* Subtract TOC pointer */
- value -= my_r2(sechdrs, me);
- if (value + 0x8000 > 0xffff) {
- pr_err("%s: bad TOC16 relocation (0x%lx)\n",
- me->name, value);
- return -ENOEXEC;
- }
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xffff)
- | (value & 0xffff);
- break;
-
- case R_PPC64_TOC16_LO:
- /* Subtract TOC pointer */
- value -= my_r2(sechdrs, me);
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xffff)
- | (value & 0xffff);
- break;
-
- case R_PPC64_TOC16_DS:
- /* Subtract TOC pointer */
- value -= my_r2(sechdrs, me);
- if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
- pr_err("%s: bad TOC16_DS relocation (0x%lx)\n",
- me->name, value);
- return -ENOEXEC;
- }
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xfffc)
- | (value & 0xfffc);
- break;
-
- case R_PPC64_TOC16_LO_DS:
- /* Subtract TOC pointer */
- value -= my_r2(sechdrs, me);
- if ((value & 3) != 0) {
- pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n",
- me->name, value);
- return -ENOEXEC;
- }
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xfffc)
- | (value & 0xfffc);
- break;
-
- case R_PPC64_TOC16_HA:
- /* Subtract TOC pointer */
- value -= my_r2(sechdrs, me);
- value = ((value + 0x8000) >> 16);
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xffff)
- | (value & 0xffff);
- break;
-
- case R_PPC_REL24:
- /* FIXME: Handle weak symbols here --RR */
- if (sym->st_shndx == SHN_UNDEF) {
- /* External: go via stub */
- value = stub_for_addr(sechdrs, value, me);
- if (!value)
- return -ENOENT;
- if (!restore_r2((u32 *)location + 1, me))
- return -ENOEXEC;
-
- squash_toc_save_inst(strtab + sym->st_name, value);
- } else
- value += local_entry_offset(sym);
-
- /* Convert value to relative */
- value -= (unsigned long)location;
- if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
- pr_err("%s: REL24 %li out of range!\n",
- me->name, (long int)value);
- return -ENOEXEC;
- }
-
- /* Only replace bits 2 through 26 */
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
- | (value & 0x03fffffc);
- break;
-
- case R_PPC64_REL64:
- /* 64 bits relative (used by features fixups) */
- *location = value - (unsigned long)location;
- break;
-
- case R_PPC64_REL32:
- /* 32 bits relative (used by relative exception tables) */
- *(u32 *)location = value - (unsigned long)location;
- break;
-
- case R_PPC64_TOCSAVE:
- /*
- * Marker reloc indicates we don't have to save r2.
- * That would only save us one instruction, so ignore
- * it.
- */
- break;
-
- case R_PPC64_ENTRY:
- /*
- * Optimize ELFv2 large code model entry point if
- * the TOC is within 2GB range of current location.
- */
- value = my_r2(sechdrs, me) - (unsigned long)location;
- if (value + 0x80008000 > 0xffffffff)
- break;
- /*
- * Check for the large code model prolog sequence:
- * ld r2, ...(r12)
- * add r2, r2, r12
- */
- if ((((uint32_t *)location)[0] & ~0xfffc)
- != 0xe84c0000)
- break;
- if (((uint32_t *)location)[1] != 0x7c426214)
- break;
- /*
- * If found, replace it with:
- * addis r2, r12, (.TOC.-func)@ha
- * addi r2, r12, (.TOC.-func)@l
- */
- ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
- ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
- break;
-
- case R_PPC64_REL16_HA:
- /* Subtract location pointer */
- value -= (unsigned long)location;
- value = ((value + 0x8000) >> 16);
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xffff)
- | (value & 0xffff);
- break;
-
- case R_PPC64_REL16_LO:
- /* Subtract location pointer */
- value -= (unsigned long)location;
- *((uint16_t *) location)
- = (*((uint16_t *) location) & ~0xffff)
- | (value & 0xffff);
- break;
-
- default:
- pr_err("%s: Unknown ADD relocation: %lu\n",
- me->name,
- (unsigned long)ELF64_R_TYPE(rela[i].r_info));
- return -ENOEXEC;
- }
+ ret = elf64_apply_relocate_add_item(sechdrs, strtab, &rela[i],
+ sym, location,
+ (unsigned long) location,
+ value, my_r2(sechdrs, me),
+ me->name, me);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b0245bed6f54..fa7aea479fba 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -427,7 +427,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
tce_alloc_end = *lprop;
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
if (lprop)
crashk_res.start = *lprop;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8d586cff8a41..6824157e4d2e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -354,7 +354,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
@@ -399,7 +399,7 @@ void smp_release_cpus(void)
DBG(" <- smp_release_cpus()\n");
}
-#endif /* CONFIG_SMP || CONFIG_KEXEC */
+#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
/*
* Initialize some remaining members of the ppc64_caches and systemcfg
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9c6f3fd58059..893bd7f79be6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg)
if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
return -EINVAL;
}
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
if (msg == PPC_MSG_DEBUGGER_BREAK) {
return 1;
}
@@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
void smp_send_debugger_break(void)
{
int cpu;
@@ -340,7 +340,7 @@ void smp_send_debugger_break(void)
}
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
crash_ipi_function_ptr = crash_ipi_callback;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 91d278c9ab28..a95555e628b6 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -65,7 +65,7 @@
#include <asm/hmi.h>
#include <sysdev/fsl_pci.h>
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 1179115a4b5c..3803b0addf65 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -220,7 +220,7 @@ define_machine(corenet_generic) {
*
* Likewise, problems have been seen with kexec when coreint is enabled.
*/
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
.get_irq = mpic_get_irq,
#else
.get_irq = mpic_get_coreint_irq,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fe9f19e5e935..a83a6d26090d 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -349,13 +349,13 @@ struct smp_ops_t smp_85xx_ops = {
.cpu_disable = generic_cpu_disable,
.cpu_die = generic_cpu_die,
#endif
-#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64)
+#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64)
.give_timebase = smp_generic_give_timebase,
.take_timebase = smp_generic_take_timebase,
#endif
};
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_PPC32
atomic_t kexec_down_cpus = ATOMIC_INIT(0);
@@ -458,7 +458,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
default_machine_kexec(image);
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
static void smp_85xx_basic_setup(int cpu_nr)
{
@@ -512,7 +512,7 @@ void __init mpc85xx_smp_init(void)
#endif
smp_ops = &smp_85xx_ops;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
#endif
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index e84d8fbc2e21..96c2b8a40630 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -676,7 +676,7 @@ static ssize_t spu_stat_show(struct device *dev,
static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
struct crash_spu_info {
struct spu *spu;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index efe8b6bb168b..d50c7d99baaf 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -174,7 +174,7 @@ static void pnv_shutdown(void)
opal_shutdown();
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void pnv_kexec_wait_secondaries_down(void)
{
int my_cpu, i, notified = -1;
@@ -245,7 +245,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
}
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static unsigned long pnv_memory_block_size(void)
@@ -311,7 +311,7 @@ define_machine(powernv) {
.machine_shutdown = pnv_shutdown,
.power_save = NULL,
.calibrate_decr = generic_calibrate_decr,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 3a487e7f4a5e..6244bc849469 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -250,7 +250,7 @@ static int __init ps3_probe(void)
return 1;
}
-#if defined(CONFIG_KEXEC)
+#if defined(CONFIG_KEXEC_CORE)
static void ps3_kexec_cpu_down(int crash_shutdown, int secondary)
{
int cpu = smp_processor_id();
@@ -276,7 +276,7 @@ define_machine(ps3) {
.progress = ps3_progress,
.restart = ps3_restart,
.halt = ps3_halt,
-#if defined(CONFIG_KEXEC)
+#if defined(CONFIG_KEXEC_CORE)
.kexec_cpu_down = ps3_kexec_cpu_down,
#endif
};
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fedc2ccf029d..dd9d9c2ba71b 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -8,7 +8,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 97aa3f332f24..7736352f7279 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -367,7 +367,7 @@ void pseries_disable_reloc_on_exc(void)
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void pSeries_machine_kexec(struct kimage *image)
{
if (firmware_has_feature(FW_FEATURE_SET_MODE))
@@ -725,7 +725,7 @@ define_machine(pseries) {
.progress = rtas_progress,
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_exception = pSeries_machine_check_exception,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.machine_kexec = pSeries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore
new file mode 100644
index 000000000000..e9e66f178a6d
--- /dev/null
+++ b/arch/powerpc/purgatory/.gitignore
@@ -0,0 +1,2 @@
+kexec-purgatory.c
+purgatory.ro
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
new file mode 100644
index 000000000000..32822234b049
--- /dev/null
+++ b/arch/powerpc/purgatory/Makefile
@@ -0,0 +1,33 @@
+OBJECT_FILES_NON_STANDARD := y
+
+purgatory-y := purgatory.o printf.o string.o v2wrap.o hvCall.o \
+ purgatory-ppc64.o console-ppc64.o crtsavres.o sha256.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostartfiles \
+ -nostdlib -nodefaultlibs
+targets += purgatory.ro
+
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS))
+
+KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding \
+ -fno-stack-protector -fno-exceptions -fpie
+KBUILD_AFLAGS += -fno-exceptions -msoft-float -fpie
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
+targets += kexec-purgatory.c
+
+CMD_BIN2C = $(objtree)/scripts/basic/bin2c
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ $(call if_changed,bin2c)
+ @:
+
+
+obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o
diff --git a/arch/powerpc/purgatory/console-ppc64.c b/arch/powerpc/purgatory/console-ppc64.c
new file mode 100644
index 000000000000..8c89686fa340
--- /dev/null
+++ b/arch/powerpc/purgatory/console-ppc64.c
@@ -0,0 +1,37 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Created by: Mohan Kumar M (mohan@in.ibm.com)
+ *
+ * Copyright (C) IBM Corporation, 2005. All rights reserved
+ *
+ * Code taken from kexec-tools.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "hvCall.h"
+#include <asm/byteorder.h>
+#include "purgatory.h"
+
+void putchar(int c)
+{
+ char buff[8];
+ unsigned long *lbuf = (unsigned long *)buff;
+
+ if (!debug) /* running on non pseries */
+ return;
+
+ if (c == '\n')
+ putchar('\r');
+
+ buff[0] = c;
+ plpar_hcall_norets(H_PUT_TERM_CHAR, 0, 1, __cpu_to_be64(*lbuf), 0);
+}
diff --git a/arch/powerpc/purgatory/crtsavres.S b/arch/powerpc/purgatory/crtsavres.S
new file mode 100644
index 000000000000..5d17e1c0d575
--- /dev/null
+++ b/arch/powerpc/purgatory/crtsavres.S
@@ -0,0 +1,5 @@
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+#define CONFIG_CC_OPTIMIZE_FOR_SIZE 1
+#endif
+
+#include "../lib/crtsavres.S"
diff --git a/arch/powerpc/purgatory/hvCall.S b/arch/powerpc/purgatory/hvCall.S
new file mode 100644
index 000000000000..a96c4898f1d8
--- /dev/null
+++ b/arch/powerpc/purgatory/hvCall.S
@@ -0,0 +1,27 @@
+/*
+ * This file contains the generic function to perform a call to the
+ * pSeries LPAR hypervisor.
+ *
+ * Taken from linux/arch/powerpc/platforms/pseries/hvCall.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ppc64_asm.h"
+
+#define HVSC .long 0x44000022
+.text
+ .machine ppc64
+.globl DOTSYM(plpar_hcall_norets)
+DOTSYM(plpar_hcall_norets):
+ or 6,6,6 # medium low priority
+ mfcr 0
+ stw 0,8(1)
+
+ HVSC /* invoke the hypervisor */
+
+ lwz 0,8(1)
+ mtcrf 0xff,0
+ blr /* return r3 = status */
diff --git a/arch/powerpc/purgatory/hvCall.h b/arch/powerpc/purgatory/hvCall.h
new file mode 100644
index 000000000000..187e24d8b964
--- /dev/null
+++ b/arch/powerpc/purgatory/hvCall.h
@@ -0,0 +1,8 @@
+#ifndef HVCALL_H
+#define HVCALL_H
+
+#define H_PUT_TERM_CHAR 0x58
+
+long plpar_hcall_norets(unsigned long opcode, ...);
+
+#endif
diff --git a/arch/powerpc/purgatory/kexec-sha256.h b/arch/powerpc/purgatory/kexec-sha256.h
new file mode 100644
index 000000000000..4418ed02c052
--- /dev/null
+++ b/arch/powerpc/purgatory/kexec-sha256.h
@@ -0,0 +1,11 @@
+#ifndef KEXEC_SHA256_H
+#define KEXEC_SHA256_H
+
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+#define SHA256_REGIONS 16
+
+#endif /* KEXEC_SHA256_H */
diff --git a/arch/powerpc/purgatory/ppc64_asm.h b/arch/powerpc/purgatory/ppc64_asm.h
new file mode 100644
index 000000000000..95d721718237
--- /dev/null
+++ b/arch/powerpc/purgatory/ppc64_asm.h
@@ -0,0 +1,20 @@
+/*
+ * ppc64_asm.h - common defines for PPC64 assembly parts
+ *
+ * Code taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <asm/types.h>
+
+/*
+ * ABIv1 requires dot symbol while ABIv2 does not.
+ */
+#ifdef PPC64_ELF_ABI_v2
+#define DOTSYM(a) a
+#else
+#define GLUE(a, b) a##b
+#define DOTSYM(a) GLUE(., a)
+#endif
diff --git a/arch/powerpc/purgatory/printf.c b/arch/powerpc/purgatory/printf.c
new file mode 100644
index 000000000000..c5f425b55fd5
--- /dev/null
+++ b/arch/powerpc/purgatory/printf.c
@@ -0,0 +1,164 @@
+/*
+ * Code taken from kexec-tools.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdarg.h>
+#include "purgatory.h"
+#include "../boot/string.h"
+
+#define CHAR_BIT 8
+
+/*
+ * Output
+ * =============================================================================
+ */
+
+#define LONG_LONG_SHIFT ((int)((sizeof(unsigned long long)*CHAR_BIT) - 4))
+#define LONG_SHIFT ((int)((sizeof(unsigned long)*CHAR_BIT) - 4))
+#define INT_SHIFT ((int)((sizeof(unsigned int)*CHAR_BIT) - 4))
+#define SHRT_SHIFT ((int)((sizeof(unsigned short)*CHAR_BIT) - 4))
+#define CHAR_SHIFT ((int)((sizeof(unsigned char)*CHAR_BIT) - 4))
+
+/**************************************************************************
+PRINTF and friends
+
+ Formats:
+ %x - 4 bytes int (8 hex digits, lower case)
+ %X - 4 bytes int (8 hex digits, upper case)
+ %lx - 8 bytes long (16 hex digits, lower case)
+ %lX - 8 bytes long (16 hex digits, upper case)
+ %hx - 2 bytes int (4 hex digits, lower case)
+ %hX - 2 bytes int (4 hex digits, upper case)
+ %hhx - 1 byte int (2 hex digits, lower case)
+ %hhX - 1 byte int (2 hex digits, upper case)
+ - optional # prefixes 0x or 0X
+ %d - decimal int
+ %c - char
+ %s - string
+ Note: width specification not supported
+**************************************************************************/
+void vsprintf(char *buffer, const char *fmt, va_list args)
+{
+ char *p;
+
+ for ( ; *fmt != '\0'; ++fmt) {
+ if (*fmt != '%') {
+ if (buffer)
+ *buffer++ = *fmt;
+ else
+ putchar(*fmt);
+ continue;
+ }
+ if (*++fmt == 's') {
+ for (p = va_arg(args, char *); *p != '\0'; p++)
+ if (buffer)
+ *buffer++ = *p;
+ else
+ putchar(*p);
+ } else { /* Length of item is bounded */
+ char tmp[40], *q = tmp;
+ int shift = INT_SHIFT;
+
+ if (*fmt == 'L') {
+ shift = LONG_LONG_SHIFT;
+ fmt++;
+ } else if (*fmt == 'l') {
+ shift = LONG_SHIFT;
+ fmt++;
+ } else if (*fmt == 'h') {
+ shift = SHRT_SHIFT;
+ fmt++;
+ if (*fmt == 'h') {
+ shift = CHAR_SHIFT;
+ fmt++;
+ }
+ }
+
+ /*
+ * Before each format q points to tmp buffer
+ * After each format q points past end of item
+ */
+ if ((*fmt | 0x20) == 'x') {
+ /* With x86 gcc, sizeof(long) == sizeof(int) */
+ unsigned long long h;
+ int ncase;
+
+ if (shift > LONG_SHIFT)
+ h = va_arg(args, unsigned long long);
+ else if (shift > INT_SHIFT)
+ h = va_arg(args, unsigned long);
+ else
+ h = va_arg(args, unsigned int);
+
+ ncase = (*fmt & 0x20);
+ for ( ; shift >= 0; shift -= 4)
+ *q++ = "0123456789ABCDEF"[(h >> shift) & 0xF] | ncase;
+ } else if (*fmt == 'd') {
+ char *r;
+ long i;
+
+ if (shift > LONG_SHIFT)
+ i = va_arg(args, long long);
+ else if (shift > INT_SHIFT)
+ i = va_arg(args, long);
+ else
+ i = va_arg(args, int);
+
+ if (i < 0) {
+ *q++ = '-';
+ i = -i;
+ }
+ p = q; /* save beginning of digits */
+ do {
+ *q++ = '0' + (i % 10);
+ i /= 10;
+ } while (i);
+ /* reverse digits, stop in middle */
+ r = q; /* don't alter q */
+ while (--r > p) {
+ i = *r;
+ *r = *p;
+ *p++ = i;
+ }
+ } else if (*fmt == 'c')
+ *q++ = va_arg(args, int);
+ else
+ *q++ = *fmt;
+ /* now output the saved string */
+ for (p = tmp; p < q; ++p)
+ if (buffer)
+ *buffer++ = *p;
+ else
+ putchar(*p);
+ }
+ }
+ if (buffer)
+ *buffer = '\0';
+}
+
+void sprintf(char *buffer, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsprintf(buffer, fmt, args);
+ va_end(args);
+}
+
+void printf(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsprintf(0, fmt, args);
+ va_end(args);
+}
diff --git a/arch/powerpc/purgatory/purgatory-ppc64.c b/arch/powerpc/purgatory/purgatory-ppc64.c
new file mode 100644
index 000000000000..2e9545bcdcb4
--- /dev/null
+++ b/arch/powerpc/purgatory/purgatory-ppc64.c
@@ -0,0 +1,36 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Created by: Mohan Kumar M (mohan@in.ibm.com)
+ *
+ * Copyright (C) IBM Corporation, 2005. All rights reserved
+ *
+ * Code taken from kexec-tools.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "purgatory.h"
+
+unsigned long stack __section(".data");
+unsigned long dt_offset __section(".data");
+unsigned long my_toc __section(".data");
+unsigned long kernel __section(".data");
+int debug __section(".data");
+unsigned long opal_base __section(".data");
+unsigned long opal_entry __section(".data");
+
+void setup_arch(void)
+{
+}
+
+void post_verification_setup_arch(void)
+{
+}
diff --git a/arch/powerpc/purgatory/purgatory.c b/arch/powerpc/purgatory/purgatory.c
new file mode 100644
index 000000000000..179f2da2b56f
--- /dev/null
+++ b/arch/powerpc/purgatory/purgatory.c
@@ -0,0 +1,62 @@
+/*
+ * Code taken from kexec-tools.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "purgatory.h"
+#include "sha256.h"
+#include "../boot/string.h"
+#include "kexec-sha256.h"
+
+struct kexec_sha_region sha_regions[SHA256_REGIONS] __section(".data");
+u8 sha256_digest[SHA256_DIGEST_SIZE] __section(".data");
+
+int verify_sha256_digest(void)
+{
+ struct kexec_sha_region *ptr, *end;
+ u8 digest[SHA256_DIGEST_SIZE];
+ size_t i;
+ struct sha256_state sctx;
+
+ sha256_init(&sctx);
+ end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
+ for (ptr = sha_regions; ptr < end; ptr++)
+ sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+ sha256_final(&sctx, digest);
+
+ if (memcmp(digest, sha256_digest, sizeof(digest)) != 0) {
+ printf("sha256 digests do not match :(\n");
+ printf(" digest: ");
+ for (i = 0; i < sizeof(digest); i++)
+ printf("%hhx ", digest[i]);
+ printf("\n");
+
+ printf("sha256_digest: ");
+ for (i = 0; i < sizeof(sha256_digest); i++)
+ printf("%hhx ", sha256_digest[i]);
+
+ printf("\n");
+ return 1;
+ }
+ return 0;
+}
+
+void purgatory(void)
+{
+ printf("I'm in purgatory\n");
+ setup_arch();
+ if (verify_sha256_digest()) {
+ /* loop forever */
+ for (;;)
+ ;
+ }
+ post_verification_setup_arch();
+}
diff --git a/arch/powerpc/purgatory/purgatory.h b/arch/powerpc/purgatory/purgatory.h
new file mode 100644
index 000000000000..fde99e8b5043
--- /dev/null
+++ b/arch/powerpc/purgatory/purgatory.h
@@ -0,0 +1,14 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#include <linux/compiler.h>
+
+extern int debug;
+
+void putchar(int ch);
+void sprintf(char *buffer, const char *fmt, ...) __printf(2, 3);
+void printf(const char *fmt, ...) __printf(1, 2);
+void setup_arch(void);
+void post_verification_setup_arch(void);
+
+#endif /* PURGATORY_H */
diff --git a/arch/powerpc/purgatory/sha256.c b/arch/powerpc/purgatory/sha256.c
new file mode 100644
index 000000000000..6abee1877d56
--- /dev/null
+++ b/arch/powerpc/purgatory/sha256.c
@@ -0,0 +1,6 @@
+#include "../boot/string.h"
+
+/* Avoid including x86's boot/string.h in sha256.c. */
+#define BOOT_STRING_H
+
+#include "../../x86/purgatory/sha256.c"
diff --git a/arch/powerpc/purgatory/sha256.h b/arch/powerpc/purgatory/sha256.h
new file mode 100644
index 000000000000..72818f3a207e
--- /dev/null
+++ b/arch/powerpc/purgatory/sha256.h
@@ -0,0 +1 @@
+#include "../../x86/purgatory/sha256.h"
diff --git a/arch/powerpc/purgatory/string.S b/arch/powerpc/purgatory/string.S
new file mode 100644
index 000000000000..19d92e4e7554
--- /dev/null
+++ b/arch/powerpc/purgatory/string.S
@@ -0,0 +1,2 @@
+#include "ppc64_asm.h"
+#include "../boot/string.S"
diff --git a/arch/powerpc/purgatory/v2wrap.S b/arch/powerpc/purgatory/v2wrap.S
new file mode 100644
index 000000000000..c9a981c39a78
--- /dev/null
+++ b/arch/powerpc/purgatory/v2wrap.S
@@ -0,0 +1,134 @@
+#
+# kexec: Linux boots Linux
+#
+# Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation
+# Copyright (C) 2006, Mohan Kumar M (mohan@in.ibm.com), IBM Corporation
+#
+# Code taken from kexec-tools.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation (version 2 of the License).
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+#include "ppc64_asm.h"
+
+# v2wrap.S
+# a wrapper to call purgatory code to backup first
+# 32kB of first kernel into the backup region
+# reserved by kexec-tools.
+# Invokes ppc64 kernel with the expected arguments
+# of kernel(device-tree, phys-offset, 0)
+
+#
+# calling convention:
+# r3 = physical number of this cpu (all cpus)
+# r4 = address of this chunk (master only)
+# master enters at purgatory_start (aka first byte of this chunk)
+# slaves (additional cpus), if any, enter a copy of the
+# first 0x100 bytes of this code relocated to 0x0
+#
+# in other words,
+# a copy of the first 0x100 bytes of this code is copied to 0
+# and the slaves are sent to address 0x60
+# with r3 = their physical cpu number.
+
+#define LOADADDR(rn,name) \
+ lis rn,name##@highest; \
+ ori rn,rn,name##@higher; \
+ rldicr rn,rn,32,31; \
+ oris rn,rn,name##@h; \
+ ori rn,rn,name##@l
+
+ .machine ppc64
+ .align 8
+ .globl purgatory_start
+purgatory_start: b master
+ .org purgatory_start + 0x5c # ABI: possible run_at_load flag at 0x5c
+ .globl run_at_load
+run_at_load:
+ .long 0
+ .size run_at_load, . - run_at_load
+ .org purgatory_start + 0x60 # ABI: slaves start at 60 with r3=phys
+slave: b $
+ .org purgatory_start + 0x100 # ABI: end of copied region
+ .size purgatory_start, . - purgatory_start
+
+#
+# The above 0x100 bytes at purgatory_start are replaced with the
+# code from the kernel (or next stage) by kexec/arch/ppc64/kexec-elf-ppc64.c
+#
+
+master:
+ or 1,1,1 # low priority to let other threads catchup
+ isync
+ mr 17,3 # save cpu id to r17
+ mr 15,4 # save physical address in reg15
+
+ LOADADDR(6,my_toc)
+ ld 2,0(6) #setup toc
+
+ LOADADDR(6,stack)
+ ld 1,0(6) #setup stack
+
+ subi 1,1,112
+ bl DOTSYM(purgatory)
+ nop
+
+ or 3,3,3 # ok now to high priority, lets boot
+ lis 6,0x1
+ mtctr 6 # delay a bit for slaves to catch up
+83: bdnz 83b # before we overwrite 0-100 again
+
+ LOADADDR(16, dt_offset)
+ ld 3,0(16) # load device-tree address
+ mr 16,3 # save dt address in reg16
+#ifdef __BIG_ENDIAN__
+ lwz 6,20(3) # fetch version number
+#else
+ li 4,20
+ lwbrx 6,3,4 # fetch BE version number
+#endif
+ cmpwi 0,6,2 # v2 ?
+ blt 80f
+#ifdef __BIG_ENDIAN__
+ stw 17,28(3) # save my cpu number as boot_cpu_phys
+#else
+ li 4,28
+ stwbrx 17,3,4 # Store my cpu as BE value
+#endif
+80:
+ LOADADDR(6,opal_base) # For OPAL early debug
+ ld 8,0(6) # load the OPAL base address in r8
+ LOADADDR(6,opal_entry) # For OPAL early debug
+ ld 9,0(6) # load the OPAL entry address in r9
+ LOADADDR(6,kernel)
+ ld 4,0(6) # load the kernel address
+ LOADADDR(6,run_at_load) # the load flag
+ lwz 7,0(6) # possibly patched by kexec-elf-ppc64
+ stw 7,0x5c(4) # and patch it into the kernel
+ mr 3,16 # restore dt address
+
+ mfmsr 5
+ andi. 10,5,1 # test MSR_LE
+ bne little_endian
+
+ li 5,0 # r5 will be 0 for kernel
+ mtctr 4 # prepare branch to
+ bctr # start kernel
+
+little_endian: # book3s-only
+ mtsrr0 4 # prepare branch to
+
+ clrrdi 5,5,1 # clear MSR_LE
+ mtsrr1 5
+
+ li 5,0 # r5 will be 0 for kernel
+
+ # skip cache flush, do we care?
+
+ rfid # update MSR and start kernel
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb4a1b2..95e73c63c99d 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@ static int panic_on_timeout;
*/
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
static unsigned int nmi_hz = HZ;
static DEFINE_PER_CPU(short, wd_enabled);
static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
void stop_nmi_watchdog(void *unused)
{
+ if (!__this_cpu_read(wd_enabled))
+ return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(&nmi_active);
@@ -207,6 +209,9 @@ error:
void start_nmi_watchdog(void *unused)
{
+ if (__this_cpu_read(wd_enabled))
+ return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(&nmi_active);
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
+ nmi_init_done = 1;
+
return err;
}
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int watchdog_nmi_enable(unsigned int cpu)
+{
+ if (atomic_read(&nmi_active) == -1) {
+ pr_warn("NMI watchdog cannot be enabled or disabled\n");
+ return -1;
+ }
+
+ /*
+ * watchdog thread could start even before nmi_init is called.
+ * Just Return in that case. Let nmi_init finish the init
+ * process first.
+ */
+ if (!nmi_init_done)
+ return 0;
+
+ smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+ return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void watchdog_nmi_disable(unsigned int cpu)
+{
+ if (atomic_read(&nmi_active) == -1)
+ pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+ else
+ smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 650830e39e3a..3741461c63a0 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -631,9 +631,9 @@ static int determine_backup_region(u64 start, u64 end, void *arg)
int crash_load_segments(struct kimage *image)
{
- unsigned long src_start, src_sz, elf_sz;
- void *elf_addr;
int ret;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ULONG_MAX, .top_down = false };
/*
* Determine and load a segment for backup area. First 640K RAM
@@ -647,43 +647,44 @@ int crash_load_segments(struct kimage *image)
if (ret < 0)
return ret;
- src_start = image->arch.backup_src_start;
- src_sz = image->arch.backup_src_sz;
-
/* Add backup segment. */
- if (src_sz) {
+ if (image->arch.backup_src_sz) {
+ kbuf.buffer = &crash_zero_bytes;
+ kbuf.bufsz = sizeof(crash_zero_bytes);
+ kbuf.memsz = image->arch.backup_src_sz;
+ kbuf.buf_align = PAGE_SIZE;
/*
* Ideally there is no source for backup segment. This is
* copied in purgatory after crash. Just add a zero filled
* segment for now to make sure checksum logic works fine.
*/
- ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
- sizeof(crash_zero_bytes), src_sz,
- PAGE_SIZE, 0, -1, 0,
- &image->arch.backup_load_addr);
+ ret = kexec_add_buffer(&kbuf);
if (ret)
return ret;
+ image->arch.backup_load_addr = kbuf.mem;
pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
- image->arch.backup_load_addr, src_start, src_sz);
+ image->arch.backup_load_addr,
+ image->arch.backup_src_start, kbuf.memsz);
}
/* Prepare elf headers and add a segment */
- ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
if (ret)
return ret;
- image->arch.elf_headers = elf_addr;
- image->arch.elf_headers_sz = elf_sz;
+ image->arch.elf_headers = kbuf.buffer;
+ image->arch.elf_headers_sz = kbuf.bufsz;
- ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
- ELF_CORE_HEADER_ALIGN, 0, -1, 0,
- &image->arch.elf_load_addr);
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree((void *)image->arch.elf_headers);
return ret;
}
+ image->arch.elf_load_addr = kbuf.mem;
pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->arch.elf_load_addr, elf_sz, elf_sz);
+ image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
return ret;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3407b148c240..d0a814a9d96a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -331,17 +331,17 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
struct setup_header *header;
int setup_sects, kern16_size, ret = 0;
- unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ unsigned long setup_header_size, params_cmdline_sz;
struct boot_params *params;
unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
unsigned long purgatory_load_addr;
- unsigned long kernel_bufsz, kernel_memsz, kernel_align;
- char *kernel_buf;
struct bzimage64_data *ldata;
struct kexec_entry64_regs regs64;
void *stack;
unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
+ .top_down = true };
header = (struct setup_header *)(kernel + setup_hdr_offset);
setup_sects = header->setup_sects;
@@ -402,11 +402,11 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
- params_misc_sz = params_cmdline_sz + efi_map_sz +
+ kbuf.bufsz = params_cmdline_sz + efi_map_sz +
sizeof(struct setup_data) +
sizeof(struct efi_setup_data);
- params = kzalloc(params_misc_sz, GFP_KERNEL);
+ params = kzalloc(kbuf.bufsz, GFP_KERNEL);
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
@@ -418,37 +418,41 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
/* Is there a limit on setup header size? */
memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
- ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
- params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
- ULONG_MAX, 1, &bootparam_load_addr);
+ kbuf.buffer = params;
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = 16;
+ kbuf.buf_min = MIN_BOOTPARAM_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ bootparam_load_addr = kbuf.mem;
pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- bootparam_load_addr, params_misc_sz, params_misc_sz);
+ bootparam_load_addr, kbuf.bufsz, kbuf.bufsz);
/* Load kernel */
- kernel_buf = kernel + kern16_size;
- kernel_bufsz = kernel_len - kern16_size;
- kernel_memsz = PAGE_ALIGN(header->init_size);
- kernel_align = header->kernel_alignment;
-
- ret = kexec_add_buffer(image, kernel_buf,
- kernel_bufsz, kernel_memsz, kernel_align,
- MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
- &kernel_load_addr);
+ kbuf.buffer = kernel + kern16_size;
+ kbuf.bufsz = kernel_len - kern16_size;
+ kbuf.memsz = PAGE_ALIGN(header->init_size);
+ kbuf.buf_align = header->kernel_alignment;
+ kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ kernel_load_addr = kbuf.mem;
pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kernel_load_addr, kernel_memsz, kernel_memsz);
+ kernel_load_addr, kbuf.bufsz, kbuf.memsz);
/* Load initrd high */
if (initrd) {
- ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
- PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
- ULONG_MAX, 1, &initrd_load_addr);
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ initrd_load_addr = kbuf.mem;
pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
initrd_load_addr, initrd_len, initrd_len);
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c
index e9cef9de9ed8..c96f9b1d948a 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c
@@ -900,8 +900,7 @@ static void iwlagn_gain_computation(struct iwl_priv *priv,
/* bound gain by 2 bits value max, 3rd bit is sign */
data->delta_gain_code[i] =
- min(abs(delta_g),
- (s32) CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
+ min(abs(delta_g), CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
if (delta_g < 0)
/*
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0eb7c2e7f0d6..7f6952f8d6aa 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
#define _LINUX_IMA_H
#include <linux/fs.h>
+#include <linux/kexec.h>
struct linux_binprm;
#ifdef CONFIG_IMA
@@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
enum kernel_read_file_id id);
extern void ima_post_path_mknod(struct dentry *dentry);
+#ifdef CONFIG_IMA_KEXEC
+extern void ima_add_kexec_buffer(struct kimage *image);
+#endif
+
#else
static inline int ima_bprm_check(struct linux_binprm *bprm)
{
@@ -62,6 +67,13 @@ static inline void ima_post_path_mknod(struct dentry *dentry)
#endif /* CONFIG_IMA */
+#ifndef CONFIG_IMA_KEXEC
+struct kimage;
+
+static inline void ima_add_kexec_buffer(struct kimage *image)
+{}
+#endif
+
#ifdef CONFIG_IMA_APPRAISE
extern void ima_inode_post_setattr(struct dentry *dentry);
extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d3ae4292931b..e98e546b543c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -148,7 +148,36 @@ struct kexec_file_ops {
kexec_verify_sig_t *verify_sig;
#endif
};
-#endif
+
+/**
+ * struct kexec_buf - parameters for finding a place for a buffer in memory
+ * @image: kexec image in which memory to search.
+ * @buffer: Contents which will be copied to the allocated memory.
+ * @bufsz: Size of @buffer.
+ * @mem: On return will have address of the buffer in memory.
+ * @memsz: Size for the buffer in memory.
+ * @buf_align: Minimum alignment needed.
+ * @buf_min: The buffer can't be placed below this address.
+ * @buf_max: The buffer can't be placed above this address.
+ * @top_down: Allocate from top of memory.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ void *buffer;
+ unsigned long bufsz;
+ unsigned long mem;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ bool top_down;
+};
+
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(u64, u64, void *));
+extern int kexec_add_buffer(struct kexec_buf *kbuf);
+int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+#endif /* CONFIG_KEXEC_FILE */
struct kimage {
kimage_entry_t head;
@@ -212,11 +241,6 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
struct kexec_segment __user *segments,
unsigned long flags);
extern int kernel_kexec(void);
-extern int kexec_add_buffer(struct kimage *image, char *buffer,
- unsigned long bufsz, unsigned long memsz,
- unsigned long buf_align, unsigned long buf_min,
- unsigned long buf_max, bool top_down,
- unsigned long *load_addr);
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a78c35cff1ae..aacca824a6ae 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
#include <linux/sched.h>
#include <asm/irq.h>
+/*
+ * The run state of the lockup detectors is controlled by the content of the
+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
+ *
+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
+ * are variables that are only used as an 'interface' between the parameters
+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
+ * 'watchdog_thresh' variable is handled differently because its value is not
+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
+ * is equal zero.
+ */
+#define NMI_WATCHDOG_ENABLED_BIT 0
+#define SOFT_WATCHDOG_ENABLED_BIT 1
+#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
+#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
@@ -91,9 +108,16 @@ extern int nmi_watchdog_enabled;
extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
extern unsigned long *watchdog_cpumask_bits;
+#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
+#endif
+extern bool is_hardlockup(void);
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
diff --git a/kernel/Makefile b/kernel/Makefile
index eb26e12c6c2a..314e7d62f5f0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 037c321c5618..b56a558e406d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/fs.h>
+#include <linux/ima.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <linux/syscalls.h>
@@ -132,6 +133,9 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
return ret;
image->kernel_buf_len = size;
+ /* IMA needs to pass the measurement list to the next kernel. */
+ ima_add_kexec_buffer(image);
+
/* Call arch image probe handlers */
ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
image->kernel_buf_len);
@@ -428,25 +432,65 @@ static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
return locate_mem_hole_bottom_up(start, end, kbuf);
}
-/*
- * Helper function for placing a buffer in a kexec segment. This assumes
- * that kexec_mutex is held.
+/**
+ * arch_kexec_walk_mem - call func(data) on free memory regions
+ * @kbuf: Context info for the search. Also passed to @func.
+ * @func: Function to call for each memory region.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
+ */
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(u64, u64, void *))
+{
+ if (kbuf->image->type == KEXEC_TYPE_CRASH)
+ return walk_iomem_res_desc(crashk_res.desc,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ crashk_res.start, crashk_res.end,
+ kbuf, func);
+ else
+ return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
+}
+
+/**
+ * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel
+ * @kbuf: Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+ int ret;
+
+ ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
+
+ return ret == 1 ? 0 : -EADDRNOTAVAIL;
+}
+
+/**
+ * kexec_add_buffer - place a buffer in a kexec segment
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * This function assumes that kexec_mutex is held.
+ * On successful return, @kbuf->mem will have the physical address of
+ * the buffer in memory.
+ *
+ * Return: 0 on success, negative errno on error.
*/
-int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
- unsigned long memsz, unsigned long buf_align,
- unsigned long buf_min, unsigned long buf_max,
- bool top_down, unsigned long *load_addr)
+int kexec_add_buffer(struct kexec_buf *kbuf)
{
struct kexec_segment *ksegment;
- struct kexec_buf buf, *kbuf;
int ret;
/* Currently adding segment this way is allowed only in file mode */
- if (!image->file_mode)
+ if (!kbuf->image->file_mode)
return -EINVAL;
- if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+ if (kbuf->image->nr_segments >= KEXEC_SEGMENT_MAX)
return -EINVAL;
/*
@@ -456,45 +500,27 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
* logic goes through list of segments to make sure there are
* no destination overlaps.
*/
- if (!list_empty(&image->control_pages)) {
+ if (!list_empty(&kbuf->image->control_pages)) {
WARN_ON(1);
return -EINVAL;
}
- memset(&buf, 0, sizeof(struct kexec_buf));
- kbuf = &buf;
- kbuf->image = image;
- kbuf->buffer = buffer;
- kbuf->bufsz = bufsz;
-
- kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
- kbuf->buf_align = max(buf_align, PAGE_SIZE);
- kbuf->buf_min = buf_min;
- kbuf->buf_max = buf_max;
- kbuf->top_down = top_down;
+ /* Ensure minimum alignment needed for segments. */
+ kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
+ kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
/* Walk the RAM ranges and allocate a suitable range for the buffer */
- if (image->type == KEXEC_TYPE_CRASH)
- ret = walk_iomem_res_desc(crashk_res.desc,
- IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
- crashk_res.start, crashk_res.end, kbuf,
- locate_mem_hole_callback);
- else
- ret = walk_system_ram_res(0, -1, kbuf,
- locate_mem_hole_callback);
- if (ret != 1) {
- /* A suitable memory range could not be found for buffer */
- return -EADDRNOTAVAIL;
- }
+ ret = kexec_locate_mem_hole(kbuf);
+ if (ret)
+ return ret;
/* Found a suitable memory range */
- ksegment = &image->segment[image->nr_segments];
+ ksegment = &kbuf->image->segment[kbuf->image->nr_segments];
ksegment->kbuf = kbuf->buffer;
ksegment->bufsz = kbuf->bufsz;
ksegment->mem = kbuf->mem;
ksegment->memsz = kbuf->memsz;
- image->nr_segments++;
- *load_addr = ksegment->mem;
+ kbuf->image->nr_segments++;
return 0;
}
@@ -616,13 +642,15 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
unsigned long max, int top_down)
{
struct purgatory_info *pi = &image->purgatory_info;
- unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
- unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+ unsigned long align, bss_align, bss_sz, bss_pad;
+ unsigned long entry, load_addr, curr_load_addr, bss_addr, offset;
unsigned char *buf_addr, *src;
int i, ret = 0, entry_sidx = -1;
const Elf_Shdr *sechdrs_c;
Elf_Shdr *sechdrs = NULL;
- void *purgatory_buf = NULL;
+ struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
+ .buf_min = min, .buf_max = max,
+ .top_down = top_down };
/*
* sechdrs_c points to section headers in purgatory and are read
@@ -688,9 +716,7 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
}
/* Determine how much memory is needed to load relocatable object. */
- buf_align = 1;
bss_align = 1;
- buf_sz = 0;
bss_sz = 0;
for (i = 0; i < pi->ehdr->e_shnum; i++) {
@@ -699,10 +725,10 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
align = sechdrs[i].sh_addralign;
if (sechdrs[i].sh_type != SHT_NOBITS) {
- if (buf_align < align)
- buf_align = align;
- buf_sz = ALIGN(buf_sz, align);
- buf_sz += sechdrs[i].sh_size;
+ if (kbuf.buf_align < align)
+ kbuf.buf_align = align;
+ kbuf.bufsz = ALIGN(kbuf.bufsz, align);
+ kbuf.bufsz += sechdrs[i].sh_size;
} else {
/* bss section */
if (bss_align < align)
@@ -714,32 +740,31 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
/* Determine the bss padding required to align bss properly */
bss_pad = 0;
- if (buf_sz & (bss_align - 1))
- bss_pad = bss_align - (buf_sz & (bss_align - 1));
+ if (kbuf.bufsz & (bss_align - 1))
+ bss_pad = bss_align - (kbuf.bufsz & (bss_align - 1));
- memsz = buf_sz + bss_pad + bss_sz;
+ kbuf.memsz = kbuf.bufsz + bss_pad + bss_sz;
/* Allocate buffer for purgatory */
- purgatory_buf = vzalloc(buf_sz);
- if (!purgatory_buf) {
+ kbuf.buffer = vzalloc(kbuf.bufsz);
+ if (!kbuf.buffer) {
ret = -ENOMEM;
goto out;
}
- if (buf_align < bss_align)
- buf_align = bss_align;
+ if (kbuf.buf_align < bss_align)
+ kbuf.buf_align = bss_align;
/* Add buffer to segment list */
- ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
- buf_align, min, max, top_down,
- &pi->purgatory_load_addr);
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out;
+ pi->purgatory_load_addr = kbuf.mem;
/* Load SHF_ALLOC sections */
- buf_addr = purgatory_buf;
+ buf_addr = kbuf.buffer;
load_addr = curr_load_addr = pi->purgatory_load_addr;
- bss_addr = load_addr + buf_sz + bss_pad;
+ bss_addr = load_addr + kbuf.bufsz + bss_pad;
for (i = 0; i < pi->ehdr->e_shnum; i++) {
if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -785,11 +810,11 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
* Used later to identify which section is purgatory and skip it
* from checksumming.
*/
- pi->purgatory_buf = purgatory_buf;
+ pi->purgatory_buf = kbuf.buffer;
return ret;
out:
vfree(sechdrs);
- vfree(purgatory_buf);
+ vfree(kbuf.buffer);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 0a52315d9c62..4cef7e4706b0 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -20,22 +20,6 @@ struct kexec_sha_region {
unsigned long len;
};
-/*
- * Keeps track of buffer parameters as provided by caller for requesting
- * memory placement of buffer.
- */
-struct kexec_buf {
- struct kimage *image;
- char *buffer;
- unsigned long bufsz;
- unsigned long mem;
- unsigned long memsz;
- unsigned long buf_align;
- unsigned long buf_min;
- unsigned long buf_max;
- bool top_down; /* allocate from top of memory hole */
-};
-
void kimage_file_post_load_cleanup(struct kimage *image);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6d1020c03d41..d4b0fa01cae3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,32 +24,14 @@
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
-#include <linux/perf_event.h>
#include <linux/kthread.h>
-/*
- * The run state of the lockup detectors is controlled by the content of the
- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
- *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
- */
-#define NMI_WATCHDOG_ENABLED_BIT 0
-#define SOFT_WATCHDOG_ENABLED_BIT 1
-#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
-
static DEFINE_MUTEX(watchdog_proc_mutex);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
int __read_mostly nmi_watchdog_enabled;
int __read_mostly soft_watchdog_enabled;
@@ -59,9 +41,6 @@ int __read_mostly watchdog_thresh = 10;
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-#else
-#define sysctl_softlockup_all_cpu_backtrace 0
-#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
static struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -100,50 +79,9 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
-static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
static unsigned long soft_lockup_nmi_warn;
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
-static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
- else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
- return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-#endif
-
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -264,32 +202,14 @@ void touch_all_softlockup_watchdogs(void)
wq_watchdog_touch(-1);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-void touch_nmi_watchdog(void)
-{
- /*
- * Using __raw here because some code paths have
- * preemption enabled. If preemption is enabled
- * then interrupts should be enabled too, in which
- * case we shouldn't have to worry about the watchdog
- * going off.
- */
- raw_cpu_write(watchdog_nmi_touch, true);
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-#endif
-
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
-static bool is_hardlockup(void)
+bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
@@ -299,7 +219,6 @@ static bool is_hardlockup(void)
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
-#endif
static int is_softlockup(unsigned long touch_ts)
{
@@ -313,77 +232,22 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-
-static struct perf_event_attr wd_hw_attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .size = sizeof(struct perf_event_attr),
- .pinned = 1,
- .disabled = 1,
-};
-
-/* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- /* Ensure the watchdog never gets throttled */
- event->hw.interrupts = 0;
-
- if (__this_cpu_read(watchdog_nmi_touch) == true) {
- __this_cpu_write(watchdog_nmi_touch, false);
- return;
- }
-
- /* check for a hardlockup
- * This is done by making sure our timer interrupt
- * is incrementing. The timer interrupt should have
- * fired multiple times before we overflow'd. If it hasn't
- * then this is a good indication the cpu is stuck
- */
- if (is_hardlockup()) {
- int this_cpu = smp_processor_id();
-
- /* only print hardlockups once */
- if (__this_cpu_read(hard_watchdog_warn) == true)
- return;
-
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- print_modules();
- print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
-
- /*
- * Perform all-CPU dump only once to avoid multiple hardlockups
- * generating interleaving traces
- */
- if (sysctl_hardlockup_all_cpu_backtrace &&
- !test_and_set_bit(0, &hardlockup_allcpu_dumped))
- trigger_allbutself_cpu_backtrace();
-
- if (hardlockup_panic)
- nmi_panic(regs, "Hard LOCKUP");
-
- __this_cpu_write(hard_watchdog_warn, true);
- return;
- }
-
- __this_cpu_write(hard_watchdog_warn, false);
- return;
-}
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
-static int watchdog_nmi_enable(unsigned int cpu);
-static void watchdog_nmi_disable(unsigned int cpu);
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+ return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
@@ -576,109 +440,6 @@ static void watchdog(unsigned int cpu)
watchdog_nmi_disable(cpu);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-/*
- * People like the simple clean cpu node info on boot.
- * Reduce the watchdog noise by only printing messages
- * that are different from what cpu0 displayed.
- */
-static unsigned long cpu0_err;
-
-static int watchdog_nmi_enable(unsigned int cpu)
-{
- struct perf_event_attr *wd_attr;
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
-
- /* nothing to do if the hard lockup detector is disabled */
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- goto out;
-
- /* is it already setup and enabled? */
- if (event && event->state > PERF_EVENT_STATE_OFF)
- goto out;
-
- /* it is setup but not enabled */
- if (event != NULL)
- goto out_enable;
-
- wd_attr = &wd_hw_attr;
- wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-
- /* Try to register using hardware perf events */
- event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
-
- /* save cpu0 error for future comparision */
- if (cpu == 0 && IS_ERR(event))
- cpu0_err = PTR_ERR(event);
-
- if (!IS_ERR(event)) {
- /* only print for cpu0 or different than cpu0 */
- if (cpu == 0 || cpu0_err)
- pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
- goto out_save;
- }
-
- /*
- * Disable the hard lockup detector if _any_ CPU fails to set up
- * set up the hardware perf event. The watchdog() function checks
- * the NMI_WATCHDOG_ENABLED bit periodically.
- *
- * The barriers are for syncing up watchdog_enabled across all the
- * cpus, as clear_bit() does not use barriers.
- */
- smp_mb__before_atomic();
- clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
- smp_mb__after_atomic();
-
- /* skip displaying the same error again */
- if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
- return PTR_ERR(event);
-
- /* vary the KERN level based on the returned errno */
- if (PTR_ERR(event) == -EOPNOTSUPP)
- pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
- else if (PTR_ERR(event) == -ENOENT)
- pr_warn("disabled (cpu%i): hardware events not enabled\n",
- cpu);
- else
- pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
- cpu, PTR_ERR(event));
-
- pr_info("Shutting down hard lockup detector on all cpus\n");
-
- return PTR_ERR(event);
-
- /* success path */
-out_save:
- per_cpu(watchdog_ev, cpu) = event;
-out_enable:
- perf_event_enable(per_cpu(watchdog_ev, cpu));
-out:
- return 0;
-}
-
-static void watchdog_nmi_disable(unsigned int cpu)
-{
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
-
- if (event) {
- perf_event_disable(event);
- per_cpu(watchdog_ev, cpu) = NULL;
-
- /* should be in cleanup, but blocks oprofile */
- perf_event_release_kernel(event);
- }
- if (cpu == 0) {
- /* watchdog_nmi_enable() expects this to be zero initially. */
- cpu0_err = 0;
- }
-}
-
-#else
-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,
.thread_should_run = watchdog_should_run,
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
new file mode 100644
index 000000000000..84016c8aee6b
--- /dev/null
+++ b/kernel/watchdog_hld.c
@@ -0,0 +1,227 @@
+/*
+ * Detect hard lockups on a system
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
+ * to those contributors as well.
+ */
+
+#define pr_fmt(fmt) "NMI watchdog: " fmt
+
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+#include <linux/perf_event.h>
+
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+ if (!strncmp(str, "panic", 5))
+ hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ else if (!strncmp(str, "1", 1))
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+void touch_nmi_watchdog(void)
+{
+ /*
+ * Using __raw here because some code paths have
+ * preemption enabled. If preemption is enabled
+ * then interrupts should be enabled too, in which
+ * case we shouldn't have to worry about the watchdog
+ * going off.
+ */
+ raw_cpu_write(watchdog_nmi_touch, true);
+ touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+static struct perf_event_attr wd_hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+};
+
+/* Callback function for perf event subsystem */
+static void watchdog_overflow_callback(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ /* Ensure the watchdog never gets throttled */
+ event->hw.interrupts = 0;
+
+ if (__this_cpu_read(watchdog_nmi_touch) == true) {
+ __this_cpu_write(watchdog_nmi_touch, false);
+ return;
+ }
+
+ /* check for a hardlockup
+ * This is done by making sure our timer interrupt
+ * is incrementing. The timer interrupt should have
+ * fired multiple times before we overflow'd. If it hasn't
+ * then this is a good indication the cpu is stuck
+ */
+ if (is_hardlockup()) {
+ int this_cpu = smp_processor_id();
+
+ /* only print hardlockups once */
+ if (__this_cpu_read(hard_watchdog_warn) == true)
+ return;
+
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ /*
+ * Perform all-CPU dump only once to avoid multiple hardlockups
+ * generating interleaving traces
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace &&
+ !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+
+ __this_cpu_write(hard_watchdog_warn, true);
+ return;
+ }
+
+ __this_cpu_write(hard_watchdog_warn, false);
+ return;
+}
+
+/*
+ * People like the simple clean cpu node info on boot.
+ * Reduce the watchdog noise by only printing messages
+ * that are different from what cpu0 displayed.
+ */
+static unsigned long cpu0_err;
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+ struct perf_event_attr *wd_attr;
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ /* nothing to do if the hard lockup detector is disabled */
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ goto out;
+
+ /* is it already setup and enabled? */
+ if (event && event->state > PERF_EVENT_STATE_OFF)
+ goto out;
+
+ /* it is setup but not enabled */
+ if (event != NULL)
+ goto out_enable;
+
+ wd_attr = &wd_hw_attr;
+ wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+
+ /* Try to register using hardware perf events */
+ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+
+ /* save cpu0 error for future comparision */
+ if (cpu == 0 && IS_ERR(event))
+ cpu0_err = PTR_ERR(event);
+
+ if (!IS_ERR(event)) {
+ /* only print for cpu0 or different than cpu0 */
+ if (cpu == 0 || cpu0_err)
+ pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
+ goto out_save;
+ }
+
+ /*
+ * Disable the hard lockup detector if _any_ CPU fails to set up
+ * set up the hardware perf event. The watchdog() function checks
+ * the NMI_WATCHDOG_ENABLED bit periodically.
+ *
+ * The barriers are for syncing up watchdog_enabled across all the
+ * cpus, as clear_bit() does not use barriers.
+ */
+ smp_mb__before_atomic();
+ clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
+ smp_mb__after_atomic();
+
+ /* skip displaying the same error again */
+ if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
+ return PTR_ERR(event);
+
+ /* vary the KERN level based on the returned errno */
+ if (PTR_ERR(event) == -EOPNOTSUPP)
+ pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
+ else if (PTR_ERR(event) == -ENOENT)
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
+ cpu);
+ else
+ pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
+ cpu, PTR_ERR(event));
+
+ pr_info("Shutting down hard lockup detector on all cpus\n");
+
+ return PTR_ERR(event);
+
+ /* success path */
+out_save:
+ per_cpu(watchdog_ev, cpu) = event;
+out_enable:
+ perf_event_enable(per_cpu(watchdog_ev, cpu));
+out:
+ return 0;
+}
+
+void watchdog_nmi_disable(unsigned int cpu)
+{
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ if (event) {
+ perf_event_disable(event);
+ per_cpu(watchdog_ev, cpu) = NULL;
+
+ /* should be in cleanup, but blocks oprofile */
+ perf_event_release_kernel(event);
+ }
+ if (cpu == 0) {
+ /* watchdog_nmi_enable() expects this to be zero initially. */
+ cpu0_err = 0;
+ }
+}
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 5487827fa86c..370eb2f4dd37 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -27,6 +27,18 @@ config IMA
to learn more about IMA.
If unsure, say N.
+config IMA_KEXEC
+ bool "Enable carrying the IMA measurement list across a soft boot"
+ depends on IMA && TCG_TPM && HAVE_IMA_KEXEC
+ default n
+ help
+ TPM PCRs are only reset on a hard reboot. In order to validate
+ a TPM's quote after a soft boot, the IMA measurement list of the
+ running kernel must be saved and restored on boot.
+
+ Depending on the IMA policy, the measurement list can grow to
+ be very large.
+
config IMA_MEASURE_PCR_IDX
int
depends on IMA
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 9aeaedad1e2b..29f198bde02b 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_IMA) += ima.o
ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
ima_policy.o ima_template.o ima_template_lib.o
ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
+ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o
obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index db25f54a04fe..5e6180a4da7d 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -28,6 +28,10 @@
#include "../integrity.h"
+#ifdef CONFIG_HAVE_IMA_KEXEC
+#include <asm/ima.h>
+#endif
+
enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN,
IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII };
enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
@@ -81,6 +85,7 @@ struct ima_template_field {
/* IMA template descriptor definition */
struct ima_template_desc {
+ struct list_head list;
char *name;
char *fmt;
int num_fields;
@@ -102,6 +107,27 @@ struct ima_queue_entry {
};
extern struct list_head ima_measurements; /* list of all measurements */
+/* Some details preceding the binary serialized measurement list */
+struct ima_kexec_hdr {
+ u16 version;
+ u16 _reserved0;
+ u32 _reserved1;
+ u64 buffer_size;
+ u64 count;
+};
+
+#ifdef CONFIG_HAVE_IMA_KEXEC
+void ima_load_kexec_buffer(void);
+#else
+static inline void ima_load_kexec_buffer(void) {}
+#endif /* CONFIG_HAVE_IMA_KEXEC */
+
+/*
+ * The default binary_runtime_measurements list format is defined as the
+ * platform native format. The canonical format is defined as little-endian.
+ */
+extern bool ima_canonical_fmt;
+
/* Internal IMA function definitions */
int ima_init(void);
int ima_fs_init(void);
@@ -122,7 +148,12 @@ int ima_init_crypto(void);
void ima_putc(struct seq_file *m, void *data, int datalen);
void ima_print_digest(struct seq_file *m, u8 *digest, u32 size);
struct ima_template_desc *ima_template_desc_current(void);
+int ima_restore_measurement_entry(struct ima_template_entry *entry);
+int ima_restore_measurement_list(loff_t bufsize, void *buf);
+int ima_measurements_show(struct seq_file *m, void *v);
+unsigned long ima_get_binary_runtime_size(void);
int ima_init_template(void);
+void ima_init_template_list(void);
/*
* used to protect h_table and sha_table
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 38f2ed830dd6..802d5d20f36f 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -477,11 +477,13 @@ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data,
u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 };
u8 *data_to_hash = field_data[i].data;
u32 datalen = field_data[i].len;
+ u32 datalen_to_hash =
+ !ima_canonical_fmt ? datalen : cpu_to_le32(datalen);
if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) {
rc = crypto_shash_update(shash,
- (const u8 *) &field_data[i].len,
- sizeof(field_data[i].len));
+ (const u8 *) &datalen_to_hash,
+ sizeof(datalen_to_hash));
if (rc)
break;
} else if (strcmp(td->fields[i]->field_id, "n") == 0) {
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 3df46906492d..ca303e5d2b94 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -28,6 +28,16 @@
static DEFINE_MUTEX(ima_write_mutex);
+bool ima_canonical_fmt;
+static int __init default_canonical_fmt_setup(char *str)
+{
+#ifdef __BIG_ENDIAN
+ ima_canonical_fmt = 1;
+#endif
+ return 1;
+}
+__setup("ima_canonical_fmt", default_canonical_fmt_setup);
+
static int valid_policy = 1;
#define TMPBUFLEN 12
static ssize_t ima_show_htable_value(char __user *buf, size_t count,
@@ -116,13 +126,13 @@ void ima_putc(struct seq_file *m, void *data, int datalen)
* [eventdata length]
* eventdata[n]=template specific data
*/
-static int ima_measurements_show(struct seq_file *m, void *v)
+int ima_measurements_show(struct seq_file *m, void *v)
{
/* the list never shrinks, so we don't need a lock here */
struct ima_queue_entry *qe = v;
struct ima_template_entry *e;
char *template_name;
- int namelen;
+ u32 pcr, namelen, template_data_len; /* temporary fields */
bool is_ima_template = false;
int i;
@@ -139,25 +149,29 @@ static int ima_measurements_show(struct seq_file *m, void *v)
* PCR used defaults to the same (config option) in
* little-endian format, unless set in policy
*/
- ima_putc(m, &e->pcr, sizeof(e->pcr));
+ pcr = !ima_canonical_fmt ? e->pcr : cpu_to_le32(e->pcr);
+ ima_putc(m, &pcr, sizeof(e->pcr));
/* 2nd: template digest */
ima_putc(m, e->digest, TPM_DIGEST_SIZE);
/* 3rd: template name size */
- namelen = strlen(template_name);
+ namelen = !ima_canonical_fmt ? strlen(template_name) :
+ cpu_to_le32(strlen(template_name));
ima_putc(m, &namelen, sizeof(namelen));
/* 4th: template name */
- ima_putc(m, template_name, namelen);
+ ima_putc(m, template_name, strlen(template_name));
/* 5th: template length (except for 'ima' template) */
if (strcmp(template_name, IMA_TEMPLATE_IMA_NAME) == 0)
is_ima_template = true;
- if (!is_ima_template)
- ima_putc(m, &e->template_data_len,
- sizeof(e->template_data_len));
+ if (!is_ima_template) {
+ template_data_len = !ima_canonical_fmt ? e->template_data_len :
+ cpu_to_le32(e->template_data_len);
+ ima_putc(m, &template_data_len, sizeof(e->template_data_len));
+ }
/* 6th: template specific data */
for (i = 0; i < e->template_desc->num_fields; i++) {
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 2ac1f41db5c0..2967d497a665 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -129,6 +129,8 @@ int __init ima_init(void)
if (rc != 0)
return rc;
+ ima_load_kexec_buffer();
+
rc = ima_add_boot_aggregate(); /* boot aggregate must be first entry */
if (rc != 0)
return rc;
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
new file mode 100644
index 000000000000..e473eee913cb
--- /dev/null
+++ b/security/integrity/ima/ima_kexec.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Authors:
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
+ * Mimi Zohar <zohar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/kexec.h>
+#include "ima.h"
+
+#ifdef CONFIG_IMA_KEXEC
+static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
+ unsigned long segment_size)
+{
+ struct ima_queue_entry *qe;
+ struct seq_file file;
+ struct ima_kexec_hdr khdr;
+ int ret = 0;
+
+ /* segment size can't change between kexec load and execute */
+ file.buf = vmalloc(segment_size);
+ if (!file.buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ file.size = segment_size;
+ file.read_pos = 0;
+ file.count = sizeof(khdr); /* reserved space */
+
+ memset(&khdr, 0, sizeof(khdr));
+ khdr.version = 1;
+ list_for_each_entry_rcu(qe, &ima_measurements, later) {
+ if (file.count < file.size) {
+ khdr.count++;
+ ima_measurements_show(&file, qe);
+ } else {
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ if (ret < 0)
+ goto out;
+
+ /*
+ * fill in reserved space with some buffer details
+ * (eg. version, buffer size, number of measurements)
+ */
+ khdr.buffer_size = file.count;
+ if (ima_canonical_fmt) {
+ khdr.version = cpu_to_le16(khdr.version);
+ khdr.count = cpu_to_le64(khdr.count);
+ khdr.buffer_size = cpu_to_le64(khdr.buffer_size);
+ }
+ memcpy(file.buf, &khdr, sizeof(khdr));
+
+ print_hex_dump(KERN_DEBUG, "ima dump: ", DUMP_PREFIX_NONE,
+ 16, 1, file.buf,
+ file.count < 100 ? file.count : 100, true);
+
+ *buffer_size = file.count;
+ *buffer = file.buf;
+out:
+ if (ret == -EINVAL)
+ vfree(file.buf);
+ return ret;
+}
+
+/*
+ * Called during kexec_file_load so that IMA can add a segment to the kexec
+ * image for the measurement list for the next kernel.
+ *
+ * This function assumes that kexec_mutex is held.
+ */
+void ima_add_kexec_buffer(struct kimage *image)
+{
+ struct kexec_buf kbuf = { .image = image, .buf_align = PAGE_SIZE,
+ .buf_min = 0, .buf_max = ULONG_MAX,
+ .top_down = true };
+ unsigned long binary_runtime_size;
+
+ /* use more understandable variable names than defined in kbuf */
+ void *kexec_buffer = NULL;
+ size_t kexec_buffer_size;
+ size_t kexec_segment_size;
+ int ret;
+
+ /*
+ * Reserve an extra half page of memory for additional measurements
+ * added during the kexec load.
+ */
+ binary_runtime_size = ima_get_binary_runtime_size();
+ if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE)
+ kexec_segment_size = ULONG_MAX;
+ else
+ kexec_segment_size = ALIGN(ima_get_binary_runtime_size() +
+ PAGE_SIZE / 2, PAGE_SIZE);
+ if ((kexec_segment_size == ULONG_MAX) ||
+ ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages / 2)) {
+ pr_err("Binary measurement list too large.\n");
+ return;
+ }
+
+ ima_dump_measurement_list(&kexec_buffer_size, &kexec_buffer,
+ kexec_segment_size);
+ if (!kexec_buffer) {
+ pr_err("Not enough memory for the kexec measurement buffer.\n");
+ return;
+ }
+
+ kbuf.buffer = kexec_buffer;
+ kbuf.bufsz = kexec_buffer_size;
+ kbuf.memsz = kexec_segment_size;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error passing over kexec measurement buffer.\n");
+ return;
+ }
+
+ ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_segment_size);
+ if (ret) {
+ pr_err("Error passing over kexec measurement buffer.\n");
+ return;
+ }
+
+ pr_debug("kexec measurement buffer for the loaded kernel at 0x%lx.\n",
+ kbuf.mem);
+}
+#endif /* IMA_KEXEC */
+
+/*
+ * Restore the measurement list from the previous kernel.
+ */
+void ima_load_kexec_buffer(void)
+{
+ void *kexec_buffer = NULL;
+ size_t kexec_buffer_size = 0;
+ int rc;
+
+ rc = ima_get_kexec_buffer(&kexec_buffer, &kexec_buffer_size);
+ switch (rc) {
+ case 0:
+ rc = ima_restore_measurement_list(kexec_buffer_size,
+ kexec_buffer);
+ if (rc != 0)
+ pr_err("Failed to restore the measurement list: %d\n",
+ rc);
+
+ ima_free_kexec_buffer();
+ break;
+ case -ENOTSUPP:
+ pr_debug("Restoring the measurement list not supported\n");
+ break;
+ case -ENOENT:
+ pr_debug("No measurement list to restore\n");
+ break;
+ default:
+ pr_debug("Error restoring the measurement list: %d\n", rc);
+ }
+}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 423d111b3b94..50818c60538b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -418,6 +418,7 @@ static int __init init_ima(void)
{
int error;
+ ima_init_template_list();
hash_setup(CONFIG_IMA_DEFAULT_HASH);
error = ima_init();
if (!error) {
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 32f6ac0f96df..3a3cc2a45645 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -29,6 +29,11 @@
#define AUDIT_CAUSE_LEN_MAX 32
LIST_HEAD(ima_measurements); /* list of all measurements */
+#ifdef CONFIG_IMA_KEXEC
+static unsigned long binary_runtime_size;
+#else
+static unsigned long binary_runtime_size = ULONG_MAX;
+#endif
/* key: inode (before secure-hashing a file) */
struct ima_h_table ima_htable = {
@@ -64,12 +69,31 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value,
return ret;
}
+/*
+ * Calculate the memory required for serializing a single
+ * binary_runtime_measurement list entry, which contains a
+ * couple of variable length fields (e.g template name and data).
+ */
+static int get_binary_runtime_size(struct ima_template_entry *entry)
+{
+ int size = 0;
+
+ size += sizeof(u32); /* pcr */
+ size += sizeof(entry->digest);
+ size += sizeof(int); /* template name size field */
+ size += strlen(entry->template_desc->name);
+ size += sizeof(entry->template_data_len);
+ size += entry->template_data_len;
+ return size;
+}
+
/* ima_add_template_entry helper function:
- * - Add template entry to measurement list and hash table.
+ * - Add template entry to the measurement list and hash table, for
+ * all entries except those carried across kexec.
*
* (Called with ima_extend_list_mutex held.)
*/
-static int ima_add_digest_entry(struct ima_template_entry *entry)
+static int ima_add_digest_entry(struct ima_template_entry *entry, int flags)
{
struct ima_queue_entry *qe;
unsigned int key;
@@ -85,11 +109,34 @@ static int ima_add_digest_entry(struct ima_template_entry *entry)
list_add_tail_rcu(&qe->later, &ima_measurements);
atomic_long_inc(&ima_htable.len);
- key = ima_hash_key(entry->digest);
- hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]);
+ if (flags) {
+ key = ima_hash_key(entry->digest);
+ hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]);
+ }
+
+ if (binary_runtime_size != ULONG_MAX) {
+ int size;
+
+ size = get_binary_runtime_size(entry);
+ binary_runtime_size = (binary_runtime_size < ULONG_MAX - size) ?
+ binary_runtime_size + size : ULONG_MAX;
+ }
return 0;
}
+/*
+ * Return the amount of memory required for serializing the
+ * entire binary_runtime_measurement list, including the ima_kexec_hdr
+ * structure.
+ */
+unsigned long ima_get_binary_runtime_size(void)
+{
+ if (binary_runtime_size >= (ULONG_MAX - sizeof(struct ima_kexec_hdr)))
+ return ULONG_MAX;
+ else
+ return binary_runtime_size + sizeof(struct ima_kexec_hdr);
+};
+
static int ima_pcr_extend(const u8 *hash, int pcr)
{
int result = 0;
@@ -103,8 +150,13 @@ static int ima_pcr_extend(const u8 *hash, int pcr)
return result;
}
-/* Add template entry to the measurement list and hash table,
- * and extend the pcr.
+/*
+ * Add template entry to the measurement list and hash table, and
+ * extend the pcr.
+ *
+ * On systems which support carrying the IMA measurement list across
+ * kexec, maintain the total memory size required for serializing the
+ * binary_runtime_measurements.
*/
int ima_add_template_entry(struct ima_template_entry *entry, int violation,
const char *op, struct inode *inode,
@@ -126,7 +178,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
}
}
- result = ima_add_digest_entry(entry);
+ result = ima_add_digest_entry(entry, 1);
if (result < 0) {
audit_cause = "ENOMEM";
audit_info = 0;
@@ -149,3 +201,13 @@ out:
op, audit_cause, result, audit_info);
return result;
}
+
+int ima_restore_measurement_entry(struct ima_template_entry *entry)
+{
+ int result = 0;
+
+ mutex_lock(&ima_extend_list_mutex);
+ result = ima_add_digest_entry(entry, 0);
+ mutex_unlock(&ima_extend_list_mutex);
+ return result;
+}
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index febd12ed9b55..004723c4169f 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -15,16 +15,20 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/rculist.h>
#include "ima.h"
#include "ima_template_lib.h"
-static struct ima_template_desc defined_templates[] = {
+static struct ima_template_desc builtin_templates[] = {
{.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
{.name = "ima-ng", .fmt = "d-ng|n-ng"},
{.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
{.name = "", .fmt = ""}, /* placeholder for a custom format */
};
+static LIST_HEAD(defined_templates);
+static DEFINE_SPINLOCK(template_list);
+
static struct ima_template_field supported_fields[] = {
{.field_id = "d", .field_init = ima_eventdigest_init,
.field_show = ima_show_template_digest},
@@ -37,6 +41,7 @@ static struct ima_template_field supported_fields[] = {
{.field_id = "sig", .field_init = ima_eventsig_init,
.field_show = ima_show_template_sig},
};
+#define MAX_TEMPLATE_NAME_LEN 15
static struct ima_template_desc *ima_template;
static struct ima_template_desc *lookup_template_desc(const char *name);
@@ -52,6 +57,8 @@ static int __init ima_template_setup(char *str)
if (ima_template)
return 1;
+ ima_init_template_list();
+
/*
* Verify that a template with the supplied name exists.
* If not, use CONFIG_IMA_DEFAULT_TEMPLATE.
@@ -80,7 +87,7 @@ __setup("ima_template=", ima_template_setup);
static int __init ima_template_fmt_setup(char *str)
{
- int num_templates = ARRAY_SIZE(defined_templates);
+ int num_templates = ARRAY_SIZE(builtin_templates);
if (ima_template)
return 1;
@@ -91,22 +98,28 @@ static int __init ima_template_fmt_setup(char *str)
return 1;
}
- defined_templates[num_templates - 1].fmt = str;
- ima_template = defined_templates + num_templates - 1;
+ builtin_templates[num_templates - 1].fmt = str;
+ ima_template = builtin_templates + num_templates - 1;
+
return 1;
}
__setup("ima_template_fmt=", ima_template_fmt_setup);
static struct ima_template_desc *lookup_template_desc(const char *name)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
- if (strcmp(defined_templates[i].name, name) == 0)
- return defined_templates + i;
+ struct ima_template_desc *template_desc;
+ int found = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(template_desc, &defined_templates, list) {
+ if ((strcmp(template_desc->name, name) == 0) ||
+ (strcmp(template_desc->fmt, name) == 0)) {
+ found = 1;
+ break;
+ }
}
-
- return NULL;
+ rcu_read_unlock();
+ return found ? template_desc : NULL;
}
static struct ima_template_field *lookup_template_field(const char *field_id)
@@ -142,9 +155,14 @@ static int template_desc_init_fields(const char *template_fmt,
{
const char *template_fmt_ptr;
struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX];
- int template_num_fields = template_fmt_size(template_fmt);
+ int template_num_fields;
int i, len;
+ if (num_fields && *num_fields > 0) /* already initialized? */
+ return 0;
+
+ template_num_fields = template_fmt_size(template_fmt);
+
if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) {
pr_err("format string '%s' contains too many fields\n",
template_fmt);
@@ -182,11 +200,29 @@ static int template_desc_init_fields(const char *template_fmt,
return 0;
}
+void ima_init_template_list(void)
+{
+ int i;
+
+ if (!list_empty(&defined_templates))
+ return;
+
+ spin_lock(&template_list);
+ for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) {
+ list_add_tail_rcu(&builtin_templates[i].list,
+ &defined_templates);
+ }
+ spin_unlock(&template_list);
+ synchronize_rcu();
+}
+
struct ima_template_desc *ima_template_desc_current(void)
{
- if (!ima_template)
+ if (!ima_template) {
+ ima_init_template_list();
ima_template =
lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE);
+ }
return ima_template;
}
@@ -205,3 +241,234 @@ int __init ima_init_template(void)
return result;
}
+
+static struct ima_template_desc *restore_template_fmt(char *template_name)
+{
+ struct ima_template_desc *template_desc = NULL;
+ int ret;
+
+ ret = template_desc_init_fields(template_name, NULL, NULL);
+ if (ret < 0) {
+ pr_err("attempting to initialize the template \"%s\" failed\n",
+ template_name);
+ goto out;
+ }
+
+ template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL);
+ if (!template_desc)
+ goto out;
+
+ template_desc->name = "";
+ template_desc->fmt = kstrdup(template_name, GFP_KERNEL);
+ if (!template_desc->fmt)
+ goto out;
+
+ spin_lock(&template_list);
+ list_add_tail_rcu(&template_desc->list, &defined_templates);
+ spin_unlock(&template_list);
+ synchronize_rcu();
+out:
+ return template_desc;
+}
+
+static int ima_restore_template_data(struct ima_template_desc *template_desc,
+ void *template_data,
+ int template_data_size,
+ struct ima_template_entry **entry)
+{
+ struct binary_field_data {
+ u32 len;
+ u8 data[0];
+ } __packed;
+
+ struct binary_field_data *field_data;
+ int offset = 0;
+ int ret = 0;
+ int i;
+
+ *entry = kzalloc(sizeof(**entry) +
+ template_desc->num_fields * sizeof(struct ima_field_data),
+ GFP_NOFS);
+ if (!*entry)
+ return -ENOMEM;
+
+ (*entry)->template_desc = template_desc;
+ for (i = 0; i < template_desc->num_fields; i++) {
+ field_data = template_data + offset;
+
+ /* Each field of the template data is prefixed with a length. */
+ if (offset > (template_data_size - sizeof(field_data->len))) {
+ pr_err("Restoring the template field failed\n");
+ ret = -EINVAL;
+ break;
+ }
+ offset += sizeof(field_data->len);
+
+ if (ima_canonical_fmt)
+ field_data->len = le32_to_cpu(field_data->len);
+
+ if (offset > (template_data_size - field_data->len)) {
+ pr_err("Restoring the template field data failed\n");
+ ret = -EINVAL;
+ break;
+ }
+ offset += field_data->len;
+
+ (*entry)->template_data[i].len = field_data->len;
+ (*entry)->template_data_len += sizeof(field_data->len);
+
+ (*entry)->template_data[i].data =
+ kzalloc(field_data->len + 1, GFP_KERNEL);
+ if (!(*entry)->template_data[i].data) {
+ ret = -ENOMEM;
+ break;
+ }
+ memcpy((*entry)->template_data[i].data, field_data->data,
+ field_data->len);
+ (*entry)->template_data_len += field_data->len;
+ }
+
+ if (ret < 0) {
+ ima_free_template_entry(*entry);
+ *entry = NULL;
+ }
+
+ return ret;
+}
+
+/* Restore the serialized binary measurement list without extending PCRs. */
+int ima_restore_measurement_list(loff_t size, void *buf)
+{
+ struct binary_hdr_v1 {
+ u32 pcr;
+ u8 digest[TPM_DIGEST_SIZE];
+ u32 template_name_len;
+ char template_name[0];
+ } __packed;
+ char template_name[MAX_TEMPLATE_NAME_LEN];
+
+ struct binary_data_v1 {
+ u32 template_data_size;
+ char template_data[0];
+ } __packed;
+
+ struct ima_kexec_hdr *khdr = buf;
+ struct binary_hdr_v1 *hdr_v1;
+ struct binary_data_v1 *data_v1;
+
+ void *bufp = buf + sizeof(*khdr);
+ void *bufendp;
+ struct ima_template_entry *entry;
+ struct ima_template_desc *template_desc;
+ unsigned long count = 0;
+ int ret = 0;
+
+ if (!buf || size < sizeof(*khdr))
+ return 0;
+
+ if (ima_canonical_fmt) {
+ khdr->version = le16_to_cpu(khdr->version);
+ khdr->count = le64_to_cpu(khdr->count);
+ khdr->buffer_size = le64_to_cpu(khdr->buffer_size);
+ }
+
+ if (khdr->version != 1) {
+ pr_err("attempting to restore a incompatible measurement list");
+ return 0;
+ }
+
+ /*
+ * ima kexec buffer prefix: version, buffer size, count
+ * v1 format: pcr, digest, template-name-len, template-name,
+ * template-data-size, template-data
+ */
+ bufendp = buf + khdr->buffer_size;
+ while ((bufp < bufendp) && (count++ < khdr->count)) {
+ if (count > ULONG_MAX - 1) {
+ pr_err("attempting to restore too many measurements");
+ ret = -EINVAL;
+ }
+
+ hdr_v1 = bufp;
+
+ if (ima_canonical_fmt)
+ hdr_v1->template_name_len =
+ le32_to_cpu(hdr_v1->template_name_len);
+
+ if ((hdr_v1->template_name_len >= MAX_TEMPLATE_NAME_LEN) ||
+ ((bufp + hdr_v1->template_name_len) > bufendp)) {
+ pr_err("attempting to restore a template name \
+ that is too long\n");
+ ret = -EINVAL;
+ break;
+ }
+ bufp += sizeof(*hdr_v1);
+
+ /* template name is not null terminated */
+ memcpy(template_name, bufp, hdr_v1->template_name_len);
+ template_name[hdr_v1->template_name_len] = 0;
+
+ if (strcmp(template_name, "ima") == 0) {
+ pr_err("attempting to restore an unsupported \
+ template \"%s\" failed\n", template_name);
+ ret = -EINVAL;
+ break;
+ }
+ data_v1 = bufp += (u_int8_t)hdr_v1->template_name_len;
+
+ template_desc = lookup_template_desc(template_name);
+ if (!template_desc) {
+ template_desc = restore_template_fmt(template_name);
+ if (!template_desc)
+ break;
+ }
+
+ /*
+ * Only the running system's template format is initialized
+ * on boot. As needed, initialize the other template formats.
+ */
+ ret = template_desc_init_fields(template_desc->fmt,
+ &(template_desc->fields),
+ &(template_desc->num_fields));
+ if (ret < 0) {
+ pr_err("attempting to restore the template fmt \"%s\" \
+ failed\n", template_desc->fmt);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (bufp > (bufendp - sizeof(data_v1->template_data_size))) {
+ pr_err("restoring the template data size failed\n");
+ ret = -EINVAL;
+ break;
+ }
+ bufp += (u_int8_t) sizeof(data_v1->template_data_size);
+
+ if (ima_canonical_fmt)
+ data_v1->template_data_size =
+ le32_to_cpu(data_v1->template_data_size);
+
+ if (bufp > (bufendp - data_v1->template_data_size)) {
+ pr_err("restoring the template data failed\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ ret = ima_restore_template_data(template_desc,
+ data_v1->template_data,
+ data_v1->template_data_size,
+ &entry);
+ if (ret < 0)
+ break;
+
+ memcpy(entry->digest, hdr_v1->digest, TPM_DIGEST_SIZE);
+ entry->pcr =
+ !ima_canonical_fmt ? hdr_v1->pcr : le32_to_cpu(hdr_v1->pcr);
+ ret = ima_restore_measurement_entry(entry);
+ if (ret < 0)
+ break;
+
+ bufp += data_v1->template_data_size;
+ }
+ return ret;
+}
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index f9bae04ba176..f9ba37b3928d 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -103,8 +103,11 @@ static void ima_show_template_data_binary(struct seq_file *m,
u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ?
strlen(field_data->data) : field_data->len;
- if (show != IMA_SHOW_BINARY_NO_FIELD_LEN)
- ima_putc(m, &len, sizeof(len));
+ if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) {
+ u32 field_len = !ima_canonical_fmt ? len : cpu_to_le32(len);
+
+ ima_putc(m, &field_len, sizeof(field_len));
+ }
if (!len)
return;
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index d08e214ec6e7..be93ab02b490 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -719,14 +719,14 @@ sub set_value {
if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
# Note if a test is something other than build, then we
- # will need other manditory options.
+ # will need other mandatory options.
if ($prvalue ne "install") {
# for bisect, we need to check BISECT_TYPE
if ($prvalue ne "bisect") {
$buildonly = 0;
}
} else {
- # install still limits some manditory options.
+ # install still limits some mandatory options.
$buildonly = 2;
}
}
@@ -735,7 +735,7 @@ sub set_value {
if ($prvalue ne "install") {
$buildonly = 0;
} else {
- # install still limits some manditory options.
+ # install still limits some mandatory options.
$buildonly = 2;
}
}
@@ -3989,7 +3989,7 @@ sub make_min_config {
}
}
- # Save off all the current mandidory configs
+ # Save off all the current mandatory configs
open (OUT, ">$temp_config")
or die "Can't write to $temp_config";
foreach my $config (keys %keep_configs) {