summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c32
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cacheinfo.c37
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S4
-rw-r--r--arch/powerpc/kernel/crash_dump.c2
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c4
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/eeh.c42
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c237
-rw-r--r--arch/powerpc/kernel/eeh_pe.c160
-rw-r--r--arch/powerpc/kernel/entry_32.S4
-rw-r--r--arch/powerpc/kernel/entry_64.S33
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S244
-rw-r--r--arch/powerpc/kernel/fadump.c4
-rw-r--r--arch/powerpc/kernel/head_8xx.S103
-rw-r--r--arch/powerpc/kernel/io-workarounds.c4
-rw-r--r--arch/powerpc/kernel/iommu.c2
-rw-r--r--arch/powerpc/kernel/isa-bridge.c6
-rw-r--r--arch/powerpc/kernel/kgdb.c43
-rw-r--r--arch/powerpc/kernel/mce.c9
-rw-r--r--arch/powerpc/kernel/mce_power.c9
-rw-r--r--arch/powerpc/kernel/module.c8
-rw-r--r--arch/powerpc/kernel/module_64.c14
-rw-r--r--arch/powerpc/kernel/paca.c2
-rw-r--r--arch/powerpc/kernel/pci_32.c6
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/process.c110
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c223
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh16
-rw-r--r--arch/powerpc/kernel/ptrace.c68
-rw-r--r--arch/powerpc/kernel/rtas.c13
-rw-r--r--arch/powerpc/kernel/rtasd.c25
-rw-r--r--arch/powerpc/kernel/setup-common.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c10
-rw-r--r--arch/powerpc/kernel/setup_64.c29
-rw-r--r--arch/powerpc/kernel/smp.c245
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/time.c104
-rw-r--r--arch/powerpc/kernel/tm.S75
-rw-r--r--arch/powerpc/kernel/trace/Makefile4
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c261
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64.S12
-rw-r--r--arch/powerpc/kernel/traps.c157
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S1
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S1
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S1
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S18
51 files changed, 1493 insertions, 922 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3b66f2c19c84e..53d4b8d5b54d4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,7 +5,8 @@
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
@@ -20,12 +21,14 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
endif
obj-y := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf15566c4e8..9ffc72ded73ad 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -79,11 +79,16 @@ int main(void)
{
OFFSET(THREAD, task_struct, thread);
OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TASK_CANARY, task_struct, stack_canary);
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
OFFSET(MMCONTEXTID, mm_struct, context.id);
#ifdef CONFIG_PPC64
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
- OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
#else
OFFSET(THREAD_INFO, task_struct, stack);
DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
@@ -173,7 +178,6 @@ int main(void)
OFFSET(PACAKSAVE, paca_struct, kstack);
OFFSET(PACACURRENT, paca_struct, __current);
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAR1, paca_struct, saved_r1);
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
@@ -212,6 +216,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+ OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
@@ -274,11 +279,6 @@ int main(void)
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#ifdef CONFIG_PPC64
- /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
- DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
- DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-#endif /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -322,10 +322,7 @@ int main(void)
STACK_PT_REGS_OFFSET(_ESR, dsisr);
#else /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(SOFTE, softe);
-
- /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
- DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
- DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+ STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_PPC32)
@@ -387,12 +384,12 @@ int main(void)
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
- OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
- OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+ OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
+ OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
- OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
- OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
+ OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
+ OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
#else
OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
@@ -438,7 +435,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -503,6 +500,7 @@ int main(void)
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+ OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -695,7 +693,7 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index b2072d5bbf2bb..b4241ed1456ec 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -163,7 +163,7 @@ void btext_map(void)
offset = ((unsigned long) dispDeviceBase) - base;
size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ dispDeviceRect[0];
- vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+ vbase = ioremap_wc(base, size);
if (!vbase)
return;
logicalDisplayBase = vbase + offset;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index a8f20e5928e13..be57bd07596d9 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -20,6 +20,8 @@
#include <linux/percpu.h>
#include <linux/slab.h>
#include <asm/prom.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
#include "cacheinfo.h"
@@ -627,17 +629,48 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
+static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
+{
+ struct kobject *index_dir_kobj = &index->kobj;
+ struct kobject *cache_dir_kobj = index_dir_kobj->parent;
+ struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
+ struct device *dev = kobj_to_dev(cpu_dev_kobj);
+
+ return dev->id;
+}
+
+/*
+ * On big-core systems, each core has two groups of CPUs each of which
+ * has its own L1-cache. The thread-siblings which share l1-cache with
+ * @cpu can be obtained via cpu_smallcore_mask().
+ */
+static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
+{
+ if (cache->level == 1)
+ return cpu_smallcore_mask(cpu);
+
+ return &cache->shared_cpu_map;
+}
+
static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
{
struct cache_index_dir *index;
struct cache *cache;
- int ret;
+ const struct cpumask *mask;
+ int ret, cpu;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
+ if (has_big_cores) {
+ cpu = index_dir_to_cpu(index);
+ mask = get_big_core_shared_cpu_map(cpu, cache);
+ } else {
+ mask = &cache->shared_cpu_map;
+ }
+
ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
- cpumask_pr_args(&cache->shared_cpu_map));
+ cpumask_pr_args(mask));
buf[ret++] = '\n';
buf[ret] = '\0';
return ret;
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928dbd844..c317080db771e 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63
bnelr
ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
- xor r5,r5,r6
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
+ andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4)
blr
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index d10ad258d41a3..bbdc4706c1599 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -110,7 +110,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
vaddr = __va(paddr);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
} else {
- vaddr = __ioremap(paddr, PAGE_SIZE, 0);
+ vaddr = ioremap_cache(paddr, PAGE_SIZE);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
iounmap(vaddr);
}
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f6..5fc335f4d9cd0 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
*
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
+ .mapping_error = dma_direct_mapping_error,
.get_required_mask = swiotlb_powerpc_get_required,
};
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f432054234a47..8be3721d93026 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1008,9 +1008,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
/* Count and allocate space for cpu features */
of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
&nr_dt_cpu_features);
- dt_cpu_features = __va(
- memblock_alloc(sizeof(struct dt_cpu_feature)*
- nr_dt_cpu_features, PAGE_SIZE));
+ dt_cpu_features = __va(memblock_phys_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE));
cpufeatures_setup_start(isa);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6ebba3e48b012..6cae6b56ffd66 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
int n = 0, l = 0;
char buffer[128];
+ if (!pdn) {
+ pr_warn("EEH: Note: No error log for absent device.\n");
+ return 0;
+ }
+
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
@@ -399,7 +404,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
}
/* Isolate the PHB and send event */
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
pr_err("EEH: PHB#%x failure detected, location: %s\n",
@@ -558,7 +563,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* with other functions on this device, and functions under
* bridges.
*/
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
/* Most EEH events are due to device driver bugs. Having
@@ -676,7 +681,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
/* Check if the request is finished successfully */
if (active_flag) {
- rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc < 0)
return rc;
@@ -825,7 +830,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
break;
case pcie_hot_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -833,7 +839,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -913,16 +920,15 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
break;
/* Wait until the PE is in a functioning state */
- state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (eeh_state_active(state))
- break;
-
+ state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (state < 0) {
pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
__func__, pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
+ if (eeh_state_active(state))
+ break;
/* Set error in case this is our last attempt */
ret = -EIO;
@@ -1036,6 +1042,11 @@ void eeh_probe_devices(void)
pdn = hose->pci_data;
traverse_pci_dn(pdn, eeh_ops->probe, NULL);
}
+ if (eeh_enabled())
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ pr_info("EEH: No capable adapters found\n");
+
}
/**
@@ -1079,18 +1090,7 @@ static int eeh_init(void)
eeh_dev_phb_init_dynamic(hose);
/* Initialize EEH event */
- ret = eeh_event_init();
- if (ret)
- return ret;
-
- eeh_probe_devices();
-
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else if (!eeh_has_flag(EEH_POSTPONED_PROBE))
- pr_info("EEH: No capable adapters found\n");
-
- return ret;
+ return eeh_event_init();
}
core_initcall_sync(eeh_init);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index a34e6912c15e2..d8c90f3284b51 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -60,8 +60,6 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- INIT_LIST_HEAD(&edev->list);
- INIT_LIST_HEAD(&edev->rmv_list);
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 67619b4b3f96c..9446248eb6b81 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -35,8 +35,8 @@
#include <asm/rtas.h>
struct eeh_rmv_data {
- struct list_head edev_list;
- int removed;
+ struct list_head removed_vf_list;
+ int removed_dev_count;
};
static int eeh_result_priority(enum pci_ers_result result)
@@ -281,6 +281,10 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
struct pci_driver *driver;
enum pci_ers_result new_result;
+ if (!edev->pdev) {
+ eeh_edev_info(edev, "no device");
+ return;
+ }
device_lock(&edev->pdev->dev);
if (eeh_edev_actionable(edev)) {
driver = eeh_pcid_get(edev->pdev);
@@ -400,7 +404,7 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* EEH device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
- if (list_is_last(&edev->list, &edev->pe->edevs))
+ if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
return NULL;
@@ -465,10 +469,9 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
return rc;
}
-static void *eeh_add_virt_device(void *data, void *userdata)
+static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
- struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -499,7 +502,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
- int *removed = rmv_data ? &rmv_data->removed : NULL;
/*
* Actually, we should remove the PCI bridges as well.
@@ -521,7 +523,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
if (eeh_dev_removed(edev))
return NULL;
- if (removed) {
+ if (rmv_data) {
if (eeh_pe_passed(edev->pe))
return NULL;
driver = eeh_pcid_get(dev);
@@ -539,10 +541,9 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
/* Remove it from PCI subsystem */
pr_debug("EEH: Removing %s without EEH sensitive driver\n",
pci_name(dev));
- edev->bus = dev->bus;
edev->mode |= EEH_DEV_DISCONNECTED;
- if (removed)
- (*removed)++;
+ if (rmv_data)
+ rmv_data->removed_dev_count++;
if (edev->physfn) {
#ifdef CONFIG_PCI_IOV
@@ -558,7 +559,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
- list_add(&edev->rmv_list, &rmv_data->edev_list);
+ list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
} else {
pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(dev);
@@ -727,7 +728,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (!driver_eeh_aware || rmv_data->removed) {
+ if (!driver_eeh_aware || rmv_data->removed_dev_count) {
pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
(driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
@@ -737,10 +738,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
if (pe->type & EEH_PE_VF) {
- eeh_add_virt_device(edev, NULL);
+ eeh_add_virt_device(edev);
} else {
if (!driver_eeh_aware)
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
@@ -789,7 +790,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
struct eeh_pe *tmp_pe;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
+ struct eeh_rmv_data rmv_data =
+ {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -806,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -821,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
- &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if ((pe->type & EEH_PE_PHB) &&
+ result != PCI_ERS_RESULT_NONE &&
+ result != PCI_ERS_RESULT_NEED_RESET)
+ result = PCI_ERS_RESULT_NEED_RESET;
+ }
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- goto hard_fail;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ }
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ }
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -857,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
}
@@ -866,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -882,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -897,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
}
- /* If any device has a hard failure, then shut off everything. */
- if (result == PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: Device driver gave up\n");
- goto hard_fail;
- }
-
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
@@ -910,88 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
-
- pr_info("EEH: Notify device drivers "
- "the completion of reset\n");
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
- }
-
- /* All devices should claim they have recovered by now. */
- if ((result != PCI_ERS_RESULT_RECOVERED) &&
- (result != PCI_ERS_RESULT_NONE)) {
- pr_warn("EEH: Not recovered\n");
- goto hard_fail;
- }
-
- /*
- * For those hot removed VFs, we should add back them after PF get
- * recovered properly.
- */
- list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
- eeh_add_virt_device(edev, NULL);
- list_del(&edev->rmv_list);
}
- /* Tell all device drivers that they can resume operations */
- pr_info("EEH: Notify device driver to resume\n");
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("resume", pe, eeh_report_resume, NULL);
- eeh_for_each_pe(pe, tmp_pe) {
- eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
- edev->mode &= ~EEH_DEV_NO_HANDLER;
- edev->in_error = false;
+ if ((result == PCI_ERS_RESULT_RECOVERED) ||
+ (result == PCI_ERS_RESULT_NONE)) {
+ /*
+ * For those hot removed VFs, we should add back them after PF
+ * get recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+ rmv_entry) {
+ eeh_add_virt_device(edev);
+ list_del(&edev->rmv_entry);
}
- }
- pr_info("EEH: Recovery successful.\n");
- goto final;
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+ eeh_for_each_pe(pe, tmp_pe) {
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+ edev->in_error = false;
+ }
+ }
-hard_fail:
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ pr_info("EEH: Recovery successful.\n");
+ } else {
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
+ }
}
-final:
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
}
@@ -1026,7 +1021,7 @@ void eeh_handle_special_event(void)
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe) continue;
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
}
eeh_serialize_unlock(flags);
@@ -1041,11 +1036,9 @@ void eeh_handle_special_event(void)
/* Purge all events of the PHB */
eeh_remove_event(pe, true);
- if (rc == EEH_NEXT_ERR_DEAD_PHB)
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+ if (rc != EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1b238ecc553e2..6fa2032e05945 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -75,7 +75,6 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
pe->type = type;
pe->phb = phb;
INIT_LIST_HEAD(&pe->child_list);
- INIT_LIST_HEAD(&pe->child);
INIT_LIST_HEAD(&pe->edevs);
pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
@@ -110,6 +109,57 @@ int eeh_phb_pe_create(struct pci_controller *phb)
}
/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+ int ret;
+ int mwait;
+
+ /*
+ * According to PAPR, the state of PE might be temporarily
+ * unavailable. Under the circumstance, we have to wait
+ * for indicated time determined by firmware. The maximal
+ * wait time is 5 minutes, which is acquired from the original
+ * EEH implementation. Also, the original implementation
+ * also defined the minimal wait time as 1 second.
+ */
+#define EEH_STATE_MIN_WAIT_TIME (1000)
+#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
+
+ while (1) {
+ ret = eeh_ops->get_state(pe, &mwait);
+
+ if (ret != EEH_STATE_UNAVAILABLE)
+ return ret;
+
+ if (max_wait <= 0) {
+ pr_warn("%s: Timeout when getting PE's state (%d)\n",
+ __func__, max_wait);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+ pr_warn("%s: Firmware returned bad wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MIN_WAIT_TIME;
+ } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+ pr_warn("%s: Firmware returned too long wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MAX_WAIT_TIME;
+ }
+
+ msleep(min(mwait, max_wait));
+ max_wait -= mwait;
+ }
+}
+
+/**
* eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
* @phb: PCI controller
*
@@ -360,7 +410,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
edev->pe = pe;
/* Put the edev to PE */
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
pdn->phb->global_number,
pdn->busno,
@@ -369,7 +419,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
@@ -379,7 +429,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
- parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
+ parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
}
@@ -429,7 +479,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* link the EEH device accordingly.
*/
list_add_tail(&pe->child, &parent->child_list);
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
@@ -457,7 +507,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
int cnt;
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- if (!edev->pe) {
+ pe = eeh_dev_to_pe(edev);
+ if (!pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
__func__, pdn->phb->global_number,
pdn->busno,
@@ -467,9 +518,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
}
/* Remove the EEH device */
- pe = eeh_dev_to_pe(edev);
edev->pe = NULL;
- list_del(&edev->list);
+ list_del(&edev->entry);
/*
* Check if the parent PE includes any EEH devices.
@@ -541,56 +591,50 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
}
/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
*
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
*/
-static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
{
- int state = *((int *)flag);
- struct eeh_dev *edev, *tmp;
- struct pci_dev *pdev;
-
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
-
- pe->state |= state;
-
- /* Offline PCI devices if applicable */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
-
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- pdev->error_state = pci_channel_io_frozen;
- }
-
- /* Block PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state |= EEH_PE_CFG_BLOCKED;
+ struct eeh_pe *pe;
- return NULL;
+ eeh_for_each_pe(root, pe)
+ if (!(pe->state & EEH_PE_REMOVED))
+ pe->state |= state;
}
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
/**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * eeh_pe_mark_isolated
* @pe: EEH PE
*
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
+ * Record that a PE has been isolated by marking the PE and it's children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
*/
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+void eeh_pe_mark_isolated(struct eeh_pe *root)
{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ struct eeh_pe *pe;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+ eeh_for_each_pe(root, pe) {
+ list_for_each_entry(edev, &pe->edevs, entry) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+ /* Block PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state |= EEH_PE_CFG_BLOCKED;
+ }
}
-EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
@@ -671,28 +715,6 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
}
-/**
- * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space
- * @pe: PE
- * @state: PE state to be set
- *
- * Set specified flag to PE and its child PEs. The PCI config space
- * of some PEs is blocked automatically when EEH_PE_ISOLATED is set,
- * which isn't needed in some situations. The function allows to set
- * the specified flag to indicated PEs without blocking their PCI
- * config space.
- */
-void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
- if (!(state & EEH_PE_ISOLATED))
- return;
-
- /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */
- state = EEH_PE_CFG_BLOCKED;
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-}
-
/*
* Some PCI bridges (e.g. PLX bridges) have primary/secondary
* buses assigned explicitly by firmware, and we probably have
@@ -945,7 +967,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
return pe->bus;
/* Retrieve the parent PCI bus of first (top) PCI device */
- edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
return pdev->bus;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e58c3f467db51..77decded11756 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -794,7 +794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
lis r10,MSR_KERNEL@h
ori r10,r10,MSR_KERNEL@l
bl transfer_to_handler_full
- .long nonrecoverable_exception
+ .long unrecoverable_exception
.long ret_from_except
#endif
@@ -1297,7 +1297,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl nonrecoverable_exception
+ bl unrecoverable_exception
/* shouldn't return */
b 4b
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2206912ea4f09..7b1693adff2a7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -171,7 +171,7 @@ system_call: /* label this so stack traces look sane */
* based on caller's run-mode / personality.
*/
ld r11,SYS_CALL_TABLE@toc(2)
- andi. r10,r10,_TIF_32BIT
+ andis. r10,r10,_TIF_32BIT@h
beq 15f
addi r11,r11,8 /* use 32-bit syscall entries */
clrldi r3,r3,32
@@ -386,10 +386,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
4: /* Anything else left to do? */
BEGIN_FTR_SECTION
- lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */
- ld r10,PACACURRENT(r13)
+ lis r3,DEFAULT_PPR@highest /* Set default PPR */
sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,TASKTHREADPPR(r10)
+ std r3,_PPR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -624,6 +623,10 @@ _GLOBAL(_switch)
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
std r6,PACACURRENT(r13) /* Set new 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+ ld r6, TASK_CANARY(r6)
+ std r6, PACA_CANARY(r13)
+#endif
ld r8,KSP(r4) /* new stack pointer */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -672,7 +675,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
isync
slbie r6
+BEGIN_FTR_SECTION
slbie r6 /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
@@ -936,12 +941,6 @@ fast_exception_return:
andi. r0,r3,MSR_RI
beq- .Lunrecov_restore
- /* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
- ld r2,PACACURRENT(r13)
- ld r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
/*
* Clear RI before restoring r13. If we are returning to
* userspace and we take an exception after restoring r13,
@@ -962,7 +961,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
andi. r0,r3,MSR_PR
beq 1f
BEGIN_FTR_SECTION
- mtspr SPRN_PPR,r2 /* Restore PPR */
+ /* Restore PPR */
+ ld r2,_PPR(r1)
+ mtspr SPRN_PPR,r2
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
@@ -1118,7 +1119,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return);
_GLOBAL(enter_rtas)
mflr r0
std r0,16(r1)
- stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
/* Because RTAS is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
@@ -1250,7 +1251,7 @@ rtas_restore_regs:
ld r8,_DSISR(r1)
mtdsisr r8
- addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
+ addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */
mtlr r0
@@ -1261,7 +1262,7 @@ rtas_restore_regs:
_GLOBAL(enter_prom)
mflr r0
std r0,16(r1)
- stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
/* Because PROM is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
@@ -1318,8 +1319,8 @@ _GLOBAL(enter_prom)
REST_10GPRS(22, r1)
ld r4,_CCR(r1)
mtcr r4
-
- addi r1,r1,PROM_FRAME_SIZE
+
+ addi r1,r1,SWITCH_FRAME_SIZE
ld r0,16(r1)
mtlr r0
blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2d8fc8c9da7a1..89d32bb79d5ee 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -244,14 +244,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
- b machine_check_powernv_early
+ b machine_check_common_early
FTR_SECTION_ELSE
b machine_check_pSeries_0
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
+TRAMP_REAL_BEGIN(machine_check_common_early)
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
* Register contents:
@@ -305,7 +304,9 @@ BEGIN_FTR_SECTION
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
+BEGIN_FTR_SECTION
ori r11,r11,MSR_ME /* turn on ME bit */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ori r11,r11,MSR_RI /* turn on RI bit */
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
@@ -324,13 +325,15 @@ BEGIN_FTR_SECTION
andc r11,r11,r10 /* Turn off MSR_ME */
b 1b
b . /* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
TRAMP_REAL_BEGIN(machine_check_pSeries)
.globl machine_check_fwnmi
machine_check_fwnmi:
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_common_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
@@ -440,6 +443,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
+BEGIN_FTR_SECTION
+ b 4f
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -463,11 +469,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
*/
rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
beq 5f
- andi. r11,r12,MSR_PR /* See if coming from user. */
+4: andi. r11,r12,MSR_PR /* See if coming from user. */
bne 9f /* continue in V mode if we are. */
5:
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+BEGIN_FTR_SECTION
/*
* We are coming from kernel context. Check if we are coming from
* guest. if yes, then we can continue. We will fall through
@@ -476,6 +483,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
lbz r11,HSTATE_IN_GUEST(r13)
cmpwi r11,0 /* Check if coming from guest */
bne 9f /* continue if we are. */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
#endif
/*
* At this point we are not sure about what context we come from.
@@ -510,6 +518,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
cmpdi r3,0 /* see if we handled MCE successfully */
beq 1b /* if !handled then panic */
+BEGIN_FTR_SECTION
/*
* Return from MC interrupt.
* Queue up the MCE event so that we can log it later, while
@@ -518,10 +527,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_USER_OR_KERNEL
+FTR_SECTION_ELSE
+ /*
+ * pSeries: Return from MC interrupt. Before that stay on emergency
+ * stack and call machine_check_exception to log the MCE event.
+ */
+ LOAD_HANDLER(r10,mce_return)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b .
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
- b machine_check_pSeries
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_0(PACA_EXMC)
+ b machine_check_pSeries_0
EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
@@ -535,6 +558,13 @@ EXC_COMMON_BEGIN(unrecover_mce)
bl unrecoverable_exception
b 1b
+EXC_COMMON_BEGIN(mce_return)
+ /* Invoke machine_check_exception to print MCE event and return. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+ b .
EXC_REAL(data_access, 0x300, 0x80)
EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
@@ -566,28 +596,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
+
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
+EXC_COMMON_BEGIN(data_access_slb_common)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
+ ld r4,PACA_EXSLB+EX_DAR(r13)
+ std r4,_DAR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+ std r3,RESULT(r1)
+ bl save_nvgprs
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r4,_DAR(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
+ b ret_from_except
+
EXC_REAL(instruction_access, 0x400, 0x80)
EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
@@ -610,160 +648,34 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
-
-/*
- * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
- * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
- */
-EXC_COMMON_BEGIN(slb_miss_common)
- /*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contains the saved r3,
- * r11 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
- * We assume we aren't going to take any exceptions during this
- * procedure.
- */
- mflr r10
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- andi. r9,r11,MSR_PR // Check for exception from userspace
- cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
-
- /*
- * Test MSR_RI before calling slb_allocate_realmode, because the
- * MSR in r11 gets clobbered. However we still want to allocate
- * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
- * recursive SLB faults. So use cr5 for this, which is preserved.
- */
- andi. r11,r11,MSR_RI /* check for unrecoverable exception */
- cmpdi cr5,r11,MSR_RI
-
- crset 4*cr0+eq
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_MMU_FTR_SECTION
- bl slb_allocate
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
-#endif
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
- mtlr r10
-
- /*
- * Large address, check whether we have to allocate new contexts.
- */
- beq- 8f
- bne- cr5,2f /* if unrecoverable exception, oops */
-
- /* All done -- return from exception. */
-
- bne cr4,1f /* returning to kernel */
-
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_USER
- b . /* prevent speculative execution */
-1:
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-
-2: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,unrecov_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-
-8: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10, large_addr_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
+TRAMP_KVM(PACA_EXSLB, 0x480)
-EXC_COMMON_BEGIN(unrecov_slb)
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- RECONCILE_IRQ_STATE(r10, r11)
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+ EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
+ ld r4,_NIP(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+ std r3,RESULT(r1)
bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(large_addr_slb)
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
- ld r3, PACA_EXSLB+EX_DAR(r13)
- std r3, _DAR(r1)
- beq cr6, 2f
- li r10, 0x481 /* fix trap number for I-SLB miss */
- std r10, _TRAP(r1)
-2: bl save_nvgprs
- addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_large_addr
+ ld r4,_NIP(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
b ret_from_except
+
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
.globl hardware_interrupt_hv;
hardware_interrupt_hv:
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a711d22339ea7..761b28b1427d8 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1444,8 +1444,8 @@ static ssize_t fadump_register_store(struct kobject *kobj,
break;
case 1:
if (fw_dump.dump_registered == 1) {
- ret = -EEXIST;
- goto unlock_out;
+ /* Un-register Firmware-assisted dump */
+ fadump_unregister_dump(&fdm);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 6582f824d6206..3b67b9533c82f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -31,6 +31,7 @@
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+#include <asm/code-patching-asm.h>
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
@@ -318,8 +319,8 @@ InstructionTLBMiss:
cmpli cr0, r11, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_TEXT
/* It is assumed that kernel code fits into the first 8M page */
-_ENTRY(ITLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+ patch_site 0b, patch__itlbmiss_linmem_top
#endif
#endif
#endif
@@ -353,13 +354,14 @@ _ENTRY(ITLBMiss_cmp)
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtcr r12
#endif
-
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
/* Load the MI_TWC with the attributes for this "segment." */
mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
@@ -372,16 +374,17 @@ _ENTRY(ITLBMiss_cmp)
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-_ENTRY(itlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
+ patch_site 0b, patch__itlbmiss_exit_1
+
#ifdef CONFIG_PERF_EVENTS
-_ENTRY(itlb_miss_perf)
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+ patch_site 0f, patch__itlbmiss_perf
+0: lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -435,11 +438,11 @@ DataStoreTLBMiss:
#ifndef CONFIG_PIN_TLB_IMMR
cmpli cr0, r11, VIRT_IMMR_BASE@h
#endif
-_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+ patch_site 0b, patch__dtlbmiss_linmem_top
#ifndef CONFIG_PIN_TLB_IMMR
-_ENTRY(DTLBMiss_jmp)
- beq- DTLBMissIMMR
+0: beq- DTLBMissIMMR
+ patch_site 0b, patch__dtlbmiss_immr_jmp
#endif
blt cr7, DTLBMissLinear
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
@@ -470,14 +473,22 @@ _ENTRY(DTLBMiss_jmp)
* above.
*/
rlwimi r11, r10, 0, _PAGE_GUARDED
-#ifdef CONFIG_SWAP
- /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
- * on that bit will represent a Non Access group
- */
- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
mtspr SPRN_MD_TWC, r11
+ /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+ * We also need to know if the insn is a load/store, so:
+ * Clear _PAGE_PRESENT and load that which will
+ * trap into DTLB Error with store bit set accordinly.
+ */
+ /* PRESENT=0x1, ACCESSED=0x20
+ * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+ * r10 = (r10 & ~PRESENT) | r11;
+ */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
@@ -489,14 +500,16 @@ _ENTRY(DTLBMiss_jmp)
/* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_1
+
#ifdef CONFIG_PERF_EVENTS
-_ENTRY(dtlb_miss_perf)
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+ patch_site 0f, patch__dtlbmiss_perf
+0: lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -637,57 +650,59 @@ InstructionBreakpoint:
*/
DTLBMissIMMR:
mtcr r12
- /* Set 512k byte guarded page and mark it valid and accessed */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+ /* Set 512k byte guarded page and mark it valid */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID
mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT | _PAGE_NO_CACHE
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_2
DTLBMissLinear:
mtcr r12
- /* Set 8M byte page and mark it valid and accessed */
- li r11, MD_PS8MEG | MD_SVALID | M_APG2
+ /* Set 8M byte page and mark it valid */
+ li r11, MD_PS8MEG | MD_SVALID
mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_3)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_3
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
mtcr r12
- /* Set 8M byte page and mark it valid,accessed */
- li r11, MI_PS8MEG | MI_SVALID | M_APG2
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID
mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-_ENTRY(itlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__itlbmiss_exit_2
#endif
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -705,8 +720,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
mfspr r11, SPRN_M_TW /* Get level 1 table */
blt+ 3f
rlwinm r11, r10, 16, 0xfff8
-_ENTRY(FixupDAR_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+ patch_site 0b, patch__fixupdar_linmem_top
+
/* create physical page address from effective address */
tophys(r11, r10)
blt- cr7, 201f
@@ -960,7 +977,7 @@ initial_mmu:
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */
+ ori r8, r8, MI_SVALID /* Make it valid */
mtspr SPRN_MI_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
@@ -987,7 +1004,7 @@ initial_mmu:
ori r8, r8, MD_EVALID /* Mark it valid */
mtspr SPRN_MD_EPN, r8
li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */
+ ori r8, r8, MD_SVALID /* Make it valid */
mtspr SPRN_MD_TWC, r8
mr r8, r9 /* Create paddr for TLB */
ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index aa9f1b8261dbf..7e89d02a84e13 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -153,10 +153,10 @@ static const struct ppc_pci_io iowa_pci_io = {
#ifdef CONFIG_PPC_INDIRECT_MMIO
static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
- void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+ void __iomem *res = __ioremap_caller(addr, size, prot, caller);
int busno;
bus = iowa_pci_find(0, (unsigned long)addr);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 19b4c628f3bec..f0dc680e659af 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
align = 0;
if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 1df6c74aa7319..fda3ae48480c8 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -110,14 +110,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
size = 0x10000;
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
+ 0x10000, pgprot_noncached(PAGE_KERNEL));
}
@@ -253,7 +253,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
*/
isa_io_base = ISA_IO_BASE;
__ioremap_at(pbase, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 35e240a0a4087..59c578f865aa6 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -24,6 +24,7 @@
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/debug.h>
+#include <asm/code-patching.h>
#include <linux/slab.h>
/*
@@ -144,7 +145,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
return 0;
- if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ if (*(u32 *)regs->nip == BREAK_INSTR)
regs->nip += BREAK_INSTR_SIZE;
return 1;
@@ -441,16 +442,42 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
return -1;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = probe_kernel_address(addr, instr);
+ if (err)
+ return err;
+
+ err = patch_instruction(addr, BREAK_INSTR);
+ if (err)
+ return -EFAULT;
+
+ *(unsigned int *)bpt->saved_instr = instr;
+
+ return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr = *(unsigned int *)bpt->saved_instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = patch_instruction(addr, instr);
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* Global data
*/
-struct kgdb_arch arch_kgdb_ops = {
-#ifdef __LITTLE_ENDIAN__
- .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
-#else
- .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
-#endif
-};
+struct kgdb_arch arch_kgdb_ops;
static int kgdb_not_implemented(struct pt_regs *regs)
{
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075f..bd933a75f0bcb 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,10 +488,11 @@ long machine_check_early(struct pt_regs *regs)
{
long handled = 0;
- __this_cpu_inc(irq_stat.mce_exceptions);
-
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
+ /*
+ * See if platform is capable of handling machine check.
+ */
+ if (ppc_md.machine_check_early)
+ handled = ppc_md.machine_check_early(regs);
return handled;
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 3497c8329c1d7..6b800eec31f2f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -60,7 +60,7 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
-static void flush_and_reload_slb(void)
+void flush_and_reload_slb(void)
{
/* Invalidate all SLBs */
slb_flush_all_realmode();
@@ -89,6 +89,13 @@ static void flush_and_reload_slb(void)
static void flush_erat(void)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ flush_and_reload_slb();
+ return;
+ }
+#endif
+ /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 77371c9ef3d8f..2d861a36662ed 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -74,6 +74,14 @@ int module_finalize(const Elf_Ehdr *hdr,
(void *)sect->sh_addr + sect->sh_size);
#endif /* CONFIG_PPC64 */
+#ifdef PPC64_ELF_ABI_v1
+ sect = find_section(hdr, sechdrs, ".opd");
+ if (sect != NULL) {
+ me->arch.start_opd = sect->sh_addr;
+ me->arch.end_opd = sect->sh_addr + sect->sh_size;
+ }
+#endif /* PPC64_ELF_ABI_v1 */
+
#ifdef CONFIG_PPC_BARRIER_NOSPEC
sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
if (sect != NULL)
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index b8d61e019d061..8661eea78503f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -360,11 +360,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
- else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) {
- me->arch.start_opd = sechdrs[i].sh_addr;
- me->arch.end_opd = sechdrs[i].sh_addr +
- sechdrs[i].sh_size;
- }
/* We don't handle .init for the moment: rename to _init */
while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
@@ -685,7 +680,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_PPC64_REL32:
/* 32 bits relative (used by relative exception tables) */
- *(u32 *)location = value - (unsigned long)location;
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x80000000 > 0xffffffff) {
+ pr_err("%s: REL32 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+ *(u32 *)location = value;
break;
case R_PPC64_TOCSAVE:
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d50f288..913bfca09c4f1 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
- paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+ paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES));
memset(paca_ptrs, 0x88, paca_ptrs_size);
}
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d63b488d34d79..d3f04f2d82494 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -10,14 +10,13 @@
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/syscalls.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -204,7 +203,8 @@ pci_create_OF_bus_map(void)
struct property* of_prop;
struct device_node *dn;
- of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0);
+ of_prop = memblock_alloc(sizeof(struct property) + 256,
+ SMP_CACHE_BYTES);
dn = of_find_node_by_path("/");
if (dn) {
memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index dff28f9035124..9d8c10d554075 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
/* Establish the mapping */
if (__ioremap_at(phys_page, area->addr, size_page,
- pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
+ pgprot_noncached(PAGE_KERNEL)) == NULL)
return -ENOMEM;
/* Fixup hose IO resource */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 913c5725cdb2a..96f34730010fe 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -43,6 +43,7 @@
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
#include <linux/pkeys.h>
+#include <linux/seq_buf.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -65,6 +66,7 @@
#include <asm/livepatch.h>
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -102,24 +104,18 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
}
}
-static inline bool msr_tm_active(unsigned long msr)
-{
- return MSR_TM_ACTIVE(msr);
-}
-
static bool tm_active_with_fp(struct task_struct *tsk)
{
- return msr_tm_active(tsk->thread.regs->msr) &&
+ return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
(tsk->thread.ckpt_regs.msr & MSR_FP);
}
static bool tm_active_with_altivec(struct task_struct *tsk)
{
- return msr_tm_active(tsk->thread.regs->msr) &&
+ return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
(tsk->thread.ckpt_regs.msr & MSR_VEC);
}
#else
-static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
@@ -247,7 +243,8 @@ void enable_kernel_fp(void)
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_fpu(current);
}
@@ -311,7 +308,8 @@ void enable_kernel_altivec(void)
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_altivec(current);
}
@@ -397,7 +395,8 @@ void enable_kernel_vsx(void)
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_vsx(current);
}
@@ -530,7 +529,7 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- if (!msr_tm_active(regs->msr) &&
+ if (!MSR_TM_ACTIVE(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -591,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk)
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
- save_all(tsk);
-
#ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
#endif
+ save_all(tsk);
preempt_enable();
}
@@ -620,8 +618,6 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
void do_break (struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
- siginfo_t info;
-
current->thread.trap_nr = TRAP_HWBKPT;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
@@ -634,12 +630,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
- clear_siginfo(&info);
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -1259,17 +1250,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
return last;
}
-static int instructions_to_print = 16;
+#define NR_INSN_TO_PRINT 16
static void show_instructions(struct pt_regs *regs)
{
int i;
- unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
- sizeof(int));
+ unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
printk("Instruction dump:");
- for (i = 0; i < instructions_to_print; i++) {
+ for (i = 0; i < NR_INSN_TO_PRINT; i++) {
int instr;
if (!(i % 8))
@@ -1284,7 +1274,7 @@ static void show_instructions(struct pt_regs *regs)
#endif
if (!__kernel_text_address(pc) ||
- probe_kernel_address((unsigned int __user *)pc, instr)) {
+ probe_kernel_address((const void *)pc, instr)) {
pr_cont("XXXXXXXX ");
} else {
if (regs->nip == pc)
@@ -1302,33 +1292,43 @@ static void show_instructions(struct pt_regs *regs)
void show_user_instructions(struct pt_regs *regs)
{
unsigned long pc;
- int i;
+ int n = NR_INSN_TO_PRINT;
+ struct seq_buf s;
+ char buf[96]; /* enough for 8 times 9 + 2 chars */
- pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
+ pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- pr_info("%s[%d]: code: ", current->comm, current->pid);
+ /*
+ * Make sure the NIP points at userspace, not kernel text/data or
+ * elsewhere.
+ */
+ if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
+ pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
+ current->comm, current->pid);
+ return;
+ }
- for (i = 0; i < instructions_to_print; i++) {
- int instr;
+ seq_buf_init(&s, buf, sizeof(buf));
- if (!(i % 8) && (i > 0)) {
- pr_cont("\n");
- pr_info("%s[%d]: code: ", current->comm, current->pid);
- }
+ while (n) {
+ int i;
- if (probe_kernel_address((unsigned int __user *)pc, instr)) {
- pr_cont("XXXXXXXX ");
- } else {
- if (regs->nip == pc)
- pr_cont("<%08x> ", instr);
- else
- pr_cont("%08x ", instr);
+ seq_buf_clear(&s);
+
+ for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+ int instr;
+
+ if (probe_kernel_address((const void *)pc, instr)) {
+ seq_buf_printf(&s, "XXXXXXXX ");
+ continue;
+ }
+ seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
}
- pc += sizeof(int);
+ if (!seq_buf_has_overflowed(&s))
+ pr_info("%s[%d]: code: %s\n", current->comm,
+ current->pid, s.buffer);
}
-
- pr_cont("\n");
}
struct regbit {
@@ -1482,6 +1482,15 @@ void flush_thread(void)
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void)
+{
+ if (radix_enabled())
+ return;
+ hash__setup_new_exec();
+}
+#endif
+
int set_thread_uses_vas(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1702,7 +1711,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
p->thread.dscr = mfspr(SPRN_DSCR);
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
- p->thread.ppr = INIT_PPR;
+ childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
@@ -1710,6 +1719,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
/*
* Set up a thread for executing a new program
*/
@@ -1717,6 +1728,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ preload_new_slb_context(start, sp);
+#endif
#endif
/*
@@ -1807,6 +1822,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
+ current->thread.load_slb = 0;
current->thread.load_fp = 0;
memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c4d7078e5295f..fe758cedb93fd 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -126,7 +126,7 @@ static void __init move_device_tree(void)
if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
overlaps_crashkernel(start, size) ||
overlaps_initrd(start, size)) {
- p = __va(memblock_alloc(size, PAGE_SIZE));
+ p = __va(memblock_phys_alloc(size, PAGE_SIZE));
memcpy(p, initial_boot_params, size);
initial_boot_params = p;
DBG("Moved device tree to 0x%p\n", p);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 9b38a2e5dd357..f33ff4163a51c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -43,11 +43,13 @@
#include <asm/btext.h>
#include <asm/sections.h>
#include <asm/machdep.h>
-#include <asm/opal.h>
#include <asm/asm-prototypes.h>
#include <linux/linux_logo.h>
+/* All of prom_init bss lives here */
+#define __prombss __section(.bss.prominit)
+
/*
* Eventually bump that one up
*/
@@ -87,7 +89,7 @@
#define OF_WORKAROUNDS 0
#else
#define OF_WORKAROUNDS of_workarounds
-int of_workarounds;
+static int of_workarounds __prombss;
#endif
#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
@@ -148,29 +150,31 @@ extern void copy_and_flush(unsigned long dest, unsigned long src,
unsigned long size, unsigned long offset);
/* prom structure */
-static struct prom_t __initdata prom;
+static struct prom_t __prombss prom;
-static unsigned long prom_entry __initdata;
+static unsigned long __prombss prom_entry;
#define PROM_SCRATCH_SIZE 256
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
#ifdef CONFIG_PPC64
-static int __initdata prom_iommu_force_on;
-static int __initdata prom_iommu_off;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
#endif
-static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+#endif
struct platform_support {
bool hash_mmu;
@@ -188,26 +192,25 @@ struct platform_support {
#define PLATFORM_LPAR 0x0001
#define PLATFORM_POWERMAC 0x0400
#define PLATFORM_GENERIC 0x0500
-#define PLATFORM_OPAL 0x0600
-static int __initdata of_platform;
+static int __prombss of_platform;
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
-static unsigned long __initdata prom_memory_limit;
+static unsigned long __prombss prom_memory_limit;
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
-static cell_t __initdata regbuf[1024];
+static cell_t __prombss regbuf[1024];
-static bool rtas_has_query_cpu_stopped;
+static bool __prombss rtas_has_query_cpu_stopped;
/*
@@ -522,8 +525,8 @@ static void add_string(char **str, const char *q)
static char *tohex(unsigned int x)
{
- static char digits[] = "0123456789abcdef";
- static char result[9];
+ static const char digits[] __initconst = "0123456789abcdef";
+ static char result[9] __prombss;
int i;
result[8] = 0;
@@ -664,6 +667,8 @@ static void __init early_cmdline_parse(void)
#endif
}
+#ifdef CONFIG_PPC_PSERIES
+ prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
opt = strstr(prom_cmd_line, "disable_radix");
if (opt) {
opt += 13;
@@ -679,9 +684,10 @@ static void __init early_cmdline_parse(void)
}
if (prom_radix_disable)
prom_debug("Radix disabled from cmdline\n");
+#endif /* CONFIG_PPC_PSERIES */
}
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* The architecture vector has an array of PVR mask/value pairs,
* followed by # option vectors - 1, followed by the option vectors.
@@ -782,7 +788,7 @@ struct ibm_arch_vec {
struct option_vector6 vec6;
} __packed;
-struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.pvrs = {
{
.mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
@@ -920,9 +926,11 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
},
};
+static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
+
/* Old method - ELF header with PT_NOTE sections only works on BE */
#ifdef __BIG_ENDIAN__
-static struct fake_elf {
+static const struct fake_elf {
Elf32_Ehdr elfhdr;
Elf32_Phdr phdr[2];
struct chrpnote {
@@ -955,7 +963,7 @@ static struct fake_elf {
u32 ignore_me;
} rpadesc;
} rpanote;
-} fake_elf = {
+} fake_elf __initconst = {
.elfhdr = {
.e_ident = { 0x7f, 'E', 'L', 'F',
ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
@@ -1129,14 +1137,21 @@ static void __init prom_check_platform_support(void)
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
+
+ /* First copy the architecture vec template */
+ ibm_architecture_vec = ibm_architecture_vec_template;
+
if (prop_len > 1) {
int i;
- u8 vec[prop_len];
+ u8 vec[8];
prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
prop_len);
+ if (prop_len > sizeof(vec))
+ prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+ prop_len);
prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
&vec, sizeof(vec));
- for (i = 0; i < prop_len; i += 2) {
+ for (i = 0; i < sizeof(vec); i += 2) {
prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
, vec[i]
, vec[i + 1]);
@@ -1225,7 +1240,7 @@ static void __init prom_send_capabilities(void)
}
#endif /* __BIG_ENDIAN__ */
}
-#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#endif /* CONFIG_PPC_PSERIES */
/*
* Memory allocation strategy... our layout is normally:
@@ -1562,88 +1577,6 @@ static void __init prom_close_stdin(void)
}
}
-#ifdef CONFIG_PPC_POWERNV
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-static u64 __initdata prom_opal_base;
-static u64 __initdata prom_opal_entry;
-#endif
-
-/*
- * Allocate room for and instantiate OPAL
- */
-static void __init prom_instantiate_opal(void)
-{
- phandle opal_node;
- ihandle opal_inst;
- u64 base, entry;
- u64 size = 0, align = 0x10000;
- __be64 val64;
- u32 rets[2];
-
- prom_debug("prom_instantiate_opal: start...\n");
-
- opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
- prom_debug("opal_node: %x\n", opal_node);
- if (!PHANDLE_VALID(opal_node))
- return;
-
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
- size = be64_to_cpu(val64);
- if (size == 0)
- return;
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
- align = be64_to_cpu(val64);
-
- base = alloc_down(size, align, 0);
- if (base == 0) {
- prom_printf("OPAL allocation failed !\n");
- return;
- }
-
- opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
- if (!IHANDLE_VALID(opal_inst)) {
- prom_printf("opening opal package failed (%x)\n", opal_inst);
- return;
- }
-
- prom_printf("instantiating opal at 0x%llx...", base);
-
- if (call_prom_ret("call-method", 4, 3, rets,
- ADDR("load-opal-runtime"),
- opal_inst,
- base >> 32, base & 0xffffffff) != 0
- || (rets[0] == 0 && rets[1] == 0)) {
- prom_printf(" failed\n");
- return;
- }
- entry = (((u64)rets[0]) << 32) | rets[1];
-
- prom_printf(" done\n");
-
- reserve_mem(base, size);
-
- prom_debug("opal base = 0x%llx\n", base);
- prom_debug("opal align = 0x%llx\n", align);
- prom_debug("opal entry = 0x%llx\n", entry);
- prom_debug("opal size = 0x%llx\n", size);
-
- prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
- &base, sizeof(base));
- prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
- &entry, sizeof(entry));
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- prom_opal_base = base;
- prom_opal_entry = entry;
-#endif
- prom_debug("prom_instantiate_opal: end...\n");
-}
-
-#endif /* CONFIG_PPC_POWERNV */
-
/*
* Allocate room for and instantiate RTAS
*/
@@ -2150,10 +2083,6 @@ static int __init prom_find_machine_type(void)
}
}
#ifdef CONFIG_PPC64
- /* Try to detect OPAL */
- if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
- return PLATFORM_OPAL;
-
/* Try to figure out if it's an IBM pSeries or any other
* PAPR compliant platform. We assume it is if :
* - /device_type is "chrp" (please, do NOT use that for future
@@ -2202,7 +2131,7 @@ static void __init prom_check_displays(void)
ihandle ih;
int i;
- static unsigned char default_colors[] = {
+ static const unsigned char default_colors[] __initconst = {
0x00, 0x00, 0x00,
0x00, 0x00, 0xaa,
0x00, 0xaa, 0x00,
@@ -2398,7 +2327,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
unsigned long soff;
unsigned char *valp;
- static char pname[MAX_PROPERTY_NAME];
+ static char pname[MAX_PROPERTY_NAME] __prombss;
int l, room, has_phandle = 0;
dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
@@ -2481,14 +2410,11 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
has_phandle = 1;
}
- /* Add a "linux,phandle" property if no "phandle" property already
- * existed (can happen with OPAL)
- */
+ /* Add a "phandle" property if none already exist */
if (!has_phandle) {
- soff = dt_find_string("linux,phandle");
+ soff = dt_find_string("phandle");
if (soff == 0)
- prom_printf("WARNING: Can't find string index for"
- " <linux-phandle> node %s\n", path);
+ prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
else {
dt_push_token(OF_DT_PROP, mem_start, mem_end);
dt_push_token(4, mem_start, mem_end);
@@ -2548,9 +2474,9 @@ static void __init flatten_device_tree(void)
dt_string_start = mem_start;
mem_start += 4; /* hole */
- /* Add "linux,phandle" in there, we'll need it */
+ /* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "linux,phandle");
+ strcpy(namep, "phandle");
mem_start = (unsigned long)namep + strlen(namep) + 1;
/* Build string array */
@@ -3172,7 +3098,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
early_cmdline_parse();
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
@@ -3216,15 +3142,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* On non-powermacs, try to instantiate RTAS. PowerMacs don't
* have a usable RTAS implementation.
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_instantiate_rtas();
-#ifdef CONFIG_PPC_POWERNV
- if (of_platform == PLATFORM_OPAL)
- prom_instantiate_opal();
-#endif /* CONFIG_PPC_POWERNV */
-
#ifdef CONFIG_PPC64
/* instantiate sml */
prom_instantiate_sml();
@@ -3237,8 +3157,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*
* (This must be done after instanciating RTAS)
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
/*
@@ -3282,11 +3201,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/*
* in case stdin is USB and still active on IBM machines...
* Unfortunately quiesce crashes on some powermacs if we have
- * closed stdin already (in particular the powerbook 101). It
- * appears that the OPAL version of OFW doesn't like it either.
+ * closed stdin already (in particular the powerbook 101).
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_close_stdin();
/*
@@ -3304,10 +3221,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
hdr = dt_header_start;
/* Don't print anything after quiesce under OPAL, it crashes OFW */
- if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
- prom_debug("->dt_header_start=0x%lx\n", hdr);
- }
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+ prom_debug("->dt_header_start=0x%lx\n", hdr);
#ifdef CONFIG_PPC32
reloc_got2(-offset);
@@ -3315,13 +3230,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
- __start(hdr, kbase, 0, 0, 0,
- prom_opal_base, prom_opal_entry);
-#else
__start(hdr, kbase, 0, 0, 0, 0, 0);
-#endif
return 0;
}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index acb6b9226352b..667df97d25954 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -28,6 +28,18 @@ OBJ="$2"
ERROR=0
+function check_section()
+{
+ file=$1
+ section=$2
+ size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+ size=${size:-0}
+ if [ $size -ne 0 ]; then
+ ERROR=1
+ echo "Error: Section $section not empty in prom_init.c" >&2
+ fi
+}
+
for UNDEF in $($NM -u $OBJ | awk '{print $2}')
do
# On 64-bit nm gives us the function descriptors, which have
@@ -66,4 +78,8 @@ do
fi
done
+check_section $OBJ .data
+check_section $OBJ .bss
+check_section $OBJ .init.data
+
exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9667666eb18e5..afb819f4ca68b 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -297,7 +297,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
}
#endif
- if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+ if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
}
@@ -360,10 +360,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.regs->orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
@@ -853,10 +853,10 @@ static int tm_cgpr_get(struct task_struct *target,
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.ckpt_regs.orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
@@ -1609,7 +1609,7 @@ static int ppr_get(struct task_struct *target,
void *kbuf, void __user *ubuf)
{
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
@@ -1618,7 +1618,7 @@ static int ppr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
@@ -2508,6 +2508,7 @@ void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -3130,7 +3131,7 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_GETREGS: /* Get all pt_regs from the child. */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
#ifdef CONFIG_PPC64
@@ -3139,7 +3140,7 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
@@ -3264,6 +3265,16 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
+ if (test_thread_flag(TIF_SYSCALL_EMU)) {
+ ptrace_report_syscall(regs);
+ /*
+ * Returning -1 will skip the syscall execution. We want to
+ * avoid clobbering any register also, thus, not 'gotoing'
+ * skip label.
+ */
+ return -1;
+ }
+
/*
* The tracer may decide to abort the syscall, if so tracehook
* will return !0. Note that the tracer may also just change
@@ -3324,3 +3335,42 @@ void do_syscall_trace_leave(struct pt_regs *regs)
user_enter();
}
+
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8afd146bc9c70..de35bd8f047fc 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,7 +981,15 @@ int rtas_ibm_suspend_me(u64 handle)
goto out;
}
- stop_topology_update();
+ cpu_hotplug_disable();
+
+ /* Check if we raced with a CPU-Offline Operation */
+ if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
+ pr_err("%s: Raced against a concurrent CPU-Offline\n",
+ __func__);
+ atomic_set(&data.error, -EBUSY);
+ goto out_hotplug_enable;
+ }
/* Call function on all CPUs. One of us will make the
* rtas call
@@ -994,7 +1002,8 @@ int rtas_ibm_suspend_me(u64 handle)
if (atomic_read(&data.error) != 0)
printk(KERN_ERR "Error doing global join\n");
- start_topology_update();
+out_hotplug_enable:
+ cpu_hotplug_enable();
/* Take down CPUs not online prior to suspend */
cpuret = rtas_offline_cpus_mask(offline_mask);
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d59d5..38cadae4ca4f7 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
return "Dump Notification Event";
case RTAS_TYPE_PRRN:
return "Platform Resource Reassignment Event";
+ case RTAS_TYPE_HOTPLUG:
+ return "Hotplug Event";
}
return rtas_type[0];
@@ -150,8 +152,10 @@ static void printk_log_rtas(char *buf, int len)
} else {
struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
- printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
- error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+ printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+ error_log_cnt,
+ rtas_event_type(rtas_error_type(errlog)),
+ rtas_error_type(errlog),
rtas_error_severity(errlog));
}
}
@@ -274,27 +278,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
}
#ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
+static void handle_prrn_event(s32 scope)
{
/*
* For PRRN, we must pass the negative of the scope value in
* the RTAS event.
*/
- pseries_devicetree_update(-prrn_update_scope);
+ pseries_devicetree_update(-scope);
numa_update_cpu_topology(false);
}
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-static void prrn_schedule_update(u32 scope)
-{
- flush_work(&prrn_work);
- prrn_update_scope = scope;
- schedule_work(&prrn_work);
-}
-
static void handle_rtas_event(const struct rtas_error_log *log)
{
if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
@@ -303,7 +296,7 @@ static void handle_rtas_event(const struct rtas_error_log *log)
/* For PRRN Events the extended log length is used to denote
* the scope for calling rtas update-nodes.
*/
- prrn_schedule_update(rtas_error_extended_log_length(log));
+ handle_prrn_event(rtas_error_extended_log_length(log));
}
#else
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 93fa0c99681e6..93ee3703b42f3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -459,8 +459,7 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
- cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
- __alignof__(u32)));
+ cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
for_each_node_by_type(dn, "cpu") {
@@ -966,6 +965,8 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8c507be12c3c3..81909600013a1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,9 +206,9 @@ void __init irqstack_early_init(void)
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
}
}
@@ -227,12 +227,12 @@ void __init exc_lvl_early_init(void)
#endif
critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
#ifdef CONFIG_BOOKE
dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
#endif
}
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6a501b25dd85f..2a51e4cc8246d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -29,10 +29,9 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/lockdep.h>
-#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/nmi.h>
@@ -243,13 +242,19 @@ static void cpu_ready_for_interrupts(void)
}
/*
- * Fixup HFSCR:TM based on CPU features. The bit is set by our
- * early asm init because at that point we haven't updated our
- * CPU features from firmware and device-tree. Here we have,
- * so let's do it.
+ * Set HFSCR:TM based on CPU features:
+ * In the special case of TM no suspend (P9N DD2.1), Linux is
+ * told TM is off via the dt-ftrs but told to (partially) use
+ * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
+ * will be off from dt-ftrs but we need to turn it on for the
+ * no suspend case.
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
- mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_TM_COMP))
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
+ else
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+ }
/* Set IR and DR in PACA MSR */
get_paca()->kernel_msr = MSR_KERNEL;
@@ -757,13 +762,15 @@ void __init emergency_stack_init(void)
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
- return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
- __pa(MAX_DMA_ADDRESS));
+ return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ early_cpu_to_node(cpu));
+
}
static void __init pcpu_fc_free(void *ptr, size_t size)
{
- free_bootmem(__pa(ptr), size);
+ memblock_free(__pa(ptr), size);
}
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 61c1fadbc6444..3f15edf25a0d6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -34,6 +34,8 @@
#include <linux/topology.h>
#include <linux/profile.h>
#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -74,14 +76,32 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
struct thread_info *secondary_ti;
+bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -674,6 +694,185 @@ static void set_cpus_unrelated(int i, int j,
}
#endif
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups
+ * structure @tg if the ibm,thread-groups[0]
+ * matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ * interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups *tg,
+ unsigned int property)
+{
+ int i;
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+ u32 *thread_list;
+ size_t total_threads;
+ int ret;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, 3);
+ if (ret)
+ return ret;
+
+ tg->property = thread_group_array[0];
+ tg->nr_groups = thread_group_array[1];
+ tg->threads_per_group = thread_group_array[2];
+ if (tg->property != property ||
+ tg->nr_groups < 1 ||
+ tg->threads_per_group < 1)
+ return -ENODATA;
+
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array,
+ 3 + total_threads);
+ if (ret)
+ return ret;
+
+ thread_list = &thread_group_array[3];
+
+ for (i = 0 ; i < total_threads; i++)
+ tg->thread_list[i] = thread_list[i];
+
+ return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups tg = {.property = 0,
+ .nr_groups = 0,
+ .threads_per_group = 0};
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i, cpu_group_start = -1, err = 0;
+
+ if (!dn)
+ return -ENODATA;
+
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+ if (err)
+ goto out;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ }
+
+out:
+ of_node_put(dn);
+ return err;
+}
+
+static int init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_l1_cache_map(cpu);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -712,6 +911,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
}
@@ -995,10 +1200,28 @@ static void remove_cpu_from_masks(int cpu)
set_cpus_unrelated(cpu, i, cpu_core_mask);
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
}
#endif
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
+ int i, first_thread = cpu_first_thread_sibling(cpu);
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
@@ -1015,6 +1238,7 @@ static void add_cpu_to_masks(int cpu)
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
+ add_cpu_to_smallcore_masks(cpu);
/*
* Copy the thread sibling mask into the cache sibling mask
* and mark any CPUs that share an L2 with this CPU.
@@ -1044,6 +1268,7 @@ static bool shared_caches;
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -1069,11 +1294,13 @@ void start_secondary(void *unused)
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1083,6 +1310,8 @@ void start_secondary(void *unused)
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ boot_init_stack_canary();
+
local_irq_enable();
/* We can enable ftrace for secondary cpus now */
@@ -1140,6 +1369,13 @@ static const struct cpumask *shared_cache_mask(int cpu)
return cpu_l2_cache_mask(cpu);
}
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
static struct sched_domain_topology_level power9_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
@@ -1167,6 +1403,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
shared_proc_topology_init();
dump_numa_cpu_topology();
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Using small cores at SMT level\n");
+ power9_topology[0].mask = smallcore_smt_mask;
+ powerpc_topology[0].mask = smallcore_smt_mask;
+ }
+#endif
/*
* If any CPU detects that it's sharing a cache with another CPU then
* use the deeper topology that is aware of this sharing.
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index f83bf6f72cb0e..185216becb8b3 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -262,7 +262,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
addi r1,r1,-128
#ifdef CONFIG_PPC_BOOK3S_64
- bl slb_flush_and_rebolt
+ bl slb_flush_and_restore_bolted
#endif
bl do_after_copyback
addi r1,r1,128
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 70f145e024877..3646affae963f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -111,6 +111,7 @@ struct clock_event_device decrementer_clockevent = {
.rating = 200,
.irq = 0,
.set_next_event = decrementer_set_next_event,
+ .set_state_oneshot_stopped = decrementer_shutdown,
.set_state_shutdown = decrementer_shutdown,
.tick_resume = decrementer_shutdown,
.features = CLOCK_EVT_FEAT_ONESHOT |
@@ -175,7 +176,7 @@ static void calc_cputime_factors(void)
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
-static unsigned long read_spurr(unsigned long tb)
+static inline unsigned long read_spurr(unsigned long tb)
{
if (cpu_has_feature(CPU_FTR_SPURR))
return mfspr(SPRN_SPURR);
@@ -281,26 +282,17 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-static unsigned long vtime_delta(struct task_struct *tsk,
- unsigned long *stime_scaled,
- unsigned long *steal_time)
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+ unsigned long now, unsigned long stime)
{
- unsigned long now, nowscaled, deltascaled;
- unsigned long stime;
+ unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long nowscaled, deltascaled;
unsigned long utime, utime_scaled;
- struct cpu_accounting_data *acct = get_accounting(tsk);
- WARN_ON_ONCE(!irqs_disabled());
-
- now = mftb();
nowscaled = read_spurr(now);
- stime = now - acct->starttime;
- acct->starttime = now;
deltascaled = nowscaled - acct->startspurr;
acct->startspurr = nowscaled;
-
- *steal_time = calculate_stolen_time(now);
-
utime = acct->utime - acct->utime_sspurr;
acct->utime_sspurr = acct->utime;
@@ -314,17 +306,38 @@ static unsigned long vtime_delta(struct task_struct *tsk,
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- *stime_scaled = stime;
+ stime_scaled = stime;
utime_scaled = utime;
if (deltascaled != stime + utime) {
if (utime) {
- *stime_scaled = deltascaled * stime / (stime + utime);
- utime_scaled = deltascaled - *stime_scaled;
+ stime_scaled = deltascaled * stime / (stime + utime);
+ utime_scaled = deltascaled - stime_scaled;
} else {
- *stime_scaled = deltascaled;
+ stime_scaled = deltascaled;
}
}
acct->utime_scaled += utime_scaled;
+#endif
+
+ return stime_scaled;
+}
+
+static unsigned long vtime_delta(struct task_struct *tsk,
+ unsigned long *stime_scaled,
+ unsigned long *steal_time)
+{
+ unsigned long now, stime;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ now = mftb();
+ stime = now - acct->starttime;
+ acct->starttime = now;
+
+ *stime_scaled = vtime_delta_scaled(acct, now, stime);
+
+ *steal_time = calculate_stolen_time(now);
return stime;
}
@@ -341,7 +354,9 @@ void vtime_account_system(struct task_struct *tsk)
if ((tsk->flags & PF_VCPU) && !irq_count()) {
acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->utime_scaled += stime_scaled;
+#endif
} else {
if (hardirq_count())
acct->hardirq_time += stime;
@@ -350,7 +365,9 @@ void vtime_account_system(struct task_struct *tsk)
else
acct->stime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->stime_scaled += stime_scaled;
+#endif
}
}
EXPORT_SYMBOL_GPL(vtime_account_system);
@@ -364,6 +381,21 @@ void vtime_account_idle(struct task_struct *tsk)
acct->idle_time += stime + steal_time;
}
+static void vtime_flush_scaled(struct task_struct *tsk,
+ struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ if (acct->utime_scaled)
+ tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+ if (acct->stime_scaled)
+ tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+ acct->utime_scaled = 0;
+ acct->utime_sspurr = 0;
+ acct->stime_scaled = 0;
+#endif
+}
+
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
@@ -378,14 +410,13 @@ void vtime_flush(struct task_struct *tsk)
if (acct->utime)
account_user_time(tsk, cputime_to_nsecs(acct->utime));
- if (acct->utime_scaled)
- tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
-
if (acct->gtime)
account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
- if (acct->steal_time)
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
account_steal_time(cputime_to_nsecs(acct->steal_time));
+ acct->steal_time = 0;
+ }
if (acct->idle_time)
account_idle_time(cputime_to_nsecs(acct->idle_time));
@@ -393,8 +424,6 @@ void vtime_flush(struct task_struct *tsk)
if (acct->stime)
account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
CPUTIME_SYSTEM);
- if (acct->stime_scaled)
- tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
if (acct->hardirq_time)
account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
@@ -403,14 +432,12 @@ void vtime_flush(struct task_struct *tsk)
account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
CPUTIME_SOFTIRQ);
+ vtime_flush_scaled(tsk, acct);
+
acct->utime = 0;
- acct->utime_scaled = 0;
- acct->utime_sspurr = 0;
acct->gtime = 0;
- acct->steal_time = 0;
acct->idle_time = 0;
acct->stime = 0;
- acct->stime_scaled = 0;
acct->hardirq_time = 0;
acct->softirq_time = 0;
}
@@ -984,10 +1011,14 @@ static void register_decrementer_clockevent(int cpu)
*dec = decrementer_clockevent;
dec->cpumask = cpumask_of(cpu);
+ clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
- clockevents_register_device(dec);
+ /* Set values for KVM, see kvm_emulate_dec() */
+ decrementer_clockevent.mult = dec->mult;
+ decrementer_clockevent.shift = dec->shift;
}
static void enable_large_decrementer(void)
@@ -1035,18 +1066,7 @@ static void __init set_decrementer_max(void)
static void __init init_decrementer_clockevent(void)
{
- int cpu = smp_processor_id();
-
- clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
-
- decrementer_clockevent.max_delta_ns =
- clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
- decrementer_clockevent.max_delta_ticks = decrementer_max;
- decrementer_clockevent.min_delta_ns =
- clockevent_delta2ns(2, &decrementer_clockevent);
- decrementer_clockevent.min_delta_ticks = 2;
-
- register_decrementer_clockevent(cpu);
+ register_decrementer_clockevent(smp_processor_id());
}
void secondary_cpu_time_init(void)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 7716374786bd9..9fabdce255cdb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -92,13 +92,14 @@ _GLOBAL(tm_abort)
blr
EXPORT_SYMBOL_GPL(tm_abort);
-/* void tm_reclaim(struct thread_struct *thread,
+/*
+ * void tm_reclaim(struct thread_struct *thread,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
- * transactions and updates thread->regs.tm_ckpt_* with the
- * original checkpointed state. Note that thread->regs is
- * unchanged.
+ * transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ * thread.ckvr_state with the original checkpointed state. Note that
+ * thread->regs is unchanged.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -163,15 +164,16 @@ _GLOBAL(tm_reclaim)
*/
TRECLAIM(R4) /* Cause in r4 */
- /* ******************** GPRs ******************** */
- /* Stash the checkpointed r13 away in the scratch SPR and get the real
- * paca
+ /*
+ * ******************** GPRs ********************
+ * Stash the checkpointed r13 in the scratch SPR and get the real paca.
*/
SET_SCRATCH0(r13)
GET_PACA(r13)
- /* Stash the checkpointed r1 away in paca tm_scratch and get the real
- * stack pointer back
+ /*
+ * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+ * stack pointer back into r1.
*/
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
@@ -209,14 +211,15 @@ _GLOBAL(tm_reclaim)
addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
+ SAVE_GPR(2, r7) /* user r2 */
SAVE_4GPRS(3, r7) /* user r3-r6 */
SAVE_GPR(8, r7) /* user r8 */
SAVE_GPR(9, r7) /* user r9 */
@@ -237,7 +240,8 @@ _GLOBAL(tm_reclaim)
/* ******************** NIP ******************** */
mfspr r3, SPRN_TFHAR
std r3, _NIP(r7) /* Returns to failhandler */
- /* The checkpointed NIP is ignored when rescheduling/rechkpting,
+ /*
+ * The checkpointed NIP is ignored when rescheduling/rechkpting,
* but is used in signal return to 'wind back' to the abort handler.
*/
@@ -260,12 +264,13 @@ _GLOBAL(tm_reclaim)
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
- /* MSR and flags: We don't change CRs, and we don't need to alter
- * MSR.
+ /*
+ * MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
- /* ******************** FPR/VR/VSRs ************
+ /*
+ * ******************** FPR/VR/VSRs ************
* After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
@@ -275,7 +280,7 @@ _GLOBAL(tm_reclaim)
/* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
- SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
+ SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
@@ -286,12 +291,13 @@ _GLOBAL(tm_reclaim)
/* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
- SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
+ SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
- /* TM regs, incl TEXASR -- these live in thread_struct. Note they've
+ /*
+ * TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
* cause (aborted).
*/
@@ -327,7 +333,7 @@ _GLOBAL(tm_reclaim)
blr
- /*
+ /*
* void __tm_recheckpoint(struct thread_struct *thread)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
@@ -343,7 +349,8 @@ _GLOBAL(__tm_recheckpoint)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ /*
+ * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
@@ -352,8 +359,9 @@ _GLOBAL(__tm_recheckpoint)
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
@@ -421,14 +429,15 @@ restore_gprs:
REST_NVGPRS(r7) /* GPR14-31 */
- /* Load up PPR and DSCR here so we don't run with user values for long
- */
+ /* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
- /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ /*
+ * Do final sanity check on TEXASR to make sure FS is set. Do this
* here before we load up the userspace r1 so any bugs we hit will get
- * a call chain */
+ * a call chain.
+ */
mfspr r5, SPRN_TEXASR
srdi r5, r5, 16
li r6, (TEXASR_FS)@h
@@ -436,8 +445,9 @@ restore_gprs:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* Do final sanity check on MSR to make sure we are not transactional
- * or suspended
+ /*
+ * Do final sanity check on MSR to make sure we are not transactional
+ * or suspended.
*/
mfmsr r6
li r5, (MSR_TS_MASK)@higher
@@ -453,8 +463,8 @@ restore_gprs:
REST_GPR(6, r7)
/*
- * Store r1 and r5 on the stack so that we can access them
- * after we clear MSR RI.
+ * Store r1 and r5 on the stack so that we can access them after we
+ * clear MSR RI.
*/
REST_GPR(5, r7)
@@ -484,7 +494,8 @@ restore_gprs:
HMT_MEDIUM
- /* Our transactional state has now changed.
+ /*
+ * Our transactional state has now changed.
*
* Now just get out of here. Transactional (current) state will be
* updated once restore is called on the return path in the _switch-ed
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index d22d8bafb6434..b1725ad3e13d4 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -3,11 +3,9 @@
# Makefile for the powerpc trace subsystem
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ifdef CONFIG_FUNCTION_TRACER
# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
endif
obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee517..4bf051d3e21e7 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,16 @@
#ifdef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
static unsigned int
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
@@ -85,13 +95,16 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr)
return create_branch((unsigned int *)ip, addr, 0);
}
-#ifdef CONFIG_MODULES
-
static int is_bl_op(unsigned int op)
{
return (op & 0xfc000003) == 0x48000001;
}
+static int is_b_op(unsigned int op)
+{
+ return (op & 0xfc000003) == 0x48000000;
+}
+
static unsigned long find_bl_target(unsigned long ip, unsigned int op)
{
static int offset;
@@ -104,6 +117,7 @@ static unsigned long find_bl_target(unsigned long ip, unsigned int op)
return ip + (long)offset;
}
+#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
static int
__ftrace_make_nop(struct module *mod,
@@ -270,6 +284,146 @@ __ftrace_make_nop(struct module *mod,
#endif /* PPC64 */
#endif /* CONFIG_MODULES */
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i, op;
+ unsigned long ptr;
+ static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i])
+ break;
+ else if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* Is this a known plt tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_plt_tramps[i])
+ break;
+ else if (ftrace_plt_tramps[i] == tramp)
+ return -1;
+
+ /* New trampoline -- read where this goes */
+ if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ if (!create_branch((void *)tramp, ptr, 0)) {
+ pr_debug("%ps is not reachable from existing mcount tramp\n",
+ (void *)ptr);
+ return -1;
+ }
+
+ if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ unsigned int op;
+
+ /* Read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -286,7 +440,8 @@ int ftrace_make_nop(struct module *mod,
old = ftrace_call_replace(ip, addr, 1);
new = PPC_INST_NOP;
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_nop_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
@@ -456,6 +611,53 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
#endif /* CONFIG_PPC64 */
#endif /* CONFIG_MODULES */
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+ if (ptr != entry) {
+#endif
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ }
+#endif
+ }
+
+ /* Make sure we have a nop */
+ if (probe_kernel_read(&op, ip, sizeof(op))) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (op != PPC_INST_NOP) {
+ pr_err("Unexpected call sequence at %p: %x\n", ip, op);
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
@@ -471,7 +673,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
old = PPC_INST_NOP;
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_call_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
@@ -603,6 +806,12 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
old = ftrace_call_replace(ip, old_addr, 1);
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
}
#ifdef CONFIG_MODULES
@@ -654,10 +863,54 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
+ 0x3d8c0000, /* addis r12,r12,<high> */
+ 0x398c0000, /* addi r12,r12,<low> */
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420, /* bctr */
+ };
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ long reladdr = addr - kernel_toc_addr();
+
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#else
int __init ftrace_dyn_arch_init(void)
{
return 0;
}
+#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
index e25f77c10a72c..1782af2d14964 100644
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -14,6 +14,18 @@
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 64
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 64
+.popsection
+
_GLOBAL(mcount)
_GLOBAL(_mcount)
EXPORT_SYMBOL(_mcount)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c85adb8582713..9a86572db1ef3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -247,8 +247,6 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
mdelay(MSEC_PER_SEC);
}
- if (in_interrupt())
- panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
do_exit(signr);
@@ -307,12 +305,9 @@ void die(const char *str, struct pt_regs *regs, long err)
}
NOKPROBE_SYMBOL(die);
-void user_single_step_siginfo(struct task_struct *tsk,
- struct pt_regs *regs, siginfo_t *info)
+void user_single_step_report(struct pt_regs *regs)
{
- info->si_signo = SIGTRAP;
- info->si_code = TRAP_TRACE;
- info->si_addr = (void __user *)regs->nip;
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
}
static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -341,14 +336,12 @@ static void show_signal_msg(int signr, struct pt_regs *regs, int code,
show_user_instructions(regs);
}
-void _exception_pkey(int signr, struct pt_regs *regs, int code,
- unsigned long addr, int key)
+static bool exception_common(int signr, struct pt_regs *regs, int code,
+ unsigned long addr)
{
- siginfo_t info;
-
if (!user_mode(regs)) {
die("Exception in kernel mode", regs, signr);
- return;
+ return false;
}
show_signal_msg(signr, regs, code, addr);
@@ -364,18 +357,23 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code,
*/
thread_pkey_regs_save(&current->thread);
- clear_siginfo(&info);
- info.si_signo = signr;
- info.si_code = code;
- info.si_addr = (void __user *) addr;
- info.si_pkey = key;
+ return true;
+}
- force_sig_info(signr, &info, current);
+void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
+{
+ if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
+ return;
+
+ force_sig_pkuerr((void __user *) addr, key);
}
void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
{
- _exception_pkey(signr, regs, code, addr, 0);
+ if (!exception_common(signr, regs, code, addr))
+ return;
+
+ force_sig_fault(signr, code, (void __user *)addr, current);
}
void system_reset_exception(struct pt_regs *regs)
@@ -535,10 +533,10 @@ int machine_check_e500mc(struct pt_regs *regs)
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR) {
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
/*
* This is recoverable by invalidating the i-cache.
@@ -556,7 +554,7 @@ int machine_check_e500mc(struct pt_regs *regs)
}
if (reason & MCSR_DCPERR_MC) {
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
/*
* In write shadow mode we auto-recover from the error, but it
@@ -575,38 +573,38 @@ int machine_check_e500mc(struct pt_regs *regs)
}
if (reason & MCSR_L2MMU_MHIT) {
- printk("Hit on multiple TLB entries\n");
+ pr_cont("Hit on multiple TLB entries\n");
recoverable = 0;
}
if (reason & MCSR_NMI)
- printk("Non-maskable interrupt\n");
+ pr_cont("Non-maskable interrupt\n");
if (reason & MCSR_IF) {
- printk("Instruction Fetch Error Report\n");
+ pr_cont("Instruction Fetch Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LD) {
- printk("Load Error Report\n");
+ pr_cont("Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_ST) {
- printk("Store Error Report\n");
+ pr_cont("Store Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LDG) {
- printk("Guarded Load Error Report\n");
+ pr_cont("Guarded Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_TLBSYNC)
- printk("Simultaneous tlbsync operations\n");
+ pr_cont("Simultaneous tlbsync operations\n");
if (reason & MCSR_BSL2_ERR) {
- printk("Level 2 Cache Error\n");
+ pr_cont("Level 2 Cache Error\n");
recoverable = 0;
}
@@ -616,7 +614,7 @@ int machine_check_e500mc(struct pt_regs *regs)
addr = mfspr(SPRN_MCAR);
addr |= (u64)mfspr(SPRN_MCARU) << 32;
- printk("Machine Check %s Address: %#llx\n",
+ pr_cont("Machine Check %s Address: %#llx\n",
reason & MCSR_MEA ? "Effective" : "Physical", addr);
}
@@ -640,29 +638,29 @@ int machine_check_e500(struct pt_regs *regs)
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR)
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
if (reason & MCSR_DCP_PERR)
- printk("Data Cache Push Parity Error\n");
+ pr_cont("Data Cache Push Parity Error\n");
if (reason & MCSR_DCPERR)
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
if (reason & MCSR_BUS_IAERR)
- printk("Bus - Instruction Address Error\n");
+ pr_cont("Bus - Instruction Address Error\n");
if (reason & MCSR_BUS_RAERR)
- printk("Bus - Read Address Error\n");
+ pr_cont("Bus - Read Address Error\n");
if (reason & MCSR_BUS_WAERR)
- printk("Bus - Write Address Error\n");
+ pr_cont("Bus - Write Address Error\n");
if (reason & MCSR_BUS_IBERR)
- printk("Bus - Instruction Data Error\n");
+ pr_cont("Bus - Instruction Data Error\n");
if (reason & MCSR_BUS_RBERR)
- printk("Bus - Read Data Bus Error\n");
+ pr_cont("Bus - Read Data Bus Error\n");
if (reason & MCSR_BUS_WBERR)
- printk("Bus - Write Data Bus Error\n");
+ pr_cont("Bus - Write Data Bus Error\n");
if (reason & MCSR_BUS_IPERR)
- printk("Bus - Instruction Parity Error\n");
+ pr_cont("Bus - Instruction Parity Error\n");
if (reason & MCSR_BUS_RPERR)
- printk("Bus - Read Parity Error\n");
+ pr_cont("Bus - Read Parity Error\n");
return 0;
}
@@ -680,19 +678,19 @@ int machine_check_e200(struct pt_regs *regs)
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_CP_PERR)
- printk("Cache Push Parity Error\n");
+ pr_cont("Cache Push Parity Error\n");
if (reason & MCSR_CPERR)
- printk("Cache Parity Error\n");
+ pr_cont("Cache Parity Error\n");
if (reason & MCSR_EXCP_ERR)
- printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+ pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
if (reason & MCSR_BUS_IRERR)
- printk("Bus - Read Bus Error on instruction fetch\n");
+ pr_cont("Bus - Read Bus Error on instruction fetch\n");
if (reason & MCSR_BUS_DRERR)
- printk("Bus - Read Bus Error on data load\n");
+ pr_cont("Bus - Read Bus Error on data load\n");
if (reason & MCSR_BUS_WRERR)
- printk("Bus - Write Bus Error on buffered store or cache line push\n");
+ pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
return 0;
}
@@ -705,30 +703,30 @@ int machine_check_generic(struct pt_regs *regs)
printk("Caused by (from SRR1=%lx): ", reason);
switch (reason & 0x601F0000) {
case 0x80000:
- printk("Machine check signal\n");
+ pr_cont("Machine check signal\n");
break;
case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
- printk("Transfer error ack signal\n");
+ pr_cont("Transfer error ack signal\n");
break;
case 0x20000:
- printk("Data parity error signal\n");
+ pr_cont("Data parity error signal\n");
break;
case 0x10000:
- printk("Address parity error signal\n");
+ pr_cont("Address parity error signal\n");
break;
case 0x20000000:
- printk("L1 Data Cache error\n");
+ pr_cont("L1 Data Cache error\n");
break;
case 0x40000000:
- printk("L1 Instruction Cache error\n");
+ pr_cont("L1 Instruction Cache error\n");
break;
case 0x00100000:
- printk("L2 data cache parity error\n");
+ pr_cont("L2 data cache parity error\n");
break;
default:
- printk("Unknown values in msr\n");
+ pr_cont("Unknown values in msr\n");
}
return 0;
}
@@ -741,9 +739,7 @@ void machine_check_exception(struct pt_regs *regs)
if (!nested)
nmi_enter();
- /* 64s accounts the mce in machine_check_early when in HVMODE */
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
- __this_cpu_inc(irq_stat.mce_exceptions);
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -767,12 +763,17 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
- die("Machine check", regs, SIGBUS);
-
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable Machine check");
+ if (!nested)
+ nmi_exit();
+
+ die("Machine check", regs, SIGBUS);
+
+ return;
+
bail:
if (!nested)
nmi_exit();
@@ -1433,7 +1434,7 @@ void program_check_exception(struct pt_regs *regs)
goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
- "at %lx (msr 0x%x)\n", regs->nip, reason);
+ "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
}
}
@@ -1547,14 +1548,6 @@ void StackOverflow(struct pt_regs *regs)
panic("kernel stack overflow");
}
-void nonrecoverable_exception(struct pt_regs *regs)
-{
- printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
- regs->nip, regs->msr);
- debugger(regs);
- die("nonrecoverable exception", regs, SIGKILL);
-}
-
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
@@ -1750,16 +1743,20 @@ void fp_unavailable_tm(struct pt_regs *regs)
* checkpointed FP registers need to be loaded.
*/
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- /* Reclaim didn't save out any FPRs to transact_fprs. */
+
+ /*
+ * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+ * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+ *
+ * At this point, ck{fp,vr}_state contains the exact values we want to
+ * recheckpoint.
+ */
/* Enable FP for the task: */
current->thread.load_fp = 1;
- /* This loads and recheckpoints the FP registers from
- * thread.fpr[]. They will remain in registers after the
- * checkpoint so we don't need to reload them after.
- * If VMX is in use, the VRs now hold checkpointed values,
- * so we don't want to load the VRs from the thread_struct.
+ /*
+ * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
*/
tm_recheckpoint(&current->thread);
}
@@ -2086,8 +2083,8 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
*/
void unrecoverable_exception(struct pt_regs *regs)
{
- printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
- regs->trap, regs->nip);
+ pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+ regs->trap, regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
}
NOKPROBE_SYMBOL(unrecoverable_exception);
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 3745113fcc652..2a7eb5452aba7 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 769c2624e0a6b..1e0bc5955a400 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index abf17feffe404..bf96686915116 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index c002adcc694c6..a4ed9edfd5f0b 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -169,6 +169,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 07ae018e550e1..434581bcd5b42 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -4,6 +4,9 @@
#else
#define PROVIDE32(x) PROVIDE(x)
#endif
+
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
@@ -99,6 +102,9 @@ SECTIONS
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.text);
+#endif
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -181,7 +187,15 @@ SECTIONS
*/
. = ALIGN(STRICT_ALIGN_SIZE);
__init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+ . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ _einittext = .;
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.init);
+#endif
+ } :kernel
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -212,8 +226,6 @@ SECTIONS
CON_INITCALL
}
- SECURITY_INIT
-
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
__start___ftr_fixup = .;