summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/gpio.h4
-rw-r--r--arch/alpha/include/asm/pci.h8
-rw-r--r--arch/alpha/include/asm/serial.h8
-rw-r--r--arch/alpha/kernel/smp.c2
-rw-r--r--arch/arc/Kconfig26
-rw-r--r--arch/arc/include/asm/dma.h5
-rw-r--r--arch/arc/include/asm/hugepage.h3
-rw-r--r--arch/arc/include/asm/io.h9
-rw-r--r--arch/arc/include/asm/pci.h28
-rw-r--r--arch/arc/kernel/Makefile1
-rw-r--r--arch/arc/kernel/pcibios.c22
-rw-r--r--arch/arc/kernel/smp.c2
-rw-r--r--arch/arc/plat-axs10x/Kconfig1
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts3
-rw-r--r--arch/arm/boot/dts/armada-xp-axpwifiap.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-db.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-gp.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-linksys-mamba.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-matrix.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-netgear-rn2120.dts4
-rw-r--r--arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts6
-rw-r--r--arch/arm/boot/dts/armada-xp-synology-ds414.dts4
-rw-r--r--arch/arm/boot/dts/dra7.dtsi10
-rw-r--r--arch/arm/boot/dts/imx23.dtsi2
-rw-r--r--arch/arm/boot/dts/imx28.dtsi2
-rw-r--r--arch/arm/boot/dts/mt2701-pinfunc.h735
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts6
-rw-r--r--arch/arm/boot/dts/socfpga.dtsi20
-rw-r--r--arch/arm/boot/dts/vfxxx.dtsi5
-rw-r--r--arch/arm/common/scoop.c10
-rw-r--r--arch/arm/configs/colibri_pxa270_defconfig1
-rw-r--r--arch/arm/configs/iop13xx_defconfig1
-rw-r--r--arch/arm/configs/iop32x_defconfig1
-rw-r--r--arch/arm/configs/trizeps4_defconfig1
-rw-r--r--arch/arm/crypto/aes-ce-glue.c5
-rw-r--r--arch/arm/crypto/aesbs-glue.c6
-rw-r--r--arch/arm/include/asm/cacheflush.h1
-rw-r--r--arch/arm/include/asm/kvm_asm.h43
-rw-r--r--arch/arm/include/asm/kvm_emulate.h20
-rw-r--r--arch/arm/include/asm/kvm_host.h80
-rw-r--r--arch/arm/include/asm/kvm_hyp.h139
-rw-r--r--arch/arm/include/asm/kvm_mmu.h2
-rw-r--r--arch/arm/include/asm/pci.h4
-rw-r--r--arch/arm/include/asm/virt.h9
-rw-r--r--arch/arm/kernel/asm-offsets.c40
-rw-r--r--arch/arm/kernel/setup.c6
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/arm/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/arm.c258
-rw-r--r--arch/arm/kvm/coproc.c126
-rw-r--r--arch/arm/kvm/coproc.h24
-rw-r--r--arch/arm/kvm/emulate.c34
-rw-r--r--arch/arm/kvm/guest.c5
-rw-r--r--arch/arm/kvm/handle_exit.c7
-rw-r--r--arch/arm/kvm/hyp/Makefile17
-rw-r--r--arch/arm/kvm/hyp/banked-sr.c77
-rw-r--r--arch/arm/kvm/hyp/cp15-sr.c84
-rw-r--r--arch/arm/kvm/hyp/entry.S101
-rw-r--r--arch/arm/kvm/hyp/hyp-entry.S169
-rw-r--r--arch/arm/kvm/hyp/s2-setup.c33
-rw-r--r--arch/arm/kvm/hyp/switch.c232
-rw-r--r--arch/arm/kvm/hyp/tlb.c70
-rw-r--r--arch/arm/kvm/hyp/vfp.S68
-rw-r--r--arch/arm/kvm/init.S8
-rw-r--r--arch/arm/kvm/interrupts.S480
-rw-r--r--arch/arm/kvm/interrupts_head.S648
-rw-r--r--arch/arm/kvm/mmu.c23
-rw-r--r--arch/arm/kvm/psci.c4
-rw-r--r--arch/arm/kvm/reset.c2
-rw-r--r--arch/arm/mach-davinci/board-mityomapl138.c5
-rw-r--r--arch/arm/mach-davinci/common.c4
-rw-r--r--arch/arm/mach-gemini/gpio.c4
-rw-r--r--arch/arm/mach-imx/gpc.c9
-rw-r--r--arch/arm/mach-imx/mach-mx27ads.c4
-rw-r--r--arch/arm/mach-ixp4xx/common.c4
-rw-r--r--arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h198
-rw-r--r--arch/arm/mach-lpc32xx/phy3250.c13
-rw-r--r--arch/arm/mach-mvebu/Kconfig6
-rw-r--r--arch/arm/mach-netx/fb.c14
-rw-r--r--arch/arm/mach-nspire/clcd.c13
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c9
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.h3
-rw-r--r--arch/arm/mach-omap2/serial.c2
-rw-r--r--arch/arm/mach-pxa/include/mach/pxa25x-udc.h163
-rw-r--r--arch/arm/mach-pxa/raumfeld.c43
-rw-r--r--arch/arm/mach-s3c24xx/mach-h1940.c2
-rw-r--r--arch/arm/mach-sa1100/simpad.c4
-rw-r--r--arch/arm/mach-socfpga/Makefile2
-rw-r--r--arch/arm/mach-socfpga/core.h2
-rw-r--r--arch/arm/mach-socfpga/l2_cache.c41
-rw-r--r--arch/arm/mach-socfpga/ocram.c49
-rw-r--r--arch/arm/mach-socfpga/socfpga.c5
-rw-r--r--arch/arm/mach-w90x900/gpio.c13
-rw-r--r--arch/arm/mm/fault.c2
-rw-r--r--arch/arm/mm/mmu.c6
-rw-r--r--arch/arm/mm/pgd.c2
-rw-r--r--arch/arm/plat-orion/gpio.c24
-rw-r--r--arch/arm/plat-samsung/pm-check.c4
-rw-r--r--arch/arm/vdso/vdso.S3
-rw-r--r--arch/arm64/Kconfig119
-rw-r--r--arch/arm64/Kconfig.debug6
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi6
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra132.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210.dtsi2
-rw-r--r--arch/arm64/crypto/aes-glue.c5
-rw-r--r--arch/arm64/include/asm/Kbuild3
-rw-r--r--arch/arm64/include/asm/acpi.h19
-rw-r--r--arch/arm64/include/asm/alternative.h63
-rw-r--r--arch/arm64/include/asm/assembler.h26
-rw-r--r--arch/arm64/include/asm/atomic_lse.h38
-rw-r--r--arch/arm64/include/asm/boot.h6
-rw-r--r--arch/arm64/include/asm/brk-imm.h25
-rw-r--r--arch/arm64/include/asm/bug.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/cpufeature.h42
-rw-r--r--arch/arm64/include/asm/cputype.h31
-rw-r--r--arch/arm64/include/asm/debug-monitors.h14
-rw-r--r--arch/arm64/include/asm/elf.h24
-rw-r--r--arch/arm64/include/asm/fixmap.h11
-rw-r--r--arch/arm64/include/asm/ftrace.h2
-rw-r--r--arch/arm64/include/asm/futex.h12
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h18
-rw-r--r--arch/arm64/include/asm/kasan.h5
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h12
-rw-r--r--arch/arm64/include/asm/kvm_arm.h8
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h46
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h181
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h14
-rw-r--r--arch/arm64/include/asm/kvm_perf_event.h68
-rw-r--r--arch/arm64/include/asm/lse.h1
-rw-r--r--arch/arm64/include/asm/memory.h65
-rw-r--r--arch/arm64/include/asm/mmu_context.h64
-rw-r--r--arch/arm64/include/asm/module.h17
-rw-r--r--arch/arm64/include/asm/pci.h2
-rw-r--r--arch/arm64/include/asm/pgalloc.h26
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h92
-rw-r--r--arch/arm64/include/asm/pgtable.h183
-rw-r--r--arch/arm64/include/asm/processor.h9
-rw-r--r--arch/arm64/include/asm/ptrace.h33
-rw-r--r--arch/arm64/include/asm/smp.h46
-rw-r--r--arch/arm64/include/asm/sysreg.h23
-rw-r--r--arch/arm64/include/asm/uaccess.h82
-rw-r--r--arch/arm64/include/asm/virt.h10
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h7
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c141
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c7
-rw-r--r--arch/arm64/kernel/asm-offsets.c5
-rw-r--r--arch/arm64/kernel/cpu_errata.c27
-rw-r--r--arch/arm64/kernel/cpu_ops.c27
-rw-r--r--arch/arm64/kernel/cpufeature.c281
-rw-r--r--arch/arm64/kernel/cpuinfo.c3
-rw-r--r--arch/arm64/kernel/debug-monitors.c23
-rw-r--r--arch/arm64/kernel/efi-entry.S3
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/head.S195
-rw-r--r--arch/arm64/kernel/image.h45
-rw-r--r--arch/arm64/kernel/kaslr.c177
-rw-r--r--arch/arm64/kernel/kgdb.c4
-rw-r--r--arch/arm64/kernel/module-plts.c201
-rw-r--r--arch/arm64/kernel/module.c25
-rw-r--r--arch/arm64/kernel/module.lds3
-rw-r--r--arch/arm64/kernel/pci.c2
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kernel/process.c16
-rw-r--r--arch/arm64/kernel/ptrace.c80
-rw-r--r--arch/arm64/kernel/setup.c42
-rw-r--r--arch/arm64/kernel/signal.c4
-rw-r--r--arch/arm64/kernel/signal32.c4
-rw-r--r--arch/arm64/kernel/sleep.S4
-rw-r--r--arch/arm64/kernel/smp.c101
-rw-r--r--arch/arm64/kernel/suspend.c20
-rw-r--r--arch/arm64/kernel/vdso/vdso.S3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S30
-rw-r--r--arch/arm64/kvm/Kconfig7
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/guest.c51
-rw-r--r--arch/arm64/kvm/hyp-init.S15
-rw-r--r--arch/arm64/kvm/hyp.S13
-rw-r--r--arch/arm64/kvm/hyp/Makefile8
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c5
-rw-r--r--arch/arm64/kvm/hyp/entry.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S109
-rw-r--r--arch/arm64/kvm/hyp/hyp.h90
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c43
-rw-r--r--arch/arm64/kvm/hyp/switch.c206
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c149
-rw-r--r--arch/arm64/kvm/hyp/timer-sr.c71
-rw-r--r--arch/arm64/kvm/hyp/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-sr.c84
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c341
-rw-r--r--arch/arm64/kvm/reset.c7
-rw-r--r--arch/arm64/kvm/sys_regs.c611
-rw-r--r--arch/arm64/lib/Makefile13
-rw-r--r--arch/arm64/lib/clear_user.S12
-rw-r--r--arch/arm64/lib/copy_from_user.S12
-rw-r--r--arch/arm64/lib/copy_in_user.S20
-rw-r--r--arch/arm64/lib/copy_page.S63
-rw-r--r--arch/arm64/lib/copy_to_user.S12
-rw-r--r--arch/arm64/lib/memcmp.S2
-rw-r--r--arch/arm64/mm/context.c54
-rw-r--r--arch/arm64/mm/dump.c21
-rw-r--r--arch/arm64/mm/extable.c2
-rw-r--r--arch/arm64/mm/fault.c36
-rw-r--r--arch/arm64/mm/hugetlbpage.c16
-rw-r--r--arch/arm64/mm/init.c130
-rw-r--r--arch/arm64/mm/kasan_init.c70
-rw-r--r--arch/arm64/mm/mmu.c526
-rw-r--r--arch/arm64/mm/pageattr.c46
-rw-r--r--arch/arm64/mm/proc.S40
-rw-r--r--arch/avr32/boards/merisc/setup.c1
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/avr32/include/asm/pci.h2
-rw-r--r--arch/avr32/include/uapi/asm/unistd.h1
-rw-r--r--arch/avr32/kernel/setup.c6
-rw-r--r--arch/avr32/kernel/syscall-stubs.S9
-rw-r--r--arch/avr32/kernel/syscall_table.S1
-rw-r--r--arch/avr32/mach-at32ap/pio.c2
-rw-r--r--arch/blackfin/Kconfig2
-rw-r--r--arch/blackfin/include/asm/pci.h1
-rw-r--r--arch/blackfin/include/asm/pgtable.h2
-rw-r--r--arch/blackfin/kernel/bfin_gpio.c6
-rw-r--r--arch/blackfin/kernel/debug-mmrs.c2
-rw-r--r--arch/blackfin/mach-bf527/boards/ezbrd.c2
-rw-r--r--arch/blackfin/mach-bf527/boards/ezkit.c2
-rw-r--r--arch/blackfin/mach-bf527/boards/tll6527m.c2
-rw-r--r--arch/blackfin/mach-bf537/boards/stamp.c2
-rw-r--r--arch/blackfin/mach-bf538/boards/ezkit.c4
-rw-r--r--arch/blackfin/mach-bf538/ext-gpio.c8
-rw-r--r--arch/blackfin/mach-bf548/boards/cm_bf548.c2
-rw-r--r--arch/blackfin/mach-bf548/boards/ezkit.c2
-rw-r--r--arch/blackfin/mach-bf609/boards/ezkit.c2
-rw-r--r--arch/blackfin/mach-common/ints-priority.c2
-rw-r--r--arch/blackfin/mach-common/pm.c2
-rw-r--r--arch/blackfin/mach-common/smp.c2
-rw-r--r--arch/c6x/Kconfig1
-rw-r--r--arch/c6x/kernel/setup.c2
-rw-r--r--arch/cris/include/asm/pci.h3
-rw-r--r--arch/frv/include/asm/pci.h7
-rw-r--r--arch/frv/include/asm/serial.h2
-rw-r--r--arch/hexagon/kernel/smp.c2
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/gpio.h4
-rw-r--r--arch/ia64/include/asm/io.h1
-rw-r--r--arch/ia64/include/asm/pci.h2
-rw-r--r--arch/ia64/include/asm/rwsem.h2
-rw-r--r--arch/ia64/kernel/efi.c13
-rw-r--r--arch/ia64/kernel/setup.c6
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/ia64/kernel/unaligned.c7
-rw-r--r--arch/ia64/mm/hugetlbpage.c2
-rw-r--r--arch/ia64/pci/fixup.c21
-rw-r--r--arch/ia64/sn/kernel/io_acpi_init.c22
-rw-r--r--arch/ia64/sn/kernel/io_init.c51
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/m32r/kernel/setup.c4
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/m32r/mm/init.c27
-rw-r--r--arch/m68k/68360/Makefile12
-rw-r--r--arch/m68k/68360/commproc.c309
-rw-r--r--arch/m68k/68360/config.c169
-rw-r--r--arch/m68k/68360/entry.S164
-rw-r--r--arch/m68k/68360/head-ram.S402
-rw-r--r--arch/m68k/68360/head-rom.S413
-rw-r--r--arch/m68k/68360/ints.c138
-rw-r--r--arch/m68k/Kconfig.cpu7
-rw-r--r--arch/m68k/Kconfig.debug2
-rw-r--r--arch/m68k/Kconfig.machine6
-rw-r--r--arch/m68k/Makefile3
-rw-r--r--arch/m68k/coldfire/device.c4
-rw-r--r--arch/m68k/coldfire/gpio.c2
-rw-r--r--arch/m68k/include/asm/MC68328.h6
-rw-r--r--arch/m68k/include/asm/MC68EZ328.h6
-rw-r--r--arch/m68k/include/asm/MC68VZ328.h6
-rw-r--r--arch/m68k/include/asm/commproc.h664
-rw-r--r--arch/m68k/include/asm/m54xxacr.h4
-rw-r--r--arch/m68k/include/asm/m68360.h13
-rw-r--r--arch/m68k/include/asm/m68360_enet.h177
-rw-r--r--arch/m68k/include/asm/m68360_pram.h431
-rw-r--r--arch/m68k/include/asm/m68360_quicc.h362
-rw-r--r--arch/m68k/include/asm/m68360_regs.h408
-rw-r--r--arch/m68k/include/asm/mac_iop.h2
-rw-r--r--arch/m68k/include/asm/mcftimer.h2
-rw-r--r--arch/m68k/include/asm/pci.h1
-rw-r--r--arch/m68k/include/asm/serial.h8
-rw-r--r--arch/m68k/kernel/early_printk.c8
-rw-r--r--arch/m68k/kernel/entry.S6
-rw-r--r--arch/m68k/kernel/setup_no.c7
-rw-r--r--arch/m68k/kernel/signal.c8
-rw-r--r--arch/m68k/mac/via.c2
-rw-r--r--arch/metag/include/asm/gpio.h4
-rw-r--r--arch/metag/kernel/smp.c2
-rw-r--r--arch/metag/mm/hugetlbpage.c2
-rw-r--r--arch/microblaze/Kconfig3
-rw-r--r--arch/microblaze/configs/mmu_defconfig1
-rw-r--r--arch/microblaze/configs/nommu_defconfig1
-rw-r--r--arch/microblaze/include/asm/gpio.h4
-rw-r--r--arch/microblaze/include/asm/pci.h2
-rw-r--r--arch/microblaze/kernel/setup.c2
-rw-r--r--arch/microblaze/pci/pci-common.c56
-rw-r--r--arch/mips/Kconfig11
-rw-r--r--arch/mips/alchemy/common/gpiolib.c8
-rw-r--r--arch/mips/ar7/gpio.c26
-rw-r--r--arch/mips/ath79/irq.c244
-rw-r--r--arch/mips/bcm63xx/gpio.c4
-rw-r--r--arch/mips/bmips/irq.c10
-rw-r--r--arch/mips/boot/compressed/uart-16550.c2
-rw-r--r--arch/mips/boot/dts/brcm/bcm6328.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm6368.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7125.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7346.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7358.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7360.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7362.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7420.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7425.dtsi2
-rw-r--r--arch/mips/boot/dts/brcm/bcm7435.dtsi2
-rw-r--r--arch/mips/configs/bigsur_defconfig1
-rw-r--r--arch/mips/configs/ip22_defconfig1
-rw-r--r--arch/mips/configs/ip27_defconfig1
-rw-r--r--arch/mips/configs/ip32_defconfig1
-rw-r--r--arch/mips/configs/jazz_defconfig1
-rw-r--r--arch/mips/configs/lemote2f_defconfig1
-rw-r--r--arch/mips/configs/rm200_defconfig1
-rw-r--r--arch/mips/configs/sb1250_swarm_defconfig1
-rw-r--r--arch/mips/include/asm/mach-ath79/ath79.h4
-rw-r--r--arch/mips/include/asm/octeon/cvmx.h9
-rw-r--r--arch/mips/include/asm/pci.h4
-rw-r--r--arch/mips/include/asm/smp-ops.h5
-rw-r--r--arch/mips/jz4740/gpio.c10
-rw-r--r--arch/mips/kernel/Makefile1
-rw-r--r--arch/mips/kernel/gpio_txx9.c4
-rw-r--r--arch/mips/kernel/setup.c10
-rw-r--r--arch/mips/kernel/smp-cmp.c4
-rw-r--r--arch/mips/kernel/smp-cps.c4
-rw-r--r--arch/mips/kernel/smp-mt.c2
-rw-r--r--arch/mips/kernel/smp.c139
-rw-r--r--arch/mips/kvm/mips.c8
-rw-r--r--arch/mips/mm/gup.c2
-rw-r--r--arch/mips/pci/fixup-loongson3.c19
-rw-r--r--arch/mips/pmcs-msp71xx/msp_serial.c2
-rw-r--r--arch/mips/rb532/gpio.c12
-rw-r--r--arch/mips/txx9/generic/setup.c10
-rw-r--r--arch/mips/txx9/rbtx4938/setup.c3
-rw-r--r--arch/mn10300/Kconfig1
-rw-r--r--arch/mn10300/include/asm/pci.h3
-rw-r--r--arch/mn10300/include/asm/serial.h10
-rw-r--r--arch/mn10300/kernel/fpu-nofpu.c1
-rw-r--r--arch/mn10300/kernel/smp.c2
-rw-r--r--arch/openrisc/include/asm/gpio.h4
-rw-r--r--arch/parisc/configs/712_defconfig1
-rw-r--r--arch/parisc/configs/a500_defconfig1
-rw-r--r--arch/parisc/configs/default_defconfig1
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig1
-rw-r--r--arch/parisc/include/asm/cache.h3
-rw-r--r--arch/parisc/include/asm/cacheflush.h4
-rw-r--r--arch/parisc/include/asm/pci.h3
-rw-r--r--arch/parisc/kernel/smp.c2
-rw-r--r--arch/parisc/mm/hugetlbpage.c2
-rw-r--r--arch/parisc/mm/init.c6
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/configs/c2k_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c6
-rw-r--r--arch/powerpc/include/asm/gpio.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h9
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h51
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/smp.h4
-rw-r--r--arch/powerpc/include/asm/xics.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h9
-rw-r--r--arch/powerpc/kernel/rtasd.c9
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c3
-rw-r--r--arch/powerpc/kernel/smp.c30
-rw-r--r--arch/powerpc/kernel/traps.c5
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c156
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c330
-rw-r--r--arch/powerpc/kvm/book3s_hv.c215
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c131
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S18
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c35
-rw-r--r--arch/powerpc/kvm/powerpc.c38
-rw-r--r--arch/powerpc/mm/hash_utils_64.c36
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c3
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/perf/hv-24x7.c8
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c10
-rw-r--r--arch/powerpc/sysdev/ppc4xx_gpio.c2
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c21
-rw-r--r--arch/s390/Kconfig10
-rw-r--r--arch/s390/crypto/aes_s390.c6
-rw-r--r--arch/s390/include/asm/clp.h27
-rw-r--r--arch/s390/include/asm/gmap.h64
-rw-r--r--arch/s390/include/asm/kvm_host.h43
-rw-r--r--arch/s390/include/asm/livepatch.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h16
-rw-r--r--arch/s390/include/asm/pci.h6
-rw-r--r--arch/s390/include/asm/pci_clp.h30
-rw-r--r--arch/s390/include/asm/percpu.h1
-rw-r--r--arch/s390/include/asm/perf_event.h2
-rw-r--r--arch/s390/include/asm/pgalloc.h28
-rw-r--r--arch/s390/include/asm/pgtable.h636
-rw-r--r--arch/s390/include/asm/processor.h12
-rw-r--r--arch/s390/include/asm/rwsem.h2
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/xor.h21
-rw-r--r--arch/s390/include/uapi/asm/clp.h28
-rw-r--r--arch/s390/include/uapi/asm/kvm.h8
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cpcmd.c3
-rw-r--r--arch/s390/kernel/debug.c6
-rw-r--r--arch/s390/kernel/dis.c17
-rw-r--r--arch/s390/kernel/dumpstack.c99
-rw-r--r--arch/s390/kernel/early.c1
-rw-r--r--arch/s390/kernel/entry.S107
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c9
-rw-r--r--arch/s390/kernel/perf_event.c58
-rw-r--r--arch/s390/kernel/setup.c10
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/s390/kernel/stacktrace.c93
-rw-r--r--arch/s390/kernel/time.c14
-rw-r--r--arch/s390/kernel/topology.c7
-rw-r--r--arch/s390/kernel/traps.c21
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/gaccess.c57
-rw-r--r--arch/s390/kvm/gaccess.h38
-rw-r--r--arch/s390/kvm/guestdbg.c4
-rw-r--r--arch/s390/kvm/intercept.c78
-rw-r--r--arch/s390/kvm/interrupt.c98
-rw-r--r--arch/s390/kvm/kvm-s390.c240
-rw-r--r--arch/s390/kvm/kvm-s390.h28
-rw-r--r--arch/s390/kvm/priv.c16
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/xor.c134
-rw-r--r--arch/s390/mm/Makefile4
-rw-r--r--arch/s390/mm/extmem.c16
-rw-r--r--arch/s390/mm/fault.c16
-rw-r--r--arch/s390/mm/gmap.c774
-rw-r--r--arch/s390/mm/hugetlbpage.c7
-rw-r--r--arch/s390/mm/pageattr.c8
-rw-r--r--arch/s390/mm/pgalloc.c360
-rw-r--r--arch/s390/mm/pgtable.c1549
-rw-r--r--arch/s390/mm/vmem.c10
-rw-r--r--arch/s390/oprofile/Makefile2
-rw-r--r--arch/s390/oprofile/backtrace.c78
-rw-r--r--arch/s390/oprofile/init.c21
-rw-r--r--arch/s390/pci/pci.c11
-rw-r--r--arch/s390/pci/pci_clp.c247
-rw-r--r--arch/s390/pci/pci_debug.c5
-rw-r--r--arch/s390/pci/pci_dma.c21
-rw-r--r--arch/s390/pci/pci_event.c13
-rw-r--r--arch/score/configs/spct6600_defconfig1
-rw-r--r--arch/score/kernel/setup.c2
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sh/include/asm/pci.h3
-rw-r--r--arch/sh/include/mach-common/mach/magicpanelr2.h2
-rw-r--r--arch/sh/kernel/setup.c8
-rw-r--r--arch/sh/kernel/smp.c2
-rw-r--r--arch/sh/mm/hugetlbpage.c2
-rw-r--r--arch/sparc/include/asm/gpio.h4
-rw-r--r--arch/sparc/include/asm/pci.h3
-rw-r--r--arch/sparc/kernel/head_64.S8
-rw-r--r--arch/sparc/kernel/smp_32.c2
-rw-r--r--arch/sparc/kernel/smp_64.c2
-rw-r--r--arch/sparc/mm/hugetlbpage.c2
-rw-r--r--arch/sparc/mm/init_64.c8
-rw-r--r--arch/tile/Kconfig4
-rw-r--r--arch/tile/configs/tilegx_defconfig1
-rw-r--r--arch/tile/configs/tilepro_defconfig1
-rw-r--r--arch/tile/include/asm/pci.h3
-rw-r--r--arch/tile/kernel/setup.c11
-rw-r--r--arch/tile/kernel/smpboot.c2
-rw-r--r--arch/tile/mm/hugetlbpage.c2
-rw-r--r--arch/tile/mm/init.c11
-rw-r--r--arch/um/kernel/skas/mmu.c2
-rw-r--r--arch/unicore32/include/asm/pci.h3
-rw-r--r--arch/unicore32/include/mach/hardware.h5
-rw-r--r--arch/unicore32/kernel/gpio.c2
-rw-r--r--arch/unicore32/kernel/setup.c6
-rw-r--r--arch/unicore32/mm/fault.c2
-rw-r--r--arch/unicore32/mm/pgd.c2
-rw-r--r--arch/x86/Kbuild3
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/Kconfig.debug28
-rw-r--r--arch/x86/boot/cpuflags.h2
-rw-r--r--arch/x86/boot/mkcpustr.c2
-rw-r--r--arch/x86/boot/tools/build.c1
-rw-r--r--arch/x86/configs/i386_defconfig3
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c11
-rw-r--r--arch/x86/crypto/camellia_glue.c10
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c10
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c11
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c11
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c35
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c10
-rw-r--r--arch/x86/entry/calling.h31
-rw-r--r--arch/x86/entry/common.c106
-rw-r--r--arch/x86/entry/entry_32.S268
-rw-r--r--arch/x86/entry/entry_64.S286
-rw-r--r--arch/x86/entry/entry_64_compat.S102
-rw-r--r--arch/x86/entry/syscall_32.c10
-rw-r--r--arch/x86/entry/syscall_64.c13
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl20
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh58
-rw-r--r--arch/x86/entry/vdso/vdso2c.h9
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c1
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S2
-rw-r--r--arch/x86/entry/vdso/vma.c127
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c9
-rw-r--r--arch/x86/events/Makefile13
-rw-r--r--arch/x86/events/amd/core.c (renamed from arch/x86/kernel/cpu/perf_event_amd.c)2
-rw-r--r--arch/x86/events/amd/ibs.c (renamed from arch/x86/kernel/cpu/perf_event_amd_ibs.c)12
-rw-r--r--arch/x86/events/amd/iommu.c (renamed from arch/x86/kernel/cpu/perf_event_amd_iommu.c)4
-rw-r--r--arch/x86/events/amd/iommu.h (renamed from arch/x86/kernel/cpu/perf_event_amd_iommu.h)0
-rw-r--r--arch/x86/events/amd/uncore.c (renamed from arch/x86/kernel/cpu/perf_event_amd_uncore.c)4
-rw-r--r--arch/x86/events/core.c (renamed from arch/x86/kernel/cpu/perf_event.c)22
-rw-r--r--arch/x86/events/intel/bts.c (renamed from arch/x86/kernel/cpu/perf_event_intel_bts.c)2
-rw-r--r--arch/x86/events/intel/core.c (renamed from arch/x86/kernel/cpu/perf_event_intel.c)31
-rw-r--r--arch/x86/events/intel/cqm.c (renamed from arch/x86/kernel/cpu/perf_event_intel_cqm.c)34
-rw-r--r--arch/x86/events/intel/cstate.c (renamed from arch/x86/kernel/cpu/perf_event_intel_cstate.c)2
-rw-r--r--arch/x86/events/intel/ds.c (renamed from arch/x86/kernel/cpu/perf_event_intel_ds.c)56
-rw-r--r--arch/x86/events/intel/knc.c (renamed from arch/x86/kernel/cpu/perf_event_knc.c)6
-rw-r--r--arch/x86/events/intel/lbr.c (renamed from arch/x86/kernel/cpu/perf_event_intel_lbr.c)2
-rw-r--r--arch/x86/events/intel/p4.c (renamed from arch/x86/kernel/cpu/perf_event_p4.c)2
-rw-r--r--arch/x86/events/intel/p6.c (renamed from arch/x86/kernel/cpu/perf_event_p6.c)2
-rw-r--r--arch/x86/events/intel/pt.c (renamed from arch/x86/kernel/cpu/perf_event_intel_pt.c)4
-rw-r--r--arch/x86/events/intel/pt.h (renamed from arch/x86/kernel/cpu/intel_pt.h)0
-rw-r--r--arch/x86/events/intel/rapl.c (renamed from arch/x86/kernel/cpu/perf_event_intel_rapl.c)412
-rw-r--r--arch/x86/events/intel/uncore.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore.c)677
-rw-r--r--arch/x86/events/intel/uncore.h (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore.h)55
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c)8
-rw-r--r--arch/x86/events/intel/uncore_snb.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c)16
-rw-r--r--arch/x86/events/intel/uncore_snbep.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c)21
-rw-r--r--arch/x86/events/msr.c (renamed from arch/x86/kernel/cpu/perf_event_msr.c)0
-rw-r--r--arch/x86/events/perf_event.h (renamed from arch/x86/kernel/cpu/perf_event.h)5
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/amd_nb.h26
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/arch_hweight.h2
-rw-r--r--arch/x86/include/asm/asm.h40
-rw-r--r--arch/x86/include/asm/barrier.h15
-rw-r--r--arch/x86/include/asm/bitops.h36
-rw-r--r--arch/x86/include/asm/cacheflush.h6
-rw-r--r--arch/x86/include/asm/clocksource.h9
-rw-r--r--arch/x86/include/asm/cmpxchg.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h448
-rw-r--r--arch/x86/include/asm/cpufeatures.h300
-rw-r--r--arch/x86/include/asm/desc_defs.h23
-rw-r--r--arch/x86/include/asm/dmi.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/elf.h2
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu/internal.h18
-rw-r--r--arch/x86/include/asm/fpu/xstate.h9
-rw-r--r--arch/x86/include/asm/frame.h59
-rw-r--r--arch/x86/include/asm/gpio.h4
-rw-r--r--arch/x86/include/asm/imr.h2
-rw-r--r--arch/x86/include/asm/ipi.h58
-rw-r--r--arch/x86/include/asm/irq_work.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h31
-rw-r--r--arch/x86/include/asm/kvm_page_track.h61
-rw-r--r--arch/x86/include/asm/kvm_para.h7
-rw-r--r--arch/x86/include/asm/livepatch.h4
-rw-r--r--arch/x86/include/asm/mce.h70
-rw-r--r--arch/x86/include/asm/microcode.h26
-rw-r--r--arch/x86/include/asm/microcode_intel.h1
-rw-r--r--arch/x86/include/asm/mmu.h3
-rw-r--r--arch/x86/include/asm/msr-index.h15
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/pci.h18
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/pmem.h5
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/proto.h15
-rw-r--r--arch/x86/include/asm/sections.h2
-rw-r--r--arch/x86/include/asm/sighandling.h1
-rw-r--r--arch/x86/include/asm/smap.h2
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/string_64.h13
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/tlbflush.h58
-rw-r--r--arch/x86/include/asm/topology.h11
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/uaccess.h16
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/vdso.h3
-rw-r--r--arch/x86/include/asm/vgtod.h6
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h4
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h32
-rw-r--r--arch/x86/include/uapi/asm/ucontext.h53
-rw-r--r--arch/x86/kernel/aperture_64.c12
-rw-r--r--arch/x86/kernel/apic/apic.c14
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/apic/ipi.c60
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile26
-rw-r--r--arch/x86/kernel/cpu/amd.c23
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c2
-rw-r--r--arch/x86/kernel/cpu/centaur.c12
-rw-r--r--arch/x86/kernel/cpu/common.c99
-rw-r--r--arch/x86/kernel/cpu/cyrix.c11
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c25
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c85
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c231
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c17
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c19
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c285
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_lib.c58
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh6
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c44
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c22
-rw-r--r--arch/x86/kernel/cpu/rdrand.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c4
-rw-r--r--arch/x86/kernel/cpu/transmeta.c10
-rw-r--r--arch/x86/kernel/cpu/vmware.c5
-rw-r--r--arch/x86/kernel/crash.c41
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/e820.c39
-rw-r--r--arch/x86/kernel/fpu/core.c56
-rw-r--r--arch/x86/kernel/fpu/init.c35
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/head64.c14
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/mcount_64.S14
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pmem.c4
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/signal.c127
-rw-r--r--arch/x86/kernel/smpboot.c102
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/test_nx.c2
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/traps.c147
-rw-r--r--arch/x86/kernel/tsc.c67
-rw-r--r--arch/x86/kernel/verify_cpu.S2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S36
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/assigned-dev.c14
-rw-r--r--arch/x86/kvm/cpuid.c14
-rw-r--r--arch/x86/kvm/cpuid.h9
-rw-r--r--arch/x86/kvm/hyperv.c50
-rw-r--r--arch/x86/kvm/i8254.c350
-rw-r--r--arch/x86/kvm/i8254.h17
-rw-r--r--arch/x86/kvm/ioapic.c30
-rw-r--r--arch/x86/kvm/ioapic.h17
-rw-r--r--arch/x86/kvm/irq.c9
-rw-r--r--arch/x86/kvm/irq.h8
-rw-r--r--arch/x86/kvm/irq_comm.c27
-rw-r--r--arch/x86/kvm/lapic.c162
-rw-r--r--arch/x86/kvm/lapic.h17
-rw-r--r--arch/x86/kvm/mmu.c506
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/page_track.c222
-rw-r--r--arch/x86/kvm/paging_tmpl.h35
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c3
-rw-r--r--arch/x86/kvm/trace.h12
-rw-r--r--arch/x86/kvm/vmx.c128
-rw-r--r--arch/x86/kvm/x86.c158
-rw-r--r--arch/x86/kvm/x86.h16
-rw-r--r--arch/x86/lguest/boot.c8
-rw-r--r--arch/x86/lib/clear_page_64.S2
-rw-r--r--arch/x86/lib/cmdline.c60
-rw-r--r--arch/x86/lib/copy_page_64.S2
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/memcpy_64.S119
-rw-r--r--arch/x86/lib/memmove_64.S2
-rw-r--r--arch/x86/lib/memset_64.S2
-rw-r--r--arch/x86/mm/dump_pagetables.c11
-rw-r--r--arch/x86/mm/extable.c100
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init.c36
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c27
-rw-r--r--arch/x86/mm/kasan_init_64.c17
-rw-r--r--arch/x86/mm/kmmio.c88
-rw-r--r--arch/x86/mm/mmap.c14
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/pageattr.c20
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/setup_nx.c6
-rw-r--r--arch/x86/oprofile/op_model_amd.c1
-rw-r--r--arch/x86/pci/common.c1
-rw-r--r--arch/x86/pci/fixup.c28
-rw-r--r--arch/x86/pci/vmd.c35
-rw-r--r--arch/x86/platform/efi/quirks.c79
-rw-r--r--arch/x86/platform/geode/alix.c14
-rw-r--r--arch/x86/platform/geode/geos.c8
-rw-r--r--arch/x86/platform/geode/net5501.c8
-rw-r--r--arch/x86/platform/intel-mid/mfld.c5
-rw-r--r--arch/x86/platform/intel-mid/mrfl.c5
-rw-r--r--arch/x86/platform/intel-quark/imr.c59
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c30
-rw-r--r--arch/x86/um/asm/barrier.h2
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/sys_call_table_64.c7
-rw-r--r--arch/x86/um/user-offsets.c6
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/pmu.c2
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/xtensa/Kconfig2
-rw-r--r--arch/xtensa/include/asm/gpio.h4
-rw-r--r--arch/xtensa/include/asm/pci.h3
-rw-r--r--arch/xtensa/kernel/smp.c2
-rw-r--r--arch/xtensa/mm/fault.c2
-rw-r--r--arch/xtensa/platforms/iss/console.c4
-rw-r--r--arch/xtensa/platforms/xt2000/setup.c2
766 files changed, 16569 insertions, 13888 deletions
diff --git a/arch/alpha/include/asm/gpio.h b/arch/alpha/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/alpha/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 98f2eeee8f68..a06c24b3a2e1 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -7,7 +7,6 @@
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <asm/machvec.h>
-#include <asm-generic/pci-bridge.h>
/*
* The following structure is used to manage multiple PCI busses.
@@ -66,13 +65,6 @@ extern void pcibios_set_master(struct pci_dev *dev);
decisions. */
#define PCI_DMA_BUS_IS_PHYS 0
-#ifdef CONFIG_PCI
-
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
-#endif
-
/* TODO: integrate with include/asm-generic/pci.h ? */
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
diff --git a/arch/alpha/include/asm/serial.h b/arch/alpha/include/asm/serial.h
index 22909b83f473..e31557fc06cc 100644
--- a/arch/alpha/include/asm/serial.h
+++ b/arch/alpha/include/asm/serial.h
@@ -14,11 +14,11 @@
/* Standard COM flags (except for COM4, because of the 8514 problem) */
#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
-#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
#else
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
+#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
#endif
#define SERIAL_PORT_DFNS \
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447fef92..46bf263c3153 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -168,7 +168,7 @@ smp_callin(void)
cpuid, current, current->active_mm));
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 8a188bc1786a..07a5cb944d4f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -17,6 +17,7 @@ config ARC
select GENERIC_FIND_FIRST_BIT
# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
select GENERIC_IRQ_SHOW
+ select GENERIC_PCI_IOMAP
select GENERIC_PENDING_IRQ if SMP
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_KGDB
@@ -37,6 +38,9 @@ config ARC
select PERF_USE_VMALLOC
select HAVE_DEBUG_STACKOVERFLOW
+config MIGHT_HAVE_PCI
+ bool
+
config TRACE_IRQFLAGS_SUPPORT
def_bool y
@@ -569,6 +573,28 @@ config FORCE_MAX_ZONEORDER
source "net/Kconfig"
source "drivers/Kconfig"
+
+menu "Bus Support"
+
+config PCI
+ bool "PCI support" if MIGHT_HAVE_PCI
+ help
+ PCI is the name of a bus system, i.e., the way the CPU talks to
+ the other stuff inside your box. Find out if your board/platform
+ has PCI.
+
+ Note: PCIe support for Synopsys Device will be available only
+ when HAPS DX is configured with PCIe RC bitmap. If you have PCI,
+ say Y, otherwise N.
+
+config PCI_SYSCALL
+ def_bool PCI
+
+source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+
+endmenu
+
source "fs/Kconfig"
source "arch/arc/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/arc/include/asm/dma.h b/arch/arc/include/asm/dma.h
index ca7c45181de9..01e47a69b034 100644
--- a/arch/arc/include/asm/dma.h
+++ b/arch/arc/include/asm/dma.h
@@ -10,5 +10,10 @@
#define ASM_ARC_DMA_H
#define MAX_DMA_ADDRESS 0xC0000000
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 0
+#endif
#endif
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index c5094de86403..7afe3356b770 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -30,19 +30,16 @@ static inline pmd_t pte_pmd(pte_t pte)
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
-#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd)))
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
-#define pmd_special(pmd) pte_special(pmd_pte(pmd))
#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot))
#define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ)
-#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd))
#define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 694ece8a0243..947bf0cfdec0 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -16,6 +16,15 @@
extern void __iomem *ioremap(unsigned long physaddr, unsigned long size);
extern void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
unsigned long flags);
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+ return (void __iomem *)port;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+}
+
extern void iounmap(const void __iomem *addr);
#define ioremap_nocache(phy, sz) ioremap(phy, sz)
diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h
new file mode 100644
index 000000000000..ba56c23c1b20
--- /dev/null
+++ b/arch/arc/include/asm/pci.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ARC_PCI_H
+#define _ASM_ARC_PCI_H
+
+#ifdef __KERNEL__
+#include <linux/ioport.h>
+
+#define PCIBIOS_MIN_IO 0x100
+#define PCIBIOS_MIN_MEM 0x100000
+
+#define pcibios_assign_all_busses() 1
+/*
+ * The PCI address space does equal the physical memory address space.
+ * The networking and block device layers use this boolean for bounce
+ * buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 1
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_ARC_PCI_H */
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index e7f3625a19b5..1bc2036b19d7 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -12,6 +12,7 @@ obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o
obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o
+obj-$(CONFIG_PCI) += pcibios.o
obj-$(CONFIG_MODULES) += arcksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/arc/kernel/pcibios.c b/arch/arc/kernel/pcibios.c
new file mode 100644
index 000000000000..72e1d73d0bd6
--- /dev/null
+++ b/arch/arc/kernel/pcibios.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+
+/*
+ * We don't have to worry about legacy ISA devices, so nothing to do here
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+ return res->start;
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 424e937da5c8..4cb3add77c75 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -142,7 +142,7 @@ void start_kernel_secondary(void)
local_irq_enable();
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/*
diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig
index d475f9d4847c..426ac4b8bb39 100644
--- a/arch/arc/plat-axs10x/Kconfig
+++ b/arch/arc/plat-axs10x/Kconfig
@@ -11,6 +11,7 @@ menuconfig ARC_PLAT_AXS10X
select DW_APB_ICTL
select GPIO_DWAPB
select OF_GPIO
+ select MIGHT_HAVE_PCI
select GENERIC_IRQ_CHIP
select ARCH_REQUIRE_GPIOLIB
help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4f799e567fc8..1d00da16d980 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1212,7 +1212,6 @@ config PCI_HOST_ITE8152
select DMABOUNCE
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
source "drivers/pcmcia/Kconfig"
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index a0986c65be0c..592e65c3a4e0 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -562,8 +562,7 @@
extcon_usb2: tps659038_usb {
compatible = "ti,palmas-usb-vid";
ti,enable-vbus-detection;
- ti,enable-id-detection;
- id-gpios = <&gpio7 24 GPIO_ACTIVE_HIGH>;
+ vbus-gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>;
};
};
diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
index 23fc670c0427..5c21b236721f 100644
--- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts
+++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
@@ -70,8 +70,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts
index 30657302305d..cca366590561 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -76,8 +76,8 @@
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
devbus-bootcs {
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts
index a1ded01d0c07..061f4237760e 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -95,8 +95,8 @@
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
devbus-bootcs {
diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
index fb9e1bbf2338..8af463f26ea1 100644
--- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
+++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
@@ -65,8 +65,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index 6e9820e141f8..b89e6cf1271a 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -70,8 +70,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts
index 6ab33837a2b6..6522b04f4a8e 100644
--- a/arch/arm/boot/dts/armada-xp-matrix.dts
+++ b/arch/arm/boot/dts/armada-xp-matrix.dts
@@ -68,8 +68,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
internal-regs {
serial@12000 {
diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
index 62175a8848bc..d19f44c70925 100644
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
@@ -64,8 +64,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index 3aa29a91c7b8..ed3b889d16ce 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -65,9 +65,9 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
+ MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
MBUS_ID(0x0c, 0x04) 0 0 0xd1200000 0x100000>;
devbus-bootcs {
diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
index 2391b11dc546..d17dab0a6f51 100644
--- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts
+++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
@@ -78,8 +78,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c4d9175b90dc..f82aa44c3cee 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1500,6 +1500,16 @@
0x48485200 0x2E00>;
#address-cells = <1>;
#size-cells = <1>;
+
+ /*
+ * Do not allow gating of cpsw clock as workaround
+ * for errata i877. Keeping internal clock disabled
+ * causes the device switching characteristics
+ * to degrade over time and eventually fail to meet
+ * the data manual delay time/skew specs.
+ */
+ ti,no-idle;
+
/*
* rx_thresh_pend
* rx_pend
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index 1c6c07538a78..302d1168f424 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -569,7 +569,7 @@
};
};
- iio_hwmon {
+ iio-hwmon {
compatible = "iio-hwmon";
io-channels = <&lradc 8>;
};
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index fae7b9069fc4..f637ec900cc8 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -1256,7 +1256,7 @@
};
};
- iio_hwmon {
+ iio-hwmon {
compatible = "iio-hwmon";
io-channels = <&lradc 8>;
};
diff --git a/arch/arm/boot/dts/mt2701-pinfunc.h b/arch/arm/boot/dts/mt2701-pinfunc.h
new file mode 100644
index 000000000000..e24ebc8d928e
--- /dev/null
+++ b/arch/arm/boot/dts/mt2701-pinfunc.h
@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 2015 MediaTek Inc.
+ * Author: Biao Huang <biao.huang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DTS_MT2701_PINFUNC_H
+#define __DTS_MT2701_PINFUNC_H
+
+#include <dt-bindings/pinctrl/mt65xx.h>
+
+#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_GPIO0 (MTK_PIN_NO(0) | 0)
+#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDO (MTK_PIN_NO(0) | 1)
+#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDI (MTK_PIN_NO(0) | 2)
+
+#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_GPIO1 (MTK_PIN_NO(1) | 0)
+#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDI (MTK_PIN_NO(1) | 1)
+#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDO (MTK_PIN_NO(1) | 2)
+
+#define MT2701_PIN_2_PWRAP_INT__FUNC_GPIO2 (MTK_PIN_NO(2) | 0)
+#define MT2701_PIN_2_PWRAP_INT__FUNC_PWRAP_INT (MTK_PIN_NO(2) | 1)
+
+#define MT2701_PIN_3_PWRAP_SPI0_CK__FUNC_GPIO3 (MTK_PIN_NO(3) | 0)
+#define MT2701_PIN_3_PWRAP_SPI0_CK__FUNC_PWRAP_SPICK_I (MTK_PIN_NO(3) | 1)
+
+#define MT2701_PIN_4_PWRAP_SPI0_CSN__FUNC_GPIO4 (MTK_PIN_NO(4) | 0)
+#define MT2701_PIN_4_PWRAP_SPI0_CSN__FUNC_PWRAP_SPICS_B_I (MTK_PIN_NO(4) | 1)
+
+#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_GPIO5 (MTK_PIN_NO(5) | 0)
+#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_PWRAP_SPICK2_I (MTK_PIN_NO(5) | 1)
+#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_ANT_SEL1 (MTK_PIN_NO(5) | 5)
+
+#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_GPIO6 (MTK_PIN_NO(6) | 0)
+#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_PWRAP_SPICS2_B_I (MTK_PIN_NO(6) | 1)
+#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_ANT_SEL0 (MTK_PIN_NO(6) | 5)
+#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_DBG_MON_A_0 (MTK_PIN_NO(6) | 7)
+
+#define MT2701_PIN_7_SPI1_CSN__FUNC_GPIO7 (MTK_PIN_NO(7) | 0)
+#define MT2701_PIN_7_SPI1_CSN__FUNC_SPI1_CS (MTK_PIN_NO(7) | 1)
+#define MT2701_PIN_7_SPI1_CSN__FUNC_KCOL0 (MTK_PIN_NO(7) | 4)
+#define MT2701_PIN_7_SPI1_CSN__FUNC_DBG_MON_B_12 (MTK_PIN_NO(7) | 7)
+
+#define MT2701_PIN_8_SPI1_MI__FUNC_GPIO8 (MTK_PIN_NO(8) | 0)
+#define MT2701_PIN_8_SPI1_MI__FUNC_SPI1_MI (MTK_PIN_NO(8) | 1)
+#define MT2701_PIN_8_SPI1_MI__FUNC_SPI1_MO (MTK_PIN_NO(8) | 2)
+#define MT2701_PIN_8_SPI1_MI__FUNC_KCOL1 (MTK_PIN_NO(8) | 4)
+#define MT2701_PIN_8_SPI1_MI__FUNC_DBG_MON_B_13 (MTK_PIN_NO(8) | 7)
+
+#define MT2701_PIN_9_SPI1_MO__FUNC_GPIO9 (MTK_PIN_NO(9) | 0)
+#define MT2701_PIN_9_SPI1_MO__FUNC_SPI1_MO (MTK_PIN_NO(9) | 1)
+#define MT2701_PIN_9_SPI1_MO__FUNC_SPI1_MI (MTK_PIN_NO(9) | 2)
+#define MT2701_PIN_9_SPI1_MO__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(9) | 3)
+#define MT2701_PIN_9_SPI1_MO__FUNC_KCOL2 (MTK_PIN_NO(9) | 4)
+#define MT2701_PIN_9_SPI1_MO__FUNC_DBG_MON_B_14 (MTK_PIN_NO(9) | 7)
+
+#define MT2701_PIN_10_RTC32K_CK__FUNC_GPIO10 (MTK_PIN_NO(10) | 0)
+#define MT2701_PIN_10_RTC32K_CK__FUNC_RTC32K_CK (MTK_PIN_NO(10) | 1)
+
+#define MT2701_PIN_11_WATCHDOG__FUNC_GPIO11 (MTK_PIN_NO(11) | 0)
+#define MT2701_PIN_11_WATCHDOG__FUNC_WATCHDOG (MTK_PIN_NO(11) | 1)
+
+#define MT2701_PIN_12_SRCLKENA__FUNC_GPIO12 (MTK_PIN_NO(12) | 0)
+#define MT2701_PIN_12_SRCLKENA__FUNC_SRCLKENA (MTK_PIN_NO(12) | 1)
+
+#define MT2701_PIN_13_SRCLKENAI__FUNC_GPIO13 (MTK_PIN_NO(13) | 0)
+#define MT2701_PIN_13_SRCLKENAI__FUNC_SRCLKENAI (MTK_PIN_NO(13) | 1)
+
+#define MT2701_PIN_14_URXD2__FUNC_GPIO14 (MTK_PIN_NO(14) | 0)
+#define MT2701_PIN_14_URXD2__FUNC_URXD2 (MTK_PIN_NO(14) | 1)
+#define MT2701_PIN_14_URXD2__FUNC_UTXD2 (MTK_PIN_NO(14) | 2)
+#define MT2701_PIN_14_URXD2__FUNC_SRCCLKENAI2 (MTK_PIN_NO(14) | 5)
+#define MT2701_PIN_14_URXD2__FUNC_DBG_MON_B_30 (MTK_PIN_NO(14) | 7)
+
+#define MT2701_PIN_15_UTXD2__FUNC_GPIO15 (MTK_PIN_NO(15) | 0)
+#define MT2701_PIN_15_UTXD2__FUNC_UTXD2 (MTK_PIN_NO(15) | 1)
+#define MT2701_PIN_15_UTXD2__FUNC_URXD2 (MTK_PIN_NO(15) | 2)
+#define MT2701_PIN_15_UTXD2__FUNC_DBG_MON_B_31 (MTK_PIN_NO(15) | 7)
+
+#define MT2701_PIN_18_PCM_CLK__FUNC_GPIO18 (MTK_PIN_NO(18) | 0)
+#define MT2701_PIN_18_PCM_CLK__FUNC_PCM_CLK0 (MTK_PIN_NO(18) | 1)
+#define MT2701_PIN_18_PCM_CLK__FUNC_MRG_CLK (MTK_PIN_NO(18) | 2)
+#define MT2701_PIN_18_PCM_CLK__FUNC_MM_TEST_CK (MTK_PIN_NO(18) | 4)
+#define MT2701_PIN_18_PCM_CLK__FUNC_CONN_DSP_JCK (MTK_PIN_NO(18) | 5)
+#define MT2701_PIN_18_PCM_CLK__FUNC_WCN_PCM_CLKO (MTK_PIN_NO(18) | 6)
+#define MT2701_PIN_18_PCM_CLK__FUNC_DBG_MON_A_3 (MTK_PIN_NO(18) | 7)
+
+#define MT2701_PIN_19_PCM_SYNC__FUNC_GPIO19 (MTK_PIN_NO(19) | 0)
+#define MT2701_PIN_19_PCM_SYNC__FUNC_PCM_SYNC (MTK_PIN_NO(19) | 1)
+#define MT2701_PIN_19_PCM_SYNC__FUNC_MRG_SYNC (MTK_PIN_NO(19) | 2)
+#define MT2701_PIN_19_PCM_SYNC__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(19) | 5)
+#define MT2701_PIN_19_PCM_SYNC__FUNC_WCN_PCM_SYNC (MTK_PIN_NO(19) | 6)
+#define MT2701_PIN_19_PCM_SYNC__FUNC_DBG_MON_A_5 (MTK_PIN_NO(19) | 7)
+
+#define MT2701_PIN_20_PCM_RX__FUNC_GPIO20 (MTK_PIN_NO(20) | 0)
+#define MT2701_PIN_20_PCM_RX__FUNC_PCM_RX (MTK_PIN_NO(20) | 1)
+#define MT2701_PIN_20_PCM_RX__FUNC_MRG_RX (MTK_PIN_NO(20) | 2)
+#define MT2701_PIN_20_PCM_RX__FUNC_MRG_TX (MTK_PIN_NO(20) | 3)
+#define MT2701_PIN_20_PCM_RX__FUNC_PCM_TX (MTK_PIN_NO(20) | 4)
+#define MT2701_PIN_20_PCM_RX__FUNC_CONN_DSP_JDI (MTK_PIN_NO(20) | 5)
+#define MT2701_PIN_20_PCM_RX__FUNC_WCN_PCM_RX (MTK_PIN_NO(20) | 6)
+#define MT2701_PIN_20_PCM_RX__FUNC_DBG_MON_A_4 (MTK_PIN_NO(20) | 7)
+
+#define MT2701_PIN_21_PCM_TX__FUNC_GPIO21 (MTK_PIN_NO(21) | 0)
+#define MT2701_PIN_21_PCM_TX__FUNC_PCM_TX (MTK_PIN_NO(21) | 1)
+#define MT2701_PIN_21_PCM_TX__FUNC_MRG_TX (MTK_PIN_NO(21) | 2)
+#define MT2701_PIN_21_PCM_TX__FUNC_MRG_RX (MTK_PIN_NO(21) | 3)
+#define MT2701_PIN_21_PCM_TX__FUNC_PCM_RX (MTK_PIN_NO(21) | 4)
+#define MT2701_PIN_21_PCM_TX__FUNC_CONN_DSP_JMS (MTK_PIN_NO(21) | 5)
+#define MT2701_PIN_21_PCM_TX__FUNC_WCN_PCM_TX (MTK_PIN_NO(21) | 6)
+#define MT2701_PIN_21_PCM_TX__FUNC_DBG_MON_A_2 (MTK_PIN_NO(21) | 7)
+
+#define MT2701_PIN_22_EINT0__FUNC_GPIO22 (MTK_PIN_NO(22) | 0)
+#define MT2701_PIN_22_EINT0__FUNC_UCTS0 (MTK_PIN_NO(22) | 1)
+#define MT2701_PIN_22_EINT0__FUNC_KCOL3 (MTK_PIN_NO(22) | 3)
+#define MT2701_PIN_22_EINT0__FUNC_CONN_DSP_JDO (MTK_PIN_NO(22) | 4)
+#define MT2701_PIN_22_EINT0__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(22) | 5)
+#define MT2701_PIN_22_EINT0__FUNC_DBG_MON_A_30 (MTK_PIN_NO(22) | 7)
+#define MT2701_PIN_22_EINT0__FUNC_PCIE0_PERST_N (MTK_PIN_NO(22) | 10)
+
+#define MT2701_PIN_23_EINT1__FUNC_GPIO23 (MTK_PIN_NO(23) | 0)
+#define MT2701_PIN_23_EINT1__FUNC_URTS0 (MTK_PIN_NO(23) | 1)
+#define MT2701_PIN_23_EINT1__FUNC_KCOL2 (MTK_PIN_NO(23) | 3)
+#define MT2701_PIN_23_EINT1__FUNC_CONN_MCU_TDO (MTK_PIN_NO(23) | 4)
+#define MT2701_PIN_23_EINT1__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(23) | 5)
+#define MT2701_PIN_23_EINT1__FUNC_DBG_MON_A_29 (MTK_PIN_NO(23) | 7)
+#define MT2701_PIN_23_EINT1__FUNC_PCIE1_PERST_N (MTK_PIN_NO(23) | 10)
+
+#define MT2701_PIN_24_EINT2__FUNC_GPIO24 (MTK_PIN_NO(24) | 0)
+#define MT2701_PIN_24_EINT2__FUNC_UCTS1 (MTK_PIN_NO(24) | 1)
+#define MT2701_PIN_24_EINT2__FUNC_KCOL1 (MTK_PIN_NO(24) | 3)
+#define MT2701_PIN_24_EINT2__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(24) | 4)
+#define MT2701_PIN_24_EINT2__FUNC_DBG_MON_A_28 (MTK_PIN_NO(24) | 7)
+#define MT2701_PIN_24_EINT2__FUNC_PCIE2_PERST_N (MTK_PIN_NO(24) | 10)
+
+#define MT2701_PIN_25_EINT3__FUNC_GPIO25 (MTK_PIN_NO(25) | 0)
+#define MT2701_PIN_25_EINT3__FUNC_URTS1 (MTK_PIN_NO(25) | 1)
+#define MT2701_PIN_25_EINT3__FUNC_KCOL0 (MTK_PIN_NO(25) | 3)
+#define MT2701_PIN_25_EINT3__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(25) | 4)
+#define MT2701_PIN_25_EINT3__FUNC_DBG_MON_A_27 (MTK_PIN_NO(25) | 7)
+
+#define MT2701_PIN_26_EINT4__FUNC_GPIO26 (MTK_PIN_NO(26) | 0)
+#define MT2701_PIN_26_EINT4__FUNC_UCTS3 (MTK_PIN_NO(26) | 1)
+#define MT2701_PIN_26_EINT4__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(26) | 2)
+#define MT2701_PIN_26_EINT4__FUNC_KROW3 (MTK_PIN_NO(26) | 3)
+#define MT2701_PIN_26_EINT4__FUNC_CONN_MCU_TCK0 (MTK_PIN_NO(26) | 4)
+#define MT2701_PIN_26_EINT4__FUNC_CONN_MCU_AICE_JCKC (MTK_PIN_NO(26) | 5)
+#define MT2701_PIN_26_EINT4__FUNC_PCIE2_WAKE_N (MTK_PIN_NO(26) | 6)
+#define MT2701_PIN_26_EINT4__FUNC_DBG_MON_A_26 (MTK_PIN_NO(26) | 7)
+
+#define MT2701_PIN_27_EINT5__FUNC_GPIO27 (MTK_PIN_NO(27) | 0)
+#define MT2701_PIN_27_EINT5__FUNC_URTS3 (MTK_PIN_NO(27) | 1)
+#define MT2701_PIN_27_EINT5__FUNC_IDDIG_P1 (MTK_PIN_NO(27) | 2)
+#define MT2701_PIN_27_EINT5__FUNC_KROW2 (MTK_PIN_NO(27) | 3)
+#define MT2701_PIN_27_EINT5__FUNC_CONN_MCU_TDI (MTK_PIN_NO(27) | 4)
+#define MT2701_PIN_27_EINT5__FUNC_PCIE1_WAKE_N (MTK_PIN_NO(27) | 6)
+#define MT2701_PIN_27_EINT5__FUNC_DBG_MON_A_25 (MTK_PIN_NO(27) | 7)
+
+#define MT2701_PIN_28_EINT6__FUNC_GPIO28 (MTK_PIN_NO(28) | 0)
+#define MT2701_PIN_28_EINT6__FUNC_DRV_VBUS (MTK_PIN_NO(28) | 1)
+#define MT2701_PIN_28_EINT6__FUNC_KROW1 (MTK_PIN_NO(28) | 3)
+#define MT2701_PIN_28_EINT6__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(28) | 4)
+#define MT2701_PIN_28_EINT6__FUNC_PCIE0_WAKE_N (MTK_PIN_NO(28) | 6)
+#define MT2701_PIN_28_EINT6__FUNC_DBG_MON_A_24 (MTK_PIN_NO(28) | 7)
+
+#define MT2701_PIN_29_EINT7__FUNC_GPIO29 (MTK_PIN_NO(29) | 0)
+#define MT2701_PIN_29_EINT7__FUNC_IDDIG (MTK_PIN_NO(29) | 1)
+#define MT2701_PIN_29_EINT7__FUNC_MSDC1_WP (MTK_PIN_NO(29) | 2)
+#define MT2701_PIN_29_EINT7__FUNC_KROW0 (MTK_PIN_NO(29) | 3)
+#define MT2701_PIN_29_EINT7__FUNC_CONN_MCU_TMS (MTK_PIN_NO(29) | 4)
+#define MT2701_PIN_29_EINT7__FUNC_CONN_MCU_AICE_JMSC (MTK_PIN_NO(29) | 5)
+#define MT2701_PIN_29_EINT7__FUNC_DBG_MON_A_23 (MTK_PIN_NO(29) | 7)
+#define MT2701_PIN_29_EINT7__FUNC_PCIE2_PERST_N (MTK_PIN_NO(29) | 14)
+
+#define MT2701_PIN_33_I2S1_DATA__FUNC_GPIO33 (MTK_PIN_NO(33) | 0)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_I2S1_DATA (MTK_PIN_NO(33) | 1)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_I2S1_DATA_BYPS (MTK_PIN_NO(33) | 2)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_PCM_TX (MTK_PIN_NO(33) | 3)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_IMG_TEST_CK (MTK_PIN_NO(33) | 4)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_G1_RXD0 (MTK_PIN_NO(33) | 5)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_WCN_PCM_TX (MTK_PIN_NO(33) | 6)
+#define MT2701_PIN_33_I2S1_DATA__FUNC_DBG_MON_B_8 (MTK_PIN_NO(33) | 7)
+
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_GPIO34 (MTK_PIN_NO(34) | 0)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_I2S1_DATA_IN (MTK_PIN_NO(34) | 1)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_PCM_RX (MTK_PIN_NO(34) | 3)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_VDEC_TEST_CK (MTK_PIN_NO(34) | 4)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_G1_RXD1 (MTK_PIN_NO(34) | 5)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_WCN_PCM_RX (MTK_PIN_NO(34) | 6)
+#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_DBG_MON_B_7 (MTK_PIN_NO(34) | 7)
+
+#define MT2701_PIN_35_I2S1_BCK__FUNC_GPIO35 (MTK_PIN_NO(35) | 0)
+#define MT2701_PIN_35_I2S1_BCK__FUNC_I2S1_BCK (MTK_PIN_NO(35) | 1)
+#define MT2701_PIN_35_I2S1_BCK__FUNC_PCM_CLK0 (MTK_PIN_NO(35) | 3)
+#define MT2701_PIN_35_I2S1_BCK__FUNC_G1_RXD2 (MTK_PIN_NO(35) | 5)
+#define MT2701_PIN_35_I2S1_BCK__FUNC_WCN_PCM_CLKO (MTK_PIN_NO(35) | 6)
+#define MT2701_PIN_35_I2S1_BCK__FUNC_DBG_MON_B_9 (MTK_PIN_NO(35) | 7)
+
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_GPIO36 (MTK_PIN_NO(36) | 0)
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_I2S1_LRCK (MTK_PIN_NO(36) | 1)
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_PCM_SYNC (MTK_PIN_NO(36) | 3)
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_G1_RXD3 (MTK_PIN_NO(36) | 5)
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_WCN_PCM_SYNC (MTK_PIN_NO(36) | 6)
+#define MT2701_PIN_36_I2S1_LRCK__FUNC_DBG_MON_B_10 (MTK_PIN_NO(36) | 7)
+
+#define MT2701_PIN_37_I2S1_MCLK__FUNC_GPIO37 (MTK_PIN_NO(37) | 0)
+#define MT2701_PIN_37_I2S1_MCLK__FUNC_I2S1_MCLK (MTK_PIN_NO(37) | 1)
+#define MT2701_PIN_37_I2S1_MCLK__FUNC_G1_RXDV (MTK_PIN_NO(37) | 5)
+#define MT2701_PIN_37_I2S1_MCLK__FUNC_DBG_MON_B_11 (MTK_PIN_NO(37) | 7)
+
+#define MT2701_PIN_39_JTMS__FUNC_GPIO39 (MTK_PIN_NO(39) | 0)
+#define MT2701_PIN_39_JTMS__FUNC_JTMS (MTK_PIN_NO(39) | 1)
+#define MT2701_PIN_39_JTMS__FUNC_CONN_MCU_TMS (MTK_PIN_NO(39) | 2)
+#define MT2701_PIN_39_JTMS__FUNC_CONN_MCU_AICE_JMSC (MTK_PIN_NO(39) | 3)
+#define MT2701_PIN_39_JTMS__FUNC_DFD_TMS_XI (MTK_PIN_NO(39) | 4)
+
+#define MT2701_PIN_40_JTCK__FUNC_GPIO40 (MTK_PIN_NO(40) | 0)
+#define MT2701_PIN_40_JTCK__FUNC_JTCK (MTK_PIN_NO(40) | 1)
+#define MT2701_PIN_40_JTCK__FUNC_CONN_MCU_TCK1 (MTK_PIN_NO(40) | 2)
+#define MT2701_PIN_40_JTCK__FUNC_CONN_MCU_AICE_JCKC (MTK_PIN_NO(40) | 3)
+#define MT2701_PIN_40_JTCK__FUNC_DFD_TCK_XI (MTK_PIN_NO(40) | 4)
+
+#define MT2701_PIN_41_JTDI__FUNC_GPIO41 (MTK_PIN_NO(41) | 0)
+#define MT2701_PIN_41_JTDI__FUNC_JTDI (MTK_PIN_NO(41) | 1)
+#define MT2701_PIN_41_JTDI__FUNC_CONN_MCU_TDI (MTK_PIN_NO(41) | 2)
+#define MT2701_PIN_41_JTDI__FUNC_DFD_TDI_XI (MTK_PIN_NO(41) | 4)
+
+#define MT2701_PIN_42_JTDO__FUNC_GPIO42 (MTK_PIN_NO(42) | 0)
+#define MT2701_PIN_42_JTDO__FUNC_JTDO (MTK_PIN_NO(42) | 1)
+#define MT2701_PIN_42_JTDO__FUNC_CONN_MCU_TDO (MTK_PIN_NO(42) | 2)
+#define MT2701_PIN_42_JTDO__FUNC_DFD_TDO (MTK_PIN_NO(42) | 4)
+
+#define MT2701_PIN_43_NCLE__FUNC_GPIO43 (MTK_PIN_NO(43) | 0)
+#define MT2701_PIN_43_NCLE__FUNC_NCLE (MTK_PIN_NO(43) | 1)
+#define MT2701_PIN_43_NCLE__FUNC_EXT_XCS2 (MTK_PIN_NO(43) | 2)
+
+#define MT2701_PIN_44_NCEB1__FUNC_GPIO44 (MTK_PIN_NO(44) | 0)
+#define MT2701_PIN_44_NCEB1__FUNC_NCEB1 (MTK_PIN_NO(44) | 1)
+#define MT2701_PIN_44_NCEB1__FUNC_IDDIG (MTK_PIN_NO(44) | 2)
+
+#define MT2701_PIN_45_NCEB0__FUNC_GPIO45 (MTK_PIN_NO(45) | 0)
+#define MT2701_PIN_45_NCEB0__FUNC_NCEB0 (MTK_PIN_NO(45) | 1)
+#define MT2701_PIN_45_NCEB0__FUNC_DRV_VBUS (MTK_PIN_NO(45) | 2)
+
+#define MT2701_PIN_46_IR__FUNC_GPIO46 (MTK_PIN_NO(46) | 0)
+#define MT2701_PIN_46_IR__FUNC_IR (MTK_PIN_NO(46) | 1)
+
+#define MT2701_PIN_47_NREB__FUNC_GPIO47 (MTK_PIN_NO(47) | 0)
+#define MT2701_PIN_47_NREB__FUNC_NREB (MTK_PIN_NO(47) | 1)
+#define MT2701_PIN_47_NREB__FUNC_IDDIG_P1 (MTK_PIN_NO(47) | 2)
+
+#define MT2701_PIN_48_NRNB__FUNC_GPIO48 (MTK_PIN_NO(48) | 0)
+#define MT2701_PIN_48_NRNB__FUNC_NRNB (MTK_PIN_NO(48) | 1)
+#define MT2701_PIN_48_NRNB__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(48) | 2)
+
+#define MT2701_PIN_49_I2S0_DATA__FUNC_GPIO49 (MTK_PIN_NO(49) | 0)
+#define MT2701_PIN_49_I2S0_DATA__FUNC_I2S0_DATA (MTK_PIN_NO(49) | 1)
+#define MT2701_PIN_49_I2S0_DATA__FUNC_I2S0_DATA_BYPS (MTK_PIN_NO(49) | 2)
+#define MT2701_PIN_49_I2S0_DATA__FUNC_PCM_TX (MTK_PIN_NO(49) | 3)
+#define MT2701_PIN_49_I2S0_DATA__FUNC_WCN_I2S_DO (MTK_PIN_NO(49) | 6)
+#define MT2701_PIN_49_I2S0_DATA__FUNC_DBG_MON_B_3 (MTK_PIN_NO(49) | 7)
+
+#define MT2701_PIN_53_SPI0_CSN__FUNC_GPIO53 (MTK_PIN_NO(53) | 0)
+#define MT2701_PIN_53_SPI0_CSN__FUNC_SPI0_CS (MTK_PIN_NO(53) | 1)
+#define MT2701_PIN_53_SPI0_CSN__FUNC_SPDIF (MTK_PIN_NO(53) | 3)
+#define MT2701_PIN_53_SPI0_CSN__FUNC_ADC_CK (MTK_PIN_NO(53) | 4)
+#define MT2701_PIN_53_SPI0_CSN__FUNC_PWM1 (MTK_PIN_NO(53) | 5)
+#define MT2701_PIN_53_SPI0_CSN__FUNC_DBG_MON_A_7 (MTK_PIN_NO(53) | 7)
+
+#define MT2701_PIN_54_SPI0_CK__FUNC_GPIO54 (MTK_PIN_NO(54) | 0)
+#define MT2701_PIN_54_SPI0_CK__FUNC_SPI0_CK (MTK_PIN_NO(54) | 1)
+#define MT2701_PIN_54_SPI0_CK__FUNC_SPDIF_IN1 (MTK_PIN_NO(54) | 3)
+#define MT2701_PIN_54_SPI0_CK__FUNC_ADC_DAT_IN (MTK_PIN_NO(54) | 4)
+#define MT2701_PIN_54_SPI0_CK__FUNC_DBG_MON_A_10 (MTK_PIN_NO(54) | 7)
+
+#define MT2701_PIN_55_SPI0_MI__FUNC_GPIO55 (MTK_PIN_NO(55) | 0)
+#define MT2701_PIN_55_SPI0_MI__FUNC_SPI0_MI (MTK_PIN_NO(55) | 1)
+#define MT2701_PIN_55_SPI0_MI__FUNC_SPI0_MO (MTK_PIN_NO(55) | 2)
+#define MT2701_PIN_55_SPI0_MI__FUNC_MSDC1_WP (MTK_PIN_NO(55) | 3)
+#define MT2701_PIN_55_SPI0_MI__FUNC_ADC_WS (MTK_PIN_NO(55) | 4)
+#define MT2701_PIN_55_SPI0_MI__FUNC_PWM2 (MTK_PIN_NO(55) | 5)
+#define MT2701_PIN_55_SPI0_MI__FUNC_DBG_MON_A_8 (MTK_PIN_NO(55) | 7)
+
+#define MT2701_PIN_56_SPI0_MO__FUNC_GPIO56 (MTK_PIN_NO(56) | 0)
+#define MT2701_PIN_56_SPI0_MO__FUNC_SPI0_MO (MTK_PIN_NO(56) | 1)
+#define MT2701_PIN_56_SPI0_MO__FUNC_SPI0_MI (MTK_PIN_NO(56) | 2)
+#define MT2701_PIN_56_SPI0_MO__FUNC_SPDIF_IN0 (MTK_PIN_NO(56) | 3)
+#define MT2701_PIN_56_SPI0_MO__FUNC_DBG_MON_A_9 (MTK_PIN_NO(56) | 7)
+
+#define MT2701_PIN_57_SDA1__FUNC_GPIO57 (MTK_PIN_NO(57) | 0)
+#define MT2701_PIN_57_SDA1__FUNC_SDA1 (MTK_PIN_NO(57) | 1)
+
+#define MT2701_PIN_58_SCL1__FUNC_GPIO58 (MTK_PIN_NO(58) | 0)
+#define MT2701_PIN_58_SCL1__FUNC_SCL1 (MTK_PIN_NO(58) | 1)
+
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_GPIO72 (MTK_PIN_NO(72) | 0)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_I2S0_DATA_IN (MTK_PIN_NO(72) | 1)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_PCM_RX (MTK_PIN_NO(72) | 3)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_PWM0 (MTK_PIN_NO(72) | 4)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_DISP_PWM (MTK_PIN_NO(72) | 5)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_WCN_I2S_DI (MTK_PIN_NO(72) | 6)
+#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_DBG_MON_B_2 (MTK_PIN_NO(72) | 7)
+
+#define MT2701_PIN_73_I2S0_LRCK__FUNC_GPIO73 (MTK_PIN_NO(73) | 0)
+#define MT2701_PIN_73_I2S0_LRCK__FUNC_I2S0_LRCK (MTK_PIN_NO(73) | 1)
+#define MT2701_PIN_73_I2S0_LRCK__FUNC_PCM_SYNC (MTK_PIN_NO(73) | 3)
+#define MT2701_PIN_73_I2S0_LRCK__FUNC_WCN_I2S_LRCK (MTK_PIN_NO(73) | 6)
+#define MT2701_PIN_73_I2S0_LRCK__FUNC_DBG_MON_B_5 (MTK_PIN_NO(73) | 7)
+
+#define MT2701_PIN_74_I2S0_BCK__FUNC_GPIO74 (MTK_PIN_NO(74) | 0)
+#define MT2701_PIN_74_I2S0_BCK__FUNC_I2S0_BCK (MTK_PIN_NO(74) | 1)
+#define MT2701_PIN_74_I2S0_BCK__FUNC_PCM_CLK0 (MTK_PIN_NO(74) | 3)
+#define MT2701_PIN_74_I2S0_BCK__FUNC_WCN_I2S_BCK (MTK_PIN_NO(74) | 6)
+#define MT2701_PIN_74_I2S0_BCK__FUNC_DBG_MON_B_4 (MTK_PIN_NO(74) | 7)
+
+#define MT2701_PIN_75_SDA0__FUNC_GPIO75 (MTK_PIN_NO(75) | 0)
+#define MT2701_PIN_75_SDA0__FUNC_SDA0 (MTK_PIN_NO(75) | 1)
+
+#define MT2701_PIN_76_SCL0__FUNC_GPIO76 (MTK_PIN_NO(76) | 0)
+#define MT2701_PIN_76_SCL0__FUNC_SCL0 (MTK_PIN_NO(76) | 1)
+
+#define MT2701_PIN_77_SDA2__FUNC_GPIO77 (MTK_PIN_NO(77) | 0)
+#define MT2701_PIN_77_SDA2__FUNC_SDA2 (MTK_PIN_NO(77) | 1)
+
+#define MT2701_PIN_78_SCL2__FUNC_GPIO78 (MTK_PIN_NO(78) | 0)
+#define MT2701_PIN_78_SCL2__FUNC_SCL2 (MTK_PIN_NO(78) | 1)
+
+#define MT2701_PIN_79_URXD0__FUNC_GPIO79 (MTK_PIN_NO(79) | 0)
+#define MT2701_PIN_79_URXD0__FUNC_URXD0 (MTK_PIN_NO(79) | 1)
+#define MT2701_PIN_79_URXD0__FUNC_UTXD0 (MTK_PIN_NO(79) | 2)
+#define MT2701_PIN_79_URXD0__FUNC_ (MTK_PIN_NO(79) | 5)
+
+#define MT2701_PIN_80_UTXD0__FUNC_GPIO80 (MTK_PIN_NO(80) | 0)
+#define MT2701_PIN_80_UTXD0__FUNC_UTXD0 (MTK_PIN_NO(80) | 1)
+#define MT2701_PIN_80_UTXD0__FUNC_URXD0 (MTK_PIN_NO(80) | 2)
+
+#define MT2701_PIN_81_URXD1__FUNC_GPIO81 (MTK_PIN_NO(81) | 0)
+#define MT2701_PIN_81_URXD1__FUNC_URXD1 (MTK_PIN_NO(81) | 1)
+#define MT2701_PIN_81_URXD1__FUNC_UTXD1 (MTK_PIN_NO(81) | 2)
+
+#define MT2701_PIN_82_UTXD1__FUNC_GPIO82 (MTK_PIN_NO(82) | 0)
+#define MT2701_PIN_82_UTXD1__FUNC_UTXD1 (MTK_PIN_NO(82) | 1)
+#define MT2701_PIN_82_UTXD1__FUNC_URXD1 (MTK_PIN_NO(82) | 2)
+
+#define MT2701_PIN_83_LCM_RST__FUNC_GPIO83 (MTK_PIN_NO(83) | 0)
+#define MT2701_PIN_83_LCM_RST__FUNC_LCM_RST (MTK_PIN_NO(83) | 1)
+#define MT2701_PIN_83_LCM_RST__FUNC_VDAC_CK_XI (MTK_PIN_NO(83) | 2)
+#define MT2701_PIN_83_LCM_RST__FUNC_DBG_MON_B_1 (MTK_PIN_NO(83) | 7)
+
+#define MT2701_PIN_84_DSI_TE__FUNC_GPIO84 (MTK_PIN_NO(84) | 0)
+#define MT2701_PIN_84_DSI_TE__FUNC_DSI_TE (MTK_PIN_NO(84) | 1)
+#define MT2701_PIN_84_DSI_TE__FUNC_DBG_MON_B_0 (MTK_PIN_NO(84) | 7)
+
+#define MT2701_PIN_91_TDN3__FUNC_GPI91 (MTK_PIN_NO(91) | 0)
+#define MT2701_PIN_91_TDN3__FUNC_TDN3 (MTK_PIN_NO(91) | 1)
+
+#define MT2701_PIN_92_TDP3__FUNC_GPI92 (MTK_PIN_NO(92) | 0)
+#define MT2701_PIN_92_TDP3__FUNC_TDP3 (MTK_PIN_NO(92) | 1)
+
+#define MT2701_PIN_93_TDN2__FUNC_GPI93 (MTK_PIN_NO(93) | 0)
+#define MT2701_PIN_93_TDN2__FUNC_TDN2 (MTK_PIN_NO(93) | 1)
+
+#define MT2701_PIN_94_TDP2__FUNC_GPI94 (MTK_PIN_NO(94) | 0)
+#define MT2701_PIN_94_TDP2__FUNC_TDP2 (MTK_PIN_NO(94) | 1)
+
+#define MT2701_PIN_95_TCN__FUNC_GPI95 (MTK_PIN_NO(95) | 0)
+#define MT2701_PIN_95_TCN__FUNC_TCN (MTK_PIN_NO(95) | 1)
+
+#define MT2701_PIN_96_TCP__FUNC_GPI96 (MTK_PIN_NO(96) | 0)
+#define MT2701_PIN_96_TCP__FUNC_TCP (MTK_PIN_NO(96) | 1)
+
+#define MT2701_PIN_97_TDN1__FUNC_GPI97 (MTK_PIN_NO(97) | 0)
+#define MT2701_PIN_97_TDN1__FUNC_TDN1 (MTK_PIN_NO(97) | 1)
+
+#define MT2701_PIN_98_TDP1__FUNC_GPI98 (MTK_PIN_NO(98) | 0)
+#define MT2701_PIN_98_TDP1__FUNC_TDP1 (MTK_PIN_NO(98) | 1)
+
+#define MT2701_PIN_99_TDN0__FUNC_GPI99 (MTK_PIN_NO(99) | 0)
+#define MT2701_PIN_99_TDN0__FUNC_TDN0 (MTK_PIN_NO(99) | 1)
+
+#define MT2701_PIN_100_TDP0__FUNC_GPI100 (MTK_PIN_NO(100) | 0)
+#define MT2701_PIN_100_TDP0__FUNC_TDP0 (MTK_PIN_NO(100) | 1)
+
+#define MT2701_PIN_101_SPI2_CSN__FUNC_GPIO101 (MTK_PIN_NO(101) | 0)
+#define MT2701_PIN_101_SPI2_CSN__FUNC_SPI2_CS (MTK_PIN_NO(101) | 1)
+#define MT2701_PIN_101_SPI2_CSN__FUNC_SCL3 (MTK_PIN_NO(101) | 3)
+#define MT2701_PIN_101_SPI2_CSN__FUNC_KROW0 (MTK_PIN_NO(101) | 4)
+
+#define MT2701_PIN_102_SPI2_MI__FUNC_GPIO102 (MTK_PIN_NO(102) | 0)
+#define MT2701_PIN_102_SPI2_MI__FUNC_SPI2_MI (MTK_PIN_NO(102) | 1)
+#define MT2701_PIN_102_SPI2_MI__FUNC_SPI2_MO (MTK_PIN_NO(102) | 2)
+#define MT2701_PIN_102_SPI2_MI__FUNC_SDA3 (MTK_PIN_NO(102) | 3)
+#define MT2701_PIN_102_SPI2_MI__FUNC_KROW1 (MTK_PIN_NO(102) | 4)
+
+#define MT2701_PIN_103_SPI2_MO__FUNC_GPIO103 (MTK_PIN_NO(103) | 0)
+#define MT2701_PIN_103_SPI2_MO__FUNC_SPI2_MO (MTK_PIN_NO(103) | 1)
+#define MT2701_PIN_103_SPI2_MO__FUNC_SPI2_MI (MTK_PIN_NO(103) | 2)
+#define MT2701_PIN_103_SPI2_MO__FUNC_SCL3 (MTK_PIN_NO(103) | 3)
+#define MT2701_PIN_103_SPI2_MO__FUNC_KROW2 (MTK_PIN_NO(103) | 4)
+
+#define MT2701_PIN_104_SPI2_CLK__FUNC_GPIO104 (MTK_PIN_NO(104) | 0)
+#define MT2701_PIN_104_SPI2_CLK__FUNC_SPI2_CK (MTK_PIN_NO(104) | 1)
+#define MT2701_PIN_104_SPI2_CLK__FUNC_SDA3 (MTK_PIN_NO(104) | 3)
+#define MT2701_PIN_104_SPI2_CLK__FUNC_KROW3 (MTK_PIN_NO(104) | 4)
+
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_GPIO105 (MTK_PIN_NO(105) | 0)
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_MSDC1_CMD (MTK_PIN_NO(105) | 1)
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_ANT_SEL0 (MTK_PIN_NO(105) | 2)
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_SDA1 (MTK_PIN_NO(105) | 3)
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_I2SOUT_BCK (MTK_PIN_NO(105) | 6)
+#define MT2701_PIN_105_MSDC1_CMD__FUNC_DBG_MON_B_27 (MTK_PIN_NO(105) | 7)
+
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_GPIO106 (MTK_PIN_NO(106) | 0)
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_MSDC1_CLK (MTK_PIN_NO(106) | 1)
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_ANT_SEL1 (MTK_PIN_NO(106) | 2)
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_SCL1 (MTK_PIN_NO(106) | 3)
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_I2SOUT_LRCK (MTK_PIN_NO(106) | 6)
+#define MT2701_PIN_106_MSDC1_CLK__FUNC_DBG_MON_B_28 (MTK_PIN_NO(106) | 7)
+
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_GPIO107 (MTK_PIN_NO(107) | 0)
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_MSDC1_DAT0 (MTK_PIN_NO(107) | 1)
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_ANT_SEL2 (MTK_PIN_NO(107) | 2)
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_UTXD0 (MTK_PIN_NO(107) | 5)
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_I2SOUT_DATA_OUT (MTK_PIN_NO(107) | 6)
+#define MT2701_PIN_107_MSDC1_DAT0__FUNC_DBG_MON_B_26 (MTK_PIN_NO(107) | 7)
+
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_GPIO108 (MTK_PIN_NO(108) | 0)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_MSDC1_DAT1 (MTK_PIN_NO(108) | 1)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_ANT_SEL3 (MTK_PIN_NO(108) | 2)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_PWM0 (MTK_PIN_NO(108) | 3)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_URXD0 (MTK_PIN_NO(108) | 5)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_PWM1 (MTK_PIN_NO(108) | 6)
+#define MT2701_PIN_108_MSDC1_DAT1__FUNC_DBG_MON_B_25 (MTK_PIN_NO(108) | 7)
+
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_GPIO109 (MTK_PIN_NO(109) | 0)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_MSDC1_DAT2 (MTK_PIN_NO(109) | 1)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_ANT_SEL4 (MTK_PIN_NO(109) | 2)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_SDA2 (MTK_PIN_NO(109) | 3)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_UTXD1 (MTK_PIN_NO(109) | 5)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_PWM2 (MTK_PIN_NO(109) | 6)
+#define MT2701_PIN_109_MSDC1_DAT2__FUNC_DBG_MON_B_24 (MTK_PIN_NO(109) | 7)
+
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_GPIO110 (MTK_PIN_NO(110) | 0)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_MSDC1_DAT3 (MTK_PIN_NO(110) | 1)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_ANT_SEL5 (MTK_PIN_NO(110) | 2)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_SCL2 (MTK_PIN_NO(110) | 3)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(110) | 4)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_URXD1 (MTK_PIN_NO(110) | 5)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_PWM3 (MTK_PIN_NO(110) | 6)
+#define MT2701_PIN_110_MSDC1_DAT3__FUNC_DBG_MON_B_23 (MTK_PIN_NO(110) | 7)
+
+#define MT2701_PIN_111_MSDC0_DAT7__FUNC_GPIO111 (MTK_PIN_NO(111) | 0)
+#define MT2701_PIN_111_MSDC0_DAT7__FUNC_MSDC0_DAT7 (MTK_PIN_NO(111) | 1)
+#define MT2701_PIN_111_MSDC0_DAT7__FUNC_NLD7 (MTK_PIN_NO(111) | 4)
+
+#define MT2701_PIN_112_MSDC0_DAT6__FUNC_GPIO112 (MTK_PIN_NO(112) | 0)
+#define MT2701_PIN_112_MSDC0_DAT6__FUNC_MSDC0_DAT6 (MTK_PIN_NO(112) | 1)
+#define MT2701_PIN_112_MSDC0_DAT6__FUNC_NLD6 (MTK_PIN_NO(112) | 4)
+
+#define MT2701_PIN_113_MSDC0_DAT5__FUNC_GPIO113 (MTK_PIN_NO(113) | 0)
+#define MT2701_PIN_113_MSDC0_DAT5__FUNC_MSDC0_DAT5 (MTK_PIN_NO(113) | 1)
+#define MT2701_PIN_113_MSDC0_DAT5__FUNC_NLD5 (MTK_PIN_NO(113) | 4)
+
+#define MT2701_PIN_114_MSDC0_DAT4__FUNC_GPIO114 (MTK_PIN_NO(114) | 0)
+#define MT2701_PIN_114_MSDC0_DAT4__FUNC_MSDC0_DAT4 (MTK_PIN_NO(114) | 1)
+#define MT2701_PIN_114_MSDC0_DAT4__FUNC_NLD4 (MTK_PIN_NO(114) | 4)
+
+#define MT2701_PIN_115_MSDC0_RSTB__FUNC_GPIO115 (MTK_PIN_NO(115) | 0)
+#define MT2701_PIN_115_MSDC0_RSTB__FUNC_MSDC0_RSTB (MTK_PIN_NO(115) | 1)
+#define MT2701_PIN_115_MSDC0_RSTB__FUNC_NLD8 (MTK_PIN_NO(115) | 4)
+
+#define MT2701_PIN_116_MSDC0_CMD__FUNC_GPIO116 (MTK_PIN_NO(116) | 0)
+#define MT2701_PIN_116_MSDC0_CMD__FUNC_MSDC0_CMD (MTK_PIN_NO(116) | 1)
+#define MT2701_PIN_116_MSDC0_CMD__FUNC_NALE (MTK_PIN_NO(116) | 4)
+
+#define MT2701_PIN_117_MSDC0_CLK__FUNC_GPIO117 (MTK_PIN_NO(117) | 0)
+#define MT2701_PIN_117_MSDC0_CLK__FUNC_MSDC0_CLK (MTK_PIN_NO(117) | 1)
+#define MT2701_PIN_117_MSDC0_CLK__FUNC_NWEB (MTK_PIN_NO(117) | 4)
+
+#define MT2701_PIN_118_MSDC0_DAT3__FUNC_GPIO118 (MTK_PIN_NO(118) | 0)
+#define MT2701_PIN_118_MSDC0_DAT3__FUNC_MSDC0_DAT3 (MTK_PIN_NO(118) | 1)
+#define MT2701_PIN_118_MSDC0_DAT3__FUNC_NLD3 (MTK_PIN_NO(118) | 4)
+
+#define MT2701_PIN_119_MSDC0_DAT2__FUNC_GPIO119 (MTK_PIN_NO(119) | 0)
+#define MT2701_PIN_119_MSDC0_DAT2__FUNC_MSDC0_DAT2 (MTK_PIN_NO(119) | 1)
+#define MT2701_PIN_119_MSDC0_DAT2__FUNC_NLD2 (MTK_PIN_NO(119) | 4)
+
+#define MT2701_PIN_120_MSDC0_DAT1__FUNC_GPIO120 (MTK_PIN_NO(120) | 0)
+#define MT2701_PIN_120_MSDC0_DAT1__FUNC_MSDC0_DAT1 (MTK_PIN_NO(120) | 1)
+#define MT2701_PIN_120_MSDC0_DAT1__FUNC_NLD1 (MTK_PIN_NO(120) | 4)
+
+#define MT2701_PIN_121_MSDC0_DAT0__FUNC_GPIO121 (MTK_PIN_NO(121) | 0)
+#define MT2701_PIN_121_MSDC0_DAT0__FUNC_MSDC0_DAT0 (MTK_PIN_NO(121) | 1)
+#define MT2701_PIN_121_MSDC0_DAT0__FUNC_NLD0 (MTK_PIN_NO(121) | 4)
+#define MT2701_PIN_121_MSDC0_DAT0__FUNC_WATCHDOG (MTK_PIN_NO(121) | 5)
+
+#define MT2701_PIN_122_CEC__FUNC_GPIO122 (MTK_PIN_NO(122) | 0)
+#define MT2701_PIN_122_CEC__FUNC_CEC (MTK_PIN_NO(122) | 1)
+#define MT2701_PIN_122_CEC__FUNC_SDA2 (MTK_PIN_NO(122) | 4)
+#define MT2701_PIN_122_CEC__FUNC_URXD0 (MTK_PIN_NO(122) | 5)
+
+#define MT2701_PIN_123_HTPLG__FUNC_GPIO123 (MTK_PIN_NO(123) | 0)
+#define MT2701_PIN_123_HTPLG__FUNC_HTPLG (MTK_PIN_NO(123) | 1)
+#define MT2701_PIN_123_HTPLG__FUNC_SCL2 (MTK_PIN_NO(123) | 4)
+#define MT2701_PIN_123_HTPLG__FUNC_UTXD0 (MTK_PIN_NO(123) | 5)
+
+#define MT2701_PIN_124_HDMISCK__FUNC_GPIO124 (MTK_PIN_NO(124) | 0)
+#define MT2701_PIN_124_HDMISCK__FUNC_HDMISCK (MTK_PIN_NO(124) | 1)
+#define MT2701_PIN_124_HDMISCK__FUNC_SDA1 (MTK_PIN_NO(124) | 4)
+#define MT2701_PIN_124_HDMISCK__FUNC_PWM3 (MTK_PIN_NO(124) | 5)
+
+#define MT2701_PIN_125_HDMISD__FUNC_GPIO125 (MTK_PIN_NO(125) | 0)
+#define MT2701_PIN_125_HDMISD__FUNC_HDMISD (MTK_PIN_NO(125) | 1)
+#define MT2701_PIN_125_HDMISD__FUNC_SCL1 (MTK_PIN_NO(125) | 4)
+#define MT2701_PIN_125_HDMISD__FUNC_PWM4 (MTK_PIN_NO(125) | 5)
+
+#define MT2701_PIN_126_I2S0_MCLK__FUNC_GPIO126 (MTK_PIN_NO(126) | 0)
+#define MT2701_PIN_126_I2S0_MCLK__FUNC_I2S0_MCLK (MTK_PIN_NO(126) | 1)
+#define MT2701_PIN_126_I2S0_MCLK__FUNC_WCN_I2S_MCLK (MTK_PIN_NO(126) | 6)
+#define MT2701_PIN_126_I2S0_MCLK__FUNC_DBG_MON_B_6 (MTK_PIN_NO(126) | 7)
+
+#define MT2701_PIN_199_SPI1_CLK__FUNC_GPIO199 (MTK_PIN_NO(199) | 0)
+#define MT2701_PIN_199_SPI1_CLK__FUNC_SPI1_CK (MTK_PIN_NO(199) | 1)
+#define MT2701_PIN_199_SPI1_CLK__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(199) | 3)
+#define MT2701_PIN_199_SPI1_CLK__FUNC_KCOL3 (MTK_PIN_NO(199) | 4)
+#define MT2701_PIN_199_SPI1_CLK__FUNC_DBG_MON_B_15 (MTK_PIN_NO(199) | 7)
+
+#define MT2701_PIN_200_SPDIF_OUT__FUNC_GPIO200 (MTK_PIN_NO(200) | 0)
+#define MT2701_PIN_200_SPDIF_OUT__FUNC_SPDIF_OUT (MTK_PIN_NO(200) | 1)
+#define MT2701_PIN_200_SPDIF_OUT__FUNC_G1_TXD3 (MTK_PIN_NO(200) | 5)
+#define MT2701_PIN_200_SPDIF_OUT__FUNC_URXD2 (MTK_PIN_NO(200) | 6)
+#define MT2701_PIN_200_SPDIF_OUT__FUNC_DBG_MON_B_16 (MTK_PIN_NO(200) | 7)
+
+#define MT2701_PIN_201_SPDIF_IN0__FUNC_GPIO201 (MTK_PIN_NO(201) | 0)
+#define MT2701_PIN_201_SPDIF_IN0__FUNC_SPDIF_IN0 (MTK_PIN_NO(201) | 1)
+#define MT2701_PIN_201_SPDIF_IN0__FUNC_G1_TXEN (MTK_PIN_NO(201) | 5)
+#define MT2701_PIN_201_SPDIF_IN0__FUNC_UTXD2 (MTK_PIN_NO(201) | 6)
+#define MT2701_PIN_201_SPDIF_IN0__FUNC_DBG_MON_B_17 (MTK_PIN_NO(201) | 7)
+
+#define MT2701_PIN_202_SPDIF_IN1__FUNC_GPIO202 (MTK_PIN_NO(202) | 0)
+#define MT2701_PIN_202_SPDIF_IN1__FUNC_SPDIF_IN1 (MTK_PIN_NO(202) | 1)
+
+#define MT2701_PIN_203_PWM0__FUNC_GPIO203 (MTK_PIN_NO(203) | 0)
+#define MT2701_PIN_203_PWM0__FUNC_PWM0 (MTK_PIN_NO(203) | 1)
+#define MT2701_PIN_203_PWM0__FUNC_DISP_PWM (MTK_PIN_NO(203) | 2)
+#define MT2701_PIN_203_PWM0__FUNC_G1_TXD2 (MTK_PIN_NO(203) | 5)
+#define MT2701_PIN_203_PWM0__FUNC_DBG_MON_B_18 (MTK_PIN_NO(203) | 7)
+#define MT2701_PIN_203_PWM0__FUNC_I2S2_DATA (MTK_PIN_NO(203) | 9)
+
+#define MT2701_PIN_204_PWM1__FUNC_GPIO204 (MTK_PIN_NO(204) | 0)
+#define MT2701_PIN_204_PWM1__FUNC_PWM1 (MTK_PIN_NO(204) | 1)
+#define MT2701_PIN_204_PWM1__FUNC_CLKM3 (MTK_PIN_NO(204) | 2)
+#define MT2701_PIN_204_PWM1__FUNC_G1_TXD1 (MTK_PIN_NO(204) | 5)
+#define MT2701_PIN_204_PWM1__FUNC_DBG_MON_B_19 (MTK_PIN_NO(204) | 7)
+#define MT2701_PIN_204_PWM1__FUNC_I2S3_DATA (MTK_PIN_NO(204) | 9)
+
+#define MT2701_PIN_205_PWM2__FUNC_GPIO205 (MTK_PIN_NO(205) | 0)
+#define MT2701_PIN_205_PWM2__FUNC_PWM2 (MTK_PIN_NO(205) | 1)
+#define MT2701_PIN_205_PWM2__FUNC_CLKM2 (MTK_PIN_NO(205) | 2)
+#define MT2701_PIN_205_PWM2__FUNC_G1_TXD0 (MTK_PIN_NO(205) | 5)
+#define MT2701_PIN_205_PWM2__FUNC_DBG_MON_B_20 (MTK_PIN_NO(205) | 7)
+
+#define MT2701_PIN_206_PWM3__FUNC_GPIO206 (MTK_PIN_NO(206) | 0)
+#define MT2701_PIN_206_PWM3__FUNC_PWM3 (MTK_PIN_NO(206) | 1)
+#define MT2701_PIN_206_PWM3__FUNC_CLKM1 (MTK_PIN_NO(206) | 2)
+#define MT2701_PIN_206_PWM3__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(206) | 3)
+#define MT2701_PIN_206_PWM3__FUNC_G1_TXC (MTK_PIN_NO(206) | 5)
+#define MT2701_PIN_206_PWM3__FUNC_DBG_MON_B_21 (MTK_PIN_NO(206) | 7)
+
+#define MT2701_PIN_207_PWM4__FUNC_GPIO207 (MTK_PIN_NO(207) | 0)
+#define MT2701_PIN_207_PWM4__FUNC_PWM4 (MTK_PIN_NO(207) | 1)
+#define MT2701_PIN_207_PWM4__FUNC_CLKM0 (MTK_PIN_NO(207) | 2)
+#define MT2701_PIN_207_PWM4__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(207) | 3)
+#define MT2701_PIN_207_PWM4__FUNC_G1_RXC (MTK_PIN_NO(207) | 5)
+#define MT2701_PIN_207_PWM4__FUNC_DBG_MON_B_22 (MTK_PIN_NO(207) | 7)
+
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_GPIO208 (MTK_PIN_NO(208) | 0)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_AUD_EXT_CK1 (MTK_PIN_NO(208) | 1)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_PWM0 (MTK_PIN_NO(208) | 2)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_ANT_SEL5 (MTK_PIN_NO(208) | 4)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_DISP_PWM (MTK_PIN_NO(208) | 5)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_DBG_MON_A_31 (MTK_PIN_NO(208) | 7)
+#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_PCIE0_PERST_N (MTK_PIN_NO(208) | 11)
+
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_GPIO209 (MTK_PIN_NO(209) | 0)
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_AUD_EXT_CK2 (MTK_PIN_NO(209) | 1)
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_MSDC1_WP (MTK_PIN_NO(209) | 2)
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_PWM1 (MTK_PIN_NO(209) | 5)
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_DBG_MON_A_32 (MTK_PIN_NO(209) | 7)
+#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_PCIE1_PERST_N (MTK_PIN_NO(209) | 11)
+
+#define MT2701_PIN_236_EXT_SDIO3__FUNC_GPIO236 (MTK_PIN_NO(236) | 0)
+#define MT2701_PIN_236_EXT_SDIO3__FUNC_EXT_SDIO3 (MTK_PIN_NO(236) | 1)
+#define MT2701_PIN_236_EXT_SDIO3__FUNC_IDDIG (MTK_PIN_NO(236) | 2)
+#define MT2701_PIN_236_EXT_SDIO3__FUNC_DBG_MON_A_1 (MTK_PIN_NO(236) | 7)
+
+#define MT2701_PIN_237_EXT_SDIO2__FUNC_GPIO237 (MTK_PIN_NO(237) | 0)
+#define MT2701_PIN_237_EXT_SDIO2__FUNC_EXT_SDIO2 (MTK_PIN_NO(237) | 1)
+#define MT2701_PIN_237_EXT_SDIO2__FUNC_DRV_VBUS (MTK_PIN_NO(237) | 2)
+
+#define MT2701_PIN_238_EXT_SDIO1__FUNC_GPIO238 (MTK_PIN_NO(238) | 0)
+#define MT2701_PIN_238_EXT_SDIO1__FUNC_EXT_SDIO1 (MTK_PIN_NO(238) | 1)
+#define MT2701_PIN_238_EXT_SDIO1__FUNC_IDDIG_P1 (MTK_PIN_NO(238) | 2)
+
+#define MT2701_PIN_239_EXT_SDIO0__FUNC_GPIO239 (MTK_PIN_NO(239) | 0)
+#define MT2701_PIN_239_EXT_SDIO0__FUNC_EXT_SDIO0 (MTK_PIN_NO(239) | 1)
+#define MT2701_PIN_239_EXT_SDIO0__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(239) | 2)
+
+#define MT2701_PIN_240_EXT_XCS__FUNC_GPIO240 (MTK_PIN_NO(240) | 0)
+#define MT2701_PIN_240_EXT_XCS__FUNC_EXT_XCS (MTK_PIN_NO(240) | 1)
+
+#define MT2701_PIN_241_EXT_SCK__FUNC_GPIO241 (MTK_PIN_NO(241) | 0)
+#define MT2701_PIN_241_EXT_SCK__FUNC_EXT_SCK (MTK_PIN_NO(241) | 1)
+
+#define MT2701_PIN_242_URTS2__FUNC_GPIO242 (MTK_PIN_NO(242) | 0)
+#define MT2701_PIN_242_URTS2__FUNC_URTS2 (MTK_PIN_NO(242) | 1)
+#define MT2701_PIN_242_URTS2__FUNC_UTXD3 (MTK_PIN_NO(242) | 2)
+#define MT2701_PIN_242_URTS2__FUNC_URXD3 (MTK_PIN_NO(242) | 3)
+#define MT2701_PIN_242_URTS2__FUNC_SCL1 (MTK_PIN_NO(242) | 4)
+#define MT2701_PIN_242_URTS2__FUNC_DBG_MON_B_32 (MTK_PIN_NO(242) | 7)
+
+#define MT2701_PIN_243_UCTS2__FUNC_GPIO243 (MTK_PIN_NO(243) | 0)
+#define MT2701_PIN_243_UCTS2__FUNC_UCTS2 (MTK_PIN_NO(243) | 1)
+#define MT2701_PIN_243_UCTS2__FUNC_URXD3 (MTK_PIN_NO(243) | 2)
+#define MT2701_PIN_243_UCTS2__FUNC_UTXD3 (MTK_PIN_NO(243) | 3)
+#define MT2701_PIN_243_UCTS2__FUNC_SDA1 (MTK_PIN_NO(243) | 4)
+#define MT2701_PIN_243_UCTS2__FUNC_DBG_MON_A_6 (MTK_PIN_NO(243) | 7)
+
+#define MT2701_PIN_244_HDMI_SDA_RX__FUNC_GPIO244 (MTK_PIN_NO(244) | 0)
+#define MT2701_PIN_244_HDMI_SDA_RX__FUNC_HDMI_SDA_RX (MTK_PIN_NO(244) | 1)
+
+#define MT2701_PIN_245_HDMI_SCL_RX__FUNC_GPIO245 (MTK_PIN_NO(245) | 0)
+#define MT2701_PIN_245_HDMI_SCL_RX__FUNC_HDMI_SCL_RX (MTK_PIN_NO(245) | 1)
+
+#define MT2701_PIN_246_MHL_SENCE__FUNC_GPIO246 (MTK_PIN_NO(246) | 0)
+
+#define MT2701_PIN_247_HDMI_HPD_CBUS_RX__FUNC_GPIO247 (MTK_PIN_NO(247) | 0)
+#define MT2701_PIN_247_HDMI_HPD_CBUS_RX__FUNC_HDMI_HPD_RX (MTK_PIN_NO(247) | 1)
+
+#define MT2701_PIN_248_HDMI_TESTOUTP_RX__FUNC_GPIO248 (MTK_PIN_NO(248) | 0)
+#define MT2701_PIN_248_HDMI_TESTOUTP_RX__FUNC_HDMI_TESTOUTP_RX (MTK_PIN_NO(248) | 1)
+
+#define MT2701_PIN_249_MSDC0E_RSTB__FUNC_MSDC0E_RSTB (MTK_PIN_NO(249) | 9)
+
+#define MT2701_PIN_250_MSDC0E_DAT7__FUNC_MSDC3_DAT7 (MTK_PIN_NO(250) | 9)
+#define MT2701_PIN_250_MSDC0E_DAT7__FUNC_PCIE0_CLKREQ_N (MTK_PIN_NO(250) | 14)
+
+#define MT2701_PIN_251_MSDC0E_DAT6__FUNC_MSDC3_DAT6 (MTK_PIN_NO(251) | 9)
+#define MT2701_PIN_251_MSDC0E_DAT6__FUNC_PCIE0_WAKE_N (MTK_PIN_NO(251) | 14)
+
+#define MT2701_PIN_252_MSDC0E_DAT5__FUNC_MSDC3_DAT5 (MTK_PIN_NO(252) | 9)
+#define MT2701_PIN_252_MSDC0E_DAT5__FUNC_PCIE1_CLKREQ_N (MTK_PIN_NO(252) | 14)
+
+#define MT2701_PIN_253_MSDC0E_DAT4__FUNC_MSDC3_DAT4 (MTK_PIN_NO(253) | 9)
+#define MT2701_PIN_253_MSDC0E_DAT4__FUNC_PCIE1_WAKE_N (MTK_PIN_NO(253) | 14)
+
+#define MT2701_PIN_254_MSDC0E_DAT3__FUNC_MSDC3_DAT3 (MTK_PIN_NO(254) | 9)
+#define MT2701_PIN_254_MSDC0E_DAT3__FUNC_PCIE2_CLKREQ_N (MTK_PIN_NO(254) | 14)
+
+#define MT2701_PIN_255_MSDC0E_DAT2__FUNC_MSDC3_DAT2 (MTK_PIN_NO(255) | 9)
+#define MT2701_PIN_255_MSDC0E_DAT2__FUNC_PCIE2_WAKE_N (MTK_PIN_NO(255) | 14)
+
+#define MT2701_PIN_256_MSDC0E_DAT1__FUNC_MSDC3_DAT1 (MTK_PIN_NO(256) | 9)
+
+#define MT2701_PIN_257_MSDC0E_DAT0__FUNC_MSDC3_DAT0 (MTK_PIN_NO(257) | 9)
+
+#define MT2701_PIN_258_MSDC0E_CMD__FUNC_MSDC3_CMD (MTK_PIN_NO(258) | 9)
+
+#define MT2701_PIN_259_MSDC0E_CLK__FUNC_MSDC3_CLK (MTK_PIN_NO(259) | 9)
+
+#define MT2701_PIN_260_MSDC0E_DSL__FUNC_MSDC3_DSL (MTK_PIN_NO(260) | 9)
+
+#define MT2701_PIN_261_MSDC1_INS__FUNC_GPIO261 (MTK_PIN_NO(261) | 0)
+#define MT2701_PIN_261_MSDC1_INS__FUNC_MSDC1_INS (MTK_PIN_NO(261) | 1)
+#define MT2701_PIN_261_MSDC1_INS__FUNC_DBG_MON_B_29 (MTK_PIN_NO(261) | 7)
+
+#define MT2701_PIN_262_G2_TXEN__FUNC_GPIO262 (MTK_PIN_NO(262) | 0)
+#define MT2701_PIN_262_G2_TXEN__FUNC_G2_TXEN (MTK_PIN_NO(262) | 1)
+
+#define MT2701_PIN_263_G2_TXD3__FUNC_GPIO263 (MTK_PIN_NO(263) | 0)
+#define MT2701_PIN_263_G2_TXD3__FUNC_G2_TXD3 (MTK_PIN_NO(263) | 1)
+#define MT2701_PIN_263_G2_TXD3__FUNC_ANT_SEL5 (MTK_PIN_NO(263) | 6)
+
+#define MT2701_PIN_264_G2_TXD2__FUNC_GPIO264 (MTK_PIN_NO(264) | 0)
+#define MT2701_PIN_264_G2_TXD2__FUNC_G2_TXD2 (MTK_PIN_NO(264) | 1)
+#define MT2701_PIN_264_G2_TXD2__FUNC_ANT_SEL4 (MTK_PIN_NO(264) | 6)
+
+#define MT2701_PIN_265_G2_TXD1__FUNC_GPIO265 (MTK_PIN_NO(265) | 0)
+#define MT2701_PIN_265_G2_TXD1__FUNC_G2_TXD1 (MTK_PIN_NO(265) | 1)
+#define MT2701_PIN_265_G2_TXD1__FUNC_ANT_SEL3 (MTK_PIN_NO(265) | 6)
+
+#define MT2701_PIN_266_G2_TXD0__FUNC_GPIO266 (MTK_PIN_NO(266) | 0)
+#define MT2701_PIN_266_G2_TXD0__FUNC_G2_TXD0 (MTK_PIN_NO(266) | 1)
+#define MT2701_PIN_266_G2_TXD0__FUNC_ANT_SEL2 (MTK_PIN_NO(266) | 6)
+
+#define MT2701_PIN_267_G2_TXC__FUNC_GPIO267 (MTK_PIN_NO(267) | 0)
+#define MT2701_PIN_267_G2_TXC__FUNC_G2_TXC (MTK_PIN_NO(267) | 1)
+
+#define MT2701_PIN_268_G2_RXC__FUNC_GPIO268 (MTK_PIN_NO(268) | 0)
+#define MT2701_PIN_268_G2_RXC__FUNC_G2_RXC (MTK_PIN_NO(268) | 1)
+
+#define MT2701_PIN_269_G2_RXD0__FUNC_GPIO269 (MTK_PIN_NO(269) | 0)
+#define MT2701_PIN_269_G2_RXD0__FUNC_G2_RXD0 (MTK_PIN_NO(269) | 1)
+
+#define MT2701_PIN_270_G2_RXD1__FUNC_GPIO270 (MTK_PIN_NO(270) | 0)
+#define MT2701_PIN_270_G2_RXD1__FUNC_G2_RXD1 (MTK_PIN_NO(270) | 1)
+
+#define MT2701_PIN_271_G2_RXD2__FUNC_GPIO271 (MTK_PIN_NO(271) | 0)
+#define MT2701_PIN_271_G2_RXD2__FUNC_G2_RXD2 (MTK_PIN_NO(271) | 1)
+
+#define MT2701_PIN_272_G2_RXD3__FUNC_GPIO272 (MTK_PIN_NO(272) | 0)
+#define MT2701_PIN_272_G2_RXD3__FUNC_G2_RXD3 (MTK_PIN_NO(272) | 1)
+
+#define MT2701_PIN_274_G2_RXDV__FUNC_GPIO274 (MTK_PIN_NO(274) | 0)
+#define MT2701_PIN_274_G2_RXDV__FUNC_G2_RXDV (MTK_PIN_NO(274) | 1)
+
+#define MT2701_PIN_275_MDC__FUNC_GPIO275 (MTK_PIN_NO(275) | 0)
+#define MT2701_PIN_275_MDC__FUNC_MDC (MTK_PIN_NO(275) | 1)
+#define MT2701_PIN_275_MDC__FUNC_ANT_SEL0 (MTK_PIN_NO(275) | 6)
+
+#define MT2701_PIN_276_MDIO__FUNC_GPIO276 (MTK_PIN_NO(276) | 0)
+#define MT2701_PIN_276_MDIO__FUNC_MDIO (MTK_PIN_NO(276) | 1)
+#define MT2701_PIN_276_MDIO__FUNC_ANT_SEL1 (MTK_PIN_NO(276) | 6)
+
+#define MT2701_PIN_278_JTAG_RESET__FUNC_GPIO278 (MTK_PIN_NO(278) | 0)
+#define MT2701_PIN_278_JTAG_RESET__FUNC_JTAG_RESET (MTK_PIN_NO(278) | 1)
+
+#endif /* __DTS_MT2701_PINFUNC_H */
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 74d8f7eb5563..e3bdcf819643 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -107,8 +107,8 @@
};
};
- isp1704: isp1704 {
- compatible = "nxp,isp1704";
+ isp1707: isp1707 {
+ compatible = "nxp,isp1707";
nxp,enable-gpio = <&gpio3 3 GPIO_ACTIVE_HIGH>;
usb-phy = <&usb2_phy>;
};
@@ -618,7 +618,7 @@
ti,termination-current = <100>;
ti,resistor-sense = <68>;
- ti,usb-charger-detection = <&isp1704>;
+ ti,usb-charger-detection = <&isp1707>;
};
};
diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 3ed4abdaaa9c..15cbc747c242 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -656,6 +656,26 @@
status = "disabled";
};
+ eccmgr: eccmgr@ffd08140 {
+ compatible = "altr,socfpga-ecc-manager";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ l2-ecc@ffd08140 {
+ compatible = "altr,socfpga-l2-ecc";
+ reg = <0xffd08140 0x4>;
+ interrupts = <0 36 1>, <0 37 1>;
+ };
+
+ ocram-ecc@ffd08144 {
+ compatible = "altr,socfpga-ocram-ecc";
+ reg = <0xffd08144 0x4>;
+ iram = <&ocram>;
+ interrupts = <0 178 1>, <0 179 1>;
+ };
+ };
+
L2: l2-cache@fffef000 {
compatible = "arm,pl310-cache";
reg = <0xfffef000 0x1000>;
diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index a9ceb5bac40e..4539f8d909a5 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -629,5 +629,10 @@
status = "disabled";
};
};
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc0 16>, <&adc1 16>;
+ };
};
};
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index e0df333202b8..9ba45ade5f48 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -69,7 +69,7 @@ static void __scoop_gpio_set(struct scoop_dev *sdev,
static void scoop_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
{
- struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio);
+ struct scoop_dev *sdev = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&sdev->scoop_lock, flags);
@@ -81,7 +81,7 @@ static void scoop_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio);
+ struct scoop_dev *sdev = gpiochip_get_data(chip);
/* XXX: I'm unsure, but it seems so */
return !!(ioread16(sdev->base + SCOOP_GPRR) & (1 << (offset + 1)));
@@ -90,7 +90,7 @@ static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset)
static int scoop_gpio_direction_input(struct gpio_chip *chip,
unsigned offset)
{
- struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio);
+ struct scoop_dev *sdev = gpiochip_get_data(chip);
unsigned long flags;
unsigned short gpcr;
@@ -108,7 +108,7 @@ static int scoop_gpio_direction_input(struct gpio_chip *chip,
static int scoop_gpio_direction_output(struct gpio_chip *chip,
unsigned offset, int value)
{
- struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio);
+ struct scoop_dev *sdev = gpiochip_get_data(chip);
unsigned long flags;
unsigned short gpcr;
@@ -224,7 +224,7 @@ static int scoop_probe(struct platform_device *pdev)
devptr->gpio.direction_input = scoop_gpio_direction_input;
devptr->gpio.direction_output = scoop_gpio_direction_output;
- ret = gpiochip_add(&devptr->gpio);
+ ret = gpiochip_add_data(&devptr->gpio, devptr);
if (ret)
goto err_gpio;
}
diff --git a/arch/arm/configs/colibri_pxa270_defconfig b/arch/arm/configs/colibri_pxa270_defconfig
index 18c311ae1113..0b9211b2b73b 100644
--- a/arch/arm/configs/colibri_pxa270_defconfig
+++ b/arch/arm/configs/colibri_pxa270_defconfig
@@ -166,7 +166,6 @@ CONFIG_DEBUG_USER=y
CONFIG_DEBUG_ERRORS=y
CONFIG_DEBUG_LL=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA1=m
diff --git a/arch/arm/configs/iop13xx_defconfig b/arch/arm/configs/iop13xx_defconfig
index 4fa94a1f115b..652b7bd9e544 100644
--- a/arch/arm/configs/iop13xx_defconfig
+++ b/arch/arm/configs/iop13xx_defconfig
@@ -95,7 +95,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS=y
CONFIG_DEBUG_USER=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig
index c3058da631da..aa3af0a6b8f7 100644
--- a/arch/arm/configs/iop32x_defconfig
+++ b/arch/arm/configs/iop32x_defconfig
@@ -108,7 +108,6 @@ CONFIG_DEBUG_USER=y
CONFIG_DEBUG_LL=y
CONFIG_DEBUG_LL_UART_8250=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig
index 4bc870028035..0ada29d568ec 100644
--- a/arch/arm/configs/trizeps4_defconfig
+++ b/arch/arm/configs/trizeps4_defconfig
@@ -214,7 +214,6 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_USER=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA256=m
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 89a3a3e592d6..da3c0428507b 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -15,6 +15,7 @@
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <linux/module.h>
+#include <crypto/xts.h>
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -152,6 +153,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
+ ret = xts_check_key(tfm, in_key, key_len);
+ if (ret)
+ return ret;
+
ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
if (!ret)
ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 6d685298690e..0511a6cafe24 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -13,6 +13,7 @@
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <linux/module.h>
+#include <crypto/xts.h>
#include "aes_glue.h"
@@ -89,6 +90,11 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int bits = key_len * 4;
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d5525bfc7e3e..9156fc303afd 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 194c91b610ff..3d5a5cd071bd 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -19,38 +19,7 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
-/* 0 is reserved as an invalid value. */
-#define c0_MPIDR 1 /* MultiProcessor ID Register */
-#define c0_CSSELR 2 /* Cache Size Selection Register */
-#define c1_SCTLR 3 /* System Control Register */
-#define c1_ACTLR 4 /* Auxiliary Control Register */
-#define c1_CPACR 5 /* Coprocessor Access Control */
-#define c2_TTBR0 6 /* Translation Table Base Register 0 */
-#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */
-#define c2_TTBR1 8 /* Translation Table Base Register 1 */
-#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */
-#define c2_TTBCR 10 /* Translation Table Base Control R. */
-#define c3_DACR 11 /* Domain Access Control Register */
-#define c5_DFSR 12 /* Data Fault Status Register */
-#define c5_IFSR 13 /* Instruction Fault Status Register */
-#define c5_ADFSR 14 /* Auxilary Data Fault Status R */
-#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */
-#define c6_DFAR 16 /* Data Fault Address Register */
-#define c6_IFAR 17 /* Instruction Fault Address Register */
-#define c7_PAR 18 /* Physical Address Register */
-#define c7_PAR_high 19 /* PAR top 32 bits */
-#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
-#define c10_PRRR 21 /* Primary Region Remap Register */
-#define c10_NMRR 22 /* Normal Memory Remap Register */
-#define c12_VBAR 23 /* Vector Base Address Register */
-#define c13_CID 24 /* Context ID Register */
-#define c13_TID_URW 25 /* Thread ID, User R/W */
-#define c13_TID_URO 26 /* Thread ID, User R/O */
-#define c13_TID_PRIV 27 /* Thread ID, Privileged */
-#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
-#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */
-#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */
-#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */
+#include <asm/virt.h>
#define ARM_EXCEPTION_RESET 0
#define ARM_EXCEPTION_UNDEFINED 1
@@ -79,6 +48,8 @@
#define rr_lo_hi(a1, a2) a1, a2
#endif
+#define kvm_ksym_ref(kva) (kva)
+
#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
@@ -86,19 +57,15 @@ struct kvm_vcpu;
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
-extern char __kvm_hyp_exit[];
-extern char __kvm_hyp_exit_end[];
-
extern char __kvm_hyp_vector[];
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
-
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern void __init_stage2_translation(void);
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 3095df091ff8..ee5328fc4b06 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -68,12 +68,12 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
{
- return &vcpu->arch.regs.usr_regs.ARM_pc;
+ return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc;
}
static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
{
- return &vcpu->arch.regs.usr_regs.ARM_cpsr;
+ return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr;
}
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
@@ -83,13 +83,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
{
- unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK;
+ unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE);
}
static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
{
- unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK;
+ unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
return cpsr_mode > USR_MODE;;
}
@@ -108,11 +108,6 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8;
}
-static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fault.hyp_pc;
-}
-
static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
@@ -143,6 +138,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
}
+static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM);
+}
+
/* Get Access Size from a data abort */
static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
{
@@ -192,7 +192,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
+ return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f9f27792d8ed..385070180c25 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -85,20 +85,61 @@ struct kvm_vcpu_fault_info {
u32 hsr; /* Hyp Syndrome Register */
u32 hxfar; /* Hyp Data/Inst. Fault Address Register */
u32 hpfar; /* Hyp IPA Fault Address Register */
- u32 hyp_pc; /* PC when exception was taken from Hyp mode */
};
-typedef struct vfp_hard_struct kvm_cpu_context_t;
+/*
+ * 0 is reserved as an invalid value.
+ * Order should be kept in sync with the save/restore code.
+ */
+enum vcpu_sysreg {
+ __INVALID_SYSREG__,
+ c0_MPIDR, /* MultiProcessor ID Register */
+ c0_CSSELR, /* Cache Size Selection Register */
+ c1_SCTLR, /* System Control Register */
+ c1_ACTLR, /* Auxiliary Control Register */
+ c1_CPACR, /* Coprocessor Access Control */
+ c2_TTBR0, /* Translation Table Base Register 0 */
+ c2_TTBR0_high, /* TTBR0 top 32 bits */
+ c2_TTBR1, /* Translation Table Base Register 1 */
+ c2_TTBR1_high, /* TTBR1 top 32 bits */
+ c2_TTBCR, /* Translation Table Base Control R. */
+ c3_DACR, /* Domain Access Control Register */
+ c5_DFSR, /* Data Fault Status Register */
+ c5_IFSR, /* Instruction Fault Status Register */
+ c5_ADFSR, /* Auxilary Data Fault Status R */
+ c5_AIFSR, /* Auxilary Instrunction Fault Status R */
+ c6_DFAR, /* Data Fault Address Register */
+ c6_IFAR, /* Instruction Fault Address Register */
+ c7_PAR, /* Physical Address Register */
+ c7_PAR_high, /* PAR top 32 bits */
+ c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */
+ c10_PRRR, /* Primary Region Remap Register */
+ c10_NMRR, /* Normal Memory Remap Register */
+ c12_VBAR, /* Vector Base Address Register */
+ c13_CID, /* Context ID Register */
+ c13_TID_URW, /* Thread ID, User R/W */
+ c13_TID_URO, /* Thread ID, User R/O */
+ c13_TID_PRIV, /* Thread ID, Privileged */
+ c14_CNTKCTL, /* Timer Control Register (PL1) */
+ c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */
+ c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */
+ NR_CP15_REGS /* Number of regs (incl. invalid) */
+};
+
+struct kvm_cpu_context {
+ struct kvm_regs gp_regs;
+ struct vfp_hard_struct vfp;
+ u32 cp15[NR_CP15_REGS];
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
struct kvm_vcpu_arch {
- struct kvm_regs regs;
+ struct kvm_cpu_context ctxt;
int target; /* Processor target */
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
- /* System control coprocessor (cp15) */
- u32 cp15[NR_CP15_REGS];
-
/* The CPU type we expose to the VM */
u32 midr;
@@ -111,9 +152,6 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Floating point registers (VFP and Advanced SIMD/NEON) */
- struct vfp_hard_struct vfp_guest;
-
/* Host FP context */
kvm_cpu_context_t *host_cpu_context;
@@ -158,12 +196,14 @@ struct kvm_vcpu_stat {
u64 exits;
};
+#define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r]
+
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
-u64 kvm_call_hyp(void *hypfn, ...);
+unsigned long kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -220,6 +260,11 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
}
+static inline void __cpu_init_stage2(void)
+{
+ kvm_call_hyp(__init_stage2_translation);
+}
+
static inline int kvm_arch_dev_ioctl_check_extension(long ext)
{
return 0;
@@ -242,5 +287,20 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..f0e860761380
--- /dev/null
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_HYP_H__
+#define __ARM_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/vfp.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+#define kern_hyp_va(v) (v)
+#define hyp_kern_va(v) (v)
+
+#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \
+ "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32
+#define __ACCESS_CP15_64(Op1, CRm) \
+ "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64
+#define __ACCESS_VFP(CRn) \
+ "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32
+
+#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v)))
+#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__)
+
+#define __read_sysreg(r, w, c, t) ({ \
+ t __val; \
+ asm volatile(r " " c : "=r" (__val)); \
+ __val; \
+})
+#define read_sysreg(...) __read_sysreg(__VA_ARGS__)
+
+#define write_special(v, r) \
+ asm volatile("msr " __stringify(r) ", %0" : : "r" (v))
+#define read_special(r) ({ \
+ u32 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+#define TTBR0 __ACCESS_CP15_64(0, c2)
+#define TTBR1 __ACCESS_CP15_64(1, c2)
+#define VTTBR __ACCESS_CP15_64(6, c2)
+#define PAR __ACCESS_CP15_64(0, c7)
+#define CNTV_CVAL __ACCESS_CP15_64(3, c14)
+#define CNTVOFF __ACCESS_CP15_64(4, c14)
+
+#define MIDR __ACCESS_CP15(c0, 0, c0, 0)
+#define CSSELR __ACCESS_CP15(c0, 2, c0, 0)
+#define VPIDR __ACCESS_CP15(c0, 4, c0, 0)
+#define VMPIDR __ACCESS_CP15(c0, 4, c0, 5)
+#define SCTLR __ACCESS_CP15(c1, 0, c0, 0)
+#define CPACR __ACCESS_CP15(c1, 0, c0, 2)
+#define HCR __ACCESS_CP15(c1, 4, c1, 0)
+#define HDCR __ACCESS_CP15(c1, 4, c1, 1)
+#define HCPTR __ACCESS_CP15(c1, 4, c1, 2)
+#define HSTR __ACCESS_CP15(c1, 4, c1, 3)
+#define TTBCR __ACCESS_CP15(c2, 0, c0, 2)
+#define HTCR __ACCESS_CP15(c2, 4, c0, 2)
+#define VTCR __ACCESS_CP15(c2, 4, c1, 2)
+#define DACR __ACCESS_CP15(c3, 0, c0, 0)
+#define DFSR __ACCESS_CP15(c5, 0, c0, 0)
+#define IFSR __ACCESS_CP15(c5, 0, c0, 1)
+#define ADFSR __ACCESS_CP15(c5, 0, c1, 0)
+#define AIFSR __ACCESS_CP15(c5, 0, c1, 1)
+#define HSR __ACCESS_CP15(c5, 4, c2, 0)
+#define DFAR __ACCESS_CP15(c6, 0, c0, 0)
+#define IFAR __ACCESS_CP15(c6, 0, c0, 2)
+#define HDFAR __ACCESS_CP15(c6, 4, c0, 0)
+#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
+#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
+#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
+#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
+#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4)
+#define PRRR __ACCESS_CP15(c10, 0, c2, 0)
+#define NMRR __ACCESS_CP15(c10, 0, c2, 1)
+#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
+#define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1)
+#define VBAR __ACCESS_CP15(c12, 0, c0, 0)
+#define CID __ACCESS_CP15(c13, 0, c0, 1)
+#define TID_URW __ACCESS_CP15(c13, 0, c0, 2)
+#define TID_URO __ACCESS_CP15(c13, 0, c0, 3)
+#define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4)
+#define HTPIDR __ACCESS_CP15(c13, 4, c0, 2)
+#define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0)
+#define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1)
+#define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0)
+
+#define VFP_FPEXC __ACCESS_VFP(FPEXC)
+
+/* AArch64 compatibility macros, only for the timer so far */
+#define read_sysreg_el0(r) read_sysreg(r##_el0)
+#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
+
+#define cntv_ctl_el0 CNTV_CTL
+#define cntv_cval_el0 CNTV_CVAL
+#define cntvoff_el2 CNTVOFF
+#define cnthctl_el2 CNTHCTL
+
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
+void __sysreg_save_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
+
+void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
+void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
+static inline bool __vfp_enabled(void)
+{
+ return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
+}
+
+void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
+void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
+
+int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_context *host);
+int asmlinkage __hyp_do_panic(const char *, int, u32);
+
+#endif /* __ARM_KVM_HYP_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a520b7987a29..da44be9db4fa 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -179,7 +179,7 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+ return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
}
static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index a5635444ca41..057d381f4e57 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -2,9 +2,6 @@
#define ASMARM_PCI_H
#ifdef __KERNEL__
-#include <asm-generic/pci-dma-compat.h>
-#include <asm-generic/pci-bridge.h>
-
#include <asm/mach/pci.h> /* for pci_sys_data */
extern unsigned long pcibios_min_io;
@@ -41,5 +38,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
}
#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 4371f45c5784..d4ceaf5f299b 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void)
{
return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH);
}
+
+static inline bool is_kernel_in_hyp_mode(void)
+{
+ return false;
+}
+
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 871b8267d211..27d05813ff09 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -170,41 +170,11 @@ int main(void)
DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
- DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr));
- DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15));
- DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest));
- DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context));
- DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
- DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
- DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
- DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs));
- DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs));
- DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs));
- DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
- DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
- DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
- DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr));
- DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
- DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr));
- DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
- DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar));
- DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
- DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
- DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
- DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
- DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
- DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
- DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
- DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
- DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
- DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
- DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
- DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
- DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
- DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
- DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
+ DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt));
+ DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
+ DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp));
+ DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
+ DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs));
#endif
BLANK();
#ifdef CONFIG_VDSO
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7d0cba6f1cc5..139791ed473d 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -176,13 +176,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6749f3..baee70267f29 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8b60fde5ce48..b4139cbbbdd9 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -18,6 +18,11 @@
*(.proc.info.init) \
VMLINUX_SYMBOL(__proc_info_end) = .;
+#define HYPERVISOR_TEXT \
+ VMLINUX_SYMBOL(__hyp_text_start) = .; \
+ *(.hyp.text) \
+ VMLINUX_SYMBOL(__hyp_text_end) = .;
+
#define IDMAP_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__idmap_text_start) = .; \
@@ -108,6 +113,7 @@ SECTIONS
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
+ HYPERVISOR_TEXT
KPROBES_TEXT
*(.gnu.warning)
*(.glue_7)
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index c5eef02c52ba..eb1bf4309c13 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -17,6 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp/
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dda1959f0dde..3e0fb66d8e05 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/kvm.h>
#include <trace/events/kvm.h>
+#include <kvm/arm_pmu.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
+ kvm_pmu_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL);
+ kvm_timer_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -506,18 +509,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+ struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
vcpu->arch.pause = false;
- wake_up_interruptible(wq);
+ swake_up(wq);
}
}
static void vcpu_sleep(struct kvm_vcpu *vcpu)
{
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+ struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
- wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+ swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
}
@@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* non-preemptible context.
*/
preempt_disable();
+ kvm_pmu_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);
kvm_vgic_flush_hwstate(vcpu);
@@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
vcpu->arch.power_off || vcpu->arch.pause) {
local_irq_enable();
+ kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
@@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
- * We must sync the timer state before the vgic state so that
- * the vgic can properly sample the updated state of the
+ * We must sync the PMU and timer state before the vgic state so
+ * that the vgic can properly sample the updated state of the
* interrupt line.
*/
+ kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
@@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ default:
+ ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ default:
+ ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ default:
+ ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
+ break;
+ }
+
+ return ret;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
@@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
return -E2BIG;
return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
}
+ case KVM_SET_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+ return kvm_arm_vcpu_set_attr(vcpu, &attr);
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+ return kvm_arm_vcpu_get_attr(vcpu, &attr);
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+ return kvm_arm_vcpu_has_attr(vcpu, &attr);
+ }
default:
return -EINVAL;
}
@@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
+static void cpu_init_stage2(void *dummy)
+{
+ __cpu_init_stage2();
+}
+
static void cpu_init_hyp_mode(void *dummy)
{
phys_addr_t boot_pgd_ptr;
@@ -982,9 +1051,10 @@ static void cpu_init_hyp_mode(void *dummy)
pgd_ptr = kvm_mmu_get_httbr();
stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
- vector_ptr = (unsigned long)__kvm_hyp_vector;
+ vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
+ __cpu_init_stage2();
kvm_arm_init_debug();
}
@@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void)
}
#endif
+static void teardown_common_resources(void)
+{
+ free_percpu(kvm_host_cpu_state);
+}
+
+static int init_common_resources(void)
+{
+ kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
+ if (!kvm_host_cpu_state) {
+ kvm_err("Cannot allocate host CPU state\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int init_subsystems(void)
+{
+ int err;
+
+ /*
+ * Init HYP view of VGIC
+ */
+ err = kvm_vgic_hyp_init();
+ switch (err) {
+ case 0:
+ vgic_present = true;
+ break;
+ case -ENODEV:
+ case -ENXIO:
+ vgic_present = false;
+ break;
+ default:
+ return err;
+ }
+
+ /*
+ * Init HYP architected timer support
+ */
+ err = kvm_timer_hyp_init();
+ if (err)
+ return err;
+
+ kvm_perf_init();
+ kvm_coproc_table_init();
+
+ return 0;
+}
+
+static void teardown_hyp_mode(void)
+{
+ int cpu;
+
+ if (is_kernel_in_hyp_mode())
+ return;
+
+ free_hyp_pgds();
+ for_each_possible_cpu(cpu)
+ free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+}
+
+static int init_vhe_mode(void)
+{
+ /*
+ * Execute the init code on each CPU.
+ */
+ on_each_cpu(cpu_init_stage2, NULL, 1);
+
+ /* set size of VMID supported by CPU */
+ kvm_vmid_bits = kvm_get_vmid_bits();
+ kvm_info("%d-bit VMID\n", kvm_vmid_bits);
+
+ kvm_info("VHE mode initialized successfully\n");
+ return 0;
+}
+
/**
* Inits Hyp-mode on all online CPUs
*/
@@ -1065,7 +1211,7 @@ static int init_hyp_mode(void)
stack_page = __get_free_page(GFP_KERNEL);
if (!stack_page) {
err = -ENOMEM;
- goto out_free_stack_pages;
+ goto out_err;
}
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
@@ -1074,16 +1220,18 @@ static int init_hyp_mode(void)
/*
* Map the Hyp-code called directly from the host
*/
- err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
+ kvm_ksym_ref(__hyp_text_end));
if (err) {
kvm_err("Cannot map world-switch code\n");
- goto out_free_mappings;
+ goto out_err;
}
- err = create_hyp_mappings(__start_rodata, __end_rodata);
+ err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
+ kvm_ksym_ref(__end_rodata));
if (err) {
kvm_err("Cannot map rodata section\n");
- goto out_free_mappings;
+ goto out_err;
}
/*
@@ -1095,20 +1243,10 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map hyp stack\n");
- goto out_free_mappings;
+ goto out_err;
}
}
- /*
- * Map the host CPU structures
- */
- kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
- if (!kvm_host_cpu_state) {
- err = -ENOMEM;
- kvm_err("Cannot allocate host CPU state\n");
- goto out_free_mappings;
- }
-
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;
@@ -1117,7 +1255,7 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map host CPU state: %d\n", err);
- goto out_free_context;
+ goto out_err;
}
}
@@ -1126,34 +1264,22 @@ static int init_hyp_mode(void)
*/
on_each_cpu(cpu_init_hyp_mode, NULL, 1);
- /*
- * Init HYP view of VGIC
- */
- err = kvm_vgic_hyp_init();
- switch (err) {
- case 0:
- vgic_present = true;
- break;
- case -ENODEV:
- case -ENXIO:
- vgic_present = false;
- break;
- default:
- goto out_free_context;
- }
-
- /*
- * Init HYP architected timer support
- */
- err = kvm_timer_hyp_init();
- if (err)
- goto out_free_context;
-
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif
- kvm_perf_init();
+ cpu_notifier_register_begin();
+
+ err = __register_cpu_notifier(&hyp_init_cpu_nb);
+
+ cpu_notifier_register_done();
+
+ if (err) {
+ kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
+ goto out_err;
+ }
+
+ hyp_cpu_pm_init();
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
@@ -1162,14 +1288,9 @@ static int init_hyp_mode(void)
kvm_info("Hyp mode initialized successfully\n");
return 0;
-out_free_context:
- free_percpu(kvm_host_cpu_state);
-out_free_mappings:
- free_hyp_pgds();
-out_free_stack_pages:
- for_each_possible_cpu(cpu)
- free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+
out_err:
+ teardown_hyp_mode();
kvm_err("error initializing Hyp mode: %d\n", err);
return err;
}
@@ -1213,26 +1334,27 @@ int kvm_arch_init(void *opaque)
}
}
- cpu_notifier_register_begin();
-
- err = init_hyp_mode();
+ err = init_common_resources();
if (err)
- goto out_err;
+ return err;
- err = __register_cpu_notifier(&hyp_init_cpu_nb);
- if (err) {
- kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
+ if (is_kernel_in_hyp_mode())
+ err = init_vhe_mode();
+ else
+ err = init_hyp_mode();
+ if (err)
goto out_err;
- }
-
- cpu_notifier_register_done();
- hyp_cpu_pm_init();
+ err = init_subsystems();
+ if (err)
+ goto out_hyp;
- kvm_coproc_table_init();
return 0;
+
+out_hyp:
+ teardown_hyp_mode();
out_err:
- cpu_notifier_register_done();
+ teardown_common_resources();
return err;
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index f3d88dc388bc..1bb2b79c01ff 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -16,6 +16,8 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+
+#include <linux/bsearch.h>
#include <linux/mm.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
@@ -54,8 +56,8 @@ static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
const struct coproc_reg *r,
u64 val)
{
- vcpu->arch.cp15[r->reg] = val & 0xffffffff;
- vcpu->arch.cp15[r->reg + 1] = val >> 32;
+ vcpu_cp15(vcpu, r->reg) = val & 0xffffffff;
+ vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
}
static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
@@ -63,9 +65,9 @@ static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
{
u64 val;
- val = vcpu->arch.cp15[r->reg + 1];
+ val = vcpu_cp15(vcpu, r->reg + 1);
val = val << 32;
- val = val | vcpu->arch.cp15[r->reg];
+ val = val | vcpu_cp15(vcpu, r->reg);
return val;
}
@@ -104,7 +106,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
* vcpu_id, but we read the 'U' bit from the underlying
* hardware directly.
*/
- vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+ vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
(vcpu->vcpu_id & 3));
}
@@ -117,7 +119,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+ *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR);
return true;
}
@@ -139,7 +141,7 @@ static bool access_l2ctlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+ *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR);
return true;
}
@@ -156,7 +158,7 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
ncores = min(ncores, 3U);
l2ctlr |= (ncores & 3) << 24;
- vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+ vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr;
}
static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
@@ -171,7 +173,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
else
actlr &= ~(1U << 6);
- vcpu->arch.cp15[c1_ACTLR] = actlr;
+ vcpu_cp15(vcpu, c1_ACTLR) = actlr;
}
/*
@@ -218,9 +220,9 @@ bool access_vm_reg(struct kvm_vcpu *vcpu,
BUG_ON(!p->is_write);
- vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+ vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1);
if (p->is_64bit)
- vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+ vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@@ -381,17 +383,26 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
+static int check_reg_table(const struct coproc_reg *table, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 1; i < n; i++) {
+ if (cmp_reg(&table[i-1], &table[i]) >= 0) {
+ kvm_err("reg table %p out of order (%d)\n", table, i - 1);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
/* Target specific emulation tables */
static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
{
- unsigned int i;
-
- for (i = 1; i < table->num; i++)
- BUG_ON(cmp_reg(&table->table[i-1],
- &table->table[i]) >= 0);
-
+ BUG_ON(check_reg_table(table->table, table->num));
target_tables[table->target] = table;
}
@@ -405,29 +416,32 @@ static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
return table->table;
}
+#define reg_to_match_value(x) \
+ ({ \
+ unsigned long val; \
+ val = (x)->CRn << 11; \
+ val |= (x)->CRm << 7; \
+ val |= (x)->Op1 << 4; \
+ val |= (x)->Op2 << 1; \
+ val |= !(x)->is_64bit; \
+ val; \
+ })
+
+static int match_reg(const void *key, const void *elt)
+{
+ const unsigned long pval = (unsigned long)key;
+ const struct coproc_reg *r = elt;
+
+ return pval - reg_to_match_value(r);
+}
+
static const struct coproc_reg *find_reg(const struct coproc_params *params,
const struct coproc_reg table[],
unsigned int num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++) {
- const struct coproc_reg *r = &table[i];
-
- if (params->is_64bit != r->is_64)
- continue;
- if (params->CRn != r->CRn)
- continue;
- if (params->CRm != r->CRm)
- continue;
- if (params->Op1 != r->Op1)
- continue;
- if (params->Op2 != r->Op2)
- continue;
+ unsigned long pval = reg_to_match_value(params);
- return r;
- }
- return NULL;
+ return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg);
}
static int emulate_cp15(struct kvm_vcpu *vcpu,
@@ -645,6 +659,9 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR },
{ CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR },
+ { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
+ { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
+
{ CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 },
{ CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 },
{ CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 },
@@ -660,9 +677,6 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 },
{ CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 },
{ CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 },
-
- { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
- { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
};
/*
@@ -901,7 +915,7 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
if (vfpid < num_fp_regs()) {
if (KVM_REG_SIZE(id) != 8)
return -ENOENT;
- return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid],
+ return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid],
id);
}
@@ -911,13 +925,13 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
switch (vfpid) {
case KVM_REG_ARM_VFP_FPEXC:
- return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id);
+ return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id);
case KVM_REG_ARM_VFP_FPSCR:
- return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id);
+ return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id);
case KVM_REG_ARM_VFP_FPINST:
- return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id);
+ return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id);
case KVM_REG_ARM_VFP_FPINST2:
- return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id);
+ return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id);
case KVM_REG_ARM_VFP_MVFR0:
val = fmrx(MVFR0);
return reg_to_user(uaddr, &val, id);
@@ -945,7 +959,7 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
if (vfpid < num_fp_regs()) {
if (KVM_REG_SIZE(id) != 8)
return -ENOENT;
- return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid],
+ return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid],
uaddr, id);
}
@@ -955,13 +969,13 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
switch (vfpid) {
case KVM_REG_ARM_VFP_FPEXC:
- return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id);
+ return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id);
case KVM_REG_ARM_VFP_FPSCR:
- return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id);
+ return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id);
case KVM_REG_ARM_VFP_FPINST:
- return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id);
+ return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id);
case KVM_REG_ARM_VFP_FPINST2:
- return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id);
+ return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id);
/* These are invariant. */
case KVM_REG_ARM_VFP_MVFR0:
if (reg_from_user(&val, uaddr, id))
@@ -1030,7 +1044,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
val = vcpu_cp15_reg64_get(vcpu, r);
ret = reg_to_user(uaddr, &val, reg->id);
} else if (KVM_REG_SIZE(reg->id) == 4) {
- ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+ ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id);
}
return ret;
@@ -1060,7 +1074,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!ret)
vcpu_cp15_reg64_set(vcpu, r, val);
} else if (KVM_REG_SIZE(reg->id) == 4) {
- ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+ ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id);
}
return ret;
@@ -1096,7 +1110,7 @@ static int write_demux_regids(u64 __user *uindices)
static u64 cp15_to_index(const struct coproc_reg *reg)
{
u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT);
- if (reg->is_64) {
+ if (reg->is_64bit) {
val |= KVM_REG_SIZE_U64;
val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
/*
@@ -1210,8 +1224,8 @@ void kvm_coproc_table_init(void)
unsigned int i;
/* Make sure tables are unique and in order. */
- for (i = 1; i < ARRAY_SIZE(cp15_regs); i++)
- BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0);
+ BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+ BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15)));
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++)
@@ -1248,7 +1262,7 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
const struct coproc_reg *table;
/* Catch someone adding a register without putting in reset entry. */
- memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15));
+ memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15));
/* Generic chip reset first (so target could override). */
reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
@@ -1257,6 +1271,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
reset_coproc_regs(vcpu, table, num);
for (num = 1; num < NR_CP15_REGS; num++)
- if (vcpu->arch.cp15[num] == 0x42424242)
- panic("Didn't reset vcpu->arch.cp15[%zi]", num);
+ if (vcpu_cp15(vcpu, num) == 0x42424242)
+ panic("Didn't reset vcpu_cp15(vcpu, %zi)", num);
}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 88d24a3a9778..eef1759c2b65 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -37,7 +37,7 @@ struct coproc_reg {
unsigned long Op1;
unsigned long Op2;
- bool is_64;
+ bool is_64bit;
/* Trapped access from guest, if non-NULL. */
bool (*access)(struct kvm_vcpu *,
@@ -47,7 +47,7 @@ struct coproc_reg {
/* Initialization for vcpu. */
void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
- /* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */
+ /* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */
unsigned long reg;
/* Value (usually reset value) */
@@ -104,25 +104,25 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
const struct coproc_reg *r)
{
BUG_ON(!r->reg);
- BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
- vcpu->arch.cp15[r->reg] = 0xdecafbad;
+ BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
+ vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
{
BUG_ON(!r->reg);
- BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
- vcpu->arch.cp15[r->reg] = r->val;
+ BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
+ vcpu_cp15(vcpu, r->reg) = r->val;
}
static inline void reset_unknown64(struct kvm_vcpu *vcpu,
const struct coproc_reg *r)
{
BUG_ON(!r->reg);
- BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15));
+ BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
- vcpu->arch.cp15[r->reg] = 0xdecafbad;
- vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee;
+ vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
+ vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee;
}
static inline int cmp_reg(const struct coproc_reg *i1,
@@ -141,7 +141,7 @@ static inline int cmp_reg(const struct coproc_reg *i1,
return i1->Op1 - i2->Op1;
if (i1->Op2 != i2->Op2)
return i1->Op2 - i2->Op2;
- return i2->is_64 - i1->is_64;
+ return i2->is_64bit - i1->is_64bit;
}
@@ -150,8 +150,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
#define CRm64(_x) .CRn = _x, .CRm = 0
#define Op1(_x) .Op1 = _x
#define Op2(_x) .Op2 = _x
-#define is64 .is_64 = true
-#define is32 .is_64 = false
+#define is64 .is_64bit = true
+#define is32 .is_64bit = false
bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index dc99159857b4..a494def3f195 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -112,7 +112,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
*/
unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
{
- unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs;
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs;
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
@@ -147,15 +147,15 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
case SVC_MODE:
- return &vcpu->arch.regs.KVM_ARM_SVC_spsr;
+ return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr;
case ABT_MODE:
- return &vcpu->arch.regs.KVM_ARM_ABT_spsr;
+ return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr;
case UND_MODE:
- return &vcpu->arch.regs.KVM_ARM_UND_spsr;
+ return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr;
case IRQ_MODE:
- return &vcpu->arch.regs.KVM_ARM_IRQ_spsr;
+ return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr;
case FIQ_MODE:
- return &vcpu->arch.regs.KVM_ARM_FIQ_spsr;
+ return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr;
default:
BUG();
}
@@ -266,8 +266,8 @@ void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
static u32 exc_vector_base(struct kvm_vcpu *vcpu)
{
- u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
- u32 vbar = vcpu->arch.cp15[c12_VBAR];
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+ u32 vbar = vcpu_cp15(vcpu, c12_VBAR);
if (sctlr & SCTLR_V)
return 0xffff0000;
@@ -282,7 +282,7 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
- u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
@@ -357,22 +357,22 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
if (is_pabt) {
/* Set IFAR and IFSR */
- vcpu->arch.cp15[c6_IFAR] = addr;
- is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
+ vcpu_cp15(vcpu, c6_IFAR) = addr;
+ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
/* Always give debug fault for now - should give guest a clue */
if (is_lpae)
- vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22;
+ vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22;
else
- vcpu->arch.cp15[c5_IFSR] = 2;
+ vcpu_cp15(vcpu, c5_IFSR) = 2;
} else { /* !iabt */
/* Set DFAR and DFSR */
- vcpu->arch.cp15[c6_DFAR] = addr;
- is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
+ vcpu_cp15(vcpu, c6_DFAR) = addr;
+ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
/* Always give debug fault for now - should give guest a clue */
if (is_lpae)
- vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22;
+ vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22;
else
- vcpu->arch.cp15[c5_DFSR] = 2;
+ vcpu_cp15(vcpu, c5_DFSR) = 2;
}
}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 99361f11354a..9093ed0f8b2a 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -25,7 +25,6 @@
#include <asm/cputype.h>
#include <asm/uaccess.h>
#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
@@ -55,7 +54,7 @@ static u64 core_reg_offset_from_id(u64 id)
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u32 __user *uaddr = (u32 __user *)(long)reg->addr;
- struct kvm_regs *regs = &vcpu->arch.regs;
+ struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
u64 off;
if (KVM_REG_SIZE(reg->id) != 4)
@@ -72,7 +71,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u32 __user *uaddr = (u32 __user *)(long)reg->addr;
- struct kvm_regs *regs = &vcpu->arch.regs;
+ struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
u64 off, val;
if (KVM_REG_SIZE(reg->id) != 4)
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3ede90d8b20b..3f1ef0dbc899 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -147,13 +147,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
switch (exception_index) {
case ARM_EXCEPTION_IRQ:
return 1;
- case ARM_EXCEPTION_UNDEFINED:
- kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
- kvm_vcpu_get_hyp_pc(vcpu));
- BUG();
- panic("KVM: Hypervisor undefined exception!\n");
- case ARM_EXCEPTION_DATA_ABORT:
- case ARM_EXCEPTION_PREF_ABORT:
case ARM_EXCEPTION_HVC:
/*
* See ARM ARM B1.14.1: "Hyp traps on instructions
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
new file mode 100644
index 000000000000..8dfa5f7f9290
--- /dev/null
+++ b/arch/arm/kvm/hyp/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for Kernel-based Virtual Machine module, HYP part
+#
+
+KVM=../../../../virt/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+
+obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
+obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
new file mode 100644
index 000000000000..111bda8cdebd
--- /dev/null
+++ b/arch/arm/kvm/hyp/banked-sr.c
@@ -0,0 +1,77 @@
+/*
+ * Original code:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+__asm__(".arch_extension virt");
+
+void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr);
+ ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp);
+ ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR);
+ ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc);
+ ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc);
+ ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc);
+ ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt);
+ ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt);
+ ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt);
+ ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und);
+ ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und);
+ ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und);
+ ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq);
+ ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq);
+ ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq);
+ ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq);
+ ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq);
+}
+
+void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt)
+{
+ write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr);
+ write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp);
+ write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf);
+ write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc);
+ write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc);
+ write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc);
+ write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt);
+ write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt);
+ write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt);
+ write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und);
+ write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und);
+ write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und);
+ write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq);
+ write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq);
+ write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq);
+ write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq);
+}
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c
new file mode 100644
index 000000000000..c4782812714c
--- /dev/null
+++ b/arch/arm/kvm/hyp/cp15-sr.c
@@ -0,0 +1,84 @@
+/*
+ * Original code:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx)
+{
+ return (u64 *)(ctxt->cp15 + idx);
+}
+
+void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR);
+ ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR);
+ ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR);
+ ctxt->cp15[c1_CPACR] = read_sysreg(CPACR);
+ *cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0);
+ *cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1);
+ ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR);
+ ctxt->cp15[c3_DACR] = read_sysreg(DACR);
+ ctxt->cp15[c5_DFSR] = read_sysreg(DFSR);
+ ctxt->cp15[c5_IFSR] = read_sysreg(IFSR);
+ ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR);
+ ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR);
+ ctxt->cp15[c6_DFAR] = read_sysreg(DFAR);
+ ctxt->cp15[c6_IFAR] = read_sysreg(IFAR);
+ *cp15_64(ctxt, c7_PAR) = read_sysreg(PAR);
+ ctxt->cp15[c10_PRRR] = read_sysreg(PRRR);
+ ctxt->cp15[c10_NMRR] = read_sysreg(NMRR);
+ ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0);
+ ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1);
+ ctxt->cp15[c12_VBAR] = read_sysreg(VBAR);
+ ctxt->cp15[c13_CID] = read_sysreg(CID);
+ ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW);
+ ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO);
+ ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV);
+ ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL);
+}
+
+void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR);
+ write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR);
+ write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR);
+ write_sysreg(ctxt->cp15[c1_CPACR], CPACR);
+ write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0);
+ write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1);
+ write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR);
+ write_sysreg(ctxt->cp15[c3_DACR], DACR);
+ write_sysreg(ctxt->cp15[c5_DFSR], DFSR);
+ write_sysreg(ctxt->cp15[c5_IFSR], IFSR);
+ write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR);
+ write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR);
+ write_sysreg(ctxt->cp15[c6_DFAR], DFAR);
+ write_sysreg(ctxt->cp15[c6_IFAR], IFAR);
+ write_sysreg(*cp15_64(ctxt, c7_PAR), PAR);
+ write_sysreg(ctxt->cp15[c10_PRRR], PRRR);
+ write_sysreg(ctxt->cp15[c10_NMRR], NMRR);
+ write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0);
+ write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1);
+ write_sysreg(ctxt->cp15[c12_VBAR], VBAR);
+ write_sysreg(ctxt->cp15[c13_CID], CID);
+ write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW);
+ write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO);
+ write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV);
+ write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL);
+}
diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S
new file mode 100644
index 000000000000..21c238871c9e
--- /dev/null
+++ b/arch/arm/kvm/hyp/entry.S
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm_arm.h>
+
+ .arch_extension virt
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR)
+
+/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */
+ENTRY(__guest_enter)
+ @ Save host registers
+ add r1, r1, #(USR_REGS_OFFSET + S_R4)
+ stm r1!, {r4-r12}
+ str lr, [r1, #4] @ Skip SP_usr (already saved)
+
+ @ Restore guest registers
+ add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
+ ldr lr, [r0, #S_LR]
+ ldm r0, {r0-r12}
+
+ clrex
+ eret
+ENDPROC(__guest_enter)
+
+ENTRY(__guest_exit)
+ /*
+ * return convention:
+ * guest r0, r1, r2 saved on the stack
+ * r0: vcpu pointer
+ * r1: exception code
+ */
+
+ add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3)
+ stm r2!, {r3-r12}
+ str lr, [r2, #4]
+ add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
+ pop {r3, r4, r5} @ r0, r1, r2
+ stm r2, {r3-r5}
+
+ ldr r0, [r0, #VCPU_HOST_CTXT]
+ add r0, r0, #(USR_REGS_OFFSET + S_R4)
+ ldm r0!, {r4-r12}
+ ldr lr, [r0, #4]
+
+ mov r0, r1
+ bx lr
+ENDPROC(__guest_exit)
+
+/*
+ * If VFPv3 support is not available, then we will not switch the VFP
+ * registers; however cp10 and cp11 accesses will still trap and fallback
+ * to the regular coprocessor emulation code, which currently will
+ * inject an undefined exception to the guest.
+ */
+#ifdef CONFIG_VFPv3
+ENTRY(__vfp_guest_restore)
+ push {r3, r4, lr}
+
+ @ NEON/VFP used. Turn on VFP access.
+ mrc p15, 4, r1, c1, c1, 2 @ HCPTR
+ bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11))
+ mcr p15, 4, r1, c1, c1, 2 @ HCPTR
+ isb
+
+ @ Switch VFP/NEON hardware state to the guest's
+ mov r4, r0
+ ldr r0, [r0, #VCPU_HOST_CTXT]
+ add r0, r0, #CPU_CTXT_VFP
+ bl __vfp_save_state
+ add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP)
+ bl __vfp_restore_state
+
+ pop {r3, r4, lr}
+ pop {r0, r1, r2}
+ clrex
+ eret
+ENDPROC(__vfp_guest_restore)
+#endif
+
+ .popsection
+
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
new file mode 100644
index 000000000000..78091383a5d9
--- /dev/null
+++ b/arch/arm/kvm/hyp/hyp-entry.S
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+
+ .arch_extension virt
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+.macro load_vcpu reg
+ mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR
+.endm
+
+/********************************************************************
+ * Hypervisor exception vector and handlers
+ *
+ *
+ * The KVM/ARM Hypervisor ABI is defined as follows:
+ *
+ * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
+ * instruction is issued since all traps are disabled when running the host
+ * kernel as per the Hyp-mode initialization at boot time.
+ *
+ * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
+ * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
+ * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
+ * instructions are called from within Hyp-mode.
+ *
+ * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
+ * Switching to Hyp mode is done through a simple HVC #0 instruction. The
+ * exception vector code will check that the HVC comes from VMID==0.
+ * - r0 contains a pointer to a HYP function
+ * - r1, r2, and r3 contain arguments to the above function.
+ * - The HYP function will be called with its arguments in r0, r1 and r2.
+ * On HYP function return, we return directly to SVC.
+ *
+ * Note that the above is used to execute code in Hyp-mode from a host-kernel
+ * point of view, and is a different concept from performing a world-switch and
+ * executing guest code SVC mode (with a VMID != 0).
+ */
+
+ .align 5
+__kvm_hyp_vector:
+ .global __kvm_hyp_vector
+
+ @ Hyp-mode exception vector
+ W(b) hyp_reset
+ W(b) hyp_undef
+ W(b) hyp_svc
+ W(b) hyp_pabt
+ W(b) hyp_dabt
+ W(b) hyp_hvc
+ W(b) hyp_irq
+ W(b) hyp_fiq
+
+.macro invalid_vector label, cause
+ .align
+\label: mov r0, #\cause
+ b __hyp_panic
+.endm
+
+ invalid_vector hyp_reset ARM_EXCEPTION_RESET
+ invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED
+ invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE
+ invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT
+ invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT
+ invalid_vector hyp_fiq ARM_EXCEPTION_FIQ
+
+ENTRY(__hyp_do_panic)
+ mrs lr, cpsr
+ bic lr, lr, #MODE_MASK
+ orr lr, lr, #SVC_MODE
+THUMB( orr lr, lr, #PSR_T_BIT )
+ msr spsr_cxsf, lr
+ ldr lr, =panic
+ msr ELR_hyp, lr
+ ldr lr, =kvm_call_hyp
+ clrex
+ eret
+ENDPROC(__hyp_do_panic)
+
+hyp_hvc:
+ /*
+ * Getting here is either because of a trap from a guest,
+ * or from executing HVC from the host kernel, which means
+ * "do something in Hyp mode".
+ */
+ push {r0, r1, r2}
+
+ @ Check syndrome register
+ mrc p15, 4, r1, c5, c2, 0 @ HSR
+ lsr r0, r1, #HSR_EC_SHIFT
+ cmp r0, #HSR_EC_HVC
+ bne guest_trap @ Not HVC instr.
+
+ /*
+ * Let's check if the HVC came from VMID 0 and allow simple
+ * switch to Hyp mode
+ */
+ mrrc p15, 6, r0, r2, c2
+ lsr r2, r2, #16
+ and r2, r2, #0xff
+ cmp r2, #0
+ bne guest_trap @ Guest called HVC
+
+ /*
+ * Getting here means host called HVC, we shift parameters and branch
+ * to Hyp function.
+ */
+ pop {r0, r1, r2}
+
+ /* Check for __hyp_get_vectors */
+ cmp r0, #-1
+ mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
+ beq 1f
+
+ push {lr}
+
+ mov lr, r0
+ mov r0, r1
+ mov r1, r2
+ mov r2, r3
+
+THUMB( orr lr, #1)
+ blx lr @ Call the HYP function
+
+ pop {lr}
+1: eret
+
+guest_trap:
+ load_vcpu r0 @ Load VCPU pointer to r0
+
+#ifdef CONFIG_VFPv3
+ @ Check for a VFP access
+ lsr r1, r1, #HSR_EC_SHIFT
+ cmp r1, #HSR_EC_CP_0_13
+ beq __vfp_guest_restore
+#endif
+
+ mov r1, #ARM_EXCEPTION_HVC
+ b __guest_exit
+
+hyp_irq:
+ push {r0, r1, r2}
+ mov r1, #ARM_EXCEPTION_IRQ
+ load_vcpu r0 @ Load VCPU pointer to r0
+ b __guest_exit
+
+ .ltorg
+
+ .popsection
diff --git a/arch/arm/kvm/hyp/s2-setup.c b/arch/arm/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000000..7be39af2ed6c
--- /dev/null
+++ b/arch/arm/kvm/hyp/s2-setup.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+void __hyp_text __init_stage2_translation(void)
+{
+ u64 val;
+
+ val = read_sysreg(VTCR) & ~VTCR_MASK;
+
+ val |= read_sysreg(HTCR) & VTCR_HTCR_SH;
+ val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S;
+
+ write_sysreg(val, VTCR);
+}
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
new file mode 100644
index 000000000000..b13caa90cd44
--- /dev/null
+++ b/arch/arm/kvm/hyp/switch.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+__asm__(".arch_extension virt");
+
+/*
+ * Activate the traps, saving the host's fpexc register before
+ * overwriting it. We'll restore it on VM exit.
+ */
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
+{
+ u32 val;
+
+ /*
+ * We are about to set HCPTR.TCP10/11 to trap all floating point
+ * register accesses to HYP, however, the ARM ARM clearly states that
+ * traps are only taken to HYP if the operation would not otherwise
+ * trap to SVC. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits.
+ */
+ val = read_sysreg(VFP_FPEXC);
+ *fpexc_host = val;
+ if (!(val & FPEXC_EN)) {
+ write_sysreg(val | FPEXC_EN, VFP_FPEXC);
+ isb();
+ }
+
+ write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
+ /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+ write_sysreg(HSTR_T(15), HSTR);
+ write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
+ val = read_sysreg(HDCR);
+ write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ u32 val;
+
+ write_sysreg(0, HCR);
+ write_sysreg(0, HSTR);
+ val = read_sysreg(HDCR);
+ write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR);
+ write_sysreg(0, HCPTR);
+}
+
+static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ write_sysreg(kvm->arch.vttbr, VTTBR);
+ write_sysreg(vcpu->arch.midr, VPIDR);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+ write_sysreg(0, VTTBR);
+ write_sysreg(read_sysreg(MIDR), VPIDR);
+}
+
+static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ __vgic_v2_save_state(vcpu);
+}
+
+static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ __vgic_v2_restore_state(vcpu);
+}
+
+static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u32 hsr = read_sysreg(HSR);
+ u8 ec = hsr >> HSR_EC_SHIFT;
+ u32 hpfar, far;
+
+ vcpu->arch.fault.hsr = hsr;
+
+ if (ec == HSR_EC_IABT)
+ far = read_sysreg(HIFAR);
+ else if (ec == HSR_EC_DABT)
+ far = read_sysreg(HDFAR);
+ else
+ return true;
+
+ /*
+ * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
+ *
+ * Abort on the stage 2 translation for a memory access from a
+ * Non-secure PL1 or PL0 mode:
+ *
+ * For any Access flag fault or Translation fault, and also for any
+ * Permission fault on the stage 2 translation of a memory access
+ * made as part of a translation table walk for a stage 1 translation,
+ * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
+ * is UNKNOWN.
+ */
+ if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) {
+ u64 par, tmp;
+
+ par = read_sysreg(PAR);
+ write_sysreg(far, ATS1CPR);
+ isb();
+
+ tmp = read_sysreg(PAR);
+ write_sysreg(par, PAR);
+
+ if (unlikely(tmp & 1))
+ return false; /* Translation failed, back to guest */
+
+ hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4;
+ } else {
+ hpfar = read_sysreg(HPFAR);
+ }
+
+ vcpu->arch.fault.hxfar = far;
+ vcpu->arch.fault.hpfar = hpfar;
+ return true;
+}
+
+static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+ u32 fpexc;
+
+ vcpu = kern_hyp_va(vcpu);
+ write_sysreg(vcpu, HTPIDR);
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ __sysreg_save_state(host_ctxt);
+ __banked_save_state(host_ctxt);
+
+ __activate_traps(vcpu, &fpexc);
+ __activate_vm(vcpu);
+
+ __vgic_restore_state(vcpu);
+ __timer_restore_state(vcpu);
+
+ __sysreg_restore_state(guest_ctxt);
+ __banked_restore_state(guest_ctxt);
+
+ /* Jump in the fire! */
+again:
+ exit_code = __guest_enter(vcpu, host_ctxt);
+ /* And we're baaack! */
+
+ if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu))
+ goto again;
+
+ fp_enabled = __vfp_enabled();
+
+ __banked_save_state(guest_ctxt);
+ __sysreg_save_state(guest_ctxt);
+ __timer_save_state(vcpu);
+ __vgic_save_state(vcpu);
+
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+
+ __banked_restore_state(host_ctxt);
+ __sysreg_restore_state(host_ctxt);
+
+ if (fp_enabled) {
+ __vfp_save_state(&guest_ctxt->vfp);
+ __vfp_restore_state(&host_ctxt->vfp);
+ }
+
+ write_sysreg(fpexc, VFP_FPEXC);
+
+ return exit_code;
+}
+
+__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+static const char * const __hyp_panic_string[] = {
+ [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x",
+ [ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x",
+ [ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x",
+};
+
+void __hyp_text __noreturn __hyp_panic(int cause)
+{
+ u32 elr = read_special(ELR_hyp);
+ u32 val;
+
+ if (cause == ARM_EXCEPTION_DATA_ABORT)
+ val = read_sysreg(HDFAR);
+ else
+ val = read_special(SPSR);
+
+ if (read_sysreg(VTTBR)) {
+ struct kvm_vcpu *vcpu;
+ struct kvm_cpu_context *host_ctxt;
+
+ vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state(host_ctxt);
+ }
+
+ /* Call panic for real */
+ __hyp_do_panic(__hyp_panic_string[cause], elr, val);
+
+ unreachable();
+}
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
new file mode 100644
index 000000000000..a2636001e616
--- /dev/null
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -0,0 +1,70 @@
+/*
+ * Original code:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/kvm_hyp.h>
+
+/**
+ * Flush per-VMID TLBs
+ *
+ * __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations). If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
+ *
+ * As v7 does not support flushing per IPA, just nuke the whole TLB
+ * instead, ignoring the ipa value.
+ */
+static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
+{
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ kvm = kern_hyp_va(kvm);
+ write_sysreg(kvm->arch.vttbr, VTTBR);
+ isb();
+
+ write_sysreg(0, TLBIALLIS);
+ dsb(ish);
+ isb();
+
+ write_sysreg(0, VTTBR);
+}
+
+__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
+static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+ __tlb_flush_vmid(kvm);
+}
+
+__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
+ phys_addr_t ipa);
+
+static void __hyp_text __tlb_flush_vm_context(void)
+{
+ write_sysreg(0, TLBIALLNSNHIS);
+ write_sysreg(0, ICIALLUIS);
+ dsb(ish);
+}
+
+__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);
diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S
new file mode 100644
index 000000000000..7c297e87eb8b
--- /dev/null
+++ b/arch/arm/kvm/hyp/vfp.S
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/vfpmacros.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+/* void __vfp_save_state(struct vfp_hard_struct *vfp); */
+ENTRY(__vfp_save_state)
+ push {r4, r5}
+ VFPFMRX r1, FPEXC
+
+ @ Make sure *really* VFP is enabled so we can touch the registers.
+ orr r5, r1, #FPEXC_EN
+ tst r5, #FPEXC_EX @ Check for VFP Subarchitecture
+ bic r5, r5, #FPEXC_EX @ FPEXC_EX disable
+ VFPFMXR FPEXC, r5
+ isb
+
+ VFPFMRX r2, FPSCR
+ beq 1f
+
+ @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
+ @ we only need to save them if FPEXC_EX is set.
+ VFPFMRX r3, FPINST
+ tst r5, #FPEXC_FP2V
+ VFPFMRX r4, FPINST2, ne @ vmrsne
+1:
+ VFPFSTMIA r0, r5 @ Save VFP registers
+ stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2
+ pop {r4, r5}
+ bx lr
+ENDPROC(__vfp_save_state)
+
+/* void __vfp_restore_state(struct vfp_hard_struct *vfp);
+ * Assume FPEXC_EN is on and FPEXC_EX is off */
+ENTRY(__vfp_restore_state)
+ VFPFLDMIA r0, r1 @ Load VFP registers
+ ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2
+
+ VFPFMXR FPSCR, r1
+ tst r0, #FPEXC_EX @ Check for VFP Subarchitecture
+ beq 1f
+ VFPFMXR FPINST, r2
+ tst r0, #FPEXC_FP2V
+ VFPFMXR FPINST2, r3, ne
+1:
+ VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN)
+ bx lr
+ENDPROC(__vfp_restore_state)
+
+ .popsection
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72d16ff..1f9ae17476f9 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -84,14 +84,6 @@ __do_hyp_init:
orr r0, r0, r1
mcr p15, 4, r0, c2, c0, 2 @ HTCR
- mrc p15, 4, r1, c2, c1, 2 @ VTCR
- ldr r2, =VTCR_MASK
- bic r1, r1, r2
- bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
- orr r1, r0, r1
- orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
- mcr p15, 4, r1, c2, c1, 2 @ VTCR
-
@ Use the same memory attributes for hyp. accesses as the kernel
@ (copy MAIRx ro HMAIRx).
mrc p15, 0, r0, c10, c2, 0
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 900ef6dd8f72..b1bd316f14c0 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -17,211 +17,14 @@
*/
#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/unified.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/vfpmacros.h>
-#include "interrupts_head.S"
.text
-__kvm_hyp_code_start:
- .globl __kvm_hyp_code_start
-
-/********************************************************************
- * Flush per-VMID TLBs
- *
- * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
- *
- * We rely on the hardware to broadcast the TLB invalidation to all CPUs
- * inside the inner-shareable domain (which is the case for all v7
- * implementations). If we come across a non-IS SMP implementation, we'll
- * have to use an IPI based mechanism. Until then, we stick to the simple
- * hardware assisted version.
- *
- * As v7 does not support flushing per IPA, just nuke the whole TLB
- * instead, ignoring the ipa value.
- */
-ENTRY(__kvm_tlb_flush_vmid_ipa)
- push {r2, r3}
-
- dsb ishst
- add r0, r0, #KVM_VTTBR
- ldrd r2, r3, [r0]
- mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
- isb
- mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
- dsb ish
- isb
- mov r2, #0
- mov r3, #0
- mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
- isb @ Not necessary if followed by eret
-
- pop {r2, r3}
- bx lr
-ENDPROC(__kvm_tlb_flush_vmid_ipa)
-
-/**
- * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
- *
- * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
- * parameter
- */
-
-ENTRY(__kvm_tlb_flush_vmid)
- b __kvm_tlb_flush_vmid_ipa
-ENDPROC(__kvm_tlb_flush_vmid)
-
-/********************************************************************
- * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
- * domain, for all VMIDs
- *
- * void __kvm_flush_vm_context(void);
- */
-ENTRY(__kvm_flush_vm_context)
- mov r0, #0 @ rn parameter for c15 flushes is SBZ
-
- /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
- mcr p15, 4, r0, c8, c3, 4
- /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
- mcr p15, 0, r0, c7, c1, 0
- dsb ish
- isb @ Not necessary if followed by eret
-
- bx lr
-ENDPROC(__kvm_flush_vm_context)
-
-
-/********************************************************************
- * Hypervisor world-switch code
- *
- *
- * int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
- */
-ENTRY(__kvm_vcpu_run)
- @ Save the vcpu pointer
- mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR
-
- save_host_regs
-
- restore_vgic_state
- restore_timer_state
-
- @ Store hardware CP15 state and load guest state
- read_cp15_state store_to_vcpu = 0
- write_cp15_state read_from_vcpu = 1
-
- @ If the host kernel has not been configured with VFPv3 support,
- @ then it is safer if we deny guests from using it as well.
-#ifdef CONFIG_VFPv3
- @ Set FPEXC_EN so the guest doesn't trap floating point instructions
- VFPFMRX r2, FPEXC @ VMRS
- push {r2}
- orr r2, r2, #FPEXC_EN
- VFPFMXR FPEXC, r2 @ VMSR
-#endif
-
- @ Configure Hyp-role
- configure_hyp_role vmentry
-
- @ Trap coprocessor CRx accesses
- set_hstr vmentry
- set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
- set_hdcr vmentry
-
- @ Write configured ID register into MIDR alias
- ldr r1, [vcpu, #VCPU_MIDR]
- mcr p15, 4, r1, c0, c0, 0
-
- @ Write guest view of MPIDR into VMPIDR
- ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
- mcr p15, 4, r1, c0, c0, 5
-
- @ Set up guest memory translation
- ldr r1, [vcpu, #VCPU_KVM]
- add r1, r1, #KVM_VTTBR
- ldrd r2, r3, [r1]
- mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
-
- @ We're all done, just restore the GPRs and go to the guest
- restore_guest_regs
- clrex @ Clear exclusive monitor
- eret
-
-__kvm_vcpu_return:
- /*
- * return convention:
- * guest r0, r1, r2 saved on the stack
- * r0: vcpu pointer
- * r1: exception code
- */
- save_guest_regs
-
- @ Set VMID == 0
- mov r2, #0
- mov r3, #0
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
-
- @ Don't trap coprocessor accesses for host kernel
- set_hstr vmexit
- set_hdcr vmexit
- set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
-
-#ifdef CONFIG_VFPv3
- @ Switch VFP/NEON hardware state to the host's
- add r7, vcpu, #VCPU_VFP_GUEST
- store_vfp_state r7
- add r7, vcpu, #VCPU_VFP_HOST
- ldr r7, [r7]
- restore_vfp_state r7
-
-after_vfp_restore:
- @ Restore FPEXC_EN which we clobbered on entry
- pop {r2}
- VFPFMXR FPEXC, r2
-#else
-after_vfp_restore:
-#endif
-
- @ Reset Hyp-role
- configure_hyp_role vmexit
-
- @ Let host read hardware MIDR
- mrc p15, 0, r2, c0, c0, 0
- mcr p15, 4, r2, c0, c0, 0
-
- @ Back to hardware MPIDR
- mrc p15, 0, r2, c0, c0, 5
- mcr p15, 4, r2, c0, c0, 5
-
- @ Store guest CP15 state and restore host state
- read_cp15_state store_to_vcpu = 1
- write_cp15_state read_from_vcpu = 0
-
- save_timer_state
- save_vgic_state
-
- restore_host_regs
- clrex @ Clear exclusive monitor
-#ifndef CONFIG_CPU_ENDIAN_BE8
- mov r0, r1 @ Return the return code
- mov r1, #0 @ Clear upper bits in return value
-#else
- @ r1 already has return code
- mov r0, #0 @ Clear upper bits in return value
-#endif /* CONFIG_CPU_ENDIAN_BE8 */
- bx lr @ return to IOCTL
-
/********************************************************************
* Call function in Hyp mode
*
*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * unsigned long kvm_call_hyp(void *hypfn, ...);
*
* This is not really a variadic function in the classic C-way and care must
* be taken when calling this to ensure parameters are passed in registers
@@ -232,7 +35,7 @@ after_vfp_restore:
* passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
* function pointer can be passed). The function being called must be mapped
* in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
- * passed in r0 and r1.
+ * passed in r0 (strictly 32bit).
*
* A function pointer with a value of 0xffffffff has a special meaning,
* and is used to implement __hyp_get_vectors in the same way as in
@@ -246,281 +49,4 @@ after_vfp_restore:
ENTRY(kvm_call_hyp)
hvc #0
bx lr
-
-/********************************************************************
- * Hypervisor exception vector and handlers
- *
- *
- * The KVM/ARM Hypervisor ABI is defined as follows:
- *
- * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
- * instruction is issued since all traps are disabled when running the host
- * kernel as per the Hyp-mode initialization at boot time.
- *
- * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
- * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
- * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
- * instructions are called from within Hyp-mode.
- *
- * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
- * Switching to Hyp mode is done through a simple HVC #0 instruction. The
- * exception vector code will check that the HVC comes from VMID==0 and if
- * so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
- * - r0 contains a pointer to a HYP function
- * - r1, r2, and r3 contain arguments to the above function.
- * - The HYP function will be called with its arguments in r0, r1 and r2.
- * On HYP function return, we return directly to SVC.
- *
- * Note that the above is used to execute code in Hyp-mode from a host-kernel
- * point of view, and is a different concept from performing a world-switch and
- * executing guest code SVC mode (with a VMID != 0).
- */
-
-/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
-.macro bad_exception exception_code, panic_str
- push {r0-r2}
- mrrc p15, 6, r0, r1, c2 @ Read VTTBR
- lsr r1, r1, #16
- ands r1, r1, #0xff
- beq 99f
-
- load_vcpu @ Load VCPU pointer
- .if \exception_code == ARM_EXCEPTION_DATA_ABORT
- mrc p15, 4, r2, c5, c2, 0 @ HSR
- mrc p15, 4, r1, c6, c0, 0 @ HDFAR
- str r2, [vcpu, #VCPU_HSR]
- str r1, [vcpu, #VCPU_HxFAR]
- .endif
- .if \exception_code == ARM_EXCEPTION_PREF_ABORT
- mrc p15, 4, r2, c5, c2, 0 @ HSR
- mrc p15, 4, r1, c6, c0, 2 @ HIFAR
- str r2, [vcpu, #VCPU_HSR]
- str r1, [vcpu, #VCPU_HxFAR]
- .endif
- mov r1, #\exception_code
- b __kvm_vcpu_return
-
- @ We were in the host already. Let's craft a panic-ing return to SVC.
-99: mrs r2, cpsr
- bic r2, r2, #MODE_MASK
- orr r2, r2, #SVC_MODE
-THUMB( orr r2, r2, #PSR_T_BIT )
- msr spsr_cxsf, r2
- mrs r1, ELR_hyp
- ldr r2, =panic
- msr ELR_hyp, r2
- ldr r0, =\panic_str
- clrex @ Clear exclusive monitor
- eret
-.endm
-
- .text
-
- .align 5
-__kvm_hyp_vector:
- .globl __kvm_hyp_vector
-
- @ Hyp-mode exception vector
- W(b) hyp_reset
- W(b) hyp_undef
- W(b) hyp_svc
- W(b) hyp_pabt
- W(b) hyp_dabt
- W(b) hyp_hvc
- W(b) hyp_irq
- W(b) hyp_fiq
-
- .align
-hyp_reset:
- b hyp_reset
-
- .align
-hyp_undef:
- bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
-
- .align
-hyp_svc:
- bad_exception ARM_EXCEPTION_HVC, svc_die_str
-
- .align
-hyp_pabt:
- bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
-
- .align
-hyp_dabt:
- bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
-
- .align
-hyp_hvc:
- /*
- * Getting here is either becuase of a trap from a guest or from calling
- * HVC from the host kernel, which means "switch to Hyp mode".
- */
- push {r0, r1, r2}
-
- @ Check syndrome register
- mrc p15, 4, r1, c5, c2, 0 @ HSR
- lsr r0, r1, #HSR_EC_SHIFT
- cmp r0, #HSR_EC_HVC
- bne guest_trap @ Not HVC instr.
-
- /*
- * Let's check if the HVC came from VMID 0 and allow simple
- * switch to Hyp mode
- */
- mrrc p15, 6, r0, r2, c2
- lsr r2, r2, #16
- and r2, r2, #0xff
- cmp r2, #0
- bne guest_trap @ Guest called HVC
-
- /*
- * Getting here means host called HVC, we shift parameters and branch
- * to Hyp function.
- */
- pop {r0, r1, r2}
-
- /* Check for __hyp_get_vectors */
- cmp r0, #-1
- mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
- beq 1f
-
- push {lr}
- mrs lr, SPSR
- push {lr}
-
- mov lr, r0
- mov r0, r1
- mov r1, r2
- mov r2, r3
-
-THUMB( orr lr, #1)
- blx lr @ Call the HYP function
-
- pop {lr}
- msr SPSR_csxf, lr
- pop {lr}
-1: eret
-
-guest_trap:
- load_vcpu @ Load VCPU pointer to r0
- str r1, [vcpu, #VCPU_HSR]
-
- @ Check if we need the fault information
- lsr r1, r1, #HSR_EC_SHIFT
-#ifdef CONFIG_VFPv3
- cmp r1, #HSR_EC_CP_0_13
- beq switch_to_guest_vfp
-#endif
- cmp r1, #HSR_EC_IABT
- mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
- beq 2f
- cmp r1, #HSR_EC_DABT
- bne 1f
- mrc p15, 4, r2, c6, c0, 0 @ HDFAR
-
-2: str r2, [vcpu, #VCPU_HxFAR]
-
- /*
- * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
- *
- * Abort on the stage 2 translation for a memory access from a
- * Non-secure PL1 or PL0 mode:
- *
- * For any Access flag fault or Translation fault, and also for any
- * Permission fault on the stage 2 translation of a memory access
- * made as part of a translation table walk for a stage 1 translation,
- * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
- * is UNKNOWN.
- */
-
- /* Check for permission fault, and S1PTW */
- mrc p15, 4, r1, c5, c2, 0 @ HSR
- and r0, r1, #HSR_FSC_TYPE
- cmp r0, #FSC_PERM
- tsteq r1, #(1 << 7) @ S1PTW
- mrcne p15, 4, r2, c6, c0, 4 @ HPFAR
- bne 3f
-
- /* Preserve PAR */
- mrrc p15, 0, r0, r1, c7 @ PAR
- push {r0, r1}
-
- /* Resolve IPA using the xFAR */
- mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR
- isb
- mrrc p15, 0, r0, r1, c7 @ PAR
- tst r0, #1
- bne 4f @ Failed translation
- ubfx r2, r0, #12, #20
- lsl r2, r2, #4
- orr r2, r2, r1, lsl #24
-
- /* Restore PAR */
- pop {r0, r1}
- mcrr p15, 0, r0, r1, c7 @ PAR
-
-3: load_vcpu @ Load VCPU pointer to r0
- str r2, [r0, #VCPU_HPFAR]
-
-1: mov r1, #ARM_EXCEPTION_HVC
- b __kvm_vcpu_return
-
-4: pop {r0, r1} @ Failed translation, return to guest
- mcrr p15, 0, r0, r1, c7 @ PAR
- clrex
- pop {r0, r1, r2}
- eret
-
-/*
- * If VFPv3 support is not available, then we will not switch the VFP
- * registers; however cp10 and cp11 accesses will still trap and fallback
- * to the regular coprocessor emulation code, which currently will
- * inject an undefined exception to the guest.
- */
-#ifdef CONFIG_VFPv3
-switch_to_guest_vfp:
- push {r3-r7}
-
- @ NEON/VFP used. Turn on VFP access.
- set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
-
- @ Switch VFP/NEON hardware state to the guest's
- add r7, r0, #VCPU_VFP_HOST
- ldr r7, [r7]
- store_vfp_state r7
- add r7, r0, #VCPU_VFP_GUEST
- restore_vfp_state r7
-
- pop {r3-r7}
- pop {r0-r2}
- clrex
- eret
-#endif
-
- .align
-hyp_irq:
- push {r0, r1, r2}
- mov r1, #ARM_EXCEPTION_IRQ
- load_vcpu @ Load VCPU pointer to r0
- b __kvm_vcpu_return
-
- .align
-hyp_fiq:
- b hyp_fiq
-
- .ltorg
-
-__kvm_hyp_code_end:
- .globl __kvm_hyp_code_end
-
- .section ".rodata"
-
-und_die_str:
- .ascii "unexpected undefined exception in Hyp mode at: %#08x\n"
-pabt_die_str:
- .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n"
-dabt_die_str:
- .ascii "unexpected data abort in Hyp mode at: %#08x\n"
-svc_die_str:
- .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
+ENDPROC(kvm_call_hyp)
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
deleted file mode 100644
index 51a59504bef4..000000000000
--- a/arch/arm/kvm/interrupts_head.S
+++ /dev/null
@@ -1,648 +0,0 @@
-#include <linux/irqchip/arm-gic.h>
-#include <asm/assembler.h>
-
-#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
-#define VCPU_USR_SP (VCPU_USR_REG(13))
-#define VCPU_USR_LR (VCPU_USR_REG(14))
-#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
-
-/*
- * Many of these macros need to access the VCPU structure, which is always
- * held in r0. These macros should never clobber r1, as it is used to hold the
- * exception code on the return path (except of course the macro that switches
- * all the registers before the final jump to the VM).
- */
-vcpu .req r0 @ vcpu pointer always in r0
-
-/* Clobbers {r2-r6} */
-.macro store_vfp_state vfp_base
- @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
- VFPFMRX r2, FPEXC
- @ Make sure VFP is enabled so we can touch the registers.
- orr r6, r2, #FPEXC_EN
- VFPFMXR FPEXC, r6
-
- VFPFMRX r3, FPSCR
- tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
- beq 1f
- @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
- @ we only need to save them if FPEXC_EX is set.
- VFPFMRX r4, FPINST
- tst r2, #FPEXC_FP2V
- VFPFMRX r5, FPINST2, ne @ vmrsne
- bic r6, r2, #FPEXC_EX @ FPEXC_EX disable
- VFPFMXR FPEXC, r6
-1:
- VFPFSTMIA \vfp_base, r6 @ Save VFP registers
- stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2
-.endm
-
-/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */
-.macro restore_vfp_state vfp_base
- VFPFLDMIA \vfp_base, r6 @ Load VFP registers
- ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2
-
- VFPFMXR FPSCR, r3
- tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
- beq 1f
- VFPFMXR FPINST, r4
- tst r2, #FPEXC_FP2V
- VFPFMXR FPINST2, r5, ne
-1:
- VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN)
-.endm
-
-/* These are simply for the macros to work - value don't have meaning */
-.equ usr, 0
-.equ svc, 1
-.equ abt, 2
-.equ und, 3
-.equ irq, 4
-.equ fiq, 5
-
-.macro push_host_regs_mode mode
- mrs r2, SP_\mode
- mrs r3, LR_\mode
- mrs r4, SPSR_\mode
- push {r2, r3, r4}
-.endm
-
-/*
- * Store all host persistent registers on the stack.
- * Clobbers all registers, in all modes, except r0 and r1.
- */
-.macro save_host_regs
- /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */
- mrs r2, ELR_hyp
- push {r2}
-
- /* usr regs */
- push {r4-r12} @ r0-r3 are always clobbered
- mrs r2, SP_usr
- mov r3, lr
- push {r2, r3}
-
- push_host_regs_mode svc
- push_host_regs_mode abt
- push_host_regs_mode und
- push_host_regs_mode irq
-
- /* fiq regs */
- mrs r2, r8_fiq
- mrs r3, r9_fiq
- mrs r4, r10_fiq
- mrs r5, r11_fiq
- mrs r6, r12_fiq
- mrs r7, SP_fiq
- mrs r8, LR_fiq
- mrs r9, SPSR_fiq
- push {r2-r9}
-.endm
-
-.macro pop_host_regs_mode mode
- pop {r2, r3, r4}
- msr SP_\mode, r2
- msr LR_\mode, r3
- msr SPSR_\mode, r4
-.endm
-
-/*
- * Restore all host registers from the stack.
- * Clobbers all registers, in all modes, except r0 and r1.
- */
-.macro restore_host_regs
- pop {r2-r9}
- msr r8_fiq, r2
- msr r9_fiq, r3
- msr r10_fiq, r4
- msr r11_fiq, r5
- msr r12_fiq, r6
- msr SP_fiq, r7
- msr LR_fiq, r8
- msr SPSR_fiq, r9
-
- pop_host_regs_mode irq
- pop_host_regs_mode und
- pop_host_regs_mode abt
- pop_host_regs_mode svc
-
- pop {r2, r3}
- msr SP_usr, r2
- mov lr, r3
- pop {r4-r12}
-
- pop {r2}
- msr ELR_hyp, r2
-.endm
-
-/*
- * Restore SP, LR and SPSR for a given mode. offset is the offset of
- * this mode's registers from the VCPU base.
- *
- * Assumes vcpu pointer in vcpu reg
- *
- * Clobbers r1, r2, r3, r4.
- */
-.macro restore_guest_regs_mode mode, offset
- add r1, vcpu, \offset
- ldm r1, {r2, r3, r4}
- msr SP_\mode, r2
- msr LR_\mode, r3
- msr SPSR_\mode, r4
-.endm
-
-/*
- * Restore all guest registers from the vcpu struct.
- *
- * Assumes vcpu pointer in vcpu reg
- *
- * Clobbers *all* registers.
- */
-.macro restore_guest_regs
- restore_guest_regs_mode svc, #VCPU_SVC_REGS
- restore_guest_regs_mode abt, #VCPU_ABT_REGS
- restore_guest_regs_mode und, #VCPU_UND_REGS
- restore_guest_regs_mode irq, #VCPU_IRQ_REGS
-
- add r1, vcpu, #VCPU_FIQ_REGS
- ldm r1, {r2-r9}
- msr r8_fiq, r2
- msr r9_fiq, r3
- msr r10_fiq, r4
- msr r11_fiq, r5
- msr r12_fiq, r6
- msr SP_fiq, r7
- msr LR_fiq, r8
- msr SPSR_fiq, r9
-
- @ Load return state
- ldr r2, [vcpu, #VCPU_PC]
- ldr r3, [vcpu, #VCPU_CPSR]
- msr ELR_hyp, r2
- msr SPSR_cxsf, r3
-
- @ Load user registers
- ldr r2, [vcpu, #VCPU_USR_SP]
- ldr r3, [vcpu, #VCPU_USR_LR]
- msr SP_usr, r2
- mov lr, r3
- add vcpu, vcpu, #(VCPU_USR_REGS)
- ldm vcpu, {r0-r12}
-.endm
-
-/*
- * Save SP, LR and SPSR for a given mode. offset is the offset of
- * this mode's registers from the VCPU base.
- *
- * Assumes vcpu pointer in vcpu reg
- *
- * Clobbers r2, r3, r4, r5.
- */
-.macro save_guest_regs_mode mode, offset
- add r2, vcpu, \offset
- mrs r3, SP_\mode
- mrs r4, LR_\mode
- mrs r5, SPSR_\mode
- stm r2, {r3, r4, r5}
-.endm
-
-/*
- * Save all guest registers to the vcpu struct
- * Expects guest's r0, r1, r2 on the stack.
- *
- * Assumes vcpu pointer in vcpu reg
- *
- * Clobbers r2, r3, r4, r5.
- */
-.macro save_guest_regs
- @ Store usr registers
- add r2, vcpu, #VCPU_USR_REG(3)
- stm r2, {r3-r12}
- add r2, vcpu, #VCPU_USR_REG(0)
- pop {r3, r4, r5} @ r0, r1, r2
- stm r2, {r3, r4, r5}
- mrs r2, SP_usr
- mov r3, lr
- str r2, [vcpu, #VCPU_USR_SP]
- str r3, [vcpu, #VCPU_USR_LR]
-
- @ Store return state
- mrs r2, ELR_hyp
- mrs r3, spsr
- str r2, [vcpu, #VCPU_PC]
- str r3, [vcpu, #VCPU_CPSR]
-
- @ Store other guest registers
- save_guest_regs_mode svc, #VCPU_SVC_REGS
- save_guest_regs_mode abt, #VCPU_ABT_REGS
- save_guest_regs_mode und, #VCPU_UND_REGS
- save_guest_regs_mode irq, #VCPU_IRQ_REGS
-.endm
-
-/* Reads cp15 registers from hardware and stores them in memory
- * @store_to_vcpu: If 0, registers are written in-order to the stack,
- * otherwise to the VCPU struct pointed to by vcpup
- *
- * Assumes vcpu pointer in vcpu reg
- *
- * Clobbers r2 - r12
- */
-.macro read_cp15_state store_to_vcpu
- mrc p15, 0, r2, c1, c0, 0 @ SCTLR
- mrc p15, 0, r3, c1, c0, 2 @ CPACR
- mrc p15, 0, r4, c2, c0, 2 @ TTBCR
- mrc p15, 0, r5, c3, c0, 0 @ DACR
- mrrc p15, 0, r6, r7, c2 @ TTBR 0
- mrrc p15, 1, r8, r9, c2 @ TTBR 1
- mrc p15, 0, r10, c10, c2, 0 @ PRRR
- mrc p15, 0, r11, c10, c2, 1 @ NMRR
- mrc p15, 2, r12, c0, c0, 0 @ CSSELR
-
- .if \store_to_vcpu == 0
- push {r2-r12} @ Push CP15 registers
- .else
- str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
- str r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
- str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
- str r5, [vcpu, #CP15_OFFSET(c3_DACR)]
- add r2, vcpu, #CP15_OFFSET(c2_TTBR0)
- strd r6, r7, [r2]
- add r2, vcpu, #CP15_OFFSET(c2_TTBR1)
- strd r8, r9, [r2]
- str r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
- str r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
- str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
- .endif
-
- mrc p15, 0, r2, c13, c0, 1 @ CID
- mrc p15, 0, r3, c13, c0, 2 @ TID_URW
- mrc p15, 0, r4, c13, c0, 3 @ TID_URO
- mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
- mrc p15, 0, r6, c5, c0, 0 @ DFSR
- mrc p15, 0, r7, c5, c0, 1 @ IFSR
- mrc p15, 0, r8, c5, c1, 0 @ ADFSR
- mrc p15, 0, r9, c5, c1, 1 @ AIFSR
- mrc p15, 0, r10, c6, c0, 0 @ DFAR
- mrc p15, 0, r11, c6, c0, 2 @ IFAR
- mrc p15, 0, r12, c12, c0, 0 @ VBAR
-
- .if \store_to_vcpu == 0
- push {r2-r12} @ Push CP15 registers
- .else
- str r2, [vcpu, #CP15_OFFSET(c13_CID)]
- str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
- str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
- str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
- str r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
- str r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
- str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
- str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
- str r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
- str r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
- str r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
- .endif
-
- mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
- mrrc p15, 0, r4, r5, c7 @ PAR
- mrc p15, 0, r6, c10, c3, 0 @ AMAIR0
- mrc p15, 0, r7, c10, c3, 1 @ AMAIR1
-
- .if \store_to_vcpu == 0
- push {r2,r4-r7}
- .else
- str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
- add r12, vcpu, #CP15_OFFSET(c7_PAR)
- strd r4, r5, [r12]
- str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
- str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
- .endif
-.endm
-
-/*
- * Reads cp15 registers from memory and writes them to hardware
- * @read_from_vcpu: If 0, registers are read in-order from the stack,
- * otherwise from the VCPU struct pointed to by vcpup
- *
- * Assumes vcpu pointer in vcpu reg
- */
-.macro write_cp15_state read_from_vcpu
- .if \read_from_vcpu == 0
- pop {r2,r4-r7}
- .else
- ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
- add r12, vcpu, #CP15_OFFSET(c7_PAR)
- ldrd r4, r5, [r12]
- ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
- ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
- .endif
-
- mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
- mcrr p15, 0, r4, r5, c7 @ PAR
- mcr p15, 0, r6, c10, c3, 0 @ AMAIR0
- mcr p15, 0, r7, c10, c3, 1 @ AMAIR1
-
- .if \read_from_vcpu == 0
- pop {r2-r12}
- .else
- ldr r2, [vcpu, #CP15_OFFSET(c13_CID)]
- ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
- ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
- ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
- ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
- ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
- ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
- ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
- ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
- ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
- ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
- .endif
-
- mcr p15, 0, r2, c13, c0, 1 @ CID
- mcr p15, 0, r3, c13, c0, 2 @ TID_URW
- mcr p15, 0, r4, c13, c0, 3 @ TID_URO
- mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
- mcr p15, 0, r6, c5, c0, 0 @ DFSR
- mcr p15, 0, r7, c5, c0, 1 @ IFSR
- mcr p15, 0, r8, c5, c1, 0 @ ADFSR
- mcr p15, 0, r9, c5, c1, 1 @ AIFSR
- mcr p15, 0, r10, c6, c0, 0 @ DFAR
- mcr p15, 0, r11, c6, c0, 2 @ IFAR
- mcr p15, 0, r12, c12, c0, 0 @ VBAR
-
- .if \read_from_vcpu == 0
- pop {r2-r12}
- .else
- ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
- ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
- ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
- ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)]
- add r12, vcpu, #CP15_OFFSET(c2_TTBR0)
- ldrd r6, r7, [r12]
- add r12, vcpu, #CP15_OFFSET(c2_TTBR1)
- ldrd r8, r9, [r12]
- ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
- ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
- ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
- .endif
-
- mcr p15, 0, r2, c1, c0, 0 @ SCTLR
- mcr p15, 0, r3, c1, c0, 2 @ CPACR
- mcr p15, 0, r4, c2, c0, 2 @ TTBCR
- mcr p15, 0, r5, c3, c0, 0 @ DACR
- mcrr p15, 0, r6, r7, c2 @ TTBR 0
- mcrr p15, 1, r8, r9, c2 @ TTBR 1
- mcr p15, 0, r10, c10, c2, 0 @ PRRR
- mcr p15, 0, r11, c10, c2, 1 @ NMRR
- mcr p15, 2, r12, c0, c0, 0 @ CSSELR
-.endm
-
-/*
- * Save the VGIC CPU state into memory
- *
- * Assumes vcpu pointer in vcpu reg
- */
-.macro save_vgic_state
- /* Get VGIC VCTRL base into r2 */
- ldr r2, [vcpu, #VCPU_KVM]
- ldr r2, [r2, #KVM_VGIC_VCTRL]
- cmp r2, #0
- beq 2f
-
- /* Compute the address of struct vgic_cpu */
- add r11, vcpu, #VCPU_VGIC_CPU
-
- /* Save all interesting registers */
- ldr r4, [r2, #GICH_VMCR]
- ldr r5, [r2, #GICH_MISR]
- ldr r6, [r2, #GICH_EISR0]
- ldr r7, [r2, #GICH_EISR1]
- ldr r8, [r2, #GICH_ELRSR0]
- ldr r9, [r2, #GICH_ELRSR1]
- ldr r10, [r2, #GICH_APR]
-ARM_BE8(rev r4, r4 )
-ARM_BE8(rev r5, r5 )
-ARM_BE8(rev r6, r6 )
-ARM_BE8(rev r7, r7 )
-ARM_BE8(rev r8, r8 )
-ARM_BE8(rev r9, r9 )
-ARM_BE8(rev r10, r10 )
-
- str r4, [r11, #VGIC_V2_CPU_VMCR]
- str r5, [r11, #VGIC_V2_CPU_MISR]
-#ifdef CONFIG_CPU_ENDIAN_BE8
- str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
- str r7, [r11, #VGIC_V2_CPU_EISR]
- str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
- str r9, [r11, #VGIC_V2_CPU_ELRSR]
-#else
- str r6, [r11, #VGIC_V2_CPU_EISR]
- str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
- str r8, [r11, #VGIC_V2_CPU_ELRSR]
- str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
-#endif
- str r10, [r11, #VGIC_V2_CPU_APR]
-
- /* Clear GICH_HCR */
- mov r5, #0
- str r5, [r2, #GICH_HCR]
-
- /* Save list registers */
- add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_V2_CPU_LR
- ldr r4, [r11, #VGIC_CPU_NR_LR]
-1: ldr r6, [r2], #4
-ARM_BE8(rev r6, r6 )
- str r6, [r3], #4
- subs r4, r4, #1
- bne 1b
-2:
-.endm
-
-/*
- * Restore the VGIC CPU state from memory
- *
- * Assumes vcpu pointer in vcpu reg
- */
-.macro restore_vgic_state
- /* Get VGIC VCTRL base into r2 */
- ldr r2, [vcpu, #VCPU_KVM]
- ldr r2, [r2, #KVM_VGIC_VCTRL]
- cmp r2, #0
- beq 2f
-
- /* Compute the address of struct vgic_cpu */
- add r11, vcpu, #VCPU_VGIC_CPU
-
- /* We only restore a minimal set of registers */
- ldr r3, [r11, #VGIC_V2_CPU_HCR]
- ldr r4, [r11, #VGIC_V2_CPU_VMCR]
- ldr r8, [r11, #VGIC_V2_CPU_APR]
-ARM_BE8(rev r3, r3 )
-ARM_BE8(rev r4, r4 )
-ARM_BE8(rev r8, r8 )
-
- str r3, [r2, #GICH_HCR]
- str r4, [r2, #GICH_VMCR]
- str r8, [r2, #GICH_APR]
-
- /* Restore list registers */
- add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_V2_CPU_LR
- ldr r4, [r11, #VGIC_CPU_NR_LR]
-1: ldr r6, [r3], #4
-ARM_BE8(rev r6, r6 )
- str r6, [r2], #4
- subs r4, r4, #1
- bne 1b
-2:
-.endm
-
-#define CNTHCTL_PL1PCTEN (1 << 0)
-#define CNTHCTL_PL1PCEN (1 << 1)
-
-/*
- * Save the timer state onto the VCPU and allow physical timer/counter access
- * for the host.
- *
- * Assumes vcpu pointer in vcpu reg
- * Clobbers r2-r5
- */
-.macro save_timer_state
- ldr r4, [vcpu, #VCPU_KVM]
- ldr r2, [r4, #KVM_TIMER_ENABLED]
- cmp r2, #0
- beq 1f
-
- mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL
- str r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
-
- isb
-
- mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
- ldr r4, =VCPU_TIMER_CNTV_CVAL
- add r5, vcpu, r4
- strd r2, r3, [r5]
-
- @ Ensure host CNTVCT == CNTPCT
- mov r2, #0
- mcrr p15, 4, r2, r2, c14 @ CNTVOFF
-
-1:
- mov r2, #0 @ Clear ENABLE
- mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
-
- @ Allow physical timer/counter access for the host
- mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
- orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
- mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
-.endm
-
-/*
- * Load the timer state from the VCPU and deny physical timer/counter access
- * for the host.
- *
- * Assumes vcpu pointer in vcpu reg
- * Clobbers r2-r5
- */
-.macro restore_timer_state
- @ Disallow physical timer access for the guest
- @ Physical counter access is allowed
- mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
- orr r2, r2, #CNTHCTL_PL1PCTEN
- bic r2, r2, #CNTHCTL_PL1PCEN
- mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
-
- ldr r4, [vcpu, #VCPU_KVM]
- ldr r2, [r4, #KVM_TIMER_ENABLED]
- cmp r2, #0
- beq 1f
-
- ldr r2, [r4, #KVM_TIMER_CNTVOFF]
- ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
- mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
-
- ldr r4, =VCPU_TIMER_CNTV_CVAL
- add r5, vcpu, r4
- ldrd r2, r3, [r5]
- mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
- isb
-
- ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
- and r2, r2, #3
- mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
-1:
-.endm
-
-.equ vmentry, 0
-.equ vmexit, 1
-
-/* Configures the HSTR (Hyp System Trap Register) on entry/return
- * (hardware reset value is 0) */
-.macro set_hstr operation
- mrc p15, 4, r2, c1, c1, 3
- ldr r3, =HSTR_T(15)
- .if \operation == vmentry
- orr r2, r2, r3 @ Trap CR{15}
- .else
- bic r2, r2, r3 @ Don't trap any CRx accesses
- .endif
- mcr p15, 4, r2, c1, c1, 3
-.endm
-
-/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
- * (hardware reset value is 0). Keep previous value in r2.
- * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
- * VFP wasn't already enabled (always executed on vmtrap).
- * If a label is specified with vmexit, it is branched to if VFP wasn't
- * enabled.
- */
-.macro set_hcptr operation, mask, label = none
- mrc p15, 4, r2, c1, c1, 2
- ldr r3, =\mask
- .if \operation == vmentry
- orr r3, r2, r3 @ Trap coproc-accesses defined in mask
- .else
- bic r3, r2, r3 @ Don't trap defined coproc-accesses
- .endif
- mcr p15, 4, r3, c1, c1, 2
- .if \operation != vmentry
- .if \operation == vmexit
- tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
- beq 1f
- .endif
- isb
- .if \label != none
- b \label
- .endif
-1:
- .endif
-.endm
-
-/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
- * (hardware reset value is 0) */
-.macro set_hdcr operation
- mrc p15, 4, r2, c1, c1, 1
- ldr r3, =(HDCR_TPM|HDCR_TPMCR)
- .if \operation == vmentry
- orr r2, r2, r3 @ Trap some perfmon accesses
- .else
- bic r2, r2, r3 @ Don't trap any perfmon accesses
- .endif
- mcr p15, 4, r2, c1, c1, 1
-.endm
-
-/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
-.macro configure_hyp_role operation
- .if \operation == vmentry
- ldr r2, [vcpu, #VCPU_HCR]
- ldr r3, [vcpu, #VCPU_IRQ_LINES]
- orr r2, r2, r3
- .else
- mov r2, #0
- .endif
- mcr p15, 4, r2, c1, c1, 0 @ HCR
-.endm
-
-.macro load_vcpu
- mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR
-.endm
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index aba61fd3697a..58dbd5c439df 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -28,6 +28,7 @@
#include <asm/kvm_mmio.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
+#include <asm/virt.h>
#include "trace.h"
@@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
/* Check for a valid kernel IO mapping */
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
@@ -1431,6 +1438,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
/*
+ * Check for a cache maintenance operation. Since we
+ * ended-up here, we know it is outside of any memory
+ * slot. But we can't find out if that is for a device,
+ * or if the guest is just being stupid. The only thing
+ * we know for sure is that this range cannot be cached.
+ *
+ * So let's assume that the guest is just being
+ * cautious, and skip the instruction.
+ */
+ if (kvm_vcpu_dabt_is_cm(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ ret = 1;
+ goto out_unlock;
+ }
+
+ /*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
* faulting VA. This is always 12 bits, irrespective
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index a9b3b905e661..c2b131527a64 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
unsigned long cpu_id;
unsigned long context_id;
phys_addr_t target_pc;
@@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
smp_mb(); /* Make sure the above is visible */
wq = kvm_arch_vcpu_wq(vcpu);
- wake_up_interruptible(wq);
+ swake_up(wq);
return PSCI_RET_SUCCESS;
}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index eeb85858d6bb..0048b5a62a50 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -71,7 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
/* Reset core registers */
- memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
+ memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs));
/* Reset CP15 registers */
kvm_reset_coprocs(vcpu);
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index de1316bf643a..62ebac51bab9 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -115,13 +115,14 @@ static void mityomapl138_cpufreq_init(const char *partnum)
static void mityomapl138_cpufreq_init(const char *partnum) { }
#endif
-static void read_factory_config(struct memory_accessor *a, void *context)
+static void read_factory_config(struct nvmem_device *nvmem, void *context)
{
int ret;
const char *partnum = NULL;
struct davinci_soc_info *soc_info = &davinci_soc_info;
- ret = a->read(a, (char *)&factory_config, 0, sizeof(factory_config));
+ ret = nvmem_device_read(nvmem, 0, sizeof(factory_config),
+ &factory_config);
if (ret != sizeof(struct factory_config)) {
pr_warn("Read Factory Config Failed: %d\n", ret);
goto bad_config;
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index a794f6d9d444..f55ef2ef2f92 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -28,13 +28,13 @@ EXPORT_SYMBOL(davinci_soc_info);
void __iomem *davinci_intc_base;
int davinci_intc_type;
-void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context)
+void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context)
{
char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
off_t offset = (off_t)context;
/* Read MAC addr from EEPROM */
- if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+ if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN)
pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
}
diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c
index 2478d9f4d92d..469a76ea0459 100644
--- a/arch/arm/mach-gemini/gpio.c
+++ b/arch/arm/mach-gemini/gpio.c
@@ -17,7 +17,7 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/irq.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <mach/hardware.h>
#include <mach/irqs.h>
@@ -227,5 +227,5 @@ void __init gemini_gpio_init(void)
(void *)i);
}
- BUG_ON(gpiochip_add(&gemini_gpio_chip));
+ BUG_ON(gpiochip_add_data(&gemini_gpio_chip, NULL));
}
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index cfc696b972f3..fd8720532471 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -374,8 +374,13 @@ static struct pu_domain imx6q_pu_domain = {
.name = "PU",
.power_off = imx6q_pm_pu_power_off,
.power_on = imx6q_pm_pu_power_on,
- .power_off_latency_ns = 25000,
- .power_on_latency_ns = 2000000,
+ .states = {
+ [0] = {
+ .power_off_latency_ns = 25000,
+ .power_on_latency_ns = 2000000,
+ },
+ },
+ .state_count = 1,
},
};
diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c
index eb1c3477c48a..f510c43981d3 100644
--- a/arch/arm/mach-imx/mach-mx27ads.c
+++ b/arch/arm/mach-imx/mach-mx27ads.c
@@ -13,6 +13,8 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
+#include <linux/gpio/driver.h>
+/* Needed for gpio_to_irq() */
#include <linux/gpio.h>
#include <linux/platform_device.h>
#include <linux/mtd/mtd.h>
@@ -243,7 +245,7 @@ static void __init mx27ads_regulator_init(void)
vchip->ngpio = 1;
vchip->direction_output = vgpio_dir_out;
vchip->set = vgpio_set;
- gpiochip_add(vchip);
+ gpiochip_add_data(vchip, NULL);
platform_device_register_data(NULL, "reg-fixed-voltage",
PLATFORM_DEVID_AUTO,
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 1cb6f2f02880..26874f608ca9 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -27,7 +27,7 @@
#include <linux/clockchips.h>
#include <linux/io.h>
#include <linux/export.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/cpu.h>
#include <linux/pci.h>
#include <linux/sched_clock.h>
@@ -461,7 +461,7 @@ void __init ixp4xx_sys_init(void)
platform_add_devices(ixp4xx_devices, ARRAY_SIZE(ixp4xx_devices));
- gpiochip_add(&ixp4xx_gpio_chip);
+ gpiochip_add_data(&ixp4xx_gpio_chip, NULL);
if (cpu_is_ixp46x()) {
int region;
diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
index c5bae9c035d5..b7ddd27419c2 100644
--- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
+++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h
@@ -395,204 +395,6 @@
#define CRP_AD_CBE_BESL 20
#define CRP_AD_CBE_WRITE 0x00010000
-
-/*
- * USB Device Controller
- *
- * These are used by the USB gadget driver, so they don't follow the
- * IXP4XX_ naming convetions.
- *
- */
-# define IXP4XX_USB_REG(x) (*((volatile u32 *)(x)))
-
-/* UDC Undocumented - Reserved1 */
-#define UDC_RES1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0004)
-/* UDC Undocumented - Reserved2 */
-#define UDC_RES2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0008)
-/* UDC Undocumented - Reserved3 */
-#define UDC_RES3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x000C)
-/* UDC Control Register */
-#define UDCCR IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0000)
-/* UDC Endpoint 0 Control/Status Register */
-#define UDCCS0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0010)
-/* UDC Endpoint 1 (IN) Control/Status Register */
-#define UDCCS1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0014)
-/* UDC Endpoint 2 (OUT) Control/Status Register */
-#define UDCCS2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0018)
-/* UDC Endpoint 3 (IN) Control/Status Register */
-#define UDCCS3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x001C)
-/* UDC Endpoint 4 (OUT) Control/Status Register */
-#define UDCCS4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0020)
-/* UDC Endpoint 5 (Interrupt) Control/Status Register */
-#define UDCCS5 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0024)
-/* UDC Endpoint 6 (IN) Control/Status Register */
-#define UDCCS6 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0028)
-/* UDC Endpoint 7 (OUT) Control/Status Register */
-#define UDCCS7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x002C)
-/* UDC Endpoint 8 (IN) Control/Status Register */
-#define UDCCS8 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0030)
-/* UDC Endpoint 9 (OUT) Control/Status Register */
-#define UDCCS9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0034)
-/* UDC Endpoint 10 (Interrupt) Control/Status Register */
-#define UDCCS10 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0038)
-/* UDC Endpoint 11 (IN) Control/Status Register */
-#define UDCCS11 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x003C)
-/* UDC Endpoint 12 (OUT) Control/Status Register */
-#define UDCCS12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0040)
-/* UDC Endpoint 13 (IN) Control/Status Register */
-#define UDCCS13 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0044)
-/* UDC Endpoint 14 (OUT) Control/Status Register */
-#define UDCCS14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0048)
-/* UDC Endpoint 15 (Interrupt) Control/Status Register */
-#define UDCCS15 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x004C)
-/* UDC Frame Number Register High */
-#define UFNRH IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0060)
-/* UDC Frame Number Register Low */
-#define UFNRL IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0064)
-/* UDC Byte Count Reg 2 */
-#define UBCR2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0068)
-/* UDC Byte Count Reg 4 */
-#define UBCR4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x006c)
-/* UDC Byte Count Reg 7 */
-#define UBCR7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0070)
-/* UDC Byte Count Reg 9 */
-#define UBCR9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0074)
-/* UDC Byte Count Reg 12 */
-#define UBCR12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0078)
-/* UDC Byte Count Reg 14 */
-#define UBCR14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x007c)
-/* UDC Endpoint 0 Data Register */
-#define UDDR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0080)
-/* UDC Endpoint 1 Data Register */
-#define UDDR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0100)
-/* UDC Endpoint 2 Data Register */
-#define UDDR2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0180)
-/* UDC Endpoint 3 Data Register */
-#define UDDR3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0200)
-/* UDC Endpoint 4 Data Register */
-#define UDDR4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0400)
-/* UDC Endpoint 5 Data Register */
-#define UDDR5 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00A0)
-/* UDC Endpoint 6 Data Register */
-#define UDDR6 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0600)
-/* UDC Endpoint 7 Data Register */
-#define UDDR7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0680)
-/* UDC Endpoint 8 Data Register */
-#define UDDR8 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0700)
-/* UDC Endpoint 9 Data Register */
-#define UDDR9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0900)
-/* UDC Endpoint 10 Data Register */
-#define UDDR10 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00C0)
-/* UDC Endpoint 11 Data Register */
-#define UDDR11 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0B00)
-/* UDC Endpoint 12 Data Register */
-#define UDDR12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0B80)
-/* UDC Endpoint 13 Data Register */
-#define UDDR13 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0C00)
-/* UDC Endpoint 14 Data Register */
-#define UDDR14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0E00)
-/* UDC Endpoint 15 Data Register */
-#define UDDR15 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00E0)
-/* UDC Interrupt Control Register 0 */
-#define UICR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0050)
-/* UDC Interrupt Control Register 1 */
-#define UICR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0054)
-/* UDC Status Interrupt Register 0 */
-#define USIR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0058)
-/* UDC Status Interrupt Register 1 */
-#define USIR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x005C)
-
-#define UDCCR_UDE (1 << 0) /* UDC enable */
-#define UDCCR_UDA (1 << 1) /* UDC active */
-#define UDCCR_RSM (1 << 2) /* Device resume */
-#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */
-#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */
-#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */
-#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */
-#define UDCCR_REM (1 << 7) /* Reset interrupt mask */
-
-#define UDCCS0_OPR (1 << 0) /* OUT packet ready */
-#define UDCCS0_IPR (1 << 1) /* IN packet ready */
-#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */
-#define UDCCS0_SST (1 << 4) /* Sent stall */
-#define UDCCS0_FST (1 << 5) /* Force stall */
-#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */
-#define UDCCS0_SA (1 << 7) /* Setup active */
-
-#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_BI_SST (1 << 4) /* Sent stall */
-#define UDCCS_BI_FST (1 << 5) /* Force stall */
-#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */
-
-#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */
-#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */
-#define UDCCS_BO_DME (1 << 3) /* DMA enable */
-#define UDCCS_BO_SST (1 << 4) /* Sent stall */
-#define UDCCS_BO_FST (1 << 5) /* Force stall */
-#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */
-#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */
-
-#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */
-
-#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */
-#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */
-#define UDCCS_IO_ROF (1 << 3) /* Receive overflow */
-#define UDCCS_IO_DME (1 << 3) /* DMA enable */
-#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */
-#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */
-
-#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_INT_SST (1 << 4) /* Sent stall */
-#define UDCCS_INT_FST (1 << 5) /* Force stall */
-#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */
-
-#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */
-#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */
-#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */
-#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */
-#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */
-#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */
-#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */
-#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */
-
-#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */
-#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */
-#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */
-#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */
-#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */
-#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */
-#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */
-#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */
-
-#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */
-#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */
-#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */
-#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */
-#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */
-#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */
-#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */
-#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */
-
-#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */
-#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */
-#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */
-#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */
-#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */
-#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */
-#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */
-#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */
-
#define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */
/* "fuse" bits of IXP_EXP_CFG2 */
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 77d6b1bab278..ee06fabdf60e 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -86,8 +86,8 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
{
dma_addr_t dma;
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
- PANEL_SIZE, &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
printk(KERN_ERR "CLCD: unable to map framebuffer\n");
return -ENOMEM;
@@ -116,15 +116,14 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base, fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
static void lpc32xx_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
/*
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 64e3d2ce9a07..b003e3afd693 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU
depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
select ARCH_SUPPORTS_BIG_ENDIAN
select CLKSRC_MMIO
- select GENERIC_IRQ_CHIP
select PINCTRL
select PLAT_ORION
select SOC_BUS
@@ -29,6 +28,7 @@ config MACH_ARMADA_370
bool "Marvell Armada 370 boards"
depends on ARCH_MULTI_V7
select ARMADA_370_CLK
+ select ARMADA_370_XP_IRQ
select CPU_PJ4B
select MACH_MVEBU_V7
select PINCTRL_ARMADA_370
@@ -39,6 +39,7 @@ config MACH_ARMADA_370
config MACH_ARMADA_375
bool "Marvell Armada 375 boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
@@ -58,6 +59,7 @@ config MACH_ARMADA_38X
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_38X_CLK
select HAVE_ARM_SCU
select HAVE_ARM_TWD if SMP
@@ -72,6 +74,7 @@ config MACH_ARMADA_39X
bool "Marvell Armada 39x boards"
depends on ARCH_MULTI_V7
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_39X_CLK
select CACHE_L2X0
select HAVE_ARM_SCU
@@ -86,6 +89,7 @@ config MACH_ARMADA_39X
config MACH_ARMADA_XP
bool "Marvell Armada XP boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARMADA_XP_CLK
select CPU_PJ4B
select MACH_MVEBU_V7
diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c
index d122ee6ab991..8814ee5e98fd 100644
--- a/arch/arm/mach-netx/fb.c
+++ b/arch/arm/mach-netx/fb.c
@@ -42,8 +42,8 @@ int netx_clcd_setup(struct clcd_fb *fb)
fb->panel = netx_panel;
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024,
- &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
printk(KERN_ERR "CLCD: unable to map framebuffer\n");
return -ENOMEM;
@@ -57,16 +57,14 @@ int netx_clcd_setup(struct clcd_fb *fb)
int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base,
- fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
void netx_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);
diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c
index abea12617b17..ea0e5b2ca1cd 100644
--- a/arch/arm/mach-nspire/clcd.c
+++ b/arch/arm/mach-nspire/clcd.c
@@ -90,8 +90,8 @@ int nspire_clcd_setup(struct clcd_fb *fb)
panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8;
panel_size = ALIGN(panel_size, PAGE_SIZE);
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
- panel_size, &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
pr_err("CLCD: unable to map framebuffer\n");
@@ -107,13 +107,12 @@ int nspire_clcd_setup(struct clcd_fb *fb)
int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base, fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
void nspire_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index e9f65fec55c0..b6d62e4cdfdd 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh)
*/
static int _idle(struct omap_hwmod *oh)
{
+ if (oh->flags & HWMOD_NO_IDLE) {
+ oh->_int_flags |= _HWMOD_SKIP_ENABLE;
+ return 0;
+ }
+
pr_debug("omap_hwmod: %s: idling\n", oh->name);
if (oh->_state != _HWMOD_STATE_ENABLED) {
@@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data)
oh->flags |= HWMOD_INIT_NO_RESET;
if (of_find_property(np, "ti,no-idle-on-init", NULL))
oh->flags |= HWMOD_INIT_NO_IDLE;
+ if (of_find_property(np, "ti,no-idle", NULL))
+ oh->flags |= HWMOD_NO_IDLE;
}
oh->_state = _HWMOD_STATE_INITIALIZED;
@@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh)
* XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data -
* it should be set by the core code as a runtime flag during startup
*/
- if ((oh->flags & HWMOD_INIT_NO_IDLE) &&
+ if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) &&
(postsetup_state == _HWMOD_STATE_IDLE)) {
oh->_int_flags |= _HWMOD_SKIP_ENABLE;
postsetup_state = _HWMOD_STATE_ENABLED;
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 76bce11c85a4..7c7a31169475 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm {
* or idled.
* HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
* operate and they need to be handled at the same time as the main_clk.
+ * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain
+ * IPs like CPSW on DRA7, where clocks to this module cannot be disabled.
*/
#define HWMOD_SWSUP_SIDLE (1 << 0)
#define HWMOD_SWSUP_MSTANDBY (1 << 1)
@@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm {
#define HWMOD_SWSUP_SIDLE_ACT (1 << 12)
#define HWMOD_RECONFIG_IO_CHAIN (1 << 13)
#define HWMOD_OPT_CLKS_NEEDED (1 << 14)
+#define HWMOD_NO_IDLE (1 << 15)
/*
* omap_hwmod._int_flags definitions
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index f164c6b32ce2..8e072de89fed 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -252,7 +252,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata,
info = omap_serial_default_info;
oh = uart->oh;
- name = DRIVER_NAME;
+ name = OMAP_SERIAL_DRIVER_NAME;
omap_up.dma_enabled = info->dma_enabled;
omap_up.uartclk = OMAP24XX_BASE_BAUD * 16;
diff --git a/arch/arm/mach-pxa/include/mach/pxa25x-udc.h b/arch/arm/mach-pxa/include/mach/pxa25x-udc.h
index 1b80a4805a60..e69de29bb2d1 100644
--- a/arch/arm/mach-pxa/include/mach/pxa25x-udc.h
+++ b/arch/arm/mach-pxa/include/mach/pxa25x-udc.h
@@ -1,163 +0,0 @@
-#ifndef _ASM_ARCH_PXA25X_UDC_H
-#define _ASM_ARCH_PXA25X_UDC_H
-
-#ifdef _ASM_ARCH_PXA27X_UDC_H
-#error "You can't include both PXA25x and PXA27x UDC support"
-#endif
-
-#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */
-#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */
-#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */
-
-#define UDCCR __REG(0x40600000) /* UDC Control Register */
-#define UDCCR_UDE (1 << 0) /* UDC enable */
-#define UDCCR_UDA (1 << 1) /* UDC active */
-#define UDCCR_RSM (1 << 2) /* Device resume */
-#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */
-#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */
-#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */
-#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */
-#define UDCCR_REM (1 << 7) /* Reset interrupt mask */
-
-#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */
-#define UDCCS0_OPR (1 << 0) /* OUT packet ready */
-#define UDCCS0_IPR (1 << 1) /* IN packet ready */
-#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */
-#define UDCCS0_SST (1 << 4) /* Sent stall */
-#define UDCCS0_FST (1 << 5) /* Force stall */
-#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */
-#define UDCCS0_SA (1 << 7) /* Setup active */
-
-/* Bulk IN - Endpoint 1,6,11 */
-#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */
-#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */
-#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */
-
-#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_BI_SST (1 << 4) /* Sent stall */
-#define UDCCS_BI_FST (1 << 5) /* Force stall */
-#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */
-
-/* Bulk OUT - Endpoint 2,7,12 */
-#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */
-#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */
-#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */
-
-#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */
-#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */
-#define UDCCS_BO_DME (1 << 3) /* DMA enable */
-#define UDCCS_BO_SST (1 << 4) /* Sent stall */
-#define UDCCS_BO_FST (1 << 5) /* Force stall */
-#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */
-#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */
-
-/* Isochronous IN - Endpoint 3,8,13 */
-#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */
-#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */
-#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */
-
-#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */
-
-/* Isochronous OUT - Endpoint 4,9,14 */
-#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */
-#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */
-#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */
-
-#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */
-#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */
-#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */
-#define UDCCS_IO_DME (1 << 3) /* DMA enable */
-#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */
-#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */
-
-/* Interrupt IN - Endpoint 5,10,15 */
-#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */
-#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */
-#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */
-
-#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */
-#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */
-#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */
-#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */
-#define UDCCS_INT_SST (1 << 4) /* Sent stall */
-#define UDCCS_INT_FST (1 << 5) /* Force stall */
-#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */
-
-#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */
-#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */
-#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */
-#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */
-#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */
-#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */
-#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */
-#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */
-#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */
-#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */
-#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */
-#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */
-#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */
-#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */
-#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */
-#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */
-#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */
-#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */
-#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */
-#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */
-#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */
-#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */
-#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */
-#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */
-
-#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */
-
-#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */
-#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */
-#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */
-#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */
-#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */
-#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */
-#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */
-#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */
-
-#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */
-
-#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */
-#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */
-#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */
-#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */
-#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */
-#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */
-#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */
-#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */
-
-#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */
-
-#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */
-#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */
-#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */
-#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */
-#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */
-#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */
-#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */
-#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */
-
-#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */
-
-#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */
-#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */
-#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */
-#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */
-#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */
-#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */
-#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */
-#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */
-
-#endif
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 8347d87a713d..5a941bd3dbed 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -18,12 +18,13 @@
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/property.h>
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/smsc911x.h>
#include <linux/input.h>
-#include <linux/rotary_encoder.h>
#include <linux/gpio_keys.h>
#include <linux/input/eeti_ts.h>
#include <linux/leds.h>
@@ -366,22 +367,31 @@ static struct pxaohci_platform_data raumfeld_ohci_info = {
* Rotary encoder input device
*/
-static struct rotary_encoder_platform_data raumfeld_rotary_encoder_info = {
- .steps = 24,
- .axis = REL_X,
- .relative_axis = 1,
- .gpio_a = GPIO_VOLENC_A,
- .gpio_b = GPIO_VOLENC_B,
- .inverted_a = 1,
- .inverted_b = 0,
+static struct gpiod_lookup_table raumfeld_rotary_gpios_table = {
+ .dev_id = "rotary-encoder.0",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-0",
+ GPIO_VOLENC_A, NULL, 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-0",
+ GPIO_VOLENC_B, NULL, 1, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
+static struct property_entry raumfeld_rotary_properties[] = {
+ PROPERTY_ENTRY_INTEGER("rotary-encoder,steps-per-period", u32, 24),
+ PROPERTY_ENTRY_INTEGER("linux,axis", u32, REL_X),
+ PROPERTY_ENTRY_INTEGER("rotary-encoder,relative_axis", u32, 1),
+ { },
+};
+
+static struct property_set raumfeld_rotary_property_set = {
+ .properties = raumfeld_rotary_properties,
};
static struct platform_device rotary_encoder_device = {
.name = "rotary-encoder",
.id = 0,
- .dev = {
- .platform_data = &raumfeld_rotary_encoder_info,
- }
};
/**
@@ -1051,7 +1061,12 @@ static void __init __maybe_unused raumfeld_controller_init(void)
int ret;
pxa3xx_mfp_config(ARRAY_AND_SIZE(raumfeld_controller_pin_config));
+
+ gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
+ device_add_property_set(&rotary_encoder_device.dev,
+ &raumfeld_rotary_property_set);
platform_device_register(&rotary_encoder_device);
+
spi_register_board_info(ARRAY_AND_SIZE(controller_spi_devices));
i2c_register_board_info(0, &raumfeld_controller_i2c_board_info, 1);
@@ -1086,6 +1101,10 @@ static void __init __maybe_unused raumfeld_speaker_init(void)
i2c_register_board_info(0, &raumfeld_connector_i2c_board_info, 1);
platform_device_register(&smc91x_device);
+
+ gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
+ device_add_property_set(&rotary_encoder_device.dev,
+ &raumfeld_rotary_property_set);
platform_device_register(&rotary_encoder_device);
raumfeld_audio_init();
diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c
index 9f54300df4b3..7ed78619217c 100644
--- a/arch/arm/mach-s3c24xx/mach-h1940.c
+++ b/arch/arm/mach-s3c24xx/mach-h1940.c
@@ -664,7 +664,7 @@ static void __init h1940_map_io(void)
/* Add latch gpio chip, set latch initial value */
h1940_latch_control(0, 0);
- WARN_ON(gpiochip_add(&h1940_latch_gpiochip));
+ WARN_ON(gpiochip_add_data(&h1940_latch_gpiochip, NULL));
}
static void __init h1940_init_time(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index d8965c682d2f..bb3ca9c763de 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -15,7 +15,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/io.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <mach/hardware.h>
#include <asm/setup.h>
@@ -369,7 +369,7 @@ static int __init simpad_init(void)
cs3_gpio.get = cs3_gpio_get;
cs3_gpio.direction_input = cs3_gpio_direction_input;
cs3_gpio.direction_output = cs3_gpio_direction_output;
- ret = gpiochip_add(&cs3_gpio);
+ ret = gpiochip_add_data(&cs3_gpio, NULL);
if (ret)
printk(KERN_WARNING "simpad: Unable to register cs3 GPIO device");
diff --git a/arch/arm/mach-socfpga/Makefile b/arch/arm/mach-socfpga/Makefile
index b8f9e238e4ab..ed15db19e561 100644
--- a/arch/arm/mach-socfpga/Makefile
+++ b/arch/arm/mach-socfpga/Makefile
@@ -5,3 +5,5 @@
obj-y := socfpga.o
obj-$(CONFIG_SMP) += headsmp.o platsmp.o
obj-$(CONFIG_SOCFPGA_SUSPEND) += pm.o self-refresh.o
+obj-$(CONFIG_EDAC_ALTERA_L2C) += l2_cache.o
+obj-$(CONFIG_EDAC_ALTERA_OCRAM) += ocram.o
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 5bc6ea87cdf7..575195be6687 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -36,6 +36,8 @@
extern void socfpga_init_clocks(void);
extern void socfpga_sysmgr_init(void);
+void socfpga_init_l2_ecc(void);
+void socfpga_init_ocram_ecc(void);
extern void __iomem *sys_manager_base_addr;
extern void __iomem *rst_manager_base_addr;
diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c
new file mode 100644
index 000000000000..e3907ab58d05
--- /dev/null
+++ b/arch/arm/mach-socfpga/l2_cache.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/io.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+
+void socfpga_init_l2_ecc(void)
+{
+ struct device_node *np;
+ void __iomem *mapped_l2_edac_addr;
+
+ np = of_find_compatible_node(NULL, NULL, "altr,socfpga-l2-ecc");
+ if (!np) {
+ pr_err("Unable to find socfpga-l2-ecc in dtb\n");
+ return;
+ }
+
+ mapped_l2_edac_addr = of_iomap(np, 0);
+ of_node_put(np);
+ if (!mapped_l2_edac_addr) {
+ pr_err("Unable to find L2 ECC mapping in dtb\n");
+ return;
+ }
+
+ /* Enable ECC */
+ writel(0x01, mapped_l2_edac_addr);
+ iounmap(mapped_l2_edac_addr);
+}
diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c
new file mode 100644
index 000000000000..60ec643ac2be
--- /dev/null
+++ b/arch/arm/mach-socfpga/ocram.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/io.h>
+#include <linux/genalloc.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#define ALTR_OCRAM_CLEAR_ECC 0x00000018
+#define ALTR_OCRAM_ECC_EN 0x00000019
+
+void socfpga_init_ocram_ecc(void)
+{
+ struct device_node *np;
+ void __iomem *mapped_ocr_edac_addr;
+
+ /* Find the OCRAM EDAC device tree node */
+ np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc");
+ if (!np) {
+ pr_err("Unable to find socfpga-ocram-ecc\n");
+ return;
+ }
+
+ mapped_ocr_edac_addr = of_iomap(np, 0);
+ of_node_put(np);
+ if (!mapped_ocr_edac_addr) {
+ pr_err("Unable to map OCRAM ecc regs.\n");
+ return;
+ }
+
+ /* Clear any pending OCRAM ECC interrupts, then enable ECC */
+ writel(ALTR_OCRAM_CLEAR_ECC, mapped_ocr_edac_addr);
+ writel(ALTR_OCRAM_ECC_EN, mapped_ocr_edac_addr);
+
+ iounmap(mapped_ocr_edac_addr);
+}
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index a1c0efaa8794..7e0aad2ec3d1 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -59,6 +59,11 @@ static void __init socfpga_init_irq(void)
{
irqchip_init();
socfpga_sysmgr_init();
+ if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C))
+ socfpga_init_l2_ecc();
+
+ if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM))
+ socfpga_init_ocram_ecc();
}
static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
diff --git a/arch/arm/mach-w90x900/gpio.c b/arch/arm/mach-w90x900/gpio.c
index ba05aec7ea4b..55d1a00dbd28 100644
--- a/arch/arm/mach-w90x900/gpio.c
+++ b/arch/arm/mach-w90x900/gpio.c
@@ -20,7 +20,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/io.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <mach/hardware.h>
@@ -30,7 +30,6 @@
#define GPIO_IN (0x0C)
#define GROUPINERV (0x10)
#define GPIO_GPIO(Nb) (0x00000001 << (Nb))
-#define to_nuc900_gpio_chip(c) container_of(c, struct nuc900_gpio_chip, chip)
#define NUC900_GPIO_CHIP(name, base_gpio, nr_gpio) \
{ \
@@ -53,7 +52,7 @@ struct nuc900_gpio_chip {
static int nuc900_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip);
+ struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip);
void __iomem *pio = nuc900_gpio->regbase + GPIO_IN;
unsigned int regval;
@@ -65,7 +64,7 @@ static int nuc900_gpio_get(struct gpio_chip *chip, unsigned offset)
static void nuc900_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
{
- struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip);
+ struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip);
void __iomem *pio = nuc900_gpio->regbase + GPIO_OUT;
unsigned int regval;
unsigned long flags;
@@ -86,7 +85,7 @@ static void nuc900_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
static int nuc900_dir_input(struct gpio_chip *chip, unsigned offset)
{
- struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip);
+ struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip);
void __iomem *pio = nuc900_gpio->regbase + GPIO_DIR;
unsigned int regval;
unsigned long flags;
@@ -104,7 +103,7 @@ static int nuc900_dir_input(struct gpio_chip *chip, unsigned offset)
static int nuc900_dir_output(struct gpio_chip *chip, unsigned offset, int val)
{
- struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip);
+ struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip);
void __iomem *outreg = nuc900_gpio->regbase + GPIO_OUT;
void __iomem *pio = nuc900_gpio->regbase + GPIO_DIR;
unsigned int regval;
@@ -149,6 +148,6 @@ void __init nuc900_init_gpio(int nr_group)
gpio_chip = &nuc900_gpio[i];
spin_lock_init(&gpio_chip->gpio_lock);
gpio_chip->regbase = GPIO_BASE + i * GROUPINERV;
- gpiochip_add(&gpio_chip->chip);
+ gpiochip_add_data(&gpio_chip->chip, gpio_chip);
}
}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index daafcf121ce0..ad5841856007 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -346,7 +346,7 @@ retry:
up_read(&mm->mmap_sem);
/*
- * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+ * Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 434d76f0b363..88fbe0d23ca6 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -732,7 +732,7 @@ static void *__init late_alloc(unsigned long sz)
return ptr;
}
-static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr,
+static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr,
unsigned long prot,
void *(*alloc)(unsigned long sz))
{
@@ -747,7 +747,7 @@ static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr,
static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr,
unsigned long prot)
{
- return pte_alloc(pmd, addr, prot, early_alloc);
+ return arm_pte_alloc(pmd, addr, prot, early_alloc);
}
static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
@@ -756,7 +756,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
void *(*alloc)(unsigned long sz),
bool ng)
{
- pte_t *pte = pte_alloc(pmd, addr, type->prot_l1, alloc);
+ pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc);
do {
set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
ng ? PTE_EXT_NG : 0);
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index e683db1b90a3..b8d477321730 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -80,7 +80,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (!new_pmd)
goto no_pmd;
- new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
+ new_pte = pte_alloc_map(mm, new_pmd, 0);
if (!new_pte)
goto no_pte;
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 7bd22d8e5b11..f74069386c13 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -154,8 +154,7 @@ err_out:
*/
static int orion_gpio_request(struct gpio_chip *chip, unsigned pin)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
if (orion_gpio_is_valid(ochip, pin, GPIO_INPUT_OK) ||
orion_gpio_is_valid(ochip, pin, GPIO_OUTPUT_OK))
@@ -166,8 +165,7 @@ static int orion_gpio_request(struct gpio_chip *chip, unsigned pin)
static int orion_gpio_direction_input(struct gpio_chip *chip, unsigned pin)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
unsigned long flags;
if (!orion_gpio_is_valid(ochip, pin, GPIO_INPUT_OK))
@@ -182,8 +180,7 @@ static int orion_gpio_direction_input(struct gpio_chip *chip, unsigned pin)
static int orion_gpio_get(struct gpio_chip *chip, unsigned pin)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
int val;
if (readl(GPIO_IO_CONF(ochip)) & (1 << pin)) {
@@ -198,8 +195,7 @@ static int orion_gpio_get(struct gpio_chip *chip, unsigned pin)
static int
orion_gpio_direction_output(struct gpio_chip *chip, unsigned pin, int value)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
unsigned long flags;
if (!orion_gpio_is_valid(ochip, pin, GPIO_OUTPUT_OK))
@@ -216,8 +212,7 @@ orion_gpio_direction_output(struct gpio_chip *chip, unsigned pin, int value)
static void orion_gpio_set(struct gpio_chip *chip, unsigned pin, int value)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&ochip->lock, flags);
@@ -227,8 +222,7 @@ static void orion_gpio_set(struct gpio_chip *chip, unsigned pin, int value)
static int orion_gpio_to_irq(struct gpio_chip *chip, unsigned pin)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
return irq_create_mapping(ochip->domain,
ochip->secondary_irq_base + pin);
@@ -445,8 +439,8 @@ static void gpio_irq_handler(struct irq_desc *desc)
static void orion_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
- struct orion_gpio_chip *ochip =
- container_of(chip, struct orion_gpio_chip, chip);
+
+ struct orion_gpio_chip *ochip = gpiochip_get_data(chip);
u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk;
int i;
@@ -567,7 +561,7 @@ void __init orion_gpio_init(struct device_node *np,
ochip->mask_offset = mask_offset;
ochip->secondary_irq_base = secondary_irq_base;
- gpiochip_add(&ochip->chip);
+ gpiochip_add_data(&ochip->chip, ochip);
/*
* Mask and clear GPIO interrupts.
diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index 04aff2c31b46..70f2f699bed3 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c
@@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg)
if (ptr->child != NULL)
s3c_pm_run_res(ptr->child, fn, arg);
- if ((ptr->flags & IORESOURCE_MEM) &&
- strcmp(ptr->name, "System RAM") == 0) {
+ if ((ptr->flags & IORESOURCE_SYSTEM_RAM)
+ == IORESOURCE_SYSTEM_RAM) {
S3C_PMDBG("Found system RAM at %08lx..%08lx\n",
(unsigned long)ptr->start,
(unsigned long)ptr->end);
diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
index b2b97e3e7bab..a62a7b64f49c 100644
--- a/arch/arm/vdso/vdso.S
+++ b/arch/arm/vdso/vdso.S
@@ -23,9 +23,8 @@
#include <linux/const.h>
#include <asm/page.h>
- __PAGE_ALIGNED_DATA
-
.globl vdso_start, vdso_end
+ .section .data..ro_after_init
.balign PAGE_SIZE
vdso_start:
.incbin "arch/arm/vdso/vdso.so"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cc62289a63e..4f436220384f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -14,6 +14,7 @@ config ARM64
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
select ARM_GIC
@@ -49,6 +50,7 @@ config ARM64
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
+ select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KGDB
@@ -235,8 +237,6 @@ config PCI_SYSCALL
def_bool PCI
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
endmenu
@@ -393,6 +393,7 @@ config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
depends on MODULES
default y
+ select ARM64_MODULE_CMODEL_LARGE
help
This option builds kernel modules using the large memory model in
order to avoid the use of the ADRP instruction, which can cause
@@ -432,6 +433,17 @@ config CAVIUM_ERRATUM_23154
If unsure, say Y.
+config CAVIUM_ERRATUM_27456
+ bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
+ default y
+ help
+ On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI
+ instructions may cause the icache to become corrupted if it
+ contains data for a non-current ASID. The fix is to
+ invalidate the icache when changing the mm context.
+
+ If unsure, say Y.
+
endmenu
@@ -537,6 +549,9 @@ config HOTPLUG_CPU
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ def_bool y
+
config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM
@@ -750,12 +765,112 @@ config ARM64_LSE_ATOMICS
not support these instructions and requires the kernel to be
built with binutils >= 2.25.
+config ARM64_VHE
+ bool "Enable support for Virtualization Host Extensions (VHE)"
+ default y
+ help
+ Virtualization Host Extensions (VHE) allow the kernel to run
+ directly at EL2 (instead of EL1) on processors that support
+ it. This leads to better performance for KVM, as they reduce
+ the cost of the world switch.
+
+ Selecting this option allows the VHE feature to be detected
+ at runtime, and does not affect processors that do not
+ implement this feature.
+
+endmenu
+
+menu "ARMv8.2 architectural features"
+
+config ARM64_UAO
+ bool "Enable support for User Access Override (UAO)"
+ default y
+ help
+ User Access Override (UAO; part of the ARMv8.2 Extensions)
+ causes the 'unprivileged' variant of the load/store instructions to
+ be overriden to be privileged.
+
+ This option changes get_user() and friends to use the 'unprivileged'
+ variant of the load/store instructions. This ensures that user-space
+ really did have access to the supplied memory. When addr_limit is
+ set to kernel memory the UAO bit will be set, allowing privileged
+ access to kernel memory.
+
+ Choosing this option will cause copy_to_user() et al to use user-space
+ memory permissions.
+
+ The feature is detected at runtime, the kernel will use the
+ regular load/store instructions if the cpu does not implement the
+ feature.
+
endmenu
+config ARM64_MODULE_CMODEL_LARGE
+ bool
+
+config ARM64_MODULE_PLTS
+ bool
+ select ARM64_MODULE_CMODEL_LARGE
+ select HAVE_MOD_ARCH_SPECIFIC
+
+config RELOCATABLE
+ bool
+ help
+ This builds the kernel as a Position Independent Executable (PIE),
+ which retains all relocation metadata required to relocate the
+ kernel binary at runtime to a different virtual address than the
+ address it was linked at.
+ Since AArch64 uses the RELA relocation format, this requires a
+ relocation pass at runtime even if the kernel is loaded at the
+ same address it was linked at.
+
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ select ARM64_MODULE_PLTS
+ select RELOCATABLE
+ help
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ It is the bootloader's job to provide entropy, by passing a
+ random u64 value in /chosen/kaslr-seed at kernel entry.
+
+ When booting via the UEFI stub, it will invoke the firmware's
+ EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+ to the kernel proper. In addition, it will randomise the physical
+ location of the kernel Image as well.
+
+ If unsure, say N.
+
+config RANDOMIZE_MODULE_REGION_FULL
+ bool "Randomize the module region independently from the core kernel"
+ depends on RANDOMIZE_BASE
+ default y
+ help
+ Randomizes the location of the module region without considering the
+ location of the core kernel. This way, it is impossible for modules
+ to leak information about the location of core kernel data structures
+ but it does imply that function calls between modules and the core
+ kernel will need to be resolved via veneers in the module PLT.
+
+ When this option is not set, the module region will be randomized over
+ a limited range that contains the [_stext, _etext] interval of the
+ core kernel, so branch relocations are always in range.
+
endmenu
menu "Boot options"
+config ARM64_ACPI_PARKING_PROTOCOL
+ bool "Enable support for the ARM64 ACPI parking protocol"
+ depends on ACPI
+ help
+ Enable support for the ARM64 ACPI parking protocol. If disabled
+ the kernel will not allow booting through the ARM64 ACPI parking
+ protocol even if the corresponding data is present in the ACPI
+ MADT table.
+
config CMDLINE
string "Default kernel command string"
default ""
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index e13c4bf84d9e..7e76845a0434 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -50,13 +50,13 @@ config DEBUG_SET_MODULE_RONX
config DEBUG_RODATA
bool "Make kernel text and rodata read-only"
+ default y
help
If this is set, kernel text and rodata will be made read-only. This
is to help catch accidental or malicious attempts to change the
- kernel's executable code. Additionally splits rodata from kernel
- text so it can be made explicitly non-executable.
+ kernel's executable code.
- If in doubt, say Y
+ If in doubt, say Y
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA && ARM64_4K_PAGES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b5e3f6d42b88..354d75402ace 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
+ifneq ($(CONFIG_RELOCATABLE),)
+LDFLAGS_vmlinux += -pie
+endif
+
KBUILD_DEFCONFIG := defconfig
# Check for binutils support for specific extensions
@@ -43,10 +47,14 @@ endif
CHECKFLAGS += -D__aarch64__
-ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
KBUILD_CFLAGS_MODULE += -mcmodel=large
endif
+ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index cafb2c2715fb..b7d7109e7304 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -493,6 +493,11 @@
reg = <0x0 0x1054a000 0x0 0x20>;
};
+ rb: rb@7e000000 {
+ compatible = "apm,xgene-rb", "syscon";
+ reg = <0x0 0x7e000000 0x0 0x10>;
+ };
+
edac@78800000 {
compatible = "apm,xgene-edac";
#address-cells = <2>;
@@ -502,6 +507,7 @@
regmap-mcba = <&mcba>;
regmap-mcbb = <&mcbb>;
regmap-efuse = <&efuse>;
+ regmap-rb = <&rb>;
reg = <0x0 0x78800000 0x0 0x100>;
interrupts = <0x0 0x20 0x4>,
<0x0 0x21 0x4>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
index e8bb46027bed..6e28e41d7e3e 100644
--- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
@@ -313,7 +313,7 @@
/*
* There are two serial driver i.e. 8250 based simple serial
* driver and APB DMA based serial driver for higher baudrate
- * and performace. To enable the 8250 based driver, the compatible
+ * and performance. To enable the 8250 based driver, the compatible
* is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
* the APB DMA based serial driver, the comptible is
* "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
index bc23f4dea002..23b0630602cf 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
@@ -345,7 +345,7 @@
/*
* There are two serial driver i.e. 8250 based simple serial
* driver and APB DMA based serial driver for higher baudrate
- * and performace. To enable the 8250 based driver, the compatible
+ * and performance. To enable the 8250 based driver, the compatible
* is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
* the APB DMA based serial driver, the comptible is
* "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 7a3d22a46faf..5c888049d061 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
+#include <crypto/xts.h>
#include "aes-ce-setkey.h"
@@ -85,6 +86,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
+ ret = xts_check_key(tfm, in_key, key_len);
+ if (ret)
+ return ret;
+
ret = aes_expandkey(&ctx->key1, in_key, key_len / 2);
if (!ret)
ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2],
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 70fd9ffb58cf..cff532a6744e 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,5 +1,3 @@
-
-
generic-y += bug.h
generic-y += bugs.h
generic-y += checksum.h
@@ -31,7 +29,6 @@ generic-y += msgbuf.h
generic-y += msi.h
generic-y += mutex.h
generic-y += pci.h
-generic-y += pci-bridge.h
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index caafd63b8092..aee323b13802 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -87,9 +87,26 @@ void __init acpi_init_cpus(void);
static inline void acpi_init_cpus(void) { }
#endif /* CONFIG_ACPI */
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
static inline const char *acpi_get_enable_method(int cpu)
{
- return acpi_psci_present() ? "psci" : NULL;
+ if (acpi_psci_present())
+ return "psci";
+
+ if (acpi_parking_protocol_valid(cpu))
+ return "parking-protocol";
+
+ return NULL;
}
#ifdef CONFIG_ACPI_APEI
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index e4962f04201e..beccbdefa106 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -1,6 +1,8 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
+#include <asm/cpufeature.h>
+
#ifndef __ASSEMBLY__
#include <linux/init.h>
@@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length);
#else
+#include <asm/assembler.h>
+
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
@@ -136,6 +140,65 @@ void apply_alternatives(void *start, size_t length);
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: ldp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ ldtr \reg1, [\addr];
+ ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: stp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ sttr \reg1, [\addr];
+ sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: \inst \reg, [\addr], \post_inc;
+ nop;
+ alternative_else
+ \alt_inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ .endm
+#else
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ USER(\l, \inst \reg, [\addr], \post_inc)
+ .endm
+#endif
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bb7b72734c24..70f7b9e04598 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -94,12 +94,19 @@
dmb \opt
.endm
+/*
+ * Emit an entry into the exception table
+ */
+ .macro _asm_extable, from, to
+ .pushsection __ex_table, "a"
+ .align 3
+ .long (\from - .), (\to - .)
+ .popsection
+ .endm
+
#define USER(l, x...) \
9999: x; \
- .section __ex_table,"a"; \
- .align 3; \
- .quad 9999b,l; \
- .previous
+ _asm_extable 9999b, l
/*
* Register aliases.
@@ -215,4 +222,15 @@ lr .req x30 // link register
.size __pi_##x, . - x; \
ENDPROC(x)
+ /*
+ * Emit a 64-bit absolute little endian symbol reference in a way that
+ * ensures that it will be resolved at build time, even when building a
+ * PIE binary. This requires cooperation from the linker script, which
+ * must emit the lo32/hi32 halves individually.
+ */
+ .macro le64sym, sym
+ .long \sym\()_lo32
+ .long \sym\()_hi32
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 197e06afbf71..39c1d340fec5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
" stclr %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
" stset %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
" steor %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
" stadd %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return w0; \
}
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
" stclr %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
" stadd %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS , ##cl); \
\
return w0; \
}
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
" stclr %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
" stset %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
" steor %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
" stadd %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
" stclr %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
" stadd %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
- : "x30");
+ : __LL_SC_CLOBBERS);
}
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
"2:")
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
- : "x30", "cc", "memory");
+ : __LL_SC_CLOBBERS, "cc", "memory");
return x0;
}
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
" mov %" #w "[ret], " #w "30") \
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
: [old] "r" (x1), [new] "r" (x2) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
[v] "+Q" (*(unsigned long *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
- : "x30" , ##cl); \
+ : __LL_SC_CLOBBERS, ##cl); \
\
return x0; \
}
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index 81151b67b26b..ebf2481889c3 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -11,4 +11,10 @@
#define MIN_FDT_ALIGN 8
#define MAX_FDT_SIZE SZ_2M
+/*
+ * arm64 requires the kernel image to placed
+ * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ */
+#define MIN_KIMG_ALIGN SZ_2M
+
#endif
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644
index 000000000000..ed693c5bcec0
--- /dev/null
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ */
+#define FAULT_BRK_IMM 0x100
+#define KGDB_DYN_DBG_BRK_IMM 0x400
+#define KGDB_COMPILED_DBG_BRK_IMM 0x401
+#define BUG_BRK_IMM 0x800
+
+#endif
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 4a748ce9ba1a..561190d15881 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
#ifndef _ARCH_ARM64_ASM_BUG_H
#define _ARCH_ARM64_ASM_BUG_H
-#include <asm/debug-monitors.h>
+#include <asm/brk-imm.h>
#ifdef CONFIG_GENERIC_BUG
#define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c3bc5b..22dda613f9c9 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#endif
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b5e9cee4b5f8..13a6103130cd 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64isar1;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8f271b83f910..b9b649422fca 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -30,8 +30,13 @@
#define ARM64_HAS_LSE_ATOMICS 5
#define ARM64_WORKAROUND_CAVIUM_23154 6
#define ARM64_WORKAROUND_834220 7
+#define ARM64_HAS_NO_HW_PREFETCH 8
+#define ARM64_HAS_UAO 9
+#define ARM64_ALT_PAN_NOT_UAO 10
+#define ARM64_HAS_VIRT_HOST_EXTN 11
+#define ARM64_WORKAROUND_CAVIUM_27456 12
-#define ARM64_NCAPS 8
+#define ARM64_NCAPS 13
#ifndef __ASSEMBLY__
@@ -85,9 +90,10 @@ struct arm64_cpu_capabilities {
struct { /* Feature register checking */
u32 sys_reg;
- int field_pos;
- int min_field_value;
- int hwcap_type;
+ u8 field_pos;
+ u8 min_field_value;
+ u8 hwcap_type;
+ bool sign;
unsigned long hwcap;
};
};
@@ -117,15 +123,15 @@ static inline void cpus_set_cap(unsigned int num)
}
static inline int __attribute_const__
-cpuid_feature_extract_field_width(u64 features, int field, int width)
+cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
{
return (s64)(features << (64 - width - field)) >> (64 - width);
}
static inline int __attribute_const__
-cpuid_feature_extract_field(u64 features, int field)
+cpuid_feature_extract_signed_field(u64 features, int field)
{
- return cpuid_feature_extract_field_width(features, field, 4);
+ return cpuid_feature_extract_signed_field_width(features, field, 4);
}
static inline unsigned int __attribute_const__
@@ -145,17 +151,23 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field, bool sign)
+{
+ return (sign) ?
+ cpuid_feature_extract_signed_field(features, field) :
+ cpuid_feature_extract_unsigned_field(features, field);
+}
+
static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
{
- return ftrp->sign ?
- cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) :
- cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width);
+ return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
- cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
}
void __init setup_cpu_features(void);
@@ -164,13 +176,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void check_local_cpu_errata(void);
-#ifdef CONFIG_HOTPLUG_CPU
void verify_local_cpu_capabilities(void);
-#else
-static inline void verify_local_cpu_capabilities(void)
-{
-}
-#endif
u64 read_system_reg(u32 id);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 1a5949364ed0..f2309a25d14c 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -32,12 +32,6 @@
#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
-#define read_cpuid(reg) ({ \
- u64 __val; \
- asm("mrs %0, " #reg : "=r" (__val)); \
- __val; \
-})
-
#define MIDR_REVISION_MASK 0xf
#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK)
#define MIDR_PARTNUM_SHIFT 4
@@ -57,11 +51,22 @@
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
-#define MIDR_CPU_PART(imp, partnum) \
+#define MIDR_CPU_MODEL(imp, partnum) \
(((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+ MIDR_ARCHITECTURE_MASK)
+
+#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \
+({ \
+ u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \
+ u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \
+ \
+ _model == (model) && rv >= (rv_min) && rv <= (rv_max); \
+ })
+
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
@@ -75,8 +80,20 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+
#ifndef __ASSEMBLY__
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) ({ \
+ u64 __val; \
+ asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \
+ __val; \
+})
+
/*
* The CPU ID never changes at run time, so we might as well tell the
* compiler that it's constant. Use this function to read the CPU ID
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5ec09..2fcb9b7c876c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,6 +20,7 @@
#include <linux/errno.h>
#include <linux/types.h>
+#include <asm/brk-imm.h>
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/ptrace.h>
@@ -47,19 +48,6 @@
#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
/*
- * #imm16 values used for BRK instruction generation
- * Allowed values for kgbd are 0x400 - 0x7ff
- * 0x100: for triggering a fault on purpose (reserved)
- * 0x400: for dynamic BRK instruction
- * 0x401: for compile time BRK instruction
- * 0x800: kernel-mode BUG() and WARN() traps
- */
-#define FAULT_BRK_IMM 0x100
-#define KGDB_DYN_DBG_BRK_IMM 0x400
-#define KGDB_COMPILED_DBG_BRK_IMM 0x401
-#define BUG_BRK_IMM 0x800
-
-/*
* BRK instruction encoding
* The #imm16 value should be placed at bits[20:5] within BRK ins
*/
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index faad6df49e5b..24ed037f09fd 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -24,15 +24,6 @@
#include <asm/ptrace.h>
#include <asm/user.h>
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs) \
- *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
/*
* AArch64 static relocation types.
*/
@@ -86,6 +77,8 @@ typedef struct user_fpsimd_state elf_fpregset_t;
#define R_AARCH64_MOVW_PREL_G2_NC 292
#define R_AARCH64_MOVW_PREL_G3 293
+#define R_AARCH64_RELATIVE 1027
+
/*
* These are used to set parameters in the core dumps.
*/
@@ -127,6 +120,17 @@ typedef struct user_fpsimd_state elf_fpregset_t;
*/
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
+#ifndef __ASSEMBLY__
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs) \
+ *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
/*
* When the program starts, a1 contains a pointer to a function to be
* registered with atexit, as per the SVR4 ABI. A value of 0 means we have no
@@ -186,4 +190,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
#endif /* CONFIG_COMPAT */
+#endif /* !__ASSEMBLY__ */
+
#endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 309704544d22..caf86be815ba 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -20,6 +20,7 @@
#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
+#include <asm/pgtable-prot.h>
/*
* Here we define all the compile-time 'special' virtual
@@ -62,6 +63,16 @@ enum fixed_addresses {
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+ /*
+ * Used for kernel page table creation, so unmapped memory may be used
+ * for tables.
+ */
+ FIX_PTE,
+ FIX_PMD,
+ FIX_PUD,
+ FIX_PGD,
+
__end_of_fixed_addresses
};
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 3c60f37e48ab..caa955f10e19 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -48,7 +48,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
* See kernel/trace/trace_syscalls.c
*
* x86 code says:
- * If the user realy wants these, then they should use the
+ * If the user really wants these, then they should use the
* raw syscall tracepoints with filtering.
*/
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f3ab8c1db55..f2585cdd32c2 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -42,10 +42,8 @@
"4: mov %w0, %w5\n" \
" b 3b\n" \
" .popsection\n" \
-" .pushsection __ex_table,\"a\"\n" \
-" .align 3\n" \
-" .quad 1b, 4b, 2b, 4b\n" \
-" .popsection\n" \
+ _ASM_EXTABLE(1b, 4b) \
+ _ASM_EXTABLE(2b, 4b) \
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
@@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
"4: mov %w0, %w6\n"
" b 3b\n"
" .popsection\n"
-" .pushsection __ex_table,\"a\"\n"
-" .align 3\n"
-" .quad 1b, 4b, 2b, 4b\n"
-" .popsection\n"
+ _ASM_EXTABLE(1b, 4b)
+ _ASM_EXTABLE(2b, 4b)
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index a57601f9d17c..8740297dac77 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 5
+#define NR_IPI 6
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9732908bfc8a..115ea2a64520 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/virt.h>
#ifdef __KERNEL__
@@ -35,10 +36,21 @@ struct arch_hw_breakpoint {
struct arch_hw_breakpoint_ctrl ctrl;
};
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1 1
+#define AARCH64_BREAKPOINT_EL0 2
+
+#define DBG_HMC_HYP (1 << 13)
+
static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
- return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
+ u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
+
+ if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1)
+ val |= DBG_HMC_HYP;
+
+ return val;
}
static inline void decode_ctrl_reg(u32 reg,
@@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg,
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
-/* Privilege Levels */
-#define AARCH64_BREAKPOINT_EL1 1
-#define AARCH64_BREAKPOINT_EL0 2
-
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 2774fa384c47..71ad0f93eb71 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -7,13 +7,14 @@
#include <linux/linkage.h>
#include <asm/memory.h>
+#include <asm/pgtable-types.h>
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
*/
#define KASAN_SHADOW_START (VA_START)
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
/*
* This value is used to map an address to the corresponding shadow
@@ -28,10 +29,12 @@
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
void kasan_init(void);
+void kasan_copy_shadow(pgd_t *pgdir);
asmlinkage void kasan_early_init(void);
#else
static inline void kasan_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a459714ee29e..5c6375d8528b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -79,5 +79,17 @@
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#endif
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define ARM64_MEMSTART_ALIGN SZ_512M
+#else
+#define ARM64_MEMSTART_ALIGN SZ_1G
+#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d201d4b396d1..0e391dbfc420 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
+#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
#define HCR_RW_SHIFT 31
@@ -61,7 +62,7 @@
/*
* The bits we set in HCR:
- * RW: 64bit by default, can be overriden for 32bit VMs
+ * RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
* TVM: Trap VM ops (until M+C set in SCTLR_EL1)
@@ -81,7 +82,7 @@
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
-
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* Hyp System Control Register (SCTLR_EL2) bits */
#define SCTLR_EL2_EE (1 << 25)
@@ -216,4 +217,7 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+#define CPACR_EL1_FPEN (3 << 20)
+#define CPACR_EL1_TTA (1 << 28)
+
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 52b777b7d407..226f49d69ea9 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -26,6 +26,8 @@
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+#define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset)
+
#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
@@ -35,9 +37,6 @@ extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_vector[];
-#define __kvm_hyp_code_start __hyp_text_start
-#define __kvm_hyp_code_end __hyp_text_end
-
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
@@ -45,9 +44,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
+extern void __init_stage2_translation(void);
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 779a5872a2c5..40bc1681b6d5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -29,6 +29,7 @@
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
+#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ if (is_kernel_in_hyp_mode())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
}
@@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
+static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
+}
+
static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 689d4c95e12f..227ed475dbd3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,7 +25,9 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
+#include <asm/kvm_perf_event.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -36,10 +38,11 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
+#include <kvm/arm_pmu.h>
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#define KVM_VCPU_MAX_FEATURES 3
+#define KVM_VCPU_MAX_FEATURES 4
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -99,8 +102,8 @@ enum vcpu_sysreg {
TTBR1_EL1, /* Translation Table Base Register 1 */
TCR_EL1, /* Translation Control Register */
ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxilary Fault Status Register 0 */
- AFSR1_EL1, /* Auxilary Fault Status Register 1 */
+ AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
+ AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
FAR_EL1, /* Fault Address Register */
MAIR_EL1, /* Memory Attribute Indirection Register */
VBAR_EL1, /* Vector Base Address Register */
@@ -114,6 +117,21 @@ enum vcpu_sysreg {
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ /* Performance Monitors Registers */
+ PMCR_EL0, /* Control Register */
+ PMSELR_EL0, /* Event Counter Selection Register */
+ PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
+ PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
+ PMCCNTR_EL0, /* Cycle Counter Register */
+ PMEVTYPER0_EL0, /* Event Type Register (0-30) */
+ PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
+ PMCCFILTR_EL0, /* Cycle Count Filter Register */
+ PMCNTENSET_EL0, /* Count Enable Set Register */
+ PMINTENSET_EL1, /* Interrupt Enable Set Register */
+ PMOVSSET_EL0, /* Overflow Flag Status Set Register */
+ PMSWINC_EL0, /* Software Increment Register */
+ PMUSERENR_EL0, /* User Enable Register */
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -211,6 +229,7 @@ struct kvm_vcpu_arch {
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
+ struct kvm_pmu pmu;
/*
* Anything that is not used directly from assembly code goes
@@ -307,7 +326,9 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-u64 kvm_call_hyp(void *hypfn, ...);
+u64 __kvm_call_hyp(void *hypfn, ...);
+#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
@@ -328,8 +349,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
* Call initialization code, and switch to the full blown
* HYP code.
*/
- kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
- hyp_stack_ptr, vector_ptr);
+ __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+ hyp_stack_ptr, vector_ptr);
}
static inline void kvm_arch_hardware_disable(void) {}
@@ -342,5 +363,18 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
+
+static inline void __cpu_init_stage2(void)
+{
+ kvm_call_hyp(__init_stage2_translation);
+}
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..a46b019ebcf5
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_perf_event.h>
+#include <asm/sysreg.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+static inline unsigned long __kern_hyp_va(unsigned long v)
+{
+ asm volatile(ALTERNATIVE("and %0, %0, %1",
+ "nop",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK));
+ return v;
+}
+
+#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v)))
+
+static inline unsigned long __hyp_kern_va(unsigned long v)
+{
+ u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET;
+ asm volatile(ALTERNATIVE("add %0, %0, %1",
+ "nop",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "+r" (v) : "r" (offset));
+ return v;
+}
+
+#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v)))
+
+#define read_sysreg_elx(r,nvh,vh) \
+ ({ \
+ u64 reg; \
+ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
+ "mrs_s %0, " __stringify(r##vh),\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : "=r" (reg)); \
+ reg; \
+ })
+
+#define write_sysreg_elx(v,r,nvh,vh) \
+ do { \
+ u64 __val = (u64)(v); \
+ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
+ "msr_s " __stringify(r##vh) ", %x0",\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : : "rZ" (__val)); \
+ } while (0)
+
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding.
+ */
+#define read_sysreg_el2(r) \
+ ({ \
+ u64 reg; \
+ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
+ "mrs %0, " __stringify(r##_EL1),\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : "=r" (reg)); \
+ reg; \
+ })
+
+#define write_sysreg_el2(v,r) \
+ do { \
+ u64 __val = (u64)(v); \
+ asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
+ "msr " __stringify(r##_EL1) ", %x0",\
+ ARM64_HAS_VIRT_HOST_EXTN) \
+ : : "rZ" (__val)); \
+ } while (0)
+
+#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
+#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
+#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
+#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
+
+/* The VHE specific system registers and their encoding */
+#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
+#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
+#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
+#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
+#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
+#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
+#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
+#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
+#define far_EL12 sys_reg(3, 5, 6, 0, 0)
+#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
+#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
+#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
+#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
+#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
+#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
+#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
+#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
+#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
+#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
+#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
+#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
+#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
+
+/**
+ * hyp_alternate_select - Generates patchable code sequences that are
+ * used to switch between two implementations of a function, depending
+ * on the availability of a feature.
+ *
+ * @fname: a symbol name that will be defined as a function returning a
+ * function pointer whose type will match @orig and @alt
+ * @orig: A pointer to the default function, as returned by @fname when
+ * @cond doesn't hold
+ * @alt: A pointer to the alternate function, as returned by @fname
+ * when @cond holds
+ * @cond: a CPU feature (as described in asm/cpufeature.h)
+ */
+#define hyp_alternate_select(fname, orig, alt, cond) \
+typeof(orig) * __hyp_text fname(void) \
+{ \
+ typeof(alt) *val = orig; \
+ asm volatile(ALTERNATIVE("nop \n", \
+ "mov %0, %1 \n", \
+ cond) \
+ : "+r" (val) : "r" (alt)); \
+ return val; \
+}
+
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
+void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
+void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg32_save_state(struct kvm_vcpu *vcpu);
+void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+
+void __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt);
+void __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt);
+void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
+void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+bool __fpsimd_enabled(void);
+
+u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+void __noreturn __hyp_do_panic(unsigned long, ...);
+
+#endif /* __ARM64_KVM_HYP_H__ */
+
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 736433912a1e..22732a5e3119 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -23,13 +23,16 @@
#include <asm/cpufeature.h>
/*
- * As we only have the TTBR0_EL2 register, we cannot express
+ * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
* "negative" addresses. This makes it impossible to directly share
* mappings with the kernel.
*
* Instead, give the HYP mode its own VA region at a fixed offset from
* the kernel by just masking the top bits (which are all ones for a
* kernel address).
+ *
+ * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these
+ * macros (the entire kernel runs at EL2).
*/
#define HYP_PAGE_OFFSET_SHIFT VA_BITS
#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
@@ -56,12 +59,19 @@
#ifdef __ASSEMBLY__
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*/
.macro kern_hyp_va reg
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
and \reg, \reg, #HYP_PAGE_OFFSET_MASK
+alternative_else
+ nop
+alternative_endif
.endm
#else
@@ -307,7 +317,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
- return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+ return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h
new file mode 100644
index 000000000000..c18fdebb8f66
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_perf_event.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KVM_PERF_EVENT_H
+#define __ASM_KVM_PERF_EVENT_H
+
+#define ARMV8_PMU_MAX_COUNTERS 32
+#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
+#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
+#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
+#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
+#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+/* Determines which bit of PMCCNTR_EL0 generates an overflow */
+#define ARMV8_PMU_PMCR_LC (1 << 6)
+#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
+#define ARMV8_PMU_PMCR_N_MASK 0x1f
+#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
+#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
+
+#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
+
+/*
+ * Event filters for PMUv3
+ */
+#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
+#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
+
+/*
+ * PMUSERENR: user enable reg
+ */
+#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
+#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
+#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
+#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+
+#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 3de42d68611d..23acc00be32d 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -26,6 +26,7 @@ __asm__(".arch_extension lse");
/* Macro for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CLOBBERS "x16", "x17", "x30"
/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 853953cd1f08..12f8a00fb3f1 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -24,6 +24,7 @@
#include <linux/compiler.h>
#include <linux/const.h>
#include <linux/types.h>
+#include <asm/bug.h>
#include <asm/sizes.h>
/*
@@ -45,15 +46,15 @@
* VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
*/
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) << VA_BITS)
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define MODULES_END (PAGE_OFFSET)
-#define MODULES_VADDR (MODULES_END - SZ_64M)
-#define PCI_IO_END (MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR (MODULES_END)
+#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE (SZ_128M)
+#define PCI_IO_END (PAGE_OFFSET - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
@@ -71,12 +72,27 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
/*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE (0)
+#endif
+
+/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
* files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
*/
-#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
-#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __virt_to_phys(x) ({ \
+ phys_addr_t __x = (phys_addr_t)(x); \
+ __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \
+ (__x - kimage_voffset); })
+
+#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
/*
* Convert a page to/from a physical address
@@ -100,19 +116,40 @@
#define MT_S2_NORMAL 0xf
#define MT_S2_DEVICE_nGnRE 0x1
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER (PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER (PMD_SHIFT)
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#define __early_init_dt_declare_initrd(__start, __end) \
+ do { \
+ initrd_start = (__start); \
+ initrd_end = (__end); \
+ } while (0)
+#endif
+
#ifndef __ASSEMBLY__
-extern phys_addr_t memstart_addr;
+#include <linux/bitops.h>
+#include <linux/mmdebug.h>
+
+extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET ({ memstart_addr; })
+#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+extern u64 kimage_vaddr;
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64 kimage_voffset;
/*
- * The maximum physical address that the linear direct mapping
- * of system RAM can cover. (PAGE_OFFSET can be interpreted as
- * a 2's complement signed quantity and negated to derive the
- * maximum size of the linear mapping.)
+ * Allow all memory at the discovery stage. We will clip it later.
*/
-#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; })
+#define MIN_MEMBLOCK_ADDR 0
+#define MAX_MEMBLOCK_ADDR U64_MAX
/*
* PFNs are used to describe any physical page; this means
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 24165784b803..b1892a0dbcb0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,6 +27,7 @@
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next)
@@ -48,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
*/
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = page_to_phys(empty_zero_page);
+ unsigned long ttbr = virt_to_phys(empty_zero_page);
asm(
" msr ttbr0_el1, %0 // set TTBR0\n"
@@ -73,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void)
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
-static inline void cpu_set_default_tcr_t0sz(void)
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr;
@@ -86,7 +87,62 @@ static inline void cpu_set_default_tcr_t0sz(void)
" msr tcr_el1, %0 ;"
" isb"
: "=&r" (tcr)
- : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+ : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
+#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz)
+
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ cpu_set_default_tcr_t0sz();
+
+ if (mm != &init_mm)
+ cpu_switch_mm(mm->pgd, mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ cpu_set_idmap_tcr_t0sz();
+
+ cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
+ * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
+ * avoiding the possibility of conflicting TLB entries being allocated.
+ */
+static inline void cpu_replace_ttbr1(pgd_t *pgd)
+{
+ typedef void (ttbr_replace_func)(phys_addr_t);
+ extern ttbr_replace_func idmap_cpu_replace_ttbr1;
+ ttbr_replace_func *replace_phys;
+
+ phys_addr_t pgd_phys = virt_to_phys(pgd);
+
+ replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+
+ cpu_install_idmap();
+ replace_phys(pgd_phys);
+ cpu_uninstall_idmap();
}
/*
@@ -147,4 +203,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#define deactivate_mm(tsk,mm) do { } while (0)
#define activate_mm(prev,next) switch_mm(prev, next, NULL)
+void verify_cpu_asid_bits(void);
+
#endif
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e80e232b730e..e12af6754634 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -20,4 +20,21 @@
#define MODULE_ARCH_VERMAGIC "aarch64"
+#ifdef CONFIG_ARM64_MODULE_PLTS
+struct mod_arch_specific {
+ struct elf64_shdr *plt;
+ int plt_num_entries;
+ int plt_max_entries;
+};
+#endif
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+ Elf64_Sym *sym);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+extern u64 module_alloc_base;
+#else
+#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
+#endif
+
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index b008a72f8bc0..b9a7ba9ca44c 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -7,8 +7,6 @@
#include <linux/dma-mapping.h>
#include <asm/io.h>
-#include <asm-generic/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index c15053902942..ff98585d085a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
}
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
{
- set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+ set_pud(pud, __pud(pmd | prot));
}
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+}
+#else
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
@@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
free_page((unsigned long)pud);
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
{
- set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+ set_pgd(pgdp, __pgd(pud | prot));
}
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+}
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
new file mode 100644
index 000000000000..29fcb33ab401
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGTABLE_PROT_H
+#define __ASM_PGTABLE_PROT_H
+
+#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+#include <linux/const.h>
+
+/*
+ * Software defined PTE bits definition.
+ */
+#define PTE_VALID (_AT(pteval_t, 1) << 0)
+#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
+#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
+#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
+#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+
+#ifndef __ASSEMBLY__
+
+#include <asm/pgtable-types.h>
+
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
+
+#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f50608674580..989fef16d461 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -21,43 +21,31 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
-
-/*
- * Software defined PTE bits definition.
- */
-#define PTE_VALID (_AT(pteval_t, 1) << 0)
-#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
-#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
-#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
-#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+#include <asm/pgtable-prot.h>
/*
* VMALLOC and SPARSEMEM_VMEMMAP ranges.
*
* VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
* (rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
* VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
* fixed mappings and modules
*/
-#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE)
-
-#ifndef CONFIG_KASAN
-#define VMALLOC_START (VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
-#endif
+#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
+#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define VMEMMAP_START (VMALLOC_END + SZ_64K)
-#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
+#define vmemmap ((struct page *)VMEMMAP_START - \
+ SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
#define FIRST_USER_ADDRESS 0UL
#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
#include <linux/mmdebug.h>
extern void __pte_error(const char *file, int line, unsigned long val);
@@ -65,65 +53,12 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
-
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-
-#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
-#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-
-#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
-#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-
-#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr) (empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
@@ -135,16 +70,6 @@ extern struct page *empty_zero_page;
#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
-/* Find an entry in the third-level page table. */
-#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
-
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
@@ -278,7 +203,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_valid(pte)) {
+ if (pte_present(pte)) {
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
@@ -411,7 +336,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
-#ifdef CONFIG_ARM64_64K_PAGES
+#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
#define pud_sect(pud) (0)
#define pud_table(pud) (1)
#else
@@ -433,13 +358,31 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pmd(pmdp, __pmd(0));
}
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
{
- return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+ return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
}
+/* Find an entry in the third-level page table. */
+#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr))))
+
+#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
+
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -466,21 +409,37 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
{
- return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+ return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
}
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
+#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
+
+#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
+
+#else
+
+#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr) NULL
+#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp)
+#define pmd_clear_fixmap()
+
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir)
+
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
@@ -502,21 +461,37 @@ static inline void pgd_clear(pgd_t *pgdp)
set_pgd(pgdp, __pgd(0));
}
-static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
{
- return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+ return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
}
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
-{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
-}
+#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
+
+#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+/* use ONLY for statically allocated translation tables */
+#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+
+#else
+
+#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
+
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr) NULL
+#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
+#define pud_offset_kimg(dir,addr) ((pud_t *)dir)
+
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -524,11 +499,16 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
/* to find an entry in a page-table-directory */
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
+#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
+
+#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr)))
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
+#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
@@ -648,6 +628,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
* bits 0-1: present (must be zero)
* bits 2-7: swap type
* bits 8-57: swap offset
+ * bit 58: PTE_PROT_NONE (must be zero)
*/
#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 6
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4acb7ca94fcd..cef1cf398356 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -29,8 +29,10 @@
#include <linux/string.h>
+#include <asm/alternative.h>
#include <asm/fpsimd.h>
#include <asm/hw_breakpoint.h>
+#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/types.h>
@@ -177,9 +179,11 @@ static inline void prefetchw(const void *ptr)
}
#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
+static inline void spin_lock_prefetch(const void *ptr)
{
- prefetchw(x);
+ asm volatile(ARM64_LSE_ATOMIC_INSN(
+ "prfm pstl1strm, %a0",
+ "nop") : : "p" (ptr));
}
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -187,5 +191,6 @@ static inline void spin_lock_prefetch(const void *x)
#endif
void cpu_enable_pan(void *__unused);
+void cpu_enable_uao(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index e9e5467e0bf4..a307eb6e7fa8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -58,6 +58,7 @@
#define COMPAT_PSR_Z_BIT 0x40000000
#define COMPAT_PSR_N_BIT 0x80000000
#define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */
+#define COMPAT_PSR_GE_MASK 0x000f0000
#ifdef CONFIG_CPU_BIG_ENDIAN
#define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT
@@ -151,35 +152,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->regs[0];
}
-/*
- * Are the current registers suitable for user mode? (used to maintain
- * security in signal handlers)
- */
-static inline int valid_user_regs(struct user_pt_regs *regs)
-{
- if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) {
- regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT);
-
- /* The T bit is reserved for AArch64 */
- if (!(regs->pstate & PSR_MODE32_BIT))
- regs->pstate &= ~COMPAT_PSR_T_BIT;
-
- return 1;
- }
-
- /*
- * Force PSR to something logical...
- */
- regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \
- COMPAT_PSR_T_BIT | PSR_MODE32_BIT;
-
- if (!(regs->pstate & PSR_MODE32_BIT)) {
- regs->pstate &= ~COMPAT_PSR_T_BIT;
- regs->pstate |= PSR_MODE_EL0t;
- }
-
- return 0;
-}
+/* We must avoid circular header include via sched.h */
+struct task_struct;
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
#define instruction_pointer(regs) ((unsigned long)(regs)->pc)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d9c3d6a6100a..817a067ba058 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -16,6 +16,19 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+/* Values for secondary_data.status */
+
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
+/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
+#define CPU_KILL_ME (1)
+/* The cpu couldn't die gracefully and is looping in the kernel */
+#define CPU_STUCK_IN_KERNEL (2)
+/* Fatal system error detected by secondary CPU, crash the system */
+#define CPU_PANIC_KERNEL (3)
+
+#ifndef __ASSEMBLY__
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
@@ -54,19 +67,52 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
+ * @stack - sp for the secondary CPU
+ * @status - Result passed back from the secondary CPU to
+ * indicate failure.
*/
struct secondary_data {
void *stack;
+ long status;
};
+
extern struct secondary_data secondary_data;
+extern long __early_cpu_boot_status;
extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+ BUILD_BUG();
+}
+#endif
+
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void);
+extern void cpu_die_early(void);
+
+static inline void cpu_park_loop(void)
+{
+ for (;;) {
+ wfe();
+ wfi();
+ }
+}
+
+static inline void update_cpu_boot_status(int val)
+{
+ WRITE_ONCE(secondary_data.status, val);
+ /* Ensure the visibility of the status update */
+ dsb(ishst);
+}
+
+#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4aeebec3d882..1a78d6e2a78b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -72,15 +72,19 @@
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
+#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
(!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
+ (!!x)<<8 | 0x1f)
/* SCTLR_EL1 */
#define SCTLR_EL1_CP15BEN (0x1 << 5)
@@ -137,6 +141,9 @@
#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
#define ID_AA64MMFR1_HADBS_SHIFT 0
+/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_UAO_SHIFT 4
+
/* id_aa64dfr0 */
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
@@ -196,16 +203,16 @@
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
- .equ __reg_num_x\num, \num
+ .equ .L__reg_num_x\num, \num
.endr
- .equ __reg_num_xzr, 31
+ .equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
- .inst 0xd5200000|(\sreg)|(__reg_num_\rt)
+ .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt)
.endm
.macro msr_s, sreg, rt
- .inst 0xd5000000|(\sreg)|(__reg_num_\rt)
+ .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt)
.endm
#else
@@ -214,16 +221,16 @@
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-" .equ __reg_num_x\\num, \\num\n"
+" .equ .L__reg_num_x\\num, \\num\n"
" .endr\n"
-" .equ __reg_num_xzr, 31\n"
+" .equ .L__reg_num_xzr, 31\n"
"\n"
" .macro mrs_s, rt, sreg\n"
-" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n"
+" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
" .endm\n"
"\n"
" .macro msr_s, sreg, rt\n"
-" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n"
+" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
" .endm\n"
);
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b2ede967fe7d..0685d74572af 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -36,11 +36,11 @@
#define VERIFY_WRITE 1
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -50,9 +50,11 @@
struct exception_table_entry
{
- unsigned long insn, fixup;
+ int insn, fixup;
};
+#define ARCH_HAS_RELATIVE_EXTABLE
+
extern int fixup_exception(struct pt_regs *regs);
#define KERNEL_DS (-1UL)
@@ -64,6 +66,16 @@ extern int fixup_exception(struct pt_regs *regs);
static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
+
+ /*
+ * Enable/disable UAO so that copy_to_user() etc can access
+ * kernel memory with the unprivileged instructions.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+ else
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+ CONFIG_ARM64_UAO));
}
#define segment_eq(a, b) ((a) == (b))
@@ -105,6 +117,12 @@ static inline void set_fs(mm_segment_t fs)
#define access_ok(type, addr, size) __range_ok(addr, size)
#define user_addr_max get_fs
+#define _ASM_EXTABLE(from, to) \
+ " .pushsection __ex_table, \"a\"\n" \
+ " .align 3\n" \
+ " .long (" #from " - .), (" #to " - .)\n" \
+ " .popsection\n"
+
/*
* The "__xxx" versions of the user access functions do not verify the address
* space - it must have been done previously with a separate "access_ok()"
@@ -113,9 +131,10 @@ static inline void set_fs(mm_segment_t fs)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_user_asm(instr, reg, x, addr, err) \
+#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
+ alt_instr " " reg "1, [%2]\n", feature) \
"2:\n" \
" .section .fixup, \"ax\"\n" \
" .align 2\n" \
@@ -123,10 +142,7 @@ static inline void set_fs(mm_segment_t fs)
" mov %1, #0\n" \
" b 2b\n" \
" .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
+ _ASM_EXTABLE(1b, 3b) \
: "+r" (err), "=&r" (x) \
: "r" (addr), "i" (-EFAULT))
@@ -134,26 +150,30 @@ static inline void set_fs(mm_segment_t fs)
do { \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 2: \
- __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 4: \
- __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 8: \
- __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \
+ __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
default: \
BUILD_BUG(); \
} \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
} while (0)
@@ -181,19 +201,17 @@ do { \
((x) = 0, -EFAULT); \
})
-#define __put_user_asm(instr, reg, x, addr, err) \
+#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
+ alt_instr " " reg "1, [%2]\n", feature) \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %w0, %3\n" \
" b 2b\n" \
" .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
+ _ASM_EXTABLE(1b, 3b) \
: "+r" (err) \
: "r" (x), "r" (addr), "i" (-EFAULT))
@@ -201,25 +219,29 @@ do { \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 2: \
- __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 4: \
- __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \
+ __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
case 8: \
- __put_user_asm("str", "%", __pu_val, (ptr), (err)); \
+ __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \
+ (err), ARM64_HAS_UAO); \
break; \
default: \
BUILD_BUG(); \
} \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
CONFIG_ARM64_PAN)); \
} while (0)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df5252dd7..9f22dd607958 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -23,6 +23,8 @@
#ifndef __ASSEMBLY__
+#include <asm/ptrace.h>
+
/*
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
@@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
+static inline bool is_kernel_in_hyp_mode(void)
+{
+ u64 el;
+
+ asm("mrs %0, CurrentEL" : "=r" (el));
+ return el == CurrentEL_EL2;
+}
+
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index aab5bf09e9d9..2b79b8a89457 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -16,6 +16,8 @@
#ifndef __ASM_WORD_AT_A_TIME_H
#define __ASM_WORD_AT_A_TIME_H
+#include <asm/uaccess.h>
+
#ifndef __AARCH64EB__
#include <linux/kernel.h>
@@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
#endif
" b 2b\n"
" .popsection\n"
- " .pushsection __ex_table,\"a\"\n"
- " .align 3\n"
- " .quad 1b, 3b\n"
- " .popsection"
+ _ASM_EXTABLE(1b, 3b)
: "=&r" (ret), "=&r" (offset)
: "r" (addr), "Q" (*(unsigned long *)addr));
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 361c8a8ef55f..a739287ef6a3 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -28,5 +28,7 @@
#define HWCAP_SHA2 (1 << 6)
#define HWCAP_CRC32 (1 << 7)
#define HWCAP_ATOMICS (1 << 8)
+#define HWCAP_FPHP (1 << 9)
+#define HWCAP_ASIMDHP (1 << 10)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 2d4ca4bb0dd3..f209ea151dca 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -94,6 +94,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
+#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
struct kvm_vcpu_init {
__u32 target;
@@ -204,6 +205,11 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3df135a..b5c3933ed441 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -45,6 +45,7 @@
#define PSR_A_BIT 0x00000100
#define PSR_D_BIT 0x00000200
#define PSR_PAN_BIT 0x00400000
+#define PSR_UAO_BIT 0x00800000
#define PSR_Q_BIT 0x08000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 83cd7e68e83b..3793003e16a2 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
../../arm/kernel/opcodes.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -41,7 +42,9 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
+arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
new file mode 100644
index 000000000000..a32b4011d711
--- /dev/null
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -0,0 +1,141 @@
+/*
+ * ARM64 ACPI Parking Protocol implementation
+ *
+ * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+#include <asm/cpu_ops.h>
+
+struct parking_protocol_mailbox {
+ __le32 cpu_id;
+ __le32 reserved;
+ __le64 entry_point;
+};
+
+struct cpu_mailbox_entry {
+ struct parking_protocol_mailbox __iomem *mailbox;
+ phys_addr_t mailbox_addr;
+ u8 version;
+ u8 gic_cpu_id;
+};
+
+static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS];
+
+void __init acpi_set_mailbox_entry(int cpu,
+ struct acpi_madt_generic_interrupt *p)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+ cpu_entry->mailbox_addr = p->parked_address;
+ cpu_entry->version = p->parking_version;
+ cpu_entry->gic_cpu_id = p->cpu_interface_number;
+}
+
+bool acpi_parking_protocol_valid(int cpu)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+ return cpu_entry->mailbox_addr && cpu_entry->version;
+}
+
+static int acpi_parking_protocol_cpu_init(unsigned int cpu)
+{
+ pr_debug("%s: ACPI parked addr=%llx\n", __func__,
+ cpu_mailbox_entries[cpu].mailbox_addr);
+
+ return 0;
+}
+
+static int acpi_parking_protocol_cpu_prepare(unsigned int cpu)
+{
+ return 0;
+}
+
+static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
+{
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+ struct parking_protocol_mailbox __iomem *mailbox;
+ __le32 cpu_id;
+
+ /*
+ * Map mailbox memory with attribute device nGnRE (ie ioremap -
+ * this deviates from the parking protocol specifications since
+ * the mailboxes are required to be mapped nGnRnE; the attribute
+ * discrepancy is harmless insofar as the protocol specification
+ * is concerned).
+ * If the mailbox is mistakenly allocated in the linear mapping
+ * by FW ioremap will fail since the mapping will be prevented
+ * by the kernel (it clashes with the linear mapping attributes
+ * specifications).
+ */
+ mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+ if (!mailbox)
+ return -EIO;
+
+ cpu_id = readl_relaxed(&mailbox->cpu_id);
+ /*
+ * Check if firmware has set-up the mailbox entry properly
+ * before kickstarting the respective cpu.
+ */
+ if (cpu_id != ~0U) {
+ iounmap(mailbox);
+ return -ENXIO;
+ }
+
+ /*
+ * stash the mailbox address mapping to use it for further FW
+ * checks in the postboot method
+ */
+ cpu_entry->mailbox = mailbox;
+
+ /*
+ * We write the entry point and cpu id as LE regardless of the
+ * native endianness of the kernel. Therefore, any boot-loaders
+ * that read this address need to convert this address to the
+ * Boot-Loader's endianness before jumping.
+ */
+ writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+ writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
+
+ arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+ return 0;
+}
+
+static void acpi_parking_protocol_cpu_postboot(void)
+{
+ int cpu = smp_processor_id();
+ struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+ struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
+ __le64 entry_point;
+
+ entry_point = readl_relaxed(&mailbox->entry_point);
+ /*
+ * Check if firmware has cleared the entry_point as expected
+ * by the protocol specification.
+ */
+ WARN_ON(entry_point);
+}
+
+const struct cpu_operations acpi_parking_protocol_ops = {
+ .name = "parking-protocol",
+ .cpu_init = acpi_parking_protocol_cpu_init,
+ .cpu_prepare = acpi_parking_protocol_cpu_prepare,
+ .cpu_boot = acpi_parking_protocol_cpu_boot,
+ .cpu_postboot = acpi_parking_protocol_cpu_postboot
+};
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 3e01207917b1..c37202c0c838 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
"4: mov %w0, %w5\n" \
" b 3b\n" \
" .popsection" \
- " .pushsection __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 0b, 4b\n" \
- " .quad 1b, 4b\n" \
- " .popsection\n" \
+ _ASM_EXTABLE(0b, 4b) \
+ _ASM_EXTABLE(1b, 4b) \
ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp) \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index fffa4ac6c25a..3ae6b310ac9b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,15 +104,14 @@ int main(void)
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
+ DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
- DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
- DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
- DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
#endif
#ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index feb6b4efa641..06afd04e02c0 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -21,24 +21,12 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
-#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
-#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
-
-#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
- MIDR_ARCHITECTURE_MASK)
-
static bool __maybe_unused
is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
{
- u32 midr = read_cpuid_id();
-
- if ((midr & CPU_MODEL_MASK) != entry->midr_model)
- return false;
-
- midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
-
- return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+ return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
+ entry->midr_range_min,
+ entry->midr_range_max);
}
#define MIDR_RANGE(model, min, max) \
@@ -100,6 +88,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
},
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+ {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ .desc = "Cavium erratum 27456",
+ .capability = ARM64_WORKAROUND_CAVIUM_27456,
+ MIDR_RANGE(MIDR_THUNDERX, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 1),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index b6bd7d447768..c7cfb8fe06f9 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -25,19 +25,30 @@
#include <asm/smp_plat.h>
extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations acpi_parking_protocol_ops;
extern const struct cpu_operations cpu_psci_ops;
const struct cpu_operations *cpu_ops[NR_CPUS];
-static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
&cpu_psci_ops,
NULL,
};
+static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+ &acpi_parking_protocol_ops,
+#endif
+ &cpu_psci_ops,
+ NULL,
+};
+
static const struct cpu_operations * __init cpu_get_ops(const char *name)
{
- const struct cpu_operations **ops = supported_cpu_ops;
+ const struct cpu_operations **ops;
+
+ ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
while (*ops) {
if (!strcmp(name, (*ops)->name))
@@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu)
}
} else {
enable_method = acpi_get_enable_method(cpu);
- if (!enable_method)
- pr_err("Unsupported ACPI enable-method\n");
+ if (!enable_method) {
+ /*
+ * In ACPI systems the boot CPU does not require
+ * checking the enable method since for some
+ * boot protocol (ie parking protocol) it need not
+ * be initialized. Don't warn spuriously.
+ */
+ if (cpu != 0)
+ pr_err("Unsupported ACPI enable-method\n");
+ }
}
return enable_method;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5c90aa490a2b..943f5140e0f3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -24,8 +24,10 @@
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
+#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -54,19 +56,23 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
.safe_val = SAFE_VAL, \
}
-/* Define a feature with signed values */
+/* Define a feature with unsigned values */
#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
- __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
-
-/* Define a feature with unsigned value */
-#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
__ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+/* Define a feature with a signed value */
+#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
#define ARM64_FTR_END \
{ \
.width = 0, \
}
+/* meta feature for alternatives */
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+
static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
@@ -84,8 +90,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
/* Linux doesn't care about the EL3 */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
@@ -96,8 +102,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
@@ -108,7 +114,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -123,29 +129,34 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static struct arm64_ftr_bits ftr_ctr[] = {
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine
*/
- U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_mmfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
ARM64_FTR_END,
@@ -153,12 +164,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = {
static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
- U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
@@ -204,6 +215,18 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_bits ftr_id_dfr0[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+ S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@@ -249,7 +272,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
- ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
@@ -284,6 +307,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 3, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
@@ -408,6 +432,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -517,6 +542,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
+ info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
/*
* EL3 is not our concern.
@@ -592,7 +619,7 @@ u64 read_system_reg(u32 id)
static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field(reg, entry->field_pos);
+ int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
return val >= entry->min_field_value;
}
@@ -621,6 +648,23 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
return has_sre;
}
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+{
+ u32 midr = read_cpuid_id();
+ u32 rv_min, rv_max;
+
+ /* Cavium ThunderX pass 1.x and 2.x */
+ rv_min = 0;
+ rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+ return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
+{
+ return is_kernel_in_hyp_mode();
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -628,6 +672,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PAN
@@ -637,6 +682,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 1,
.enable = cpu_enable_pan,
},
@@ -648,38 +694,69 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+ .sign = FTR_UNSIGNED,
.min_field_value = 2,
},
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+ {
+ .desc = "Software prefetching using PRFM",
+ .capability = ARM64_HAS_NO_HW_PREFETCH,
+ .matches = has_no_hw_prefetch,
+ },
+#ifdef CONFIG_ARM64_UAO
+ {
+ .desc = "User Access Override",
+ .capability = ARM64_HAS_UAO,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .field_pos = ID_AA64MMFR2_UAO_SHIFT,
+ .min_field_value = 1,
+ .enable = cpu_enable_uao,
+ },
+#endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+ {
+ .capability = ARM64_ALT_PAN_NOT_UAO,
+ .matches = cpufeature_pan_not_uao,
+ },
+#endif /* CONFIG_ARM64_PAN */
+ {
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .matches = runs_at_el2,
+ },
{},
};
-#define HWCAP_CAP(reg, field, min_value, type, cap) \
+#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
{ \
.desc = #cap, \
.matches = has_cpuid_feature, \
.sys_reg = reg, \
.field_pos = field, \
+ .sign = s, \
.min_field_value = min_value, \
.hwcap_type = type, \
.hwcap = cap, \
}
static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
#ifdef CONFIG_COMPAT
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
#endif
{},
};
@@ -734,7 +811,7 @@ static void __init setup_cpu_hwcaps(void)
int i;
const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
- for (i = 0; hwcaps[i].desc; i++)
+ for (i = 0; hwcaps[i].matches; i++)
if (hwcaps[i].matches(&hwcaps[i]))
cap_set_hwcap(&hwcaps[i]);
}
@@ -744,11 +821,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
{
int i;
- for (i = 0; caps[i].desc; i++) {
+ for (i = 0; caps[i].matches; i++) {
if (!caps[i].matches(&caps[i]))
continue;
- if (!cpus_have_cap(caps[i].capability))
+ if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
pr_info("%s %s\n", info, caps[i].desc);
cpus_set_cap(caps[i].capability);
}
@@ -763,13 +840,11 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
int i;
- for (i = 0; caps[i].desc; i++)
+ for (i = 0; caps[i].matches; i++)
if (caps[i].enable && cpus_have_cap(caps[i].capability))
on_each_cpu(caps[i].enable, NULL, true);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
/*
* Flag to indicate if we have computed the system wide
* capabilities based on the boot time active CPUs. This
@@ -791,35 +866,36 @@ static inline void set_sys_caps_initialised(void)
static u64 __raw_read_system_reg(u32 sys_id)
{
switch (sys_id) {
- case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1);
- case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1);
- case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1);
- case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1);
- case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1);
- case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1);
- case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1);
- case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1);
- case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1);
- case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1);
- case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1);
- case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
- case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
- case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1);
- case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1);
- case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1);
-
- case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1);
- case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1);
- case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1);
- case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1);
-
- case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0);
- case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0);
- case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0);
+ case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1);
+ case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1);
+ case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1);
+ case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1);
+ case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1);
+ case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1);
+ case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1);
+ case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1);
+ case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1);
+ case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1);
+ case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1);
+ case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1);
+ case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1);
+ case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1);
+
+ case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1);
+ case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1);
+ case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1);
+ case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1);
+ case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1);
+
+ case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0);
+ case SYS_CTR_EL0: return read_cpuid(CTR_EL0);
+ case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0);
default:
BUG();
return 0;
@@ -827,25 +903,12 @@ static u64 __raw_read_system_reg(u32 sys_id)
}
/*
- * Park the CPU which doesn't have the capability as advertised
- * by the system.
+ * Check for CPU features that are used in early boot
+ * based on the Boot CPU value.
*/
-static void fail_incapable_cpu(char *cap_type,
- const struct arm64_cpu_capabilities *cap)
+static void check_early_cpu_features(void)
{
- int cpu = smp_processor_id();
-
- pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
- /* Mark this CPU absent */
- set_cpu_present(cpu, 0);
-
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
- asm(
- "1: wfe\n"
- " wfi\n"
- " b 1b");
+ verify_cpu_asid_bits();
}
/*
@@ -861,6 +924,8 @@ void verify_local_cpu_capabilities(void)
int i;
const struct arm64_cpu_capabilities *caps;
+ check_early_cpu_features();
+
/*
* If we haven't computed the system capabilities, there is nothing
* to verify.
@@ -869,35 +934,33 @@ void verify_local_cpu_capabilities(void)
return;
caps = arm64_features;
- for (i = 0; caps[i].desc; i++) {
+ for (i = 0; caps[i].matches; i++) {
if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
continue;
/*
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_features", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing feature: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
if (caps[i].enable)
caps[i].enable(NULL);
}
- for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+ for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
if (!cpus_have_hwcap(&caps[i]))
continue;
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
- fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+ if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+ pr_crit("CPU%d: missing HWCAP: %s\n",
+ smp_processor_id(), caps[i].desc);
+ cpu_die_early();
+ }
}
}
-#else /* !CONFIG_HOTPLUG_CPU */
-
-static inline void set_sys_caps_initialised(void)
-{
-}
-
-#endif /* CONFIG_HOTPLUG_CPU */
-
static void __init setup_feature_capabilities(void)
{
update_cpu_capabilities(arm64_features, "detected feature:");
@@ -928,3 +991,9 @@ void __init setup_cpu_features(void)
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
L1_CACHE_BYTES, cls);
}
+
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+{
+ return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 212ae6361d8b..84c8684431c7 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -59,6 +59,8 @@ static const char *const hwcap_str[] = {
"sha2",
"crc32",
"atomics",
+ "fphp",
+ "asimdhp",
NULL
};
@@ -210,6 +212,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c536c9e307b9..c45f2968bc8c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -34,7 +34,7 @@
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
- return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_DEBUGVER_SHIFT);
}
@@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
/* EL1 Single Step Handler hooks */
static LIST_HEAD(step_hook);
-static DEFINE_RWLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(step_hook_lock);
void register_step_hook(struct step_hook *hook)
{
- write_lock(&step_hook_lock);
- list_add(&hook->node, &step_hook);
- write_unlock(&step_hook_lock);
+ spin_lock(&step_hook_lock);
+ list_add_rcu(&hook->node, &step_hook);
+ spin_unlock(&step_hook_lock);
}
void unregister_step_hook(struct step_hook *hook)
{
- write_lock(&step_hook_lock);
- list_del(&hook->node);
- write_unlock(&step_hook_lock);
+ spin_lock(&step_hook_lock);
+ list_del_rcu(&hook->node);
+ spin_unlock(&step_hook_lock);
+ synchronize_rcu();
}
/*
@@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
struct step_hook *hook;
int retval = DBG_HOOK_ERROR;
- read_lock(&step_hook_lock);
+ rcu_read_lock();
- list_for_each_entry(hook, &step_hook, node) {
+ list_for_each_entry_rcu(hook, &step_hook, node) {
retval = hook->fn(regs, esr);
if (retval == DBG_HOOK_HANDLED)
break;
}
- read_unlock(&step_hook_lock);
+ rcu_read_unlock();
return retval;
}
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index a773db92908b..cae3112f7791 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -35,6 +35,7 @@ ENTRY(entry)
* for image_addr variable passed to efi_entry().
*/
stp x29, x30, [sp, #-32]!
+ mov x29, sp
/*
* Call efi_entry to do the real work.
@@ -61,7 +62,7 @@ ENTRY(entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- ldr x21, =stext_offset
+ movz x21, #:abs_g0:stext_offset
add x21, x0, x21
/*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index acc1afd5c749..975b274ee7b5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -45,7 +45,7 @@
* been used to perform kernel mode NEON in the meantime.
*
* For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
- * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * the id of the current CPU every time the state is loaded onto a CPU. For (b),
* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
* address of the userland FPSIMD state of the task that was loaded onto the CPU
* the most recently, or NULL if kernel mode NEON has been performed after that.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 917d98108b3f..6ebd204da16a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,11 +29,14 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/elf.h>
#include <asm/kernel-pgtable.h>
+#include <asm/kvm_arm.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -67,12 +70,11 @@
* in the entry routines.
*/
__HEAD
-
+_head:
/*
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
*/
#ifdef CONFIG_EFI
-efi_head:
/*
* This add instruction has no meaningful effect except that
* its opcode forms the magic "MZ" signature required by UEFI.
@@ -83,9 +85,9 @@ efi_head:
b stext // branch to kernel start, magic
.long 0 // reserved
#endif
- .quad _kernel_offset_le // Image load offset from start of RAM, little-endian
- .quad _kernel_size_le // Effective size of kernel image, little-endian
- .quad _kernel_flags_le // Informative flags, little-endian
+ le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian
+ le64sym _kernel_size_le // Effective size of kernel image, little-endian
+ le64sym _kernel_flags_le // Informative flags, little-endian
.quad 0 // reserved
.quad 0 // reserved
.quad 0 // reserved
@@ -94,14 +96,14 @@ efi_head:
.byte 0x4d
.byte 0x64
#ifdef CONFIG_EFI
- .long pe_header - efi_head // Offset to the PE header.
+ .long pe_header - _head // Offset to the PE header.
#else
.word 0 // reserved
#endif
#ifdef CONFIG_EFI
.globl __efistub_stext_offset
- .set __efistub_stext_offset, stext - efi_head
+ .set __efistub_stext_offset, stext - _head
.align 3
pe_header:
.ascii "PE"
@@ -124,7 +126,7 @@ optional_header:
.long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long __efistub_entry - efi_head // AddressOfEntryPoint
+ .long __efistub_entry - _head // AddressOfEntryPoint
.long __efistub_stext_offset // BaseOfCode
extra_header_fields:
@@ -139,7 +141,7 @@ extra_header_fields:
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
- .long _end - efi_head // SizeOfImage
+ .long _end - _head // SizeOfImage
// Everything before the kernel image is considered part of the header
.long __efistub_stext_offset // SizeOfHeaders
@@ -210,6 +212,7 @@ section_table:
ENTRY(stext)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
+ mov x23, xzr // KASLR offset, defaults to 0
adrp x24, __PHYS_OFFSET
bl set_cpu_boot_mode_flag
bl __create_page_tables // x25=TTBR0, x26=TTBR1
@@ -219,11 +222,13 @@ ENTRY(stext)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, =__mmap_switched // address to jump to after
+ ldr x27, 0f // address to jump to after
// MMU has been enabled
adr_l lr, __enable_mmu // return (PIC) address
b __cpu_setup // initialise processor
ENDPROC(stext)
+ .align 3
+0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
@@ -311,7 +316,7 @@ ENDPROC(preserve_boot_args)
__create_page_tables:
adrp x25, idmap_pg_dir
adrp x26, swapper_pg_dir
- mov x27, lr
+ mov x28, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
@@ -389,9 +394,11 @@ __create_page_tables:
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
- mov x5, #PAGE_OFFSET
+ ldr x5, =KIMAGE_VADDR
+ add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
- ldr x6, =KERNEL_END // __va(KERNEL_END)
+ ldr w6, kernel_img_size
+ add x6, x6, x5
mov x3, x24 // phys offset
create_block_map x0, x7, x3, x5, x6
@@ -405,9 +412,11 @@ __create_page_tables:
dmb sy
bl __inval_cache_range
- mov lr, x27
- ret
+ ret x28
ENDPROC(__create_page_tables)
+
+kernel_img_size:
+ .long _end - (_head - TEXT_OFFSET)
.ltorg
/*
@@ -415,23 +424,81 @@ ENDPROC(__create_page_tables)
*/
.set initial_sp, init_thread_union + THREAD_START_SP
__mmap_switched:
+ mov x28, lr // preserve LR
+ adr_l x8, vectors // load VBAR_EL1 with virtual
+ msr vbar_el1, x8 // vector table address
+ isb
+
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
adr_l x2, __bss_stop
sub x2, x2, x0
bl __pi_memset
+ dsb ishst // Make zero page visible to PTW
+
+#ifdef CONFIG_RELOCATABLE
+
+ /*
+ * Iterate over each entry in the relocation table, and apply the
+ * relocations in place.
+ */
+ adr_l x8, __dynsym_start // start of symbol table
+ adr_l x9, __reloc_start // start of reloc table
+ adr_l x10, __reloc_end // end of reloc table
+
+0: cmp x9, x10
+ b.hs 2f
+ ldp x11, x12, [x9], #24
+ ldr x13, [x9, #-8]
+ cmp w12, #R_AARCH64_RELATIVE
+ b.ne 1f
+ add x13, x13, x23 // relocate
+ str x13, [x11, x23]
+ b 0b
+
+1: cmp w12, #R_AARCH64_ABS64
+ b.ne 0b
+ add x12, x12, x12, lsl #1 // symtab offset: 24x top word
+ add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
+ ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
+ ldr x15, [x12, #8] // Elf64_Sym::st_value
+ cmp w14, #-0xf // SHN_ABS (0xfff1) ?
+ add x14, x15, x23 // relocate
+ csel x15, x14, x15, ne
+ add x15, x13, x15
+ str x15, [x11, x23]
+ b 0b
+
+2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr
+ dc cvac, x8 // value visible to secondaries
+ dsb sy // with MMU off
+#endif
adr_l sp, initial_sp, x4
mov x4, sp
and x4, x4, #~(THREAD_SIZE - 1)
msr sp_el0, x4 // Save thread_info
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
+
+ ldr_l x4, kimage_vaddr // Save the offset between
+ sub x4, x4, x24 // the kernel virtual and
+ str_l x4, kimage_voffset, x5 // physical mappings
+
mov x29, #0
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
+#ifdef CONFIG_RANDOMIZE_BASE
+ cbnz x23, 0f // already running randomized?
+ mov x0, x21 // pass FDT address in x0
+ bl kaslr_early_init // parse FDT for KASLR options
+ cbz x0, 0f // KASLR disabled? just proceed
+ mov x23, x0 // record KASLR offset
+ ret x28 // we must enable KASLR, return
+ // to __enable_mmu()
+0:
+#endif
b start_kernel
ENDPROC(__mmap_switched)
@@ -440,6 +507,10 @@ ENDPROC(__mmap_switched)
* hotplug and needs to have the same protections as the text region
*/
.section ".text","ax"
+
+ENTRY(kimage_vaddr)
+ .quad _text - TEXT_OFFSET
+
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
@@ -464,9 +535,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
isb
ret
+2:
+#ifdef CONFIG_ARM64_VHE
+ /*
+ * Check for VHE being present. For the rest of the EL2 setup,
+ * x2 being non-zero indicates that we do have VHE, and that the
+ * kernel is intended to run at EL2.
+ */
+ mrs x2, id_aa64mmfr1_el1
+ ubfx x2, x2, #8, #4
+#else
+ mov x2, xzr
+#endif
+
/* Hyp configuration. */
-2: mov x0, #(1 << 31) // 64-bit EL1
+ mov x0, #HCR_RW // 64-bit EL1
+ cbz x2, set_hcr
+ orr x0, x0, #HCR_TGE // Enable Host Extensions
+ orr x0, x0, #HCR_E2H
+set_hcr:
msr hcr_el2, x0
+ isb
/* Generic timers. */
mrs x0, cnthctl_el2
@@ -526,6 +615,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
/* Stage-2 translation */
msr vttbr_el2, xzr
+ cbz x2, install_el2_stub
+
+ mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ isb
+ ret
+
+install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors
@@ -605,13 +701,20 @@ ENTRY(secondary_startup)
adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
- ldr x21, =secondary_data
- ldr x27, =__secondary_switched // address to jump to after enabling the MMU
+ ldr x8, kimage_vaddr
+ ldr w9, 0f
+ sub x27, x8, w9, sxtw // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
+0: .long (_text - TEXT_OFFSET) - __secondary_switched
ENTRY(__secondary_switched)
- ldr x0, [x21] // get secondary_data.stack
+ adr_l x5, vectors
+ msr vbar_el1, x5
+ isb
+
+ adr_l x0, secondary_data
+ ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0
and x0, x0, #~(THREAD_SIZE - 1)
msr sp_el0, x0 // save thread_info
@@ -620,6 +723,29 @@ ENTRY(__secondary_switched)
ENDPROC(__secondary_switched)
/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ *
+ * update_early_cpu_boot_status tmp, status
+ * - Corrupts tmp1, tmp2
+ * - Writes 'status' to __early_cpu_boot_status and makes sure
+ * it is committed to memory.
+ */
+
+ .macro update_early_cpu_boot_status status, tmp1, tmp2
+ mov \tmp2, #\status
+ str_l \tmp2, __early_cpu_boot_status, \tmp1
+ dmb sy
+ dc ivac, \tmp1 // Invalidate potentially stale cache line
+ .endm
+
+ .pushsection .data..cacheline_aligned
+ .align L1_CACHE_SHIFT
+ENTRY(__early_cpu_boot_status)
+ .long 0
+ .popsection
+
+/*
* Enable the MMU.
*
* x0 = SCTLR_EL1 value for turning on the MMU.
@@ -632,12 +758,12 @@ ENDPROC(__secondary_switched)
*/
.section ".idmap.text", "ax"
__enable_mmu:
+ mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
- ldr x5, =vectors
- msr vbar_el1, x5
+ update_early_cpu_boot_status 0, x1, x2
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
@@ -651,10 +777,33 @@ __enable_mmu:
ic iallu
dsb nsh
isb
+#ifdef CONFIG_RANDOMIZE_BASE
+ mov x19, x0 // preserve new SCTLR_EL1 value
+ blr x27
+
+ /*
+ * If we return here, we have a KASLR displacement in x23 which we need
+ * to take into account by discarding the current kernel mapping and
+ * creating a new one.
+ */
+ msr sctlr_el1, x18 // disable the MMU
+ isb
+ bl __create_page_tables // recreate kernel mapping
+
+ msr sctlr_el1, x19 // re-enable the MMU
+ isb
+ ic ialluis // flush instructions fetched
+ isb // via old mapping
+ add x27, x27, x23 // relocated __mmap_switched
+#endif
br x27
ENDPROC(__enable_mmu)
__no_granule_support:
+ /* Indicate that this CPU can't boot and is stuck in the kernel */
+ update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+1:
wfe
- b __no_granule_support
+ wfi
+ b 1b
ENDPROC(__no_granule_support)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 352f7abd91c9..5e360ce88f10 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -26,31 +26,40 @@
* There aren't any ELF relocations we can use to endian-swap values known only
* at link time (e.g. the subtraction of two symbol addresses), so we must get
* the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
*/
#ifdef CONFIG_CPU_BIG_ENDIAN
-#define DATA_LE64(data) \
- ((((data) & 0x00000000000000ff) << 56) | \
- (((data) & 0x000000000000ff00) << 40) | \
- (((data) & 0x0000000000ff0000) << 24) | \
- (((data) & 0x00000000ff000000) << 8) | \
- (((data) & 0x000000ff00000000) >> 8) | \
- (((data) & 0x0000ff0000000000) >> 24) | \
- (((data) & 0x00ff000000000000) >> 40) | \
- (((data) & 0xff00000000000000) >> 56))
+#define DATA_LE32(data) \
+ ((((data) & 0x000000ff) << 24) | \
+ (((data) & 0x0000ff00) << 8) | \
+ (((data) & 0x00ff0000) >> 8) | \
+ (((data) & 0xff000000) >> 24))
#else
-#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#define DATA_LE32(data) ((data) & 0xffffffff)
#endif
+#define DEFINE_IMAGE_LE64(sym, data) \
+ sym##_lo32 = DATA_LE32((data) & 0xffffffff); \
+ sym##_hi32 = DATA_LE32((data) >> 32)
+
#ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE 1
+#define __HEAD_FLAG_BE 1
#else
-#define __HEAD_FLAG_BE 0
+#define __HEAD_FLAG_BE 0
#endif
-#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAG_PHYS_BASE 1
-#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
- (__HEAD_FLAG_PAGE_SIZE << 1))
+#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
+ (__HEAD_FLAG_PAGE_SIZE << 1) | \
+ (__HEAD_FLAG_PHYS_BASE << 3))
/*
* These will output as part of the Image header, which should be little-endian
@@ -58,9 +67,9 @@
* endian swapped in head.S, all are done here for consistency.
*/
#define HEAD_SYMBOLS \
- _kernel_size_le = DATA_LE64(_end - _text); \
- _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \
- _kernel_flags_le = DATA_LE64(__HEAD_FLAGS);
+ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \
+ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \
+ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
#ifdef CONFIG_EFI
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
new file mode 100644
index 000000000000..582983920054
--- /dev/null
+++ b/arch/arm64/kernel/kaslr.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+
+u64 __read_mostly module_alloc_base;
+u16 __initdata memstart_offset_seed;
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ u64 *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static __init const u8 *get_cmdline(void *fdt)
+{
+ static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+ return prop;
+ }
+out:
+ return default_cmdline;
+}
+
+extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
+ pgprot_t prot);
+
+/*
+ * This routine will be executed with the kernel mapped at its default virtual
+ * address, and if it returns successfully, the kernel will be remapped, and
+ * start_kernel() will be executed from a randomized virtual offset. The
+ * relocation will result in all absolute references (e.g., static variables
+ * containing function pointers) to be reinitialized, and zero-initialized
+ * .bss variables will be reset to 0.
+ */
+u64 __init kaslr_early_init(u64 dt_phys)
+{
+ void *fdt;
+ u64 seed, offset, mask, module_range;
+ const u8 *cmdline, *str;
+ int size;
+
+ /*
+ * Set a reasonable default for module_alloc_base in case
+ * we end up running with module randomization disabled.
+ */
+ module_alloc_base = (u64)_etext - MODULES_VSIZE;
+
+ /*
+ * Try to map the FDT early. If this fails, we simply bail,
+ * and proceed with KASLR disabled. We will make another
+ * attempt at mapping the FDT in setup_machine()
+ */
+ early_fixmap_init();
+ fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+ if (!fdt)
+ return 0;
+
+ /*
+ * Retrieve (and wipe) the seed from the FDT
+ */
+ seed = get_kaslr_seed(fdt);
+ if (!seed)
+ return 0;
+
+ /*
+ * Check if 'nokaslr' appears on the command line, and
+ * return 0 if that is the case.
+ */
+ cmdline = get_cmdline(fdt);
+ str = strstr(cmdline, "nokaslr");
+ if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+ return 0;
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * lower half of the VMALLOC area (VA_BITS - 2).
+ * Even if we could randomize at page granularity for 16k and 64k pages,
+ * let's always round to 2 MB so we don't interfere with the ability to
+ * map using contiguous PTEs
+ */
+ mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
+ offset = seed & mask;
+
+ /* use the top 16 bits to randomize the linear region */
+ memstart_offset_seed = seed >> 48;
+
+ /*
+ * The kernel Image should not extend across a 1GB/32MB/512MB alignment
+ * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
+ * happens, increase the KASLR offset by the size of the kernel image.
+ */
+ if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+ (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+ offset = (offset + (u64)(_end - _text)) & mask;
+
+ if (IS_ENABLED(CONFIG_KASAN))
+ /*
+ * KASAN does not expect the module region to intersect the
+ * vmalloc region, since shadow memory is allocated for each
+ * module at load time, whereas the vmalloc region is shadowed
+ * by KASAN zero pages. So keep modules out of the vmalloc
+ * region if KASAN is enabled.
+ */
+ return offset;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ /*
+ * Randomize the module region independently from the core
+ * kernel. This prevents modules from leaking any information
+ * about the address of the kernel itself, but results in
+ * branches between modules and the core kernel that are
+ * resolved via PLTs. (Branches between modules will be
+ * resolved normally.)
+ */
+ module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
+ module_alloc_base = VMALLOC_START;
+ } else {
+ /*
+ * Randomize the module region by setting module_alloc_base to
+ * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+ * _stext) . This guarantees that the resulting region still
+ * covers [_stext, _etext], and that all relative branches can
+ * be resolved without veneers.
+ */
+ module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+ module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+ }
+
+ /* use the lower 21 bits to randomize the base of the module region */
+ module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+ module_alloc_base &= PAGE_MASK;
+
+ return offset;
+}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index bcac81e600b9..b67531a13136 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -292,8 +292,8 @@ static struct notifier_block kgdb_notifier = {
};
/*
- * kgdb_arch_init - Perform any architecture specific initalization.
- * This function will handle the initalization of any architecture
+ * kgdb_arch_init - Perform any architecture specific initialization.
+ * This function will handle the initialization of any architecture
* specific callbacks.
*/
int kgdb_arch_init(void)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
new file mode 100644
index 000000000000..1ce90d8450ae
--- /dev/null
+++ b/arch/arm64/kernel/module-plts.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+struct plt_entry {
+ /*
+ * A program that conforms to the AArch64 Procedure Call Standard
+ * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+ * IP1 (x17) may be inserted at any branch instruction that is
+ * exposed to a relocation that supports long branches. Since that
+ * is exactly what we are dealing with here, we are free to use x16
+ * as a scratch register in the PLT veneers.
+ */
+ __le32 mov0; /* movn x16, #0x.... */
+ __le32 mov1; /* movk x16, #0x...., lsl #16 */
+ __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 br; /* br x16 */
+};
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+ Elf64_Sym *sym)
+{
+ struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
+ int i = mod->arch.plt_num_entries;
+ u64 val = sym->st_value + rela->r_addend;
+
+ /*
+ * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
+ * which are listed in the ELF symtab section, but without a type
+ * or a size.
+ * So, similar to how the module loader uses the Elf64_Sym::st_value
+ * field to store the resolved addresses of undefined symbols, let's
+ * borrow the Elf64_Sym::st_size field (whose value is never used by
+ * the module loader, even for symbols that are defined) to record
+ * the address of a symbol's associated PLT entry as we emit it for a
+ * zero addend relocation (which is the only kind we have to deal with
+ * in practice). This allows us to find duplicates without having to
+ * go through the table every time.
+ */
+ if (rela->r_addend == 0 && sym->st_size != 0) {
+ BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
+ return sym->st_size;
+ }
+
+ mod->arch.plt_num_entries++;
+ BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
+
+ /*
+ * MOVK/MOVN/MOVZ opcode:
+ * +--------+------------+--------+-----------+-------------+---------+
+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+ * +--------+------------+--------+-----------+-------------+---------+
+ *
+ * Rd := 0x10 (x16)
+ * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+ * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+ * sf := 1 (64-bit variant)
+ */
+ plt[i] = (struct plt_entry){
+ cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
+ cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+ cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+ cpu_to_le32(0xd61f0200)
+ };
+
+ if (rela->r_addend == 0)
+ sym->st_size = (u64)&plt[i];
+
+ return (u64)&plt[i];
+}
+
+#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+ const Elf64_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+ if (i == 0)
+ i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding
+ * slot.
+ */
+ return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+ unsigned int ret = 0;
+ Elf64_Sym *s;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ /*
+ * We only have to consider branch targets that resolve
+ * to undefined symbols. This is not simply a heuristic,
+ * it is a fundamental limitation, since the PLT itself
+ * is part of the module, and needs to be within 128 MB
+ * as well, so modules can never grow beyond that limit.
+ */
+ s = syms + ELF64_R_SYM(rela[i].r_info);
+ if (s->st_shndx != SHN_UNDEF)
+ break;
+
+ /*
+ * Jump relocations with non-zero addends against
+ * undefined symbols are supported by the ELF spec, but
+ * do not occur in practice (e.g., 'jump n bytes past
+ * the entry point of undefined function symbol f').
+ * So we need to support them, but there is no need to
+ * take them into consideration when trying to optimize
+ * this code. So let's only check for duplicates when
+ * the addend is zero: this allows us to record the PLT
+ * entry address in the symbol table itself, rather than
+ * having to search the list for duplicates each time we
+ * emit one.
+ */
+ if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long plt_max_entries = 0;
+ Elf64_Sym *syms = NULL;
+ int i;
+
+ /*
+ * Find the empty .plt section so we can expand it to store the PLT
+ * entries. Record the symtab address as well.
+ */
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
+ mod->arch.plt = sechdrs + i;
+ else if (sechdrs[i].sh_type == SHT_SYMTAB)
+ syms = (Elf64_Sym *)sechdrs[i].sh_addr;
+ }
+
+ if (!mod->arch.plt) {
+ pr_err("%s: module PLT section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+ if (!syms) {
+ pr_err("%s: module symtab section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+ int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+ Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
+
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+
+ /* ignore relocations that operate on non-exec sections */
+ if (!(dstsec->sh_flags & SHF_EXECINSTR))
+ continue;
+
+ /* sort by type, symbol index and addend */
+ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+ plt_max_entries += count_plts(syms, rels, numrels);
+ }
+
+ mod->arch.plt->sh_type = SHT_NOBITS;
+ mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
+ mod->arch.plt_num_entries = 0;
+ mod->arch.plt_max_entries = plt_max_entries;
+ return 0;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 93e970231ca9..7f316982ce00 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -34,10 +34,26 @@ void *module_alloc(unsigned long size)
{
void *p;
- p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+ module_alloc_base + MODULES_VSIZE,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0));
+ if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+ !IS_ENABLED(CONFIG_KASAN))
+ /*
+ * KASAN can only deal with module allocations being served
+ * from the reserved module region, since the remainder of
+ * the vmalloc region is already backed by zero shadow pages,
+ * and punching holes into it is non-trivial. Since the module
+ * region is not randomized when KASAN is enabled, it is even
+ * less likely that the module region gets exhausted, so we
+ * can simply omit this fallback in that case.
+ */
+ p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
+ VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
return NULL;
@@ -361,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
AARCH64_INSN_IMM_26);
+
+ if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+ ovf == -ERANGE) {
+ val = module_emit_plt_entry(me, &rel[i], sym);
+ ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
+ 26, AARCH64_INSN_IMM_26);
+ }
break;
default:
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
new file mode 100644
index 000000000000..8949f6c6f729
--- /dev/null
+++ b/arch/arm64/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+ .plt (NOLOAD) : { BYTE(0) }
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index b3d098bd34aa..c72de668e1d4 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -19,8 +19,6 @@
#include <linux/of_platform.h>
#include <linux/slab.h>
-#include <asm/pci-bridge.h>
-
/*
* Called after each bus is probed, but before its children are examined
*/
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f7ab14c4d5df..1b52269ffa87 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -20,6 +20,7 @@
*/
#include <asm/irq_regs.h>
+#include <asm/virt.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
@@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
if (attr->exclude_idle)
return -EPERM;
+ if (is_kernel_in_hyp_mode() &&
+ attr->exclude_kernel != attr->exclude_hv)
+ return -EINVAL;
if (attr->exclude_user)
config_base |= ARMV8_EXCLUDE_EL0;
- if (attr->exclude_kernel)
+ if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
config_base |= ARMV8_EXCLUDE_EL1;
if (!attr->exclude_hv)
config_base |= ARMV8_INCLUDE_EL2;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 88d742ba19d5..80624829db61 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -46,6 +46,7 @@
#include <linux/notifier.h>
#include <trace/events/power.h>
+#include <asm/alternative.h>
#include <asm/compat.h>
#include <asm/cacheflush.h>
#include <asm/fpsimd.h>
@@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
} else {
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
+ if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+ cpus_have_cap(ARM64_HAS_UAO))
+ childregs->pstate |= PSR_UAO_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
}
@@ -308,6 +312,17 @@ static void tls_thread_switch(struct task_struct *next)
: : "r" (tpidr), "r" (tpidrro));
}
+/* Restore the UAO state depending on next's addr_limit */
+static void uao_thread_switch(struct task_struct *next)
+{
+ if (IS_ENABLED(CONFIG_ARM64_UAO)) {
+ if (task_thread_info(next)->addr_limit == KERNEL_DS)
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+ else
+ asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
+ }
+}
+
/*
* Thread switching.
*/
@@ -320,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+ uao_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index ff7f13239515..3f6cd5c5234f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -500,7 +500,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- if (!valid_user_regs(&newregs))
+ if (!valid_user_regs(&newregs, target))
return -EINVAL;
task_pt_regs(target)->user_regs = newregs;
@@ -770,7 +770,7 @@ static int compat_gpr_set(struct task_struct *target,
}
- if (valid_user_regs(&newregs.user_regs))
+ if (valid_user_regs(&newregs.user_regs, target))
*task_pt_regs(target) = newregs;
else
ret = -EINVAL;
@@ -1272,3 +1272,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
}
+
+/*
+ * Bits which are always architecturally RES0 per ARM DDI 0487A.h
+ * Userspace cannot use these until they have an architectural meaning.
+ * We also reserve IL for the kernel; SS is handled dynamically.
+ */
+#define SPSR_EL1_AARCH64_RES0_BITS \
+ (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \
+ GENMASK_ULL(5, 5))
+#define SPSR_EL1_AARCH32_RES0_BITS \
+ (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20))
+
+static int valid_compat_regs(struct user_pt_regs *regs)
+{
+ regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS;
+
+ if (!system_supports_mixed_endian_el0()) {
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ regs->pstate |= COMPAT_PSR_E_BIT;
+ else
+ regs->pstate &= ~COMPAT_PSR_E_BIT;
+ }
+
+ if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) &&
+ (regs->pstate & COMPAT_PSR_A_BIT) == 0 &&
+ (regs->pstate & COMPAT_PSR_I_BIT) == 0 &&
+ (regs->pstate & COMPAT_PSR_F_BIT) == 0) {
+ return 1;
+ }
+
+ /*
+ * Force PSR to a valid 32-bit EL0t, preserving the same bits as
+ * arch/arm.
+ */
+ regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT |
+ COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT |
+ COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK |
+ COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT |
+ COMPAT_PSR_T_BIT;
+ regs->pstate |= PSR_MODE32_BIT;
+
+ return 0;
+}
+
+static int valid_native_regs(struct user_pt_regs *regs)
+{
+ regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS;
+
+ if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) &&
+ (regs->pstate & PSR_D_BIT) == 0 &&
+ (regs->pstate & PSR_A_BIT) == 0 &&
+ (regs->pstate & PSR_I_BIT) == 0 &&
+ (regs->pstate & PSR_F_BIT) == 0) {
+ return 1;
+ }
+
+ /* Force PSR to a valid 64-bit EL0t */
+ regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT;
+
+ return 0;
+}
+
+/*
+ * Are the current registers suitable for user mode? (used to maintain
+ * security in signal handlers)
+ */
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task)
+{
+ if (!test_tsk_thread_flag(task, TIF_SINGLESTEP))
+ regs->pstate &= ~DBG_SPSR_SS;
+
+ if (is_compat_thread(task_thread_info(task)))
+ return valid_compat_regs(regs);
+ else
+ return valid_native_regs(regs);
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479147db..9dc67769b6a4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -62,6 +62,7 @@
#include <asm/memblock.h>
#include <asm/efi.h>
#include <asm/xen/hypervisor.h>
+#include <asm/mmu_context.h>
phys_addr_t __fdt_pointer __initdata;
@@ -73,13 +74,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -210,7 +211,7 @@ static void __init request_standard_resources(void)
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
@@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p)
*/
local_async_enable();
+ /*
+ * TTBR0 is only used for the identity mapping at this stage. Make it
+ * point to zero page to avoid speculatively fetching new entries.
+ */
+ cpu_uninstall_idmap();
+
efi_init();
arm64_memblock_init();
@@ -381,3 +388,32 @@ static int __init topology_init(void)
return 0;
}
subsys_initcall(topology_init);
+
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+ u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
+ pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
+ kaslr_offset, KIMAGE_VADDR);
+ } else {
+ pr_emerg("Kernel Offset: disabled\n");
+ }
+ return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e18c48cb6db1..a8eafdbc7cb8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs,
*/
regs->syscallno = ~0UL;
- err |= !valid_user_regs(&regs->user_regs);
+ err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0) {
struct fpsimd_context *fpsimd_ctx =
@@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Check that the resulting registers are actually sane.
*/
- ret |= !valid_user_regs(&regs->user_regs);
+ ret |= !valid_user_regs(&regs->user_regs, current);
/*
* Fast forward the stepping logic so we step into the signal
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 71ef6dc89ae5..b7063de792f7 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -166,7 +166,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
#ifdef BUS_MCEERR_AO
/*
* Other callers might not initialize the si_lsb field,
- * so check explicitely for the right codes here.
+ * so check explicitly for the right codes here.
*/
if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
@@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
*/
regs->syscallno = ~0UL;
- err |= !valid_user_regs(&regs->user_regs);
+ err |= !valid_user_regs(&regs->user_regs, current);
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
if (err == 0)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index e33fe33876ab..fd10eb663868 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu)
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_remaining_stack
+#endif
mov x0, #0 // return zero on success
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..b2d5f4ee9a1c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -63,6 +63,8 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+/* Number of CPUs which aren't online, but looping in kernel text. */
+int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -70,8 +72,19 @@ enum ipi_msg_type {
IPI_CPU_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
+ IPI_WAKEUP
};
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_kill(unsigned int cpu);
+#else
+static inline int op_cpu_kill(unsigned int cpu)
+{
+ return -ENOSYS;
+}
+#endif
+
+
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
@@ -89,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running);
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
+ long status;
/*
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+ update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
/*
@@ -118,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
secondary_data.stack = NULL;
+ status = READ_ONCE(secondary_data.status);
+ if (ret && status) {
+
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
+
+ switch (status) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
+ break;
+ }
+ /* Fall through */
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
+ }
+ }
return ret;
}
@@ -149,9 +190,7 @@ asmlinkage void secondary_start_kernel(void)
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
+ cpu_uninstall_idmap();
preempt_disable();
trace_hardirqs_off();
@@ -185,6 +224,9 @@ asmlinkage void secondary_start_kernel(void)
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
+ update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ /* Make sure the status update is visible before we complete */
+ smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -195,7 +237,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -313,6 +355,30 @@ void cpu_die(void)
}
#endif
+/*
+ * Kill the calling secondary CPU, early in bringup before it is turned
+ * online.
+ */
+void cpu_die_early(void)
+{
+ int cpu = smp_processor_id();
+
+ pr_crit("CPU%d: will not boot\n", cpu);
+
+ /* Mark this CPU absent */
+ set_cpu_present(cpu, 0);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ update_cpu_boot_status(CPU_KILL_ME);
+ /* Check if we can park ourselves */
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+ update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+
+ cpu_park_loop();
+}
+
static void __init hyp_mode_check(void)
{
if (is_hyp_mode_available())
@@ -445,6 +511,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
/* map the logical cpu id to cpu MPIDR */
cpu_logical_map(cpu_count) = hwid;
+ /*
+ * Set-up the ACPI parking protocol cpu entries
+ * while initializing the cpu_logical_map to
+ * avoid parsing MADT entries multiple times for
+ * nothing (ie a valid cpu_logical_map entry should
+ * contain a valid parking protocol data set to
+ * initialize the cpu if the parking protocol is
+ * the only available enable method).
+ */
+ acpi_set_mailbox_entry(cpu_count, processor);
+
cpu_count++;
}
@@ -627,6 +704,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
+ S(IPI_WAKEUP, "CPU wake-up interrupts"),
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
@@ -670,6 +748,13 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+ smp_cross_call(mask, IPI_WAKEUP);
+}
+#endif
+
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
@@ -747,6 +832,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+ case IPI_WAKEUP:
+ WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+ "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+ cpu);
+ break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 1095aa483a1c..66055392f445 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
*/
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
- struct mm_struct *mm = current->active_mm;
int ret;
unsigned long flags;
@@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
ret = __cpu_suspend_enter(arg, fn);
if (ret == 0) {
/*
- * We are resuming from reset with TTBR0_EL1 set to the
- * idmap to enable the MMU; set the TTBR0 to the reserved
- * page tables to prevent speculative TLB allocations, flush
- * the local tlb and set the default tcr_el1.t0sz so that
- * the TTBR0 address space set-up is properly restored.
- * If the current active_mm != &init_mm we entered cpu_suspend
- * with mappings in TTBR0 that must be restored, so we switch
- * them back to complete the address space configuration
- * restoration before returning.
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+ * state before we can possibly return to userspace.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
-
- if (mm != &init_mm)
- cpu_switch_mm(mm->pgd, mm);
+ cpu_uninstall_idmap();
/*
* Restore per-cpu offset before any kernel
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index 60c1db54b41a..82379a70ef03 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -21,9 +21,8 @@
#include <linux/const.h>
#include <asm/page.h>
- __PAGE_ALIGNED_DATA
-
.globl vdso_start, vdso_end
+ .section .rodata
.balign PAGE_SIZE
vdso_start:
.incbin "arch/arm64/kernel/vdso/vdso.so"
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e3928f578891..4c56e7a0621b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -87,15 +87,16 @@ SECTIONS
EXIT_CALL
*(.discard)
*(.discard.*)
+ *(.interp .dynamic)
}
- . = PAGE_OFFSET + TEXT_OFFSET;
+ . = KIMAGE_VADDR + TEXT_OFFSET;
.head.text : {
_text = .;
HEAD_TEXT
}
- ALIGN_DEBUG_RO
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -113,13 +114,13 @@ SECTIONS
*(.got) /* Global offset table */
}
- RO_DATA(PAGE_SIZE)
- EXCEPTION_TABLE(8)
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE) /* everything from this point to */
+ EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
NOTES
- ALIGN_DEBUG_RO
- _etext = .; /* End of text and rodata section */
ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ _etext = .; /* End of text and rodata section */
__init_begin = .;
INIT_TEXT_SECTION(8)
@@ -150,6 +151,21 @@ SECTIONS
.altinstr_replacement : {
*(.altinstr_replacement)
}
+ .rela : ALIGN(8) {
+ __reloc_start = .;
+ *(.rela .rela*)
+ __reloc_end = .;
+ }
+ .dynsym : ALIGN(8) {
+ __dynsym_start = .;
+ *(.dynsym)
+ }
+ .dynstr : {
+ *(.dynstr)
+ }
+ .hash : {
+ *(.hash)
+ }
. = ALIGN(PAGE_SIZE);
__init_end = .;
@@ -187,4 +203,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
-ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a5272c07d1cb..de7450df7629 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
select KVM_ARM_VGIC_V3
+ select KVM_ARM_PMU if HW_PERF_EVENTS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
@@ -48,6 +49,12 @@ config KVM_ARM_HOST
---help---
Provides host support for ARM processors.
+config KVM_ARM_PMU
+ bool
+ ---help---
+ Adds support for a virtual Performance Monitoring Unit (PMU) in
+ virtual machines.
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index caee9ee8e12a..122cff482ac4 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 9e54ad7c240a..32fad75bb9ff 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
}
return 0;
}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_PMU_V3_CTRL:
+ ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index d073b5a216f7..7d8747c6427c 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -87,26 +87,13 @@ __do_hyp_init:
#endif
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
- * TCR_EL2 and VTCR_EL2.
+ * TCR_EL2.
*/
mrs x5, ID_AA64MMFR0_EL1
bfi x4, x5, #16, #3
msr tcr_el2, x4
- ldr x4, =VTCR_EL2_FLAGS
- bfi x4, x5, #16, #3
- /*
- * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
- * VTCR_EL2.
- */
- mrs x5, ID_AA64MMFR1_EL1
- ubfx x5, x5, #5, #1
- lsl x5, x5, #VTCR_EL2_VS
- orr x4, x4, x5
-
- msr vtcr_el2, x4
-
mrs x4, mair_el1
msr mair_el2, x4
isb
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 0ccdcbbef3c2..48f19a37b3df 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -17,10 +17,12 @@
#include <linux/linkage.h>
+#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cpufeature.h>
/*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * u64 __kvm_call_hyp(void *hypfn, ...);
*
* This is not really a variadic function in the classic C-way and care must
* be taken when calling this to ensure parameters are passed in registers
@@ -37,7 +39,12 @@
* used to implement __hyp_get_vectors in the same way as in
* arch/arm64/kernel/hyp_stub.S.
*/
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
hvc #0
ret
-ENDPROC(kvm_call_hyp)
+alternative_else
+ b __vhe_hyp_call
+ nop
+alternative_endif
+ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 826032bc3945..b6a8fc5ad1af 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,9 +2,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
+KVM=../../../../virt/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
@@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index c9c1e97501a9..33342a776ec7 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -18,10 +18,9 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
+#include <asm/debug-monitors.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fd0fbe9b7e6a..ce9e5e5f28cf 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -130,9 +130,15 @@ ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
stp x4, lr, [sp, #-16]!
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x2, cptr_el2
bic x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
+alternative_else
+ mrs x2, cpacr_el1
+ orr x2, x2, #CPACR_EL1_FPEN
+ msr cpacr_el1, x2
+alternative_endif
isb
mrs x3, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 93e8d983c0bd..3488894397ff 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -19,7 +19,6 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
@@ -38,10 +37,42 @@
ldp x0, x1, [sp], #16
.endm
+.macro do_el2_call
+ /*
+ * Shuffle the parameters before calling the function
+ * pointed to in x0. Assumes parameters in x[1,2,3].
+ */
+ sub sp, sp, #16
+ str lr, [sp]
+ mov lr, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ blr lr
+ ldr lr, [sp]
+ add sp, sp, #16
+.endm
+
+ENTRY(__vhe_hyp_call)
+ do_el2_call
+ /*
+ * We used to rely on having an exception return to get
+ * an implicit isb. In the E2H case, we don't have it anymore.
+ * rather than changing all the leaf functions, just do it here
+ * before returning to the rest of the kernel.
+ */
+ isb
+ ret
+ENDPROC(__vhe_hyp_call)
+
el1_sync: // Guest trapped into EL2
save_x0_to_x3
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x1, esr_el2
+alternative_else
+ mrs x1, esr_el1
+alternative_endif
lsr x2, x1, #ESR_ELx_EC_SHIFT
cmp x2, #ESR_ELx_EC_HVC64
@@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2
mrs x0, vbar_el2
b 2f
-1: stp lr, xzr, [sp, #-16]!
-
+1:
/*
- * Compute the function address in EL2, and shuffle the parameters.
+ * Perform the EL2 call
*/
kern_hyp_va x0
- mov lr, x0
- mov x0, x1
- mov x1, x2
- mov x2, x3
- blr lr
+ do_el2_call
- ldp lr, xzr, [sp], #16
2: eret
el1_trap:
@@ -83,72 +108,10 @@ el1_trap:
cmp x2, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
- cmp x2, #ESR_ELx_EC_DABT_LOW
- mov x0, #ESR_ELx_EC_IABT_LOW
- ccmp x2, x0, #4, ne
- b.ne 1f // Not an abort we care about
-
- /* This is an abort. Check for permission fault */
-alternative_if_not ARM64_WORKAROUND_834220
- and x2, x1, #ESR_ELx_FSC_TYPE
- cmp x2, #FSC_PERM
- b.ne 1f // Not a permission fault
-alternative_else
- nop // Use the permission fault path to
- nop // check for a valid S1 translation,
- nop // regardless of the ESR value.
-alternative_endif
-
- /*
- * Check for Stage-1 page table walk, which is guaranteed
- * to give a valid HPFAR_EL2.
- */
- tbnz x1, #7, 1f // S1PTW is set
-
- /* Preserve PAR_EL1 */
- mrs x3, par_el1
- stp x3, xzr, [sp, #-16]!
-
- /*
- * Permission fault, HPFAR_EL2 is invalid.
- * Resolve the IPA the hard way using the guest VA.
- * Stage-1 translation already validated the memory access rights.
- * As such, we can use the EL1 translation regime, and don't have
- * to distinguish between EL0 and EL1 access.
- */
- mrs x2, far_el2
- at s1e1r, x2
- isb
-
- /* Read result */
- mrs x3, par_el1
- ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack
- msr par_el1, x0
- tbnz x3, #0, 3f // Bail out if we failed the translation
- ubfx x3, x3, #12, #36 // Extract IPA
- lsl x3, x3, #4 // and present it like HPFAR
- b 2f
-
-1: mrs x3, hpfar_el2
- mrs x2, far_el2
-
-2: mrs x0, tpidr_el2
- str w1, [x0, #VCPU_ESR_EL2]
- str x2, [x0, #VCPU_FAR_EL2]
- str x3, [x0, #VCPU_HPFAR_EL2]
-
+ mrs x0, tpidr_el2
mov x1, #ARM_EXCEPTION_TRAP
b __guest_exit
- /*
- * Translation failed. Just return to the guest and
- * let it fault again. Another CPU is probably playing
- * behind our back.
- */
-3: restore_x0_to_x3
-
- eret
-
el1_irq:
save_x0_to_x3
mrs x0, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
deleted file mode 100644
index fb275178b6af..000000000000
--- a/arch/arm64/kvm/hyp/hyp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_HYP_H__
-#define __ARM64_KVM_HYP_H__
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
-#include <asm/sysreg.h>
-
-#define __hyp_text __section(.hyp.text) notrace
-
-#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
-#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
- + PAGE_OFFSET)
-
-/**
- * hyp_alternate_select - Generates patchable code sequences that are
- * used to switch between two implementations of a function, depending
- * on the availability of a feature.
- *
- * @fname: a symbol name that will be defined as a function returning a
- * function pointer whose type will match @orig and @alt
- * @orig: A pointer to the default function, as returned by @fname when
- * @cond doesn't hold
- * @alt: A pointer to the alternate function, as returned by @fname
- * when @cond holds
- * @cond: a CPU feature (as described in asm/cpufeature.h)
- */
-#define hyp_alternate_select(fname, orig, alt, cond) \
-typeof(orig) * __hyp_text fname(void) \
-{ \
- typeof(alt) *val = orig; \
- asm volatile(ALTERNATIVE("nop \n", \
- "mov %0, %1 \n", \
- cond) \
- : "+r" (val) : "r" (alt)); \
- return val; \
-}
-
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
-
-void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-
-void __timer_save_state(struct kvm_vcpu *vcpu);
-void __timer_restore_state(struct kvm_vcpu *vcpu);
-
-void __sysreg_save_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
-void __sysreg32_save_state(struct kvm_vcpu *vcpu);
-void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
-
-void __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
-
-void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
-void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-static inline bool __fpsimd_enabled(void)
-{
- return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
-}
-
-u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
-void __noreturn __hyp_do_panic(unsigned long, ...);
-
-#endif /* __ARM64_KVM_HYP_H__ */
-
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000000..bfc54fd82797
--- /dev/null
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+void __hyp_text __init_stage2_translation(void)
+{
+ u64 val = VTCR_EL2_FLAGS;
+ u64 tmp;
+
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
+ * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
+ * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
+ */
+ val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16;
+
+ /*
+ * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
+ * bit in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf;
+ val |= (tmp == 2) ? VTCR_EL2_VS : 0;
+
+ write_sysreg(val, vtcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f0e7bdfae134..437cfad5e3d8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -15,7 +15,53 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "hyp.h"
+#include <linux/types.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+
+static bool __hyp_text __fpsimd_enabled_nvhe(void)
+{
+ return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
+}
+
+static bool __hyp_text __fpsimd_enabled_vhe(void)
+{
+ return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+}
+
+static hyp_alternate_select(__fpsimd_is_enabled,
+ __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+bool __hyp_text __fpsimd_enabled(void)
+{
+ return __fpsimd_is_enabled()();
+}
+
+static void __hyp_text __activate_traps_vhe(void)
+{
+ u64 val;
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_FPEN;
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(__kvm_hyp_vector, vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(void)
+{
+ u64 val;
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+ write_sysreg(val, cptr_el2);
+}
+
+static hyp_alternate_select(__activate_traps_arch,
+ __activate_traps_nvhe, __activate_traps_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
@@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, hcr_el2);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
+ /* Make sure we trap PMU access from EL0 to EL2 */
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+ __activate_traps_arch()();
+}
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
- write_sysreg(val, cptr_el2);
+static void __hyp_text __deactivate_traps_vhe(void)
+{
+ extern char vectors[]; /* kernel exception vectors */
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
+ write_sysreg(vectors, vbar_el1);
}
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __deactivate_traps_nvhe(void)
{
write_sysreg(HCR_RW, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static hyp_alternate_select(__deactivate_traps_arch,
+ __deactivate_traps_nvhe, __deactivate_traps_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ __deactivate_traps_arch()();
write_sysreg(0, hstr_el2);
write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
- write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+ write_sysreg(0, pmuserenr_el0);
}
static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
@@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
__vgic_call_restore_state()(vcpu);
}
+static bool __hyp_text __true_value(void)
+{
+ return true;
+}
+
+static bool __hyp_text __false_value(void)
+{
+ return false;
+}
+
+static hyp_alternate_select(__check_arm_834220,
+ __false_value, __true_value,
+ ARM64_WORKAROUND_834220);
+
+static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ asm volatile("at s1e1r, %0" : : "r" (far));
+ isb();
+
+ tmp = read_sysreg(par_el1);
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & 1))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
+ return true;
+}
+
+static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u64 esr = read_sysreg_el2(esr);
+ u8 ec = esr >> ESR_ELx_EC_SHIFT;
+ u64 hpfar, far;
+
+ vcpu->arch.fault.esr_el2 = esr;
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(far);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
@@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
guest_ctxt = &vcpu->arch.ctxt;
- __sysreg_save_state(host_ctxt);
+ __sysreg_save_host_state(host_ctxt);
__debug_cond_save_host_state(vcpu);
__activate_traps(vcpu);
@@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
* to Cortex-A57 erratum #852523.
*/
__sysreg32_restore_state(vcpu);
- __sysreg_restore_state(guest_ctxt);
+ __sysreg_restore_guest_state(guest_ctxt);
__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/* Jump in the fire! */
+again:
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
+ if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ goto again;
+
fp_enabled = __fpsimd_enabled();
- __sysreg_save_state(guest_ctxt);
+ __sysreg_save_guest_state(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_save_state(vcpu);
__vgic_save_state(vcpu);
@@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_state(host_ctxt);
+ __sysreg_restore_host_state(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
@@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-void __hyp_text __noreturn __hyp_panic(void)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
{
unsigned long str_va = (unsigned long)__hyp_panic_string;
- u64 spsr = read_sysreg(spsr_el2);
- u64 elr = read_sysreg(elr_el2);
+
+ __hyp_do_panic(hyp_kern_va(str_va),
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par,
+ (void *)read_sysreg(tpidr_el2));
+}
+
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+{
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(esr), read_sysreg_el2(far),
+ read_sysreg(hpfar_el2), par,
+ (void *)read_sysreg(tpidr_el2));
+}
+
+static hyp_alternate_select(__hyp_call_panic,
+ __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __noreturn __hyp_panic(void)
+{
+ u64 spsr = read_sysreg_el2(spsr);
+ u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
@@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_state(host_ctxt);
+ __sysreg_restore_host_state(host_ctxt);
}
/* Call panic for real */
- __hyp_do_panic(hyp_kern_va(str_va),
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg(far_el2),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ __hyp_call_panic()(spsr, elr, par);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 425630980229..0f7c40eb3f53 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,75 +19,122 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
+#include <asm/kvm_hyp.h>
-#include "hyp.h"
+/* Yes, this does nothing, on purpose */
+static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-/* ctxt is already in the HYP VA space */
-void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+/*
+ * Non-VHE: Both host and guest must save everything.
+ *
+ * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
+ * guest must save everything.
+ */
+
+static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
- ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1);
- ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1);
+ ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+}
+
+static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
+ ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
+ ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
+ ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
+ ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
+ ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
+ ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
+ ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
+ ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
+ ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
+ ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
+ ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
+ ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
+ ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
+ ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
- ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
- ctxt->gp_regs.regs.pc = read_sysreg(elr_el2);
- ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
+ ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
+ ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static hyp_alternate_select(__sysreg_call_save_host_state,
+ __sysreg_save_state, __sysreg_do_nothing,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_call_save_host_state()(ctxt);
+ __sysreg_save_common_state(ctxt);
+}
+
+void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_state(ctxt);
+ __sysreg_save_common_state(ctxt);
}
-void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
- write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1);
- write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1);
- write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1);
- write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1);
- write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1);
- write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1);
- write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1);
- write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1);
- write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1);
- write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1);
- write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
- write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1);
- write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
- write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-
- write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
- write_sysreg(ctxt->gp_regs.regs.pc, elr_el2);
- write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2);
- write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg(ctxt->gp_regs.elr_el1, elr_el1);
- write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
+ write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+}
+
+static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
+ write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
+ write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
+ write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
+ write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
+ write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
+ write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
+ write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
+ write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
+ write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
+ write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
+ write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
+ write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
+ write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
+ write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
+ write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static hyp_alternate_select(__sysreg_call_restore_host_state,
+ __sysreg_restore_state, __sysreg_do_nothing,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_call_restore_host_state()(ctxt);
+ __sysreg_restore_common_state(ctxt);
+}
+
+void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c
deleted file mode 100644
index 1051e5d7320f..000000000000
--- a/arch/arm64/kvm/hyp/timer-sr.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <clocksource/arm_arch_timer.h>
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- if (kvm->arch.timer.enabled) {
- timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
- timer->cntv_cval = read_sysreg(cntv_cval_el0);
- }
-
- /* Disable the virtual timer */
- write_sysreg(0, cntv_ctl_el0);
-
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
-
- /* Clear cntvoff for the host */
- write_sysreg(0, cntvoff_el2);
-}
-
-void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
-
- if (kvm->arch.timer.enabled) {
- write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
- write_sysreg(timer->cntv_cval, cntv_cval_el0);
- isb();
- write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
- }
-}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 2a7e0d838698..be8177cdd3bf 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -15,7 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
deleted file mode 100644
index e71761238cfc..000000000000
--- a/arch/arm64/kvm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- u32 eisr0, eisr1, elrsr0, elrsr1;
- int i, nr_lr;
-
- if (!base)
- return;
-
- nr_lr = vcpu->arch.vgic_cpu.nr_lr;
- cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
- cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
- eisr0 = readl_relaxed(base + GICH_EISR0);
- elrsr0 = readl_relaxed(base + GICH_ELRSR0);
- if (unlikely(nr_lr > 32)) {
- eisr1 = readl_relaxed(base + GICH_EISR1);
- elrsr1 = readl_relaxed(base + GICH_ELRSR1);
- } else {
- eisr1 = elrsr1 = 0;
- }
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
- cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
- cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
- cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
- cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
- writel_relaxed(0, base + GICH_HCR);
-
- for (i = 0; i < nr_lr; i++)
- cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int i, nr_lr;
-
- if (!base)
- return;
-
- writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
- writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
- writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
-
- nr_lr = vcpu->arch.vgic_cpu.nr_lr;
- for (i = 0; i < nr_lr; i++)
- writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 5dd2a26444ec..fff7cd42b3a3 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -19,9 +19,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
-
-#include "hyp.h"
+#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
@@ -39,12 +37,133 @@
asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
} while (0)
-/* vcpu is already in the HYP VA space */
+static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ return read_gicreg(ICH_LR0_EL2);
+ case 1:
+ return read_gicreg(ICH_LR1_EL2);
+ case 2:
+ return read_gicreg(ICH_LR2_EL2);
+ case 3:
+ return read_gicreg(ICH_LR3_EL2);
+ case 4:
+ return read_gicreg(ICH_LR4_EL2);
+ case 5:
+ return read_gicreg(ICH_LR5_EL2);
+ case 6:
+ return read_gicreg(ICH_LR6_EL2);
+ case 7:
+ return read_gicreg(ICH_LR7_EL2);
+ case 8:
+ return read_gicreg(ICH_LR8_EL2);
+ case 9:
+ return read_gicreg(ICH_LR9_EL2);
+ case 10:
+ return read_gicreg(ICH_LR10_EL2);
+ case 11:
+ return read_gicreg(ICH_LR11_EL2);
+ case 12:
+ return read_gicreg(ICH_LR12_EL2);
+ case 13:
+ return read_gicreg(ICH_LR13_EL2);
+ case 14:
+ return read_gicreg(ICH_LR14_EL2);
+ case 15:
+ return read_gicreg(ICH_LR15_EL2);
+ }
+
+ unreachable();
+}
+
+static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ write_gicreg(val, ICH_LR0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_LR1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_LR2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_LR3_EL2);
+ break;
+ case 4:
+ write_gicreg(val, ICH_LR4_EL2);
+ break;
+ case 5:
+ write_gicreg(val, ICH_LR5_EL2);
+ break;
+ case 6:
+ write_gicreg(val, ICH_LR6_EL2);
+ break;
+ case 7:
+ write_gicreg(val, ICH_LR7_EL2);
+ break;
+ case 8:
+ write_gicreg(val, ICH_LR8_EL2);
+ break;
+ case 9:
+ write_gicreg(val, ICH_LR9_EL2);
+ break;
+ case 10:
+ write_gicreg(val, ICH_LR10_EL2);
+ break;
+ case 11:
+ write_gicreg(val, ICH_LR11_EL2);
+ break;
+ case 12:
+ write_gicreg(val, ICH_LR12_EL2);
+ break;
+ case 13:
+ write_gicreg(val, ICH_LR13_EL2);
+ break;
+ case 14:
+ write_gicreg(val, ICH_LR14_EL2);
+ break;
+ case 15:
+ write_gicreg(val, ICH_LR15_EL2);
+ break;
+ }
+}
+
+static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ int i;
+ bool expect_mi;
+
+ expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
+
+ for (i = 0; i < nr_lr; i++) {
+ if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ continue;
+
+ expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
+ (cpu_if->vgic_lr[i] & ICH_LR_EOI));
+ }
+
+ if (expect_mi) {
+ cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
+
+ if (cpu_if->vgic_misr & ICH_MISR_EOI)
+ cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
+ else
+ cpu_if->vgic_eisr = 0;
+ } else {
+ cpu_if->vgic_misr = 0;
+ cpu_if->vgic_eisr = 0;
+ }
+}
+
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
- u32 max_lr_idx, nr_pri_bits;
/*
* Make sure stores to the GIC via the memory mapped interface
@@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
dsb(st);
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
- cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
- cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
- cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
- write_gicreg(0, ICH_HCR_EL2);
- val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
- nr_pri_bits = vtr_to_nr_pri_bits(val);
+ if (vcpu->arch.vgic_cpu.live_lrs) {
+ int i;
+ u32 max_lr_idx, nr_pri_bits;
- switch (max_lr_idx) {
- case 15:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
- case 14:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
- case 13:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
- case 12:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
- case 11:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
- case 10:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
- case 9:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
- case 8:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
- case 7:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
- case 6:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
- case 5:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
- case 4:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
- case 3:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
- case 2:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
- case 1:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
- case 0:
- cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
- }
+ cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
- cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
- case 6:
- cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
- default:
- cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
- }
+ write_gicreg(0, ICH_HCR_EL2);
+ val = read_gicreg(ICH_VTR_EL2);
+ max_lr_idx = vtr_to_max_lr_idx(val);
+ nr_pri_bits = vtr_to_nr_pri_bits(val);
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
- cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
- case 6:
- cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
- default:
- cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ save_maint_int_state(vcpu, max_lr_idx + 1);
+
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ continue;
+
+ if (cpu_if->vgic_elrsr & (1 << i)) {
+ cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
+ continue;
+ }
+
+ cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
+ __gic_v3_set_lr(0, i);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
+ cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+ case 6:
+ cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+ default:
+ cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
+ cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+ case 6:
+ cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+ default:
+ cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ }
+
+ vcpu->arch.vgic_cpu.live_lrs = 0;
+ } else {
+ cpu_if->vgic_misr = 0;
+ cpu_if->vgic_eisr = 0;
+ cpu_if->vgic_elrsr = 0xffff;
+ cpu_if->vgic_ap0r[0] = 0;
+ cpu_if->vgic_ap0r[1] = 0;
+ cpu_if->vgic_ap0r[2] = 0;
+ cpu_if->vgic_ap0r[3] = 0;
+ cpu_if->vgic_ap1r[0] = 0;
+ cpu_if->vgic_ap1r[1] = 0;
+ cpu_if->vgic_ap1r[2] = 0;
+ cpu_if->vgic_ap1r[3] = 0;
}
val = read_gicreg(ICC_SRE_EL2);
@@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
+ u16 live_lrs = 0;
+ int i;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
isb();
- write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
- write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
-
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
- write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
+ live_lrs |= (1 << i);
}
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
- }
+ write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
- switch (max_lr_idx) {
- case 15:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
- case 14:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
- case 13:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
- case 12:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
- case 11:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
- case 10:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
- case 9:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
- case 8:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
- case 7:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
- case 5:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
- case 4:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
- case 3:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
- case 2:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
- case 1:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
- case 0:
- write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
+ if (live_lrs) {
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+
+ switch (nr_pri_bits) {
+ case 7:
+ write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
+ write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+ case 6:
+ write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+ default:
+ write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ }
+
+ switch (nr_pri_bits) {
+ case 7:
+ write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+ write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+ case 6:
+ write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+ default:
+ write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+ }
+
+ for (i = 0; i <= max_lr_idx; i++) {
+ if (!(live_lrs & (1 << i)))
+ continue;
+
+ __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ }
}
/*
@@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
*/
isb();
dsb(sy);
+ vcpu->arch.vgic_cpu.live_lrs = live_lrs;
/*
* Prevent the guest from touching the GIC system registers if
@@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
}
}
+void __hyp_text __vgic_v3_init_lrs(void)
+{
+ int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
+ int i;
+
+ for (i = 0; i <= max_lr_idx; i++)
+ __gic_v3_set_lr(0, i);
+}
+
static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f34745cb3d23..9677bf069bcc 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = get_num_wrps();
break;
+ case KVM_CAP_ARM_PMU_V3:
+ r = kvm_arm_support_pmu_v3();
+ break;
case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_VCPU_ATTRIBUTES:
r = 1;
break;
default:
@@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
+ /* Reset PMU */
+ kvm_pmu_vcpu_reset(vcpu);
+
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e90371cfb37..7bbe3ff02602 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -20,6 +20,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bsearch.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
@@ -34,6 +35,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
+#include <asm/perf_event.h>
#include <trace/events/kvm.h>
@@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
}
+static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 pmcr, val;
+
+ asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
+ /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
+ * except PMCR.E resetting to zero.
+ */
+ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
+ | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+}
+
+static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
+{
+ u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+
+ return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN))
+ || vcpu_mode_priv(vcpu));
+}
+
+static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ /* Only update writeable bits of PMCR */
+ val = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val &= ~ARMV8_PMU_PMCR_MASK;
+ val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ kvm_pmu_handle_pmcr(vcpu, val);
+ } else {
+ /* PMCR.P & PMCR.C are RAZ */
+ val = vcpu_sys_reg(vcpu, PMCR_EL0)
+ & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
+ p->regval = val;
+ }
+
+ return true;
+}
+
+static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write)
+ vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ else
+ /* return PMSELR.SEL field */
+ p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+
+ return true;
+}
+
+static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 pmceid;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ BUG_ON(p->is_write);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (!(p->Op2 & 1))
+ asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
+ else
+ asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
+
+ p->regval = pmceid;
+
+ return true;
+}
+
+static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
+{
+ u64 pmcr, val;
+
+ pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
+ return false;
+
+ return true;
+}
+
+static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (r->CRn == 9 && r->CRm == 13) {
+ if (r->Op2 == 2) {
+ /* PMXEVCNTR_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ & ARMV8_PMU_COUNTER_MASK;
+ } else if (r->Op2 == 0) {
+ /* PMCCNTR_EL0 */
+ if (pmu_access_cycle_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ARMV8_PMU_CYCLE_IDX;
+ } else {
+ BUG();
+ }
+ } else if (r->CRn == 14 && (r->CRm & 12) == 8) {
+ /* PMEVCNTRn_EL0 */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ } else {
+ BUG();
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ kvm_pmu_set_counter_value(vcpu, idx, p->regval);
+ } else {
+ p->regval = kvm_pmu_get_counter_value(vcpu, idx);
+ }
+
+ return true;
+}
+
+static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 idx, reg;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
+ /* PMXEVTYPER_EL0 */
+ idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ reg = PMEVTYPER0_EL0 + idx;
+ } else if (r->CRn == 14 && (r->CRm & 12) == 12) {
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ reg = PMCCFILTR_EL0;
+ else
+ /* PMEVTYPERn_EL0 */
+ reg = PMEVTYPER0_EL0 + idx;
+ } else {
+ BUG();
+ }
+
+ if (!pmu_counter_idx_valid(vcpu, idx))
+ return false;
+
+ if (p->is_write) {
+ kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
+ vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ }
+
+ return true;
+}
+
+static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 val, mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ if (p->is_write) {
+ val = p->regval & mask;
+ if (r->Op2 & 0x1) {
+ /* accessing PMCNTENSET_EL0 */
+ vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ kvm_pmu_enable_counter(vcpu, val);
+ } else {
+ /* accessing PMCNTENCLR_EL0 */
+ vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ kvm_pmu_disable_counter(vcpu, val);
+ }
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ if (p->is_write) {
+ u64 val = p->regval & mask;
+
+ if (r->Op2 & 0x1)
+ /* accessing PMINTENSET_EL1 */
+ vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ else
+ /* accessing PMINTENCLR_EL1 */
+ vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_access_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ if (r->CRm & 0x2)
+ /* accessing PMOVSSET_EL0 */
+ kvm_pmu_overflow_set(vcpu, p->regval & mask);
+ else
+ /* accessing PMOVSCLR_EL0 */
+ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ }
+
+ return true;
+}
+
+static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 mask;
+
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (pmu_write_swinc_el0_disabled(vcpu))
+ return false;
+
+ if (p->is_write) {
+ mask = kvm_pmu_valid_counter_mask(vcpu);
+ kvm_pmu_software_increment(vcpu, p->regval & mask);
+ return true;
+ }
+
+ return false;
+}
+
+static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_arm_pmu_v3_ready(vcpu))
+ return trap_raz_wi(vcpu, p, r);
+
+ if (p->is_write) {
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
+ & ARMV8_PMU_USERENR_MASK;
+ } else {
+ p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ & ARMV8_PMU_USERENR_MASK;
+ }
+
+ return true;
+}
+
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */ \
@@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
+/* Macro to expand the PMEVCNTRn_EL0 register */
+#define PMU_PMEVCNTR_EL0(n) \
+ /* PMEVCNTRn_EL0 */ \
+ { Op0(0b11), Op1(0b011), CRn(0b1110), \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
+
+/* Macro to expand the PMEVTYPERn_EL0 register */
+#define PMU_PMEVTYPER_EL0(n) \
+ /* PMEVTYPERn_EL0 */ \
+ { Op0(0b11), Op1(0b011), CRn(0b1110), \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
- trap_raz_wi },
+ access_pminten, reset_unknown, PMINTENSET_EL1 },
/* PMINTENCLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
- trap_raz_wi },
+ access_pminten, NULL, PMINTENSET_EL1 },
/* MAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMCR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
- trap_raz_wi },
+ access_pmcr, reset_pmcr, },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
- trap_raz_wi },
+ access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
/* PMCNTENCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
- trap_raz_wi },
+ access_pmcnten, NULL, PMCNTENSET_EL0 },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
- trap_raz_wi },
+ access_pmovs, NULL, PMOVSSET_EL0 },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
- trap_raz_wi },
+ access_pmswinc, reset_unknown, PMSWINC_EL0 },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
- trap_raz_wi },
+ access_pmselr, reset_unknown, PMSELR_EL0 },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
- trap_raz_wi },
+ access_pmceid },
/* PMCEID1_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
- trap_raz_wi },
+ access_pmceid },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
- trap_raz_wi },
+ access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
- trap_raz_wi },
+ access_pmu_evtyper },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
- trap_raz_wi },
- /* PMUSERENR_EL0 */
+ access_pmu_evcntr },
+ /* PMUSERENR_EL0
+ * This register resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
- trap_raz_wi },
+ access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
- trap_raz_wi },
+ access_pmovs, reset_unknown, PMOVSSET_EL0 },
/* TPIDR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
NULL, reset_unknown, TPIDRRO_EL0 },
+ /* PMEVCNTRn_EL0 */
+ PMU_PMEVCNTR_EL0(0),
+ PMU_PMEVCNTR_EL0(1),
+ PMU_PMEVCNTR_EL0(2),
+ PMU_PMEVCNTR_EL0(3),
+ PMU_PMEVCNTR_EL0(4),
+ PMU_PMEVCNTR_EL0(5),
+ PMU_PMEVCNTR_EL0(6),
+ PMU_PMEVCNTR_EL0(7),
+ PMU_PMEVCNTR_EL0(8),
+ PMU_PMEVCNTR_EL0(9),
+ PMU_PMEVCNTR_EL0(10),
+ PMU_PMEVCNTR_EL0(11),
+ PMU_PMEVCNTR_EL0(12),
+ PMU_PMEVCNTR_EL0(13),
+ PMU_PMEVCNTR_EL0(14),
+ PMU_PMEVCNTR_EL0(15),
+ PMU_PMEVCNTR_EL0(16),
+ PMU_PMEVCNTR_EL0(17),
+ PMU_PMEVCNTR_EL0(18),
+ PMU_PMEVCNTR_EL0(19),
+ PMU_PMEVCNTR_EL0(20),
+ PMU_PMEVCNTR_EL0(21),
+ PMU_PMEVCNTR_EL0(22),
+ PMU_PMEVCNTR_EL0(23),
+ PMU_PMEVCNTR_EL0(24),
+ PMU_PMEVCNTR_EL0(25),
+ PMU_PMEVCNTR_EL0(26),
+ PMU_PMEVCNTR_EL0(27),
+ PMU_PMEVCNTR_EL0(28),
+ PMU_PMEVCNTR_EL0(29),
+ PMU_PMEVCNTR_EL0(30),
+ /* PMEVTYPERn_EL0 */
+ PMU_PMEVTYPER_EL0(0),
+ PMU_PMEVTYPER_EL0(1),
+ PMU_PMEVTYPER_EL0(2),
+ PMU_PMEVTYPER_EL0(3),
+ PMU_PMEVTYPER_EL0(4),
+ PMU_PMEVTYPER_EL0(5),
+ PMU_PMEVTYPER_EL0(6),
+ PMU_PMEVTYPER_EL0(7),
+ PMU_PMEVTYPER_EL0(8),
+ PMU_PMEVTYPER_EL0(9),
+ PMU_PMEVTYPER_EL0(10),
+ PMU_PMEVTYPER_EL0(11),
+ PMU_PMEVTYPER_EL0(12),
+ PMU_PMEVTYPER_EL0(13),
+ PMU_PMEVTYPER_EL0(14),
+ PMU_PMEVTYPER_EL0(15),
+ PMU_PMEVTYPER_EL0(16),
+ PMU_PMEVTYPER_EL0(17),
+ PMU_PMEVTYPER_EL0(18),
+ PMU_PMEVTYPER_EL0(19),
+ PMU_PMEVTYPER_EL0(20),
+ PMU_PMEVTYPER_EL0(21),
+ PMU_PMEVTYPER_EL0(22),
+ PMU_PMEVTYPER_EL0(23),
+ PMU_PMEVTYPER_EL0(24),
+ PMU_PMEVTYPER_EL0(25),
+ PMU_PMEVTYPER_EL0(26),
+ PMU_PMEVTYPER_EL0(27),
+ PMU_PMEVTYPER_EL0(28),
+ PMU_PMEVTYPER_EL0(29),
+ PMU_PMEVTYPER_EL0(30),
+ /* PMCCFILTR_EL0
+ * This register resets as unknown in 64bit mode while it resets as zero
+ * in 32bit mode. Here we choose to reset it as zero for consistency.
+ */
+ { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111),
+ access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
+
/* DACR32_EL2 */
{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
NULL, reset_unknown, DACR32_EL2 },
@@ -688,7 +1116,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
} else {
u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
- u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+ u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
(((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
@@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
};
+/* Macro to expand the PMEVCNTRn register */
+#define PMU_PMEVCNTR(n) \
+ /* PMEVCNTRn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evcntr }
+
+/* Macro to expand the PMEVTYPERn register */
+#define PMU_PMEVTYPER(n) \
+ /* PMEVTYPERn */ \
+ { Op1(0), CRn(0b1110), \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_evtyper }
+
/*
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
* depending on the way they are accessed (as a 32bit or a 64bit
@@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
/* PMU */
- { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
- { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
@@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+
+ /* PMEVCNTRn */
+ PMU_PMEVCNTR(0),
+ PMU_PMEVCNTR(1),
+ PMU_PMEVCNTR(2),
+ PMU_PMEVCNTR(3),
+ PMU_PMEVCNTR(4),
+ PMU_PMEVCNTR(5),
+ PMU_PMEVCNTR(6),
+ PMU_PMEVCNTR(7),
+ PMU_PMEVCNTR(8),
+ PMU_PMEVCNTR(9),
+ PMU_PMEVCNTR(10),
+ PMU_PMEVCNTR(11),
+ PMU_PMEVCNTR(12),
+ PMU_PMEVCNTR(13),
+ PMU_PMEVCNTR(14),
+ PMU_PMEVCNTR(15),
+ PMU_PMEVCNTR(16),
+ PMU_PMEVCNTR(17),
+ PMU_PMEVCNTR(18),
+ PMU_PMEVCNTR(19),
+ PMU_PMEVCNTR(20),
+ PMU_PMEVCNTR(21),
+ PMU_PMEVCNTR(22),
+ PMU_PMEVCNTR(23),
+ PMU_PMEVCNTR(24),
+ PMU_PMEVCNTR(25),
+ PMU_PMEVCNTR(26),
+ PMU_PMEVCNTR(27),
+ PMU_PMEVCNTR(28),
+ PMU_PMEVCNTR(29),
+ PMU_PMEVCNTR(30),
+ /* PMEVTYPERn */
+ PMU_PMEVTYPER(0),
+ PMU_PMEVTYPER(1),
+ PMU_PMEVTYPER(2),
+ PMU_PMEVTYPER(3),
+ PMU_PMEVTYPER(4),
+ PMU_PMEVTYPER(5),
+ PMU_PMEVTYPER(6),
+ PMU_PMEVTYPER(7),
+ PMU_PMEVTYPER(8),
+ PMU_PMEVTYPER(9),
+ PMU_PMEVTYPER(10),
+ PMU_PMEVTYPER(11),
+ PMU_PMEVTYPER(12),
+ PMU_PMEVTYPER(13),
+ PMU_PMEVTYPER(14),
+ PMU_PMEVTYPER(15),
+ PMU_PMEVTYPER(16),
+ PMU_PMEVTYPER(17),
+ PMU_PMEVTYPER(18),
+ PMU_PMEVTYPER(19),
+ PMU_PMEVTYPER(20),
+ PMU_PMEVTYPER(21),
+ PMU_PMEVTYPER(22),
+ PMU_PMEVTYPER(23),
+ PMU_PMEVTYPER(24),
+ PMU_PMEVTYPER(25),
+ PMU_PMEVTYPER(26),
+ PMU_PMEVTYPER(27),
+ PMU_PMEVTYPER(28),
+ PMU_PMEVTYPER(29),
+ PMU_PMEVTYPER(30),
+ /* PMCCFILTR */
+ { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
};
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
};
@@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
}
}
+#define reg_to_match_value(x) \
+ ({ \
+ unsigned long val; \
+ val = (x)->Op0 << 14; \
+ val |= (x)->Op1 << 11; \
+ val |= (x)->CRn << 7; \
+ val |= (x)->CRm << 3; \
+ val |= (x)->Op2; \
+ val; \
+ })
+
+static int match_sys_reg(const void *key, const void *elt)
+{
+ const unsigned long pval = (unsigned long)key;
+ const struct sys_reg_desc *r = elt;
+
+ return pval - reg_to_match_value(r);
+}
+
static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
const struct sys_reg_desc table[],
unsigned int num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++) {
- const struct sys_reg_desc *r = &table[i];
-
- if (params->Op0 != r->Op0)
- continue;
- if (params->Op1 != r->Op1)
- continue;
- if (params->CRn != r->CRn)
- continue;
- if (params->CRm != r->CRm)
- continue;
- if (params->Op2 != r->Op2)
- continue;
+ unsigned long pval = reg_to_match_value(params);
- return r;
- }
- return NULL;
+ return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 1a811ecf71da..c86b7909ef31 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
-ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \
-ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
- -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+ -fcall-saved-x18
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index a9723c71c52b..5d1cad3ce6d6 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -33,28 +33,28 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
1:
-USER(9f, str xzr, [x0], #8 )
+uao_user_alternative 9f, str, sttr, xzr, x0, 8
subs x1, x1, #8
b.pl 1b
2: adds x1, x1, #4
b.mi 3f
-USER(9f, str wzr, [x0], #4 )
+uao_user_alternative 9f, str, sttr, wzr, x0, 4
sub x1, x1, #4
3: adds x1, x1, #2
b.mi 4f
-USER(9f, strh wzr, [x0], #2 )
+uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
sub x1, x1, #2
4: adds x1, x1, #1
b.mi 5f
-USER(9f, strb wzr, [x0] )
+uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4699cd74f87e..17e8306dca29 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -34,7 +34,7 @@
*/
.macro ldrb1 ptr, regB, val
- USER(9998f, ldrb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
.endm
.macro strb1 ptr, regB, val
@@ -42,7 +42,7 @@
.endm
.macro ldrh1 ptr, regB, val
- USER(9998f, ldrh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
.endm
.macro strh1 ptr, regB, val
@@ -50,7 +50,7 @@
.endm
.macro ldr1 ptr, regB, val
- USER(9998f, ldr \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
.endm
.macro str1 ptr, regB, val
@@ -58,7 +58,7 @@
.endm
.macro ldp1 ptr, regB, regC, val
- USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ uao_ldp 9998f, \ptr, \regB, \regC, \val
.endm
.macro stp1 ptr, regB, regC, val
@@ -67,11 +67,11 @@
end .req x5
ENTRY(__copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0 // Nothing to copy
ret
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 81c8fc93c100..f7292dd08c84 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -35,44 +35,44 @@
* x0 - bytes not copied
*/
.macro ldrb1 ptr, regB, val
- USER(9998f, ldrb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
.endm
.macro strb1 ptr, regB, val
- USER(9998f, strb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
.endm
.macro ldrh1 ptr, regB, val
- USER(9998f, ldrh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
.endm
.macro strh1 ptr, regB, val
- USER(9998f, strh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
.endm
.macro ldr1 ptr, regB, val
- USER(9998f, ldr \ptr, [\regB], \val)
+ uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
.endm
.macro str1 ptr, regB, val
- USER(9998f, str \ptr, [\regB], \val)
+ uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
.endm
.macro ldp1 ptr, regB, regC, val
- USER(9998f, ldp \ptr, \regB, [\regC], \val)
+ uao_ldp 9998f, \ptr, \regB, \regC, \val
.endm
.macro stp1 ptr, regB, regC, val
- USER(9998f, stp \ptr, \regB, [\regC], \val)
+ uao_stp 9998f, \ptr, \regB, \regC, \val
.endm
end .req x5
ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 512b9a7b980e..4c1e700840b6 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,8 @@
#include <linux/const.h>
#include <asm/assembler.h>
#include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
/*
* Copy a page from src to dest (both are page aligned)
@@ -27,20 +29,65 @@
* x1 - src
*/
ENTRY(copy_page)
- /* Assume cache line size is 64 bytes. */
- prfm pldl1strm, [x1, #64]
-1: ldp x2, x3, [x1]
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+ nop
+ nop
+alternative_else
+ # Prefetch two cache lines ahead.
+ prfm pldl1strm, [x1, #128]
+ prfm pldl1strm, [x1, #256]
+alternative_endif
+
+ ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
ldp x8, x9, [x1, #48]
- add x1, x1, #64
- prfm pldl1strm, [x1, #64]
+ ldp x10, x11, [x1, #64]
+ ldp x12, x13, [x1, #80]
+ ldp x14, x15, [x1, #96]
+ ldp x16, x17, [x1, #112]
+
+ mov x18, #(PAGE_SIZE - 128)
+ add x1, x1, #128
+1:
+ subs x18, x18, #128
+
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+ nop
+alternative_else
+ prfm pldl1strm, [x1, #384]
+alternative_endif
+
stnp x2, x3, [x0]
+ ldp x2, x3, [x1]
stnp x4, x5, [x0, #16]
+ ldp x4, x5, [x1, #16]
stnp x6, x7, [x0, #32]
+ ldp x6, x7, [x1, #32]
stnp x8, x9, [x0, #48]
- add x0, x0, #64
- tst x1, #(PAGE_SIZE - 1)
- b.ne 1b
+ ldp x8, x9, [x1, #48]
+ stnp x10, x11, [x0, #64]
+ ldp x10, x11, [x1, #64]
+ stnp x12, x13, [x0, #80]
+ ldp x12, x13, [x1, #80]
+ stnp x14, x15, [x0, #96]
+ ldp x14, x15, [x1, #96]
+ stnp x16, x17, [x0, #112]
+ ldp x16, x17, [x1, #112]
+
+ add x0, x0, #128
+ add x1, x1, #128
+
+ b.gt 1b
+
+ stnp x2, x3, [x0]
+ stnp x4, x5, [x0, #16]
+ stnp x6, x7, [x0, #32]
+ stnp x8, x9, [x0, #48]
+ stnp x10, x11, [x0, #64]
+ stnp x12, x13, [x0, #80]
+ stnp x14, x15, [x0, #96]
+ stnp x16, x17, [x0, #112]
+
ret
ENDPROC(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7512bbbc07ac..21faae60f988 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -37,7 +37,7 @@
.endm
.macro strb1 ptr, regB, val
- USER(9998f, strb \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
.endm
.macro ldrh1 ptr, regB, val
@@ -45,7 +45,7 @@
.endm
.macro strh1 ptr, regB, val
- USER(9998f, strh \ptr, [\regB], \val)
+ uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
.endm
.macro ldr1 ptr, regB, val
@@ -53,7 +53,7 @@
.endm
.macro str1 ptr, regB, val
- USER(9998f, str \ptr, [\regB], \val)
+ uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
.endm
.macro ldp1 ptr, regB, regC, val
@@ -61,16 +61,16 @@
.endm
.macro stp1 ptr, regB, regC, val
- USER(9998f, stp \ptr, \regB, [\regC], \val)
+ uao_stp 9998f, \ptr, \regB, \regC, \val
.endm
end .req x5
ENTRY(__copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index ffbdec00327d..2a4e239bd17a 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -211,7 +211,7 @@ CPU_LE( lsr tmp2, tmp2, tmp1 )
.Lunequal_proc:
cbz diff, .Lremain8
-/*There is differnence occured in the latest comparison.*/
+/* There is difference occurred in the latest comparison. */
.Lnot_limit:
/*
* For little endian,reverse the low significant equal bits into MSB,then
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e87f53ff5f58..c90c3c5f46af 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -24,6 +24,7 @@
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
+#include <asm/smp.h>
#include <asm/tlbflush.h>
static u32 asid_bits;
@@ -40,6 +41,45 @@ static cpumask_t tlb_flush_pending;
#define ASID_FIRST_VERSION (1UL << asid_bits)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
+/* Get the ASIDBits supported by the current CPU */
+static u32 get_cpu_asid_bits(void)
+{
+ u32 asid;
+ int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1),
+ ID_AA64MMFR0_ASID_SHIFT);
+
+ switch (fld) {
+ default:
+ pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n",
+ smp_processor_id(), fld);
+ /* Fallthrough */
+ case 0:
+ asid = 8;
+ break;
+ case 2:
+ asid = 16;
+ }
+
+ return asid;
+}
+
+/* Check if the current cpu's ASIDBits is compatible with asid_bits */
+void verify_cpu_asid_bits(void)
+{
+ u32 asid = get_cpu_asid_bits();
+
+ if (asid < asid_bits) {
+ /*
+ * We cannot decrease the ASID size at runtime, so panic if we support
+ * fewer ASID bits than the boot CPU.
+ */
+ pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
+ smp_processor_id(), asid, asid_bits);
+ update_cpu_boot_status(CPU_PANIC_KERNEL);
+ cpu_park_loop();
+ }
+}
+
static void flush_context(unsigned int cpu)
{
int i;
@@ -187,19 +227,7 @@ switch_mm_fastpath:
static int asids_init(void)
{
- int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
-
- switch (fld) {
- default:
- pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
- /* Fallthrough */
- case 0:
- asid_bits = 8;
- break;
- case 2:
- asid_bits = 16;
- }
-
+ asid_bits = get_cpu_asid_bits();
/* If we end up with more CPUs than ASIDs, expect things to crash */
WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0adbebbc2803..f9271cb2f5e3 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -27,15 +27,15 @@
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
-#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS)
-
struct addr_marker {
unsigned long start_address;
const char *name;
};
enum address_markers_idx {
- VMALLOC_START_NR = 0,
+ MODULES_START_NR = 0,
+ MODULES_END_NR,
+ VMALLOC_START_NR,
VMALLOC_END_NR,
#ifdef CONFIG_SPARSEMEM_VMEMMAP
VMEMMAP_START_NR,
@@ -45,12 +45,12 @@ enum address_markers_idx {
FIXADDR_END_NR,
PCI_START_NR,
PCI_END_NR,
- MODULES_START_NR,
- MODULES_END_NR,
KERNEL_SPACE_NR,
};
static struct addr_marker address_markers[] = {
+ { MODULES_VADDR, "Modules start" },
+ { MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMALLOC_END, "vmalloc() End" },
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +61,7 @@ static struct addr_marker address_markers[] = {
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
- { MODULES_VADDR, "Modules start" },
- { MODULES_END, "Modules end" },
- { PAGE_OFFSET, "Kernel Mapping" },
+ { PAGE_OFFSET, "Linear Mapping" },
{ -1, NULL },
};
@@ -90,6 +88,11 @@ struct prot_bits {
static const struct prot_bits pte_bits[] = {
{
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
.mask = PTE_USER,
.val = PTE_USER,
.set = "USR",
@@ -316,7 +319,7 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = address_markers,
};
- walk_pgd(&st, &init_mm, LOWEST_ADDR);
+ walk_pgd(&st, &init_mm, VA_START);
note_page(&st, 0, 0, 0);
return 0;
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 79444279ba8c..81acd4706878 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
fixup = search_exception_tables(instruction_pointer(regs));
if (fixup)
- regs->pc = fixup->fixup;
+ regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
return fixup != NULL;
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index abe2a9542b3a..95df28bc875f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -192,6 +192,14 @@ out:
return fault;
}
+static inline int permission_fault(unsigned int esr)
+{
+ unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+ unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -225,12 +233,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- /*
- * PAN bit set implies the fault happened in kernel space, but not
- * in the arch's user access functions.
- */
- if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
- goto no_context;
+ if (permission_fault(esr) && (addr < USER_DS)) {
+ if (get_fs() == KERNEL_DS)
+ die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+ if (!search_exception_tables(regs->pc))
+ die("Accessing user space memory outside uaccess.h routines", regs, esr);
+ }
/*
* As per x86, we may deadlock here. However, since the kernel only
@@ -295,7 +304,7 @@ retry:
up_read(&mm->mmap_sem);
/*
- * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+ * Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS))))
@@ -568,3 +577,16 @@ void cpu_enable_pan(void *__unused)
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
#endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+ asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c3614e..589fd28e1fb5 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -124,7 +124,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
pud_none(*pud))
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else if (ps == (PAGE_SIZE * CONT_PTES)) {
- hugetlb_add_hstate(CONT_PTE_SHIFT);
- } else if (ps == (PMD_SIZE * CONT_PMDS)) {
- hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
} else {
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
- if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PMD_SHIFT);
- return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7802f216a67a..61a38eaf0895 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,7 +35,10 @@
#include <linux/efi.h>
#include <linux/swiotlb.h>
+#include <asm/boot.h>
#include <asm/fixmap.h>
+#include <asm/kasan.h>
+#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -45,7 +48,13 @@
#include "mm.h"
-phys_addr_t memstart_addr __read_mostly = 0;
+/*
+ * We need to be able to catch inadvertent references to memstart_addr
+ * that occur (potentially in generic code) before arm64_memblock_init()
+ * executes, which assigns it its actual value. So use a default value
+ * that cannot be mistaken for a real physical address.
+ */
+s64 memstart_addr __read_mostly = -1;
phys_addr_t arm64_dma_phys_limit __read_mostly;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -58,8 +67,8 @@ static int __init early_initrd(char *p)
if (*endp == ',') {
size = memparse(endp + 1, NULL);
- initrd_start = (unsigned long)__va(start);
- initrd_end = (unsigned long)__va(start + size);
+ initrd_start = start;
+ initrd_end = start + size;
}
return 0;
}
@@ -159,7 +168,57 @@ early_param("mem", early_mem);
void __init arm64_memblock_init(void)
{
- memblock_enforce_memory_limit(memory_limit);
+ const s64 linear_region_size = -(s64)PAGE_OFFSET;
+
+ /*
+ * Ensure that the linear region takes up exactly half of the kernel
+ * virtual address space. This way, we can distinguish a linear address
+ * from a kernel/module/vmalloc address by testing a single bit.
+ */
+ BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
+
+ /*
+ * Select a suitable value for the base of physical memory.
+ */
+ memstart_addr = round_down(memblock_start_of_DRAM(),
+ ARM64_MEMSTART_ALIGN);
+
+ /*
+ * Remove the memory that we will not be able to cover with the
+ * linear mapping. Take care not to clip the kernel which may be
+ * high in memory.
+ */
+ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
+ ULLONG_MAX);
+ if (memblock_end_of_DRAM() > linear_region_size)
+ memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+
+ /*
+ * Apply the memory limit if it was set. Since the kernel may be loaded
+ * high up in memory, add back the kernel region that must be accessible
+ * via the linear mapping.
+ */
+ if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+ memblock_enforce_memory_limit(memory_limit);
+ memblock_add(__pa(_text), (u64)(_end - _text));
+ }
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern u16 memstart_offset_seed;
+ u64 range = linear_region_size -
+ (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+
+ /*
+ * If the size of the linear region exceeds, by a sufficient
+ * margin, the size of the region that the available physical
+ * memory spans, randomize the linear region as well.
+ */
+ if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+ range = range / ARM64_MEMSTART_ALIGN + 1;
+ memstart_addr -= ARM64_MEMSTART_ALIGN *
+ ((range * memstart_offset_seed) >> 16);
+ }
+ }
/*
* Register the kernel text, kernel data, initrd, and initial
@@ -167,8 +226,13 @@ void __init arm64_memblock_init(void)
*/
memblock_reserve(__pa(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start)
- memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+ if (initrd_start) {
+ memblock_reserve(initrd_start, initrd_end - initrd_start);
+
+ /* the generic initrd code expects virtual addresses */
+ initrd_start = __phys_to_virt(initrd_start);
+ initrd_end = __phys_to_virt(initrd_end);
+ }
#endif
early_init_fdt_scan_reserved_mem();
@@ -302,35 +366,38 @@ void __init mem_init(void)
#ifdef CONFIG_KASAN
" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
#endif
+ " modules : 0x%16lx - 0x%16lx (%6ld MB)\n"
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
+ " .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
+ " .data : 0x%p" " - 0x%p" " (%6ld KB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
" 0x%16lx - 0x%16lx (%6ld MB actual)\n"
#endif
" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n"
" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " modules : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " memory : 0x%16lx - 0x%16lx (%6ld MB)\n"
- " .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ " memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
#ifdef CONFIG_KASAN
MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
#endif
+ MLM(MODULES_VADDR, MODULES_END),
MLG(VMALLOC_START, VMALLOC_END),
+ MLK_ROUNDUP(_text, __start_rodata),
+ MLK_ROUNDUP(__start_rodata, _etext),
+ MLK_ROUNDUP(__init_begin, __init_end),
+ MLK_ROUNDUP(_sdata, _edata),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
MLG(VMEMMAP_START,
VMEMMAP_START + VMEMMAP_SIZE),
- MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+ MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
(unsigned long)virt_to_page(high_memory)),
#endif
MLK(FIXADDR_START, FIXADDR_TOP),
MLM(PCI_IO_START, PCI_IO_END),
- MLM(MODULES_VADDR, MODULES_END),
- MLM(PAGE_OFFSET, (unsigned long)high_memory),
- MLK_ROUNDUP(__init_begin, __init_end),
- MLK_ROUNDUP(_text, _etext),
- MLK_ROUNDUP(_sdata, _edata));
+ MLM(__phys_to_virt(memblock_start_of_DRAM()),
+ (unsigned long)high_memory));
#undef MLK
#undef MLM
@@ -343,8 +410,6 @@ void __init mem_init(void)
#ifdef CONFIG_COMPAT
BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64);
#endif
- BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR);
- BUG_ON(TASK_SIZE_64 > MODULES_VADDR);
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
extern int sysctl_overcommit_memory;
@@ -358,8 +423,8 @@ void __init mem_init(void)
void free_initmem(void)
{
- fixup_init();
free_initmem_default(0);
+ fixup_init();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -380,3 +445,28 @@ static int __init keepinitrd_setup(char *__unused)
__setup("keepinitrd", keepinitrd_setup);
#endif
+
+/*
+ * Dump out memory limit information on panic.
+ */
+static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+{
+ if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+ pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
+ } else {
+ pr_emerg("Memory Limit: none\n");
+ }
+ return 0;
+}
+
+static struct notifier_block mem_limit_notifier = {
+ .notifier_call = dump_mem_limit,
+};
+
+static int __init register_mem_limit_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &mem_limit_notifier);
+ return 0;
+}
+__initcall(register_mem_limit_dumper);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cab7a5be40aa..757009daa9ed 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -16,9 +16,12 @@
#include <linux/memblock.h>
#include <linux/start_kernel.h>
+#include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
+#include <asm/sections.h>
#include <asm/tlbflush.h>
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -32,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
- pte = pte_offset_kernel(pmd, addr);
+ pte = pte_offset_kimg(pmd, addr);
do {
next = addr + PAGE_SIZE;
set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -50,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
if (pud_none(*pud))
pud_populate(&init_mm, pud, kasan_zero_pmd);
- pmd = pmd_offset(pud, addr);
+ pmd = pmd_offset_kimg(pud, addr);
do {
next = pmd_addr_end(addr, end);
kasan_early_pte_populate(pmd, addr, next);
@@ -67,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, kasan_zero_pud);
- pud = pud_offset(pgd, addr);
+ pud = pud_offset_kimg(pgd, addr);
do {
next = pud_addr_end(addr, end);
kasan_early_pmd_populate(pud, addr, next);
@@ -96,6 +99,21 @@ asmlinkage void __init kasan_early_init(void)
kasan_map_early_shadow();
}
+/*
+ * Copy the current shadow region into a new pgdir.
+ */
+void __init kasan_copy_shadow(pgd_t *pgdir)
+{
+ pgd_t *pgd, *pgd_new, *pgd_end;
+
+ pgd = pgd_offset_k(KASAN_SHADOW_START);
+ pgd_end = pgd_offset_k(KASAN_SHADOW_END);
+ pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+ do {
+ set_pgd(pgd_new, *pgd);
+ } while (pgd++, pgd_new++, pgd != pgd_end);
+}
+
static void __init clear_pgds(unsigned long start,
unsigned long end)
{
@@ -108,20 +126,19 @@ static void __init clear_pgds(unsigned long start,
set_pgd(pgd_offset_k(start), __pgd(0));
}
-static void __init cpu_set_ttbr1(unsigned long ttbr1)
-{
- asm(
- " msr ttbr1_el1, %0\n"
- " isb"
- :
- : "r" (ttbr1));
-}
-
void __init kasan_init(void)
{
+ u64 kimg_shadow_start, kimg_shadow_end;
+ u64 mod_shadow_start, mod_shadow_end;
struct memblock_region *reg;
int i;
+ kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+ kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
+ mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
+ mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+
/*
* We are going to perform proper setup of shadow memory.
* At first we should unmap early shadow (clear_pgds() call bellow).
@@ -130,13 +147,33 @@ void __init kasan_init(void)
* setup will be finished.
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
- cpu_set_ttbr1(__pa(tmp_pg_dir));
- flush_tlb_all();
+ dsb(ishst);
+ cpu_replace_ttbr1(tmp_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
+ pfn_to_nid(virt_to_pfn(_text)));
+
+ /*
+ * vmemmap_populate() has populated the shadow region that covers the
+ * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+ * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+ * kasan_populate_zero_shadow() from replacing the page table entries
+ * (PMD or PTE) at the edges of the shadow region for the kernel
+ * image.
+ */
+ kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
+ kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
- kasan_mem_to_shadow((void *)MODULES_VADDR));
+ (void *)mod_shadow_start);
+ kasan_populate_zero_shadow((void *)kimg_shadow_end,
+ kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+ if (kimg_shadow_start > mod_shadow_end)
+ kasan_populate_zero_shadow((void *)mod_shadow_end,
+ (void *)kimg_shadow_start);
for_each_memblock(memory, reg) {
void *start = (void *)__phys_to_virt(reg->base);
@@ -165,8 +202,7 @@ void __init kasan_init(void)
pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
memset(kasan_zero_page, 0, PAGE_SIZE);
- cpu_set_ttbr1(__pa(swapper_pg_dir));
- flush_tlb_all();
+ cpu_replace_ttbr1(swapper_pg_dir);
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 58faeaa7fbdc..d2d8b8c2e17f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -30,8 +30,10 @@
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <asm/barrier.h>
#include <asm/cputype.h>
#include <asm/fixmap.h>
+#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -44,13 +46,20 @@
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
/*
* Empty_zero_page is a special page that is used for zero-initialized data
* and COW.
*/
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
EXPORT_SYMBOL(phys_mem_access_prot);
-static void __init *early_alloc(unsigned long sz)
+static phys_addr_t __init early_pgtable_alloc(void)
{
phys_addr_t phys;
void *ptr;
- phys = memblock_alloc(sz, sz);
+ phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
BUG_ON(!phys);
- ptr = __va(phys);
- memset(ptr, 0, sz);
- return ptr;
+
+ /*
+ * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+ * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+ * any level of table.
+ */
+ ptr = pte_set_fixmap(phys);
+
+ memset(ptr, 0, PAGE_SIZE);
+
+ /*
+ * Implicit barriers also ensure the zeroed page is visible to the page
+ * table walker
+ */
+ pte_clear_fixmap();
+
+ return phys;
}
/*
@@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pte_t *pte;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
- pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+ phys_addr_t pte_phys;
+ BUG_ON(!pgtable_alloc);
+ pte_phys = pgtable_alloc();
+ pte = pte_set_fixmap(pte_phys);
if (pmd_sect(*pmd))
split_pmd(pmd, pte);
- __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+ __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
flush_tlb_all();
+ pte_clear_fixmap();
}
BUG_ON(pmd_bad(*pmd));
- pte = pte_offset_kernel(pmd, addr);
+ pte = pte_set_fixmap_offset(pmd, addr);
do {
set_pte(pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
+
+ pte_clear_fixmap();
}
static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
} while (pmd++, i++, i < PTRS_PER_PMD);
}
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end,
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+ /*
+ * If debug_page_alloc is enabled we must map the linear map
+ * using pages. However, other mappings created by
+ * create_mapping_noalloc must use sections in some cases. Allow
+ * sections to be used in those cases, where no pgtable_alloc
+ * function is provided.
+ */
+ return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+ return true;
+}
+#endif
+
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pmd_t *pmd;
unsigned long next;
@@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
* Check for initial section mappings in the pgd/pud and remove them.
*/
if (pud_none(*pud) || pud_sect(*pud)) {
- pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+ phys_addr_t pmd_phys;
+ BUG_ON(!pgtable_alloc);
+ pmd_phys = pgtable_alloc();
+ pmd = pmd_set_fixmap(pmd_phys);
if (pud_sect(*pud)) {
/*
* need to have the 1G of mappings continue to be
@@ -147,16 +198,18 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
*/
split_pud(pud, pmd);
}
- pud_populate(mm, pud, pmd);
+ __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
flush_tlb_all();
+ pmd_clear_fixmap();
}
BUG_ON(pud_bad(*pud));
- pmd = pmd_offset(pud, addr);
+ pmd = pmd_set_fixmap_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
/* try section mapping first */
- if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+ if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+ block_mappings_allowed(pgtable_alloc)) {
pmd_t old_pmd =*pmd;
set_pmd(pmd, __pmd(phys |
pgprot_val(mk_sect_prot(prot))));
@@ -167,17 +220,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
if (!pmd_none(old_pmd)) {
flush_tlb_all();
if (pmd_table(old_pmd)) {
- phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+ phys_addr_t table = pmd_page_paddr(old_pmd);
if (!WARN_ON_ONCE(slab_is_available()))
memblock_free(table, PAGE_SIZE);
}
}
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, alloc);
+ prot, pgtable_alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
+
+ pmd_clear_fixmap();
}
static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -192,28 +247,30 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
return true;
}
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
pud_t *pud;
unsigned long next;
if (pgd_none(*pgd)) {
- pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
- pgd_populate(mm, pgd, pud);
+ phys_addr_t pud_phys;
+ BUG_ON(!pgtable_alloc);
+ pud_phys = pgtable_alloc();
+ __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
}
BUG_ON(pgd_bad(*pgd));
- pud = pud_offset(pgd, addr);
+ pud = pud_set_fixmap_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (use_1G_block(addr, next, phys)) {
+ if (use_1G_block(addr, next, phys) &&
+ block_mappings_allowed(pgtable_alloc)) {
pud_t old_pud = *pud;
set_pud(pud, __pud(phys |
pgprot_val(mk_sect_prot(prot))));
@@ -228,26 +285,28 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
if (!pud_none(old_pud)) {
flush_tlb_all();
if (pud_table(old_pud)) {
- phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+ phys_addr_t table = pud_page_paddr(old_pud);
if (!WARN_ON_ONCE(slab_is_available()))
memblock_free(table, PAGE_SIZE);
}
}
} else {
- alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+ alloc_init_pmd(pud, addr, next, phys, prot,
+ pgtable_alloc);
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
+
+ pud_clear_fixmap();
}
/*
* Create the page directory entries and any necessary page tables for the
* mapping specified by 'md'.
*/
-static void __create_mapping(struct mm_struct *mm, pgd_t *pgd,
- phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
- void *(*alloc)(unsigned long size))
+ phys_addr_t (*pgtable_alloc)(void))
{
unsigned long addr, length, end, next;
@@ -265,22 +324,35 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+ alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
-static void *late_alloc(unsigned long size)
+static phys_addr_t late_pgtable_alloc(void)
{
- void *ptr;
-
- BUG_ON(size > PAGE_SIZE);
- ptr = (void *)__get_free_page(PGALLOC_GFP);
+ void *ptr = (void *)__get_free_page(PGALLOC_GFP);
BUG_ON(!ptr);
- return ptr;
+
+ /* Ensure the zeroed page is visible to the page table walker */
+ dsb(ishst);
+ return __pa(ptr);
}
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot,
+ phys_addr_t (*alloc)(void))
+{
+ init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+}
+
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot)
{
if (virt < VMALLOC_START) {
@@ -288,16 +360,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
- size, prot, early_alloc);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+ NULL);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot)
{
- __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
- late_alloc);
+ __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+ late_pgtable_alloc);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -309,69 +381,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
return;
}
- return __create_mapping(&init_mm, pgd_offset_k(virt),
- phys, virt, size, prot, late_alloc);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+ late_pgtable_alloc);
}
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
{
+ unsigned long kernel_start = __pa(_stext);
+ unsigned long kernel_end = __pa(_etext);
+
/*
- * Set up the executable regions using the existing section mappings
- * for now. This will get more fine grained later once all memory
- * is mapped
+ * Take care not to create a writable alias for the
+ * read-only text and rodata sections of the kernel image.
*/
- unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
- unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
-
- if (end < kernel_x_start) {
- create_mapping(start, __phys_to_virt(start),
- end - start, PAGE_KERNEL);
- } else if (start >= kernel_x_end) {
- create_mapping(start, __phys_to_virt(start),
- end - start, PAGE_KERNEL);
- } else {
- if (start < kernel_x_start)
- create_mapping(start, __phys_to_virt(start),
- kernel_x_start - start,
- PAGE_KERNEL);
- create_mapping(kernel_x_start,
- __phys_to_virt(kernel_x_start),
- kernel_x_end - kernel_x_start,
- PAGE_KERNEL_EXEC);
- if (kernel_x_end < end)
- create_mapping(kernel_x_end,
- __phys_to_virt(kernel_x_end),
- end - kernel_x_end,
- PAGE_KERNEL);
+
+ /* No overlap with the kernel text */
+ if (end < kernel_start || start >= kernel_end) {
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start),
+ end - start, PAGE_KERNEL,
+ early_pgtable_alloc);
+ return;
}
+ /*
+ * This block overlaps the kernel text mapping.
+ * Map the portion(s) which don't overlap.
+ */
+ if (start < kernel_start)
+ __create_pgd_mapping(pgd, start,
+ __phys_to_virt(start),
+ kernel_start - start, PAGE_KERNEL,
+ early_pgtable_alloc);
+ if (kernel_end < end)
+ __create_pgd_mapping(pgd, kernel_end,
+ __phys_to_virt(kernel_end),
+ end - kernel_end, PAGE_KERNEL,
+ early_pgtable_alloc);
+
+ /*
+ * Map the linear alias of the [_stext, _etext) interval as
+ * read-only/non-executable. This makes the contents of the
+ * region accessible to subsystems such as hibernate, but
+ * protects it from inadvertent modification or execution.
+ */
+ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+ kernel_end - kernel_start, PAGE_KERNEL_RO,
+ early_pgtable_alloc);
}
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
- create_mapping(start, __phys_to_virt(start), end - start,
- PAGE_KERNEL_EXEC);
-}
-#endif
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
{
struct memblock_region *reg;
- phys_addr_t limit;
-
- /*
- * Temporarily limit the memblock range. We need to do this as
- * create_mapping requires puds, pmds and ptes to be allocated from
- * memory addressable from the initial direct kernel mapping.
- *
- * The initial direct kernel mapping, located at swapper_pg_dir, gives
- * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
- * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
- * per Documentation/arm64/booting.txt).
- */
- limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
- memblock_set_current_limit(limit);
/* map all the memory banks */
for_each_memblock(memory, reg) {
@@ -383,69 +443,94 @@ static void __init map_mem(void)
if (memblock_is_nomap(reg))
continue;
- if (ARM64_SWAPPER_USES_SECTION_MAPS) {
- /*
- * For the first memory bank align the start address and
- * current memblock limit to prevent create_mapping() from
- * allocating pte page tables from unmapped memory. With
- * the section maps, if the first block doesn't end on section
- * size boundary, create_mapping() will try to allocate a pte
- * page, which may be returned from an unmapped area.
- * When section maps are not used, the pte page table for the
- * current limit is already present in swapper_pg_dir.
- */
- if (start < limit)
- start = ALIGN(start, SECTION_SIZE);
- if (end < limit) {
- limit = end & SECTION_MASK;
- memblock_set_current_limit(limit);
- }
- }
- __map_memblock(start, end);
+ __map_memblock(pgd, start, end);
}
-
- /* Limit no longer required. */
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
-static void __init fixup_executable(void)
+void mark_rodata_ro(void)
{
-#ifdef CONFIG_DEBUG_RODATA
- /* now that we are actually fully mapped, make the start/end more fine grained */
- if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
- unsigned long aligned_start = round_down(__pa(_stext),
- SWAPPER_BLOCK_SIZE);
+ unsigned long section_size;
- create_mapping(aligned_start, __phys_to_virt(aligned_start),
- __pa(_stext) - aligned_start,
- PAGE_KERNEL);
- }
+ section_size = (unsigned long)__start_rodata - (unsigned long)_stext;
+ create_mapping_late(__pa(_stext), (unsigned long)_stext,
+ section_size, PAGE_KERNEL_ROX);
+ /*
+ * mark .rodata as read only. Use _etext rather than __end_rodata to
+ * cover NOTES and EXCEPTION_TABLE.
+ */
+ section_size = (unsigned long)_etext - (unsigned long)__start_rodata;
+ create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+ section_size, PAGE_KERNEL_RO);
+}
- if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
- unsigned long aligned_end = round_up(__pa(__init_end),
- SWAPPER_BLOCK_SIZE);
- create_mapping(__pa(__init_end), (unsigned long)__init_end,
- aligned_end - __pa(__init_end),
- PAGE_KERNEL);
- }
-#endif
+void fixup_init(void)
+{
+ /*
+ * Unmap the __init region but leave the VM area in place. This
+ * prevents the region from being reused for kernel modules, which
+ * is not supported by kallsyms.
+ */
+ unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void)
+static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
+ pgprot_t prot, struct vm_struct *vma)
{
- create_mapping_late(__pa(_stext), (unsigned long)_stext,
- (unsigned long)_etext - (unsigned long)_stext,
- PAGE_KERNEL_ROX);
+ phys_addr_t pa_start = __pa(va_start);
+ unsigned long size = va_end - va_start;
+
+ BUG_ON(!PAGE_ALIGNED(pa_start));
+ BUG_ON(!PAGE_ALIGNED(size));
+
+ __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+ early_pgtable_alloc);
+
+ vma->addr = va_start;
+ vma->phys_addr = pa_start;
+ vma->size = size;
+ vma->flags = VM_MAP;
+ vma->caller = __builtin_return_address(0);
+ vm_area_add_early(vma);
}
-#endif
-void fixup_init(void)
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
{
- create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
- (unsigned long)__init_end - (unsigned long)__init_begin,
- PAGE_KERNEL);
+ static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
+
+ map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
+ map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+ map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+ &vmlinux_init);
+ map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+
+ if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+ /*
+ * The fixmap falls in a separate pgd to the kernel, and doesn't
+ * live in the carveout for the swapper_pg_dir. We can simply
+ * re-use the existing dir for the fixmap.
+ */
+ set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+ *pgd_offset_k(FIXADDR_START));
+ } else if (CONFIG_PGTABLE_LEVELS > 3) {
+ /*
+ * The fixmap shares its top level pgd entry with the kernel
+ * mapping. This can really only occur when we are running
+ * with 16k/4 levels, so we can simply reuse the pud level
+ * entry instead.
+ */
+ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+ set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+ __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+ pud_clear_fixmap();
+ } else {
+ BUG();
+ }
+
+ kasan_copy_shadow(pgd);
}
/*
@@ -454,28 +539,35 @@ void fixup_init(void)
*/
void __init paging_init(void)
{
- void *zero_page;
+ phys_addr_t pgd_phys = early_pgtable_alloc();
+ pgd_t *pgd = pgd_set_fixmap(pgd_phys);
- map_mem();
- fixup_executable();
+ map_kernel(pgd);
+ map_mem(pgd);
- /* allocate the zero page. */
- zero_page = early_alloc(PAGE_SIZE);
-
- bootmem_init();
-
- empty_zero_page = virt_to_page(zero_page);
+ /*
+ * We want to reuse the original swapper_pg_dir so we don't have to
+ * communicate the new address to non-coherent secondaries in
+ * secondary_entry, and so cpu_switch_mm can generate the address with
+ * adrp+add rather than a load from some global variable.
+ *
+ * To do this we need to go via a temporary pgd.
+ */
+ cpu_replace_ttbr1(__va(pgd_phys));
+ memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+ cpu_replace_ttbr1(swapper_pg_dir);
- /* Ensure the zero page is visible to the page table walker */
- dsb(ishst);
+ pgd_clear_fixmap();
+ memblock_free(pgd_phys, PAGE_SIZE);
/*
- * TTBR0 is only used for the identity mapping at this stage. Make it
- * point to zero page to avoid speculatively fetching new entries.
+ * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+ * allocated with it.
*/
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
+ memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+ SWAPPER_DIR_SIZE - PAGE_SIZE);
+
+ bootmem_init();
}
/*
@@ -562,21 +654,13 @@ void vmemmap_free(unsigned long start, unsigned long end)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
static inline pud_t * fixmap_pud(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
- return pud_offset(pgd, addr);
+ return pud_offset_kimg(pgd, addr);
}
static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -585,16 +669,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
BUG_ON(pud_none(*pud) || pud_bad(*pud));
- return pmd_offset(pud, addr);
+ return pmd_offset_kimg(pud, addr);
}
static inline pte_t * fixmap_pte(unsigned long addr)
{
- pmd_t *pmd = fixmap_pmd(addr);
-
- BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
- return pte_offset_kernel(pmd, addr);
+ return &bm_pte[pte_index(addr)];
}
void __init early_fixmap_init(void)
@@ -605,15 +685,26 @@ void __init early_fixmap_init(void)
unsigned long addr = FIXADDR_START;
pgd = pgd_offset_k(addr);
- pgd_populate(&init_mm, pgd, bm_pud);
- pud = pud_offset(pgd, addr);
+ if (CONFIG_PGTABLE_LEVELS > 3 &&
+ !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+ /*
+ * We only end up here if the kernel mapping and the fixmap
+ * share the top level pgd entry, which should only happen on
+ * 16k/4 levels configurations.
+ */
+ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+ pud = pud_offset_kimg(pgd, addr);
+ } else {
+ pgd_populate(&init_mm, pgd, bm_pud);
+ pud = fixmap_pud(addr);
+ }
pud_populate(&init_mm, pud, bm_pmd);
- pmd = pmd_offset(pud, addr);
+ pmd = fixmap_pmd(addr);
pmd_populate_kernel(&init_mm, pmd, bm_pte);
/*
* The boot-ioremap range spans multiple pmds, for which
- * we are not preparted:
+ * we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
@@ -652,11 +743,10 @@ void __set_fixmap(enum fixed_addresses idx,
}
}
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
- pgprot_t prot = PAGE_KERNEL_RO;
- int size, offset;
+ int offset;
void *dt_virt;
/*
@@ -673,7 +763,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
/*
* Make sure that the FDT region can be mapped without the need to
* allocate additional translation table pages, so that it is safe
- * to call create_mapping() this early.
+ * to call create_mapping_noalloc() this early.
*
* On 64k pages, the FDT will be mapped using PTEs, so we need to
* be in the same PMD as the rest of the fixmap.
@@ -689,21 +779,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
- create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
- SWAPPER_BLOCK_SIZE, prot);
+ create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+ dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
if (fdt_check_header(dt_virt) != 0)
return NULL;
- size = fdt_totalsize(dt_virt);
- if (size > MAX_FDT_SIZE)
+ *size = fdt_totalsize(dt_virt);
+ if (*size > MAX_FDT_SIZE)
return NULL;
- if (offset + size > SWAPPER_BLOCK_SIZE)
- create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
- round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+ if (offset + *size > SWAPPER_BLOCK_SIZE)
+ create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
- memblock_reserve(dt_phys, size);
+ return dt_virt;
+}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+ void *dt_virt;
+ int size;
+
+ dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+ if (!dt_virt)
+ return NULL;
+ memblock_reserve(dt_phys, size);
return dt_virt;
}
+
+int __init arch_ioremap_pud_supported(void)
+{
+ /* only 4k granule supports level 1 block mappings */
+ return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+ return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+ BUG_ON(phys & ~PUD_MASK);
+ set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+ BUG_ON(phys & ~PMD_MASK);
+ set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ if (!pud_sect(*pud))
+ return 0;
+ pud_clear(pud);
+ return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ if (!pmd_sect(*pmd))
+ return 0;
+ pmd_clear(pmd);
+ return 1;
+}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0795c3a36d8f..ca6d268e3313 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
return 0;
}
+/*
+ * This function assumes that the range is mapped with PAGE_SIZE pages.
+ */
+static int __change_memory_common(unsigned long start, unsigned long size,
+ pgprot_t set_mask, pgprot_t clear_mask)
+{
+ struct page_change_data data;
+ int ret;
+
+ data.set_mask = set_mask;
+ data.clear_mask = clear_mask;
+
+ ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+ &data);
+
+ flush_tlb_kernel_range(start, start + size);
+ return ret;
+}
+
static int change_memory_common(unsigned long addr, int numpages,
pgprot_t set_mask, pgprot_t clear_mask)
{
unsigned long start = addr;
unsigned long size = PAGE_SIZE*numpages;
unsigned long end = start + size;
- int ret;
- struct page_change_data data;
struct vm_struct *area;
if (!PAGE_ALIGNED(addr)) {
@@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages,
if (!numpages)
return 0;
- data.set_mask = set_mask;
- data.clear_mask = clear_mask;
-
- ret = apply_to_page_range(&init_mm, start, size, change_page_range,
- &data);
-
- flush_tlb_kernel_range(start, end);
- return ret;
+ return __change_memory_common(start, size, set_mask, clear_mask);
}
int set_memory_ro(unsigned long addr, int numpages)
@@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages)
__pgprot(PTE_PXN));
}
EXPORT_SYMBOL_GPL(set_memory_x);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ unsigned long addr = (unsigned long) page_address(page);
+
+ if (enable)
+ __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(PTE_VALID),
+ __pgprot(0));
+ else
+ __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(0),
+ __pgprot(PTE_VALID));
+}
+#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c164d2cb35c0..543f5198005a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,8 @@
#include <asm/hwcap.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
#include "proc-macros.S"
@@ -137,9 +139,47 @@ ENTRY(cpu_do_switch_mm)
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
+alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
ret
+ nop
+ nop
+ nop
+alternative_else
+ ic iallu
+ dsb nsh
+ isb
+ ret
+alternative_endif
ENDPROC(cpu_do_switch_mm)
+ .pushsection ".idmap.text", "ax"
+/*
+ * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ *
+ * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
+ * called by anything else. It can only be executed from a TTBR0 mapping.
+ */
+ENTRY(idmap_cpu_replace_ttbr1)
+ mrs x2, daif
+ msr daifset, #0xf
+
+ adrp x1, empty_zero_page
+ msr ttbr1_el1, x1
+ isb
+
+ tlbi vmalle1
+ dsb nsh
+ isb
+
+ msr ttbr1_el1, x0
+ isb
+
+ msr daif, x2
+
+ ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+ .popsection
+
/*
* __cpu_setup
*
diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c
index 83d896cc2aed..718a6d7eb808 100644
--- a/arch/avr32/boards/merisc/setup.c
+++ b/arch/avr32/boards/merisc/setup.c
@@ -27,7 +27,6 @@
#include <asm/io.h>
#include <asm/setup.h>
-#include <asm/gpio.h>
#include <mach/at32ap700x.h>
#include <mach/board.h>
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 366bbeaeb405..572739b4c4b4 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -57,7 +57,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
" brne 1b\n"
"2:\n"
: [ret] "=&r"(ret), [m] "=m"(*m)
- : "m"(m), [old] "ir"(old), [new] "r"(new)
+ : "m"(m), [old] "Ks21r"(old), [new] "r"(new)
: "memory", "cc");
return ret;
}
diff --git a/arch/avr32/include/asm/pci.h b/arch/avr32/include/asm/pci.h
index a32a02372017..0f5f134b896a 100644
--- a/arch/avr32/include/asm/pci.h
+++ b/arch/avr32/include/asm/pci.h
@@ -5,6 +5,4 @@
#define PCI_DMA_BUS_IS_PHYS (1)
-#include <asm-generic/pci-dma-compat.h>
-
#endif /* __ASM_AVR32_PCI_H__ */
diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h
index b60132bb27ea..60c0f3afc1f9 100644
--- a/arch/avr32/include/uapi/asm/unistd.h
+++ b/arch/avr32/include/uapi/asm/unistd.h
@@ -337,5 +337,6 @@
#define __NR_userfaultfd 322
#define __NR_membarrier 323
#define __NR_mlock2 324
+#define __NR_copy_file_range 325
#endif /* _UAPI__ASM_AVR32_UNISTD_H */
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 209ae5ad3495..e6928896da2a 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata kernel_code = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_SYSTEM_RAM,
.sibling = &kernel_data,
};
@@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end)
new->start = start;
new->end = end;
new->name = "System RAM";
- new->flags = IORESOURCE_MEM;
+ new->flags = IORESOURCE_SYSTEM_RAM;
*pprev = new;
}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
index f9c68fab0e2f..cb3991552f14 100644
--- a/arch/avr32/kernel/syscall-stubs.S
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -124,3 +124,12 @@ __sys_process_vm_writev:
call sys_process_vm_writev
sub sp, -4
popm pc
+
+ .global __sys_copy_file_range
+ .type __sys_copy_file_range,@function
+__sys_copy_file_range:
+ pushm lr
+ st.w --sp, ARG6
+ call sys_copy_file_range
+ sub sp, -4
+ popm pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 1915a443b491..64d71a781fa8 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -338,4 +338,5 @@ sys_call_table:
.long sys_userfaultfd
.long sys_membarrier
.long sys_mlock2
+ .long __sys_copy_file_range /* 325 */
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 5020057ac7a2..83c2a0021b56 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -14,8 +14,8 @@
#include <linux/fs.h>
#include <linux/platform_device.h>
#include <linux/irq.h>
+#include <linux/gpio.h>
-#include <asm/gpio.h>
#include <asm/io.h>
#include <mach/portmux.h>
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index af76634f8d98..a63c12259e77 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -1233,8 +1233,6 @@ source "drivers/pci/Kconfig"
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
endmenu
menu "Executable file formats"
diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h
index 14efc0db1ade..11ea1cb35036 100644
--- a/arch/blackfin/include/asm/pci.h
+++ b/arch/blackfin/include/asm/pci.h
@@ -4,7 +4,6 @@
#define _ASM_BFIN_PCI_H
#include <linux/scatterlist.h>
-#include <asm-generic/pci-dma-compat.h>
#include <asm-generic/pci.h>
#define PCIBIOS_MIN_IO 0x00001000
diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h
index b88a1558b0b9..c1ee3d6533fb 100644
--- a/arch/blackfin/include/asm/pgtable.h
+++ b/arch/blackfin/include/asm/pgtable.h
@@ -97,6 +97,8 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
unsigned long);
#define HAVE_ARCH_FB_UNMAPPED_AREA
+#define pgprot_writecombine pgprot_noncached
+
#include <asm-generic/pgtable.h>
#endif /* _BLACKFIN_PGTABLE_H */
diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c
index a017359c1826..c5d31287de01 100644
--- a/arch/blackfin/kernel/bfin_gpio.c
+++ b/arch/blackfin/kernel/bfin_gpio.c
@@ -11,6 +11,8 @@
#include <linux/err.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/gpio/driver.h>
+/* FIXME: consumer API required for gpio_set_value() etc, get rid of this */
#include <linux/gpio.h>
#include <linux/irq.h>
@@ -1159,7 +1161,7 @@ static int bfin_gpiolib_direction_output(struct gpio_chip *chip, unsigned gpio,
static int bfin_gpiolib_get_value(struct gpio_chip *chip, unsigned gpio)
{
- return bfin_gpio_get_value(gpio);
+ return !!bfin_gpio_get_value(gpio);
}
static void bfin_gpiolib_set_value(struct gpio_chip *chip, unsigned gpio, int value)
@@ -1197,7 +1199,7 @@ static struct gpio_chip bfin_chip = {
static int __init bfin_gpiolib_setup(void)
{
- return gpiochip_add(&bfin_chip);
+ return gpiochip_add_data(&bfin_chip, NULL);
}
arch_initcall(bfin_gpiolib_setup);
#endif
diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c
index 86b1cd3a0309..e272bca93c64 100644
--- a/arch/blackfin/kernel/debug-mmrs.c
+++ b/arch/blackfin/kernel/debug-mmrs.c
@@ -11,9 +11,9 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/i2c/bfin_twi.h>
+#include <linux/gpio.h>
#include <asm/blackfin.h>
-#include <asm/gpio.h>
#include <asm/gptimers.h>
#include <asm/bfin_can.h>
#include <asm/bfin_dma.h>
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index a3a572352769..80bcfd1d023e 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -279,7 +279,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
#endif
#if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
static const struct ad7879_platform_data bfin_ad7879_ts_info = {
.model = 7879, /* Model = AD7879 */
.x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index d4219e8e5ab8..571edfd2ecf3 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -477,7 +477,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
#endif
#if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
static const struct ad7879_platform_data bfin_ad7879_ts_info = {
.model = 7879, /* Model = AD7879 */
.x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index a0f5856a5ff8..c1acce4c2e45 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -29,7 +29,7 @@
#include <asm/dpmc.h>
#if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
#define LCD_BACKLIGHT_GPIO 0x40
/* TLL6527M uses TLL7UIQ35 / ADI LCD EZ Extender. AD7879 AUX GPIO is used for
* LCD Backlight Enable
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index c181543a399a..eaec7b4832a2 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -776,7 +776,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
#endif
#if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
static const struct ad7879_platform_data bfin_ad7879_ts_info = {
.model = 7879, /* Model = AD7879 */
.x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf538/boards/ezkit.c b/arch/blackfin/mach-bf538/boards/ezkit.c
index ae2fcbb00119..1b6a52ad8a0e 100644
--- a/arch/blackfin/mach-bf538/boards/ezkit.c
+++ b/arch/blackfin/mach-bf538/boards/ezkit.c
@@ -15,9 +15,9 @@
#include <linux/spi/flash.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
+#include <linux/gpio.h>
#include <asm/bfin5xx_spi.h>
#include <asm/dma.h>
-#include <asm/gpio.h>
#include <asm/nand.h>
#include <asm/portmux.h>
#include <asm/dpmc.h>
@@ -521,7 +521,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
#endif /* CONFIG_SPI_BFIN5XX */
#if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
static const struct ad7879_platform_data bfin_ad7879_ts_info = {
.model = 7879, /* Model = AD7879 */
.x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf538/ext-gpio.c b/arch/blackfin/mach-bf538/ext-gpio.c
index 471a9b184d5b..48c100228f2d 100644
--- a/arch/blackfin/mach-bf538/ext-gpio.c
+++ b/arch/blackfin/mach-bf538/ext-gpio.c
@@ -8,8 +8,8 @@
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/gpio.h>
#include <asm/blackfin.h>
-#include <asm/gpio.h>
#include <asm/portmux.h>
#define DEFINE_REG(reg, off) \
@@ -116,9 +116,9 @@ static struct gpio_chip bf538_porte_chip = {
static int __init bf538_extgpio_setup(void)
{
- return gpiochip_add(&bf538_portc_chip) |
- gpiochip_add(&bf538_portd_chip) |
- gpiochip_add(&bf538_porte_chip);
+ return gpiochip_add_data(&bf538_portc_chip, NULL) |
+ gpiochip_add_data(&bf538_portd_chip, NULL) |
+ gpiochip_add_data(&bf538_porte_chip, NULL);
}
arch_initcall(bf538_extgpio_setup);
diff --git a/arch/blackfin/mach-bf548/boards/cm_bf548.c b/arch/blackfin/mach-bf548/boards/cm_bf548.c
index 6d5ffdead067..120c9941c242 100644
--- a/arch/blackfin/mach-bf548/boards/cm_bf548.c
+++ b/arch/blackfin/mach-bf548/boards/cm_bf548.c
@@ -17,9 +17,9 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/usb/musb.h>
+#include <linux/gpio.h>
#include <asm/bfin5xx_spi.h>
#include <asm/dma.h>
-#include <asm/gpio.h>
#include <asm/nand.h>
#include <asm/portmux.h>
#include <asm/bfin_sdh.h>
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 4204b9842532..3cdd4835a9f7 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -20,9 +20,9 @@
#include <linux/pinctrl/machine.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/platform_data/pinctrl-adi2.h>
+#include <linux/gpio.h>
#include <asm/bfin5xx_spi.h>
#include <asm/dma.h>
-#include <asm/gpio.h>
#include <asm/nand.h>
#include <asm/dpmc.h>
#include <asm/bfin_sport.h>
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index c7928d8ebb82..aad5d7416886 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -21,8 +21,8 @@
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/platform_data/pinctrl-adi2.h>
#include <linux/spi/adi_spi3.h>
+#include <linux/gpio.h>
#include <asm/dma.h>
-#include <asm/gpio.h>
#include <asm/nand.h>
#include <asm/dpmc.h>
#include <asm/portmux.h>
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index e8d4d748d0fd..4986b4fbcee9 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -17,13 +17,13 @@
#include <linux/irq.h>
#include <linux/sched.h>
#include <linux/syscore_ops.h>
+#include <linux/gpio.h>
#include <asm/delay.h>
#ifdef CONFIG_IPIPE
#include <linux/ipipe.h>
#endif
#include <asm/traps.h>
#include <asm/blackfin.h>
-#include <asm/gpio.h>
#include <asm/irq_handler.h>
#include <asm/dpmc.h>
#include <asm/traps.h>
diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c
index a66d979ec651..5ece38a5b758 100644
--- a/arch/blackfin/mach-common/pm.c
+++ b/arch/blackfin/mach-common/pm.c
@@ -15,9 +15,9 @@
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/delay.h>
+#include <linux/gpio.h>
#include <asm/cplb.h>
-#include <asm/gpio.h>
#include <asm/dma.h>
#include <asm/dpmc.h>
#include <asm/pm.h>
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21cfceb..23c4ef5f8bdc 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -333,7 +333,7 @@ void secondary_start_kernel(void)
/* We are done with local CPU inits, unblock the boot CPU. */
set_cpu_online(cpu, true);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 79049d432d3c..5aa8ea8bad2d 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -36,6 +36,7 @@ config GENERIC_HWEIGHT
config GENERIC_BUG
def_bool y
+ depends on BUG
config C6X_BIG_KERNEL
bool "Build a big kernel"
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 72e17f7ebd6f..786e36e2f61d 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr)
*/
set_ist(_vectors_start);
- lockdep_init();
-
/*
* dtb is passed in from bootloader.
* fdt is linked in blob.
diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h
index c15b4b4baafa..b1b289df04c7 100644
--- a/arch/cris/include/asm/pci.h
+++ b/arch/cris/include/asm/pci.h
@@ -48,9 +48,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
#endif /* __KERNEL__ */
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
/* generic pci stuff */
#include <asm-generic/pci.h>
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index e43d22c58ad5..809cfc6707ab 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -15,7 +15,6 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
-#include <asm-generic/pci-dma-compat.h>
#include <asm-generic/pci.h>
struct pci_dev;
@@ -32,12 +31,6 @@ extern void consistent_sync_page(struct page *page, unsigned long offset,
size_t size, int direction);
#endif
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
- dma_addr_t *dma_handle);
-
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
/* Return the index of the PCI controller for device PDEV. */
#define pci_controller_num(PDEV) (0)
diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h
index dbb825998689..bce0d0d07e60 100644
--- a/arch/frv/include/asm/serial.h
+++ b/arch/frv/include/asm/serial.h
@@ -13,6 +13,6 @@
*/
#define BASE_BAUD 0
-#define STD_COM_FLAGS ASYNC_BOOT_AUTOCONF
+#define STD_COM_FLAGS UPF_BOOT_AUTOCONF
#define SERIAL_PORT_DFNS
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f26b96a..983bae7d2665 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,7 +180,7 @@ void start_secondary(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index fb0515eb639b..b534ebab36ea 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -574,12 +574,8 @@ config PCI_DOMAINS
config PCI_SYSCALL
def_bool PCI
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
source "drivers/pcmcia/Kconfig"
endmenu
diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/ia64/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index a865d2a04f75..5de673ac9cb1 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -433,6 +433,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
return ioremap(phys_addr, size);
}
#define ioremap_cache ioremap_cache
+#define ioremap_uc ioremap_nocache
/*
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 07039d168f37..c0835b0dc722 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -50,8 +50,6 @@ struct pci_dev;
extern unsigned long ia64_max_iommu_merge_mask;
#define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL)
-#include <asm-generic/pci-dma-compat.h>
-
#define HAVE_PCI_MMAP
extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine);
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 3027e7516d85..ce112472bdd6 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -3,7 +3,7 @@
*
* Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
* Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com>
+ * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
*
* Based on asm-i386/rwsem.h and other architecture implementation.
*
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index caae3f4e4341..300dac3702f1 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
efi_memory_desc_t *md;
u64 efi_desc_size;
char *name;
- unsigned long flags;
+ unsigned long flags, desc;
efi_map_start = __va(ia64_boot_param->efi_memmap);
efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
@@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource,
continue;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ desc = IORES_DESC_NONE;
+
switch (md->type) {
case EFI_MEMORY_MAPPED_IO:
@@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->attribute & EFI_MEMORY_WP) {
name = "System ROM";
flags |= IORESOURCE_READONLY;
- } else if (md->attribute == EFI_MEMORY_UC)
+ } else if (md->attribute == EFI_MEMORY_UC) {
name = "Uncached RAM";
- else
+ } else {
name = "System RAM";
+ flags |= IORESOURCE_SYSRAM;
+ }
break;
case EFI_ACPI_MEMORY_NVS:
name = "ACPI Non-volatile Storage";
+ desc = IORES_DESC_ACPI_NV_STORAGE;
break;
case EFI_UNUSABLE_MEMORY:
@@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
case EFI_PERSISTENT_MEMORY:
name = "Persistent Memory";
+ desc = IORES_DESC_PERSISTENT_MEMORY;
break;
case EFI_RESERVED_TYPE:
@@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
res->start = md->phys_addr;
res->end = md->phys_addr + efi_md_size(md) - 1;
res->flags = flags;
+ res->desc = desc;
if (insert_resource(&iomem_resource, res) < 0)
kfree(res);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4f118b0d3091..2029a38a72ae 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -80,17 +80,17 @@ unsigned long vga_console_membase;
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
unsigned long ia64_max_cacheline_size;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad27975..74fe317477e6 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -454,7 +454,7 @@ start_secondary (void *unused)
preempt_disable();
smp_callin();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 622772b7fb6c..e7ae6088350a 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1336,8 +1336,11 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
* Don't call tty_write_message() if we're in the kernel; we might
* be holding locks...
*/
- if (user_mode(regs))
- tty_write_message(current->signal->tty, buf);
+ if (user_mode(regs)) {
+ struct tty_struct *tty = get_current_tty();
+ tty_write_message(tty, buf);
+ tty_kref_put(tty);
+ }
buf[len-1] = '\0'; /* drop '\r' */
/* watch for command names containing %s */
printk(KERN_WARNING "%s", buf);
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index f50d4b3f501a..85de86d36fdf 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
if (pud) {
pmd = pmd_alloc(mm, pud, taddr);
if (pmd)
- pte = pte_alloc_map(mm, NULL, pmd, taddr);
+ pte = pte_alloc_map(mm, pmd, taddr);
}
return pte;
}
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
index fc505d58f078..41caa99add51 100644
--- a/arch/ia64/pci/fixup.c
+++ b/arch/ia64/pci/fixup.c
@@ -17,14 +17,14 @@
*
* The standard boot ROM sequence for an x86 machine uses the BIOS
* to select an initial video card for boot display. This boot video
- * card will have it's BIOS copied to C0000 in system RAM.
+ * card will have its BIOS copied to 0xC0000 in system RAM.
* IORESOURCE_ROM_SHADOW is used to associate the boot video
* card with this copy. On laptops this copy has to be used since
* the main ROM may be compressed or combined with another image.
* See pci_map_rom() for use of this flag. Before marking the device
* with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
- * by either arch cde or vga-arbitration, if so only apply the fixup to this
- * already determined primary video card.
+ * by either arch code or vga-arbitration; if so only apply the fixup to this
+ * already-determined primary video card.
*/
static void pci_fixup_video(struct pci_dev *pdev)
@@ -32,6 +32,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
struct pci_dev *bridge;
struct pci_bus *bus;
u16 config;
+ struct resource *res;
if ((strcmp(ia64_platform_name, "dig") != 0)
&& (strcmp(ia64_platform_name, "hpzx1") != 0))
@@ -61,8 +62,18 @@ static void pci_fixup_video(struct pci_dev *pdev)
if (!vga_default_device() || pdev == vga_default_device()) {
pci_read_config_word(pdev, PCI_COMMAND, &config);
if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
- pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
- dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n");
+ res = &pdev->resource[PCI_ROM_RESOURCE];
+
+ pci_disable_rom(pdev);
+ if (res->parent)
+ release_resource(res);
+
+ res->start = 0xC0000;
+ res->end = res->start + 0x20000 - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
+ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n",
+ res);
}
}
}
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index 0640739cc20c..231234c8d113 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -429,7 +429,8 @@ sn_acpi_slot_fixup(struct pci_dev *dev)
void __iomem *addr;
struct pcidev_info *pcidev_info = NULL;
struct sn_irq_info *sn_irq_info = NULL;
- size_t image_size, size;
+ struct resource *res;
+ size_t size;
if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) {
panic("%s: Failure obtaining pcidev_info for %s\n",
@@ -443,17 +444,20 @@ sn_acpi_slot_fixup(struct pci_dev *dev)
* of the shadowed copy, and the actual length of the ROM image.
*/
size = pci_resource_len(dev, PCI_ROM_RESOURCE);
- addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE],
- size);
- image_size = pci_get_rom_size(dev, addr, size);
- dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr;
- dev->resource[PCI_ROM_RESOURCE].end =
- (unsigned long) addr + image_size - 1;
- dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY;
+
+ res = &dev->resource[PCI_ROM_RESOURCE];
+
+ pci_disable_rom(dev);
+ if (res->parent)
+ release_resource(res);
+
+ res->start = pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE];
+ res->end = res->start + size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
}
sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
}
-
EXPORT_SYMBOL(sn_acpi_slot_fixup);
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 1be65eb074ec..c15a41e2d1f2 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -150,7 +150,8 @@ void
sn_io_slot_fixup(struct pci_dev *dev)
{
int idx;
- unsigned long addr, end, size, start;
+ struct resource *res;
+ unsigned long addr, size;
struct pcidev_info *pcidev_info;
struct sn_irq_info *sn_irq_info;
int status;
@@ -175,55 +176,41 @@ sn_io_slot_fixup(struct pci_dev *dev)
/* Copy over PIO Mapped Addresses */
for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
-
- if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
+ if (!pcidev_info->pdi_pio_mapped_addr[idx])
continue;
- }
- start = dev->resource[idx].start;
- end = dev->resource[idx].end;
- size = end - start;
- if (size == 0) {
+ res = &dev->resource[idx];
+
+ size = res->end - res->start;
+ if (size == 0)
continue;
- }
- addr = pcidev_info->pdi_pio_mapped_addr[idx];
- addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
- dev->resource[idx].start = addr;
- dev->resource[idx].end = addr + size;
+
+ res->start = pcidev_info->pdi_pio_mapped_addr[idx];
+ res->end = addr + size;
/*
* if it's already in the device structure, remove it before
* inserting
*/
- if (dev->resource[idx].parent && dev->resource[idx].parent->child)
- release_resource(&dev->resource[idx]);
+ if (res->parent && res->parent->child)
+ release_resource(res);
- if (dev->resource[idx].flags & IORESOURCE_IO)
- insert_resource(&ioport_resource, &dev->resource[idx]);
+ if (res->flags & IORESOURCE_IO)
+ insert_resource(&ioport_resource, res);
else
- insert_resource(&iomem_resource, &dev->resource[idx]);
+ insert_resource(&iomem_resource, res);
/*
- * If ROM, set the actual ROM image size, and mark as
- * shadowed in PROM.
+ * If ROM, mark as shadowed in PROM.
*/
if (idx == PCI_ROM_RESOURCE) {
- size_t image_size;
- void __iomem *rom;
-
- rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE),
- size + 1);
- image_size = pci_get_rom_size(dev, rom, size + 1);
- dev->resource[PCI_ROM_RESOURCE].end =
- dev->resource[PCI_ROM_RESOURCE].start +
- image_size - 1;
- dev->resource[PCI_ROM_RESOURCE].flags |=
- IORESOURCE_ROM_BIOS_COPY;
+ pci_disable_rom(dev);
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
}
}
sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
}
-
EXPORT_SYMBOL(sn_io_slot_fixup);
/*
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 2841c0a3fd3b..c82b29253991 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -387,8 +387,6 @@ config ISA
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
endmenu
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index a5ecef7188ba..136c69f1fb8a 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -70,14 +70,14 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
unsigned long memory_start;
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467542f4..f98d2f6519d6 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
*/
local_flush_tlb_all();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 0d4146f644dc..11fa717d93b1 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -59,21 +59,24 @@ void free_initrd_mem(unsigned long, unsigned long);
void __init zone_sizes_init(void)
{
unsigned long zones_size[MAX_NR_ZONES] = {0, };
- unsigned long max_dma;
- unsigned long low;
unsigned long start_pfn;
#ifdef CONFIG_MMU
- start_pfn = START_PFN(0);
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- low = MAX_LOW_PFN(0);
-
- if (low < max_dma){
- zones_size[ZONE_DMA] = low - start_pfn;
- zones_size[ZONE_NORMAL] = 0;
- } else {
- zones_size[ZONE_DMA] = low - start_pfn;
- zones_size[ZONE_NORMAL] = low - max_dma;
+ {
+ unsigned long low;
+ unsigned long max_dma;
+
+ start_pfn = START_PFN(0);
+ max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ low = MAX_LOW_PFN(0);
+
+ if (low < max_dma) {
+ zones_size[ZONE_DMA] = low - start_pfn;
+ zones_size[ZONE_NORMAL] = 0;
+ } else {
+ zones_size[ZONE_DMA] = low - start_pfn;
+ zones_size[ZONE_NORMAL] = low - max_dma;
+ }
}
#else
zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
diff --git a/arch/m68k/68360/Makefile b/arch/m68k/68360/Makefile
deleted file mode 100644
index 591ce42df3de..000000000000
--- a/arch/m68k/68360/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# Makefile for 68360 machines.
-#
-model-y := ram
-model-$(CONFIG_ROMKERNEL) := rom
-
-obj-y := config.o commproc.o entry.o ints.o
-
-extra-y := head.o
-
-$(obj)/head.o: $(obj)/head-$(model-y).o
- ln -sf head-$(model-y).o $(obj)/head.o
diff --git a/arch/m68k/68360/commproc.c b/arch/m68k/68360/commproc.c
deleted file mode 100644
index 14d7f35cd37b..000000000000
--- a/arch/m68k/68360/commproc.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * General Purpose functions for the global management of the
- * Communication Processor Module.
- *
- * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com>
- * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
- *
- * In addition to the individual control of the communication
- * channels, there are a few functions that globally affect the
- * communication processor.
- *
- * Buffer descriptors must be allocated from the dual ported memory
- * space. The allocator for that is here. When the communication
- * process is reset, we reclaim the memory available. There is
- * currently no deallocator for this memory.
- * The amount of space available is platform dependent. On the
- * MBX, the EPPC software loads additional microcode into the
- * communication processor, and uses some of the DP ram for this
- * purpose. Current, the first 512 bytes and the last 256 bytes of
- * memory are used. Right now I am conservative and only use the
- * memory that can never be used for microcode. If there are
- * applications that require more DP ram, we can expand the boundaries
- * but then we have to be careful of any downloaded microcode.
- *
- */
-
-/*
- * Michael Leslie <mleslie@lineo.com>
- * adapted Dan Malek's ppc8xx drivers to M68360
- *
- */
-
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <asm/irq.h>
-#include <asm/m68360.h>
-#include <asm/commproc.h>
-
-/* #include <asm/page.h> */
-/* #include <asm/pgtable.h> */
-extern void *_quicc_base;
-extern unsigned int system_clock;
-
-
-static uint dp_alloc_base; /* Starting offset in DP ram */
-static uint dp_alloc_top; /* Max offset + 1 */
-
-#if 0
-static void *host_buffer; /* One page of host buffer */
-static void *host_end; /* end + 1 */
-#endif
-
-/* struct cpm360_t *cpmp; */ /* Pointer to comm processor space */
-
-QUICC *pquicc;
-/* QUICC *quicc_dpram; */ /* mleslie - temporary; use extern pquicc elsewhere instead */
-
-
-/* CPM interrupt vector functions. */
-struct cpm_action {
- irq_handler_t handler;
- void *dev_id;
-};
-static struct cpm_action cpm_vecs[CPMVEC_NR];
-static void cpm_interrupt(int irq, void * dev, struct pt_regs * regs);
-static void cpm_error_interrupt(void *);
-
-/* prototypes: */
-void cpm_install_handler(int vec, irq_handler_t handler, void *dev_id);
-void m360_cpm_reset(void);
-
-
-
-
-void __init m360_cpm_reset()
-{
-/* pte_t *pte; */
-
- pquicc = (struct quicc *)(_quicc_base); /* initialized in crt0_rXm.S */
-
- /* Perform a CPM reset. */
- pquicc->cp_cr = (SOFTWARE_RESET | CMD_FLAG);
-
- /* Wait for CPM to become ready (should be 2 clocks). */
- while (pquicc->cp_cr & CMD_FLAG);
-
- /* On the recommendation of the 68360 manual, p. 7-60
- * - Set sdma interrupt service mask to 7
- * - Set sdma arbitration ID to 4
- */
- pquicc->sdma_sdcr = 0x0740;
-
-
- /* Claim the DP memory for our use.
- */
- dp_alloc_base = CPM_DATAONLY_BASE;
- dp_alloc_top = dp_alloc_base + CPM_DATAONLY_SIZE;
-
-
- /* Set the host page for allocation.
- */
- /* host_buffer = host_page_addr; */
- /* host_end = host_page_addr + PAGE_SIZE; */
-
- /* pte = find_pte(&init_mm, host_page_addr); */
- /* pte_val(*pte) |= _PAGE_NO_CACHE; */
- /* flush_tlb_page(current->mm->mmap, host_buffer); */
-
- /* Tell everyone where the comm processor resides.
- */
-/* cpmp = (cpm360_t *)commproc; */
-}
-
-
-/* This is called during init_IRQ. We used to do it above, but this
- * was too early since init_IRQ was not yet called.
- */
-void
-cpm_interrupt_init(void)
-{
- /* Initialize the CPM interrupt controller.
- * NOTE THAT pquicc had better have been initialized!
- * reference: MC68360UM p. 7-377
- */
- pquicc->intr_cicr =
- (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
- (CPM_INTERRUPT << 13) |
- CICR_HP_MASK |
- (CPM_VECTOR_BASE << 5) |
- CICR_SPS;
-
- /* mask all CPM interrupts from reaching the cpu32 core: */
- pquicc->intr_cimr = 0;
-
-
- /* mles - If I understand correctly, the 360 just pops over to the CPM
- * specific vector, obviating the necessity to vector through the IRQ
- * whose priority the CPM is set to. This needs a closer look, though.
- */
-
- /* Set our interrupt handler with the core CPU. */
-/* if (request_irq(CPM_INTERRUPT, cpm_interrupt, 0, "cpm", NULL) != 0) */
-/* panic("Could not allocate CPM IRQ!"); */
-
- /* Install our own error handler.
- */
- /* I think we want to hold off on this one for the moment - mles */
- /* cpm_install_handler(CPMVEC_ERROR, cpm_error_interrupt, NULL); */
-
- /* master CPM interrupt enable */
- /* pquicc->intr_cicr |= CICR_IEN; */ /* no such animal for 360 */
-}
-
-
-
-/* CPM interrupt controller interrupt.
-*/
-static void
-cpm_interrupt(int irq, void * dev, struct pt_regs * regs)
-{
- /* uint vec; */
-
- /* mles: Note that this stuff is currently being performed by
- * M68360_do_irq(int vec, struct pt_regs *fp), in ../ints.c */
-
- /* figure out the vector */
- /* call that vector's handler */
- /* clear the irq's bit in the service register */
-
-#if 0 /* old 860 stuff: */
- /* Get the vector by setting the ACK bit and then reading
- * the register.
- */
- ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr = 1;
- vec = ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr;
- vec >>= 11;
-
-
- if (cpm_vecs[vec].handler != 0)
- (*cpm_vecs[vec].handler)(cpm_vecs[vec].dev_id);
- else
- ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr &= ~(1 << vec);
-
- /* After servicing the interrupt, we have to remove the status
- * indicator.
- */
- ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cisr |= (1 << vec);
-#endif
-
-}
-
-/* The CPM can generate the error interrupt when there is a race condition
- * between generating and masking interrupts. All we have to do is ACK it
- * and return. This is a no-op function so we don't need any special
- * tests in the interrupt handler.
- */
-static void
-cpm_error_interrupt(void *dev)
-{
-}
-
-/* Install a CPM interrupt handler.
-*/
-void
-cpm_install_handler(int vec, irq_handler_t handler, void *dev_id)
-{
-
- request_irq(vec, handler, 0, "timer", dev_id);
-
-/* if (cpm_vecs[vec].handler != 0) */
-/* printk(KERN_INFO "CPM interrupt %x replacing %x\n", */
-/* (uint)handler, (uint)cpm_vecs[vec].handler); */
-/* cpm_vecs[vec].handler = handler; */
-/* cpm_vecs[vec].dev_id = dev_id; */
-
- /* ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr |= (1 << vec); */
-/* pquicc->intr_cimr |= (1 << vec); */
-
-}
-
-/* Free a CPM interrupt handler.
-*/
-void
-cpm_free_handler(int vec)
-{
- cpm_vecs[vec].handler = NULL;
- cpm_vecs[vec].dev_id = NULL;
- /* ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr &= ~(1 << vec); */
- pquicc->intr_cimr &= ~(1 << vec);
-}
-
-
-
-
-/* Allocate some memory from the dual ported ram. We may want to
- * enforce alignment restrictions, but right now everyone is a good
- * citizen.
- */
-uint
-m360_cpm_dpalloc(uint size)
-{
- uint retloc;
-
- if ((dp_alloc_base + size) >= dp_alloc_top)
- return(CPM_DP_NOSPACE);
-
- retloc = dp_alloc_base;
- dp_alloc_base += size;
-
- return(retloc);
-}
-
-
-#if 0 /* mleslie - for now these are simply kmalloc'd */
-/* We also own one page of host buffer space for the allocation of
- * UART "fifos" and the like.
- */
-uint
-m360_cpm_hostalloc(uint size)
-{
- uint retloc;
-
- if ((host_buffer + size) >= host_end)
- return(0);
-
- retloc = host_buffer;
- host_buffer += size;
-
- return(retloc);
-}
-#endif
-
-
-/* Set a baud rate generator. This needs lots of work. There are
- * four BRGs, any of which can be wired to any channel.
- * The internal baud rate clock is the system clock divided by 16.
- * This assumes the baudrate is 16x oversampled by the uart.
- */
-/* #define BRG_INT_CLK (((bd_t *)__res)->bi_intfreq * 1000000) */
-#define BRG_INT_CLK system_clock
-#define BRG_UART_CLK (BRG_INT_CLK/16)
-
-void
-m360_cpm_setbrg(uint brg, uint rate)
-{
- volatile uint *bp;
-
- /* This is good enough to get SMCs running.....
- */
- /* bp = (uint *)&cpmp->cp_brgc1; */
- bp = (volatile uint *)(&pquicc->brgc[0].l);
- bp += brg;
- *bp = ((BRG_UART_CLK / rate - 1) << 1) | CPM_BRG_EN;
-}
-
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 4
- * End:
- */
diff --git a/arch/m68k/68360/config.c b/arch/m68k/68360/config.c
deleted file mode 100644
index b65fe4eed38e..000000000000
--- a/arch/m68k/68360/config.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * config.c - non-mmu 68360 platform initialization code
- *
- * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com>
- * Copyright (C) 1993 Hamish Macdonald
- * Copyright (C) 1999 D. Jeff Dionne <jeff@uclinux.org>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#include <stdarg.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-
-#include <asm/setup.h>
-#include <asm/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/m68360.h>
-
-#ifdef CONFIG_UCQUICC
-#include <asm/bootstd.h>
-#endif
-
-extern void m360_cpm_reset(void);
-
-// Mask to select if the PLL prescaler is enabled.
-#define MCU_PREEN ((unsigned short)(0x0001 << 13))
-
-#if defined(CONFIG_UCQUICC)
-#define OSCILLATOR (unsigned long int)33000000
-#endif
-
-static irq_handler_t timer_interrupt;
-unsigned long int system_clock;
-
-extern QUICC *pquicc;
-
-/* TODO DON"T Hard Code this */
-/* calculate properly using the right PLL and prescaller */
-// unsigned int system_clock = 33000000l;
-extern unsigned long int system_clock; //In kernel setup.c
-
-
-static irqreturn_t hw_tick(int irq, void *dummy)
-{
- /* Reset Timer1 */
- /* TSTAT &= 0; */
-
- pquicc->timer_ter1 = 0x0002; /* clear timer event */
-
- return timer_interrupt(irq, dummy);
-}
-
-static struct irqaction m68360_timer_irq = {
- .name = "timer",
- .flags = IRQF_TIMER,
- .handler = hw_tick,
-};
-
-void hw_timer_init(irq_handler_t handler)
-{
- unsigned char prescaler;
- unsigned short tgcr_save;
-
-#if 0
- /* Restart mode, Enable int, 32KHz, Enable timer */
- TCTL = TCTL_OM | TCTL_IRQEN | TCTL_CLKSOURCE_32KHZ | TCTL_TEN;
- /* Set prescaler (Divide 32KHz by 32)*/
- TPRER = 31;
- /* Set compare register 32Khz / 32 / 10 = 100 */
- TCMP = 10;
-
- request_irq(IRQ_MACHSPEC | 1, timer_routine, 0, "timer", NULL);
-#endif
-
- /* General purpose quicc timers: MC68360UM p7-20 */
-
- /* Set up timer 1 (in [1..4]) to do 100Hz */
- tgcr_save = pquicc->timer_tgcr & 0xfff0;
- pquicc->timer_tgcr = tgcr_save; /* stop and reset timer 1 */
- /* pquicc->timer_tgcr |= 0x4444; */ /* halt timers when FREEZE (ie bdm freeze) */
-
- prescaler = 8;
- pquicc->timer_tmr1 = 0x001a | /* or=1, frr=1, iclk=01b */
- (unsigned short)((prescaler - 1) << 8);
-
- pquicc->timer_tcn1 = 0x0000; /* initial count */
- /* calculate interval for 100Hz based on the _system_clock: */
- pquicc->timer_trr1 = (system_clock/ prescaler) / HZ; /* reference count */
-
- pquicc->timer_ter1 = 0x0003; /* clear timer events */
-
- timer_interrupt = handler;
-
- /* enable timer 1 interrupt in CIMR */
- setup_irq(CPMVEC_TIMER1, &m68360_timer_irq);
-
- /* Start timer 1: */
- tgcr_save = (pquicc->timer_tgcr & 0xfff0) | 0x0001;
- pquicc->timer_tgcr = tgcr_save;
-}
-
-void BSP_reset (void)
-{
- local_irq_disable();
- asm volatile (
- "moveal #_start, %a0;\n"
- "moveb #0, 0xFFFFF300;\n"
- "moveal 0(%a0), %sp;\n"
- "moveal 4(%a0), %a0;\n"
- "jmp (%a0);\n"
- );
-}
-
-unsigned char *scc1_hwaddr;
-static int errno;
-
-#if defined (CONFIG_UCQUICC)
-_bsc0(char *, getserialnum)
-_bsc1(unsigned char *, gethwaddr, int, a)
-_bsc1(char *, getbenv, char *, a)
-#endif
-
-
-void __init config_BSP(char *command, int len)
-{
- unsigned char *p;
-
- m360_cpm_reset();
-
- /* Calculate the real system clock value. */
- {
- unsigned int local_pllcr = (unsigned int)(pquicc->sim_pllcr);
- if( local_pllcr & MCU_PREEN ) // If the prescaler is dividing by 128
- {
- int mf = (int)(pquicc->sim_pllcr & 0x0fff);
- system_clock = (OSCILLATOR / 128) * (mf + 1);
- }
- else
- {
- int mf = (int)(pquicc->sim_pllcr & 0x0fff);
- system_clock = (OSCILLATOR) * (mf + 1);
- }
- }
-
- printk(KERN_INFO "\n68360 QUICC support (C) 2000 Lineo Inc.\n");
-
-#if defined(CONFIG_UCQUICC) && 0
- printk(KERN_INFO "uCquicc serial string [%s]\n",getserialnum());
- p = scc1_hwaddr = gethwaddr(0);
- printk(KERN_INFO "uCquicc hwaddr %pM\n", p);
-
- p = getbenv("APPEND");
- if (p)
- strcpy(p,command);
- else
- command[0] = 0;
-#else
- scc1_hwaddr = "\00\01\02\03\04\05";
-#endif
-
- mach_reset = BSP_reset;
-}
diff --git a/arch/m68k/68360/entry.S b/arch/m68k/68360/entry.S
deleted file mode 100644
index 22eb3022f9ee..000000000000
--- a/arch/m68k/68360/entry.S
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * entry.S - non-mmu 68360 interrupt and exceptions entry points
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2001 SED Systems, a Division of Calian Ltd.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file README.legal in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- * M68360 Port by SED Systems, and Lineo.
- */
-
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/traps.h>
-#include <asm/asm-offsets.h>
-#include <asm/entry.h>
-
-.text
-
-.globl system_call
-.globl resume
-.globl ret_from_exception
-.globl ret_from_signal
-.globl sys_call_table
-.globl bad_interrupt
-.globl inthandler
-
-badsys:
- movel #-ENOSYS,%sp@(PT_OFF_D0)
- jra ret_from_exception
-
-do_trace:
- movel #-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/
- subql #4,%sp
- SAVE_SWITCH_STACK
- jbsr syscall_trace_enter
- RESTORE_SWITCH_STACK
- addql #4,%sp
- movel %sp@(PT_OFF_ORIG_D0),%d1
- movel #-ENOSYS,%d0
- cmpl #NR_syscalls,%d1
- jcc 1f
- lsl #2,%d1
- lea sys_call_table, %a0
- jbsr %a0@(%d1)
-
-1: movel %d0,%sp@(PT_OFF_D0) /* save the return value */
- subql #4,%sp /* dummy return address */
- SAVE_SWITCH_STACK
- jbsr syscall_trace_leave
-
-ret_from_signal:
- RESTORE_SWITCH_STACK
- addql #4,%sp
- jra ret_from_exception
-
-ENTRY(system_call)
- SAVE_ALL_SYS
-
- /* save top of frame*/
- pea %sp@
- jbsr set_esp0
- addql #4,%sp
-
- movel %sp@(PT_OFF_ORIG_D0),%d0
-
- movel %sp,%d1 /* get thread_info pointer */
- andl #-THREAD_SIZE,%d1
- movel %d1,%a2
- btst #(TIF_SYSCALL_TRACE%8),%a2@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8)
- jne do_trace
- cmpl #NR_syscalls,%d0
- jcc badsys
- lsl #2,%d0
- lea sys_call_table,%a0
- movel %a0@(%d0), %a0
- jbsr %a0@
- movel %d0,%sp@(PT_OFF_D0) /* save the return value*/
-
-ret_from_exception:
- btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel*/
- jeq Luser_return /* if so, skip resched, signals*/
-
-Lkernel_return:
- RESTORE_ALL
-
-Luser_return:
- /* only allow interrupts when we are really the last one on the*/
- /* kernel stack, otherwise stack overflow can occur during*/
- /* heavy interrupt load*/
- andw #ALLOWINT,%sr
-
- movel %sp,%d1 /* get thread_info pointer */
- andl #-THREAD_SIZE,%d1
- movel %d1,%a2
-1:
- move %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */
- jne Lwork_to_do
- RESTORE_ALL
-
-Lwork_to_do:
- movel %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */
- btst #TIF_NEED_RESCHED,%d1
- jne reschedule
-
-Lsignal_return:
- subql #4,%sp /* dummy return address*/
- SAVE_SWITCH_STACK
- pea %sp@(SWITCH_STACK_SIZE)
- bsrw do_notify_resume
- addql #4,%sp
- RESTORE_SWITCH_STACK
- addql #4,%sp
- jra 1b
-
-/*
- * This is the main interrupt handler, responsible for calling do_IRQ()
- */
-inthandler:
- SAVE_ALL_INT
- movew %sp@(PT_OFF_FORMATVEC), %d0
- and.l #0x3ff, %d0
- lsr.l #0x02, %d0
-
- movel %sp,%sp@-
- movel %d0,%sp@- /* put vector # on stack*/
- jbsr do_IRQ /* process the IRQ */
- addql #8,%sp /* pop parameters off stack*/
- jra ret_from_exception
-
-/*
- * Handler for uninitialized and spurious interrupts.
- */
-bad_interrupt:
- addql #1,irq_err_count
- rte
-
-/*
- * Beware - when entering resume, prev (the current task) is
- * in a0, next (the new task) is in a1, so don't change these
- * registers until their contents are no longer needed.
- */
-ENTRY(resume)
- movel %a0,%d1 /* save prev thread in d1 */
- movew %sr,%a0@(TASK_THREAD+THREAD_SR) /* save sr */
- SAVE_SWITCH_STACK
- movel %sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack */
- movel %usp,%a3 /* save usp */
- movel %a3,%a0@(TASK_THREAD+THREAD_USP)
-
- movel %a1@(TASK_THREAD+THREAD_USP),%a3 /* restore user stack */
- movel %a3,%usp
- movel %a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new thread stack */
- RESTORE_SWITCH_STACK
- movew %a1@(TASK_THREAD+THREAD_SR),%sr /* restore thread status reg */
- rts
-
diff --git a/arch/m68k/68360/head-ram.S b/arch/m68k/68360/head-ram.S
deleted file mode 100644
index 62bc56f41d57..000000000000
--- a/arch/m68k/68360/head-ram.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * head-ram.S - startup code for Motorola 68360
- *
- * Copyright 2001 (C) SED Systems, a Division of Calian Ltd.
- * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S
- * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7
- * uClinux Kernel
- * Copyright (C) Michael Leslie <mleslie@lineo.com>
- * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S
- * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>,
- *
- */
-#define ASSEMBLY
-
-.global _stext
-.global _start
-
-.global _rambase
-.global _ramvec
-.global _ramstart
-.global _ramend
-
-.global _quicc_base
-.global _periph_base
-
-#define RAMEND (CONFIG_RAMBASE + CONFIG_RAMSIZE)
-#define ROMEND (CONFIG_ROMBASE + CONFIG_ROMSIZE)
-
-#define REGB 0x1000
-#define PEPAR (_dprbase + REGB + 0x0016)
-#define GMR (_dprbase + REGB + 0x0040)
-#define OR0 (_dprbase + REGB + 0x0054)
-#define BR0 (_dprbase + REGB + 0x0050)
-#define OR1 (_dprbase + REGB + 0x0064)
-#define BR1 (_dprbase + REGB + 0x0060)
-#define OR4 (_dprbase + REGB + 0x0094)
-#define BR4 (_dprbase + REGB + 0x0090)
-#define OR6 (_dprbase + REGB + 0x00b4)
-#define BR6 (_dprbase + REGB + 0x00b0)
-#define OR7 (_dprbase + REGB + 0x00c4)
-#define BR7 (_dprbase + REGB + 0x00c0)
-
-#define MCR (_dprbase + REGB + 0x0000)
-#define AVR (_dprbase + REGB + 0x0008)
-
-#define SYPCR (_dprbase + REGB + 0x0022)
-
-#define PLLCR (_dprbase + REGB + 0x0010)
-#define CLKOCR (_dprbase + REGB + 0x000C)
-#define CDVCR (_dprbase + REGB + 0x0014)
-
-#define BKAR (_dprbase + REGB + 0x0030)
-#define BKCR (_dprbase + REGB + 0x0034)
-#define SWIV (_dprbase + REGB + 0x0023)
-#define PICR (_dprbase + REGB + 0x0026)
-#define PITR (_dprbase + REGB + 0x002A)
-
-/* Define for all memory configuration */
-#define MCU_SIM_GMR 0x00000000
-#define SIM_OR_MASK 0x0fffffff
-
-/* Defines for chip select zero - the flash */
-#define SIM_OR0_MASK 0x20000002
-#define SIM_BR0_MASK 0x00000001
-
-
-/* Defines for chip select one - the RAM */
-#define SIM_OR1_MASK 0x10000000
-#define SIM_BR1_MASK 0x00000001
-
-#define MCU_SIM_MBAR_ADRS 0x0003ff00
-#define MCU_SIM_MBAR_BA_MASK 0xfffff000
-#define MCU_SIM_MBAR_AS_MASK 0x00000001
-
-#define MCU_SIM_PEPAR 0x00B4
-
-#define MCU_DISABLE_INTRPTS 0x2700
-#define MCU_SIM_AVR 0x00
-
-#define MCU_SIM_MCR 0x00005cff
-
-#define MCU_SIM_CLKOCR 0x00
-#define MCU_SIM_PLLCR 0x8000
-#define MCU_SIM_CDVCR 0x0000
-
-#define MCU_SIM_SYPCR 0x0000
-#define MCU_SIM_SWIV 0x00
-#define MCU_SIM_PICR 0x0000
-#define MCU_SIM_PITR 0x0000
-
-
-#include <asm/m68360_regs.h>
-
-
-/*
- * By the time this RAM specific code begins to execute, DPRAM
- * and DRAM should already be mapped and accessible.
- */
-
- .text
-_start:
-_stext:
- nop
- ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */
- /* We should not need to setup the boot stack the reset should do it. */
- movea.l #RAMEND, %sp /*set up stack at the end of DRAM:*/
-
-set_mbar_register:
- moveq.l #0x07, %d1 /* Setup MBAR */
- movec %d1, %dfc
-
- lea.l MCU_SIM_MBAR_ADRS, %a0
- move.l #_dprbase, %d0
- andi.l #MCU_SIM_MBAR_BA_MASK, %d0
- ori.l #MCU_SIM_MBAR_AS_MASK, %d0
- moves.l %d0, %a0@
-
- moveq.l #0x05, %d1
- movec.l %d1, %dfc
-
- /* Now we can begin to access registers in DPRAM */
-
-set_sim_mcr:
- /* Set Module Configuration Register */
- move.l #MCU_SIM_MCR, MCR
-
- /* to do: Determine cause of reset */
-
- /*
- * configure system clock MC68360 p. 6-40
- * (value +1)*osc/128 = system clock
- */
-set_sim_clock:
- move.w #MCU_SIM_PLLCR, PLLCR
- move.b #MCU_SIM_CLKOCR, CLKOCR
- move.w #MCU_SIM_CDVCR, CDVCR
-
- /* Wait for the PLL to settle */
- move.w #16384, %d0
-pll_settle_wait:
- subi.w #1, %d0
- bne pll_settle_wait
-
- /* Setup the system protection register, and watchdog timer register */
- move.b #MCU_SIM_SWIV, SWIV
- move.w #MCU_SIM_PICR, PICR
- move.w #MCU_SIM_PITR, PITR
- move.w #MCU_SIM_SYPCR, SYPCR
-
- /* Clear DPRAM - system + parameter */
- movea.l #_dprbase, %a0
- movea.l #_dprbase+0x2000, %a1
-
- /* Copy 0 to %a0 until %a0 == %a1 */
-clear_dpram:
- movel #0, %a0@+
- cmpal %a0, %a1
- bhi clear_dpram
-
-configure_memory_controller:
- /* Set up Global Memory Register (GMR) */
- move.l #MCU_SIM_GMR, %d0
- move.l %d0, GMR
-
-configure_chip_select_0:
- move.l #RAMEND, %d0
- subi.l #__ramstart, %d0
- subq.l #0x01, %d0
- eori.l #SIM_OR_MASK, %d0
- ori.l #SIM_OR0_MASK, %d0
- move.l %d0, OR0
-
- move.l #__ramstart, %d0
- ori.l #SIM_BR0_MASK, %d0
- move.l %d0, BR0
-
-configure_chip_select_1:
- move.l #ROMEND, %d0
- subi.l #__rom_start, %d0
- subq.l #0x01, %d0
- eori.l #SIM_OR_MASK, %d0
- ori.l #SIM_OR1_MASK, %d0
- move.l %d0, OR1
-
- move.l #__rom_start, %d0
- ori.l #SIM_BR1_MASK, %d0
- move.l %d0, BR1
-
- move.w #MCU_SIM_PEPAR, PEPAR
-
- /* point to vector table: */
- move.l #_romvec, %a0
- move.l #_ramvec, %a1
-copy_vectors:
- move.l %a0@, %d0
- move.l %d0, %a1@
- move.l %a0@, %a1@
- addq.l #0x04, %a0
- addq.l #0x04, %a1
- cmp.l #_start, %a0
- blt copy_vectors
-
- move.l #_ramvec, %a1
- movec %a1, %vbr
-
-
- /* Copy data segment from ROM to RAM */
- moveal #_stext, %a0
- moveal #_sdata, %a1
- moveal #_edata, %a2
-
- /* Copy %a0 to %a1 until %a1 == %a2 */
-LD1:
- move.l %a0@, %d0
- addq.l #0x04, %a0
- move.l %d0, %a1@
- addq.l #0x04, %a1
- cmp.l #_edata, %a1
- blt LD1
-
- moveal #__bss_start, %a0
- moveal #__bss_stop, %a1
-
- /* Copy 0 to %a0 until %a0 == %a1 */
-L1:
- movel #0, %a0@+
- cmpal %a0, %a1
- bhi L1
-
-load_quicc:
- move.l #_dprbase, _quicc_base
-
-store_ram_size:
- /* Set ram size information */
- move.l #_sdata, _rambase
- move.l #__bss_stop, _ramstart
- move.l #RAMEND, %d0
- sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/
- move.l %d0, _ramend /* Different from RAMEND.*/
-
- pea 0
- pea env
- pea %sp@(4)
- pea 0
-
- lea init_thread_union, %a2
- lea 0x2000(%a2), %sp
-
-lp:
- jsr start_kernel
-
-_exit:
- jmp _exit
-
-
- .data
- .align 4
-env:
- .long 0
-_quicc_base:
- .long 0
-_periph_base:
- .long 0
-_ramvec:
- .long 0
-_rambase:
- .long 0
-_ramstart:
- .long 0
-_ramend:
- .long 0
-_dprbase:
- .long 0xffffe000
-
- .text
-
- /*
- * These are the exception vectors at boot up, they are copied into RAM
- * and then overwritten as needed.
- */
-
-.section ".data..initvect","awx"
- .long RAMEND /* Reset: Initial Stack Pointer - 0. */
- .long _start /* Reset: Initial Program Counter - 1. */
- .long buserr /* Bus Error - 2. */
- .long trap /* Address Error - 3. */
- .long trap /* Illegal Instruction - 4. */
- .long trap /* Divide by zero - 5. */
- .long trap /* CHK, CHK2 Instructions - 6. */
- .long trap /* TRAPcc, TRAPV Instructions - 7. */
- .long trap /* Privilege Violation - 8. */
- .long trap /* Trace - 9. */
- .long trap /* Line 1010 Emulator - 10. */
- .long trap /* Line 1111 Emualtor - 11. */
- .long trap /* Harware Breakpoint - 12. */
- .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */
- .long trap /* Format Error - 14. */
- .long trap /* Uninitialized Interrupt - 15. */
- .long trap /* (Unassigned, Reserver) - 16. */
- .long trap /* (Unassigned, Reserver) - 17. */
- .long trap /* (Unassigned, Reserver) - 18. */
- .long trap /* (Unassigned, Reserver) - 19. */
- .long trap /* (Unassigned, Reserver) - 20. */
- .long trap /* (Unassigned, Reserver) - 21. */
- .long trap /* (Unassigned, Reserver) - 22. */
- .long trap /* (Unassigned, Reserver) - 23. */
- .long trap /* Spurious Interrupt - 24. */
- .long trap /* Level 1 Interrupt Autovector - 25. */
- .long trap /* Level 2 Interrupt Autovector - 26. */
- .long trap /* Level 3 Interrupt Autovector - 27. */
- .long trap /* Level 4 Interrupt Autovector - 28. */
- .long trap /* Level 5 Interrupt Autovector - 29. */
- .long trap /* Level 6 Interrupt Autovector - 30. */
- .long trap /* Level 7 Interrupt Autovector - 31. */
- .long system_call /* Trap Instruction Vectors 0 - 32. */
- .long trap /* Trap Instruction Vectors 1 - 33. */
- .long trap /* Trap Instruction Vectors 2 - 34. */
- .long trap /* Trap Instruction Vectors 3 - 35. */
- .long trap /* Trap Instruction Vectors 4 - 36. */
- .long trap /* Trap Instruction Vectors 5 - 37. */
- .long trap /* Trap Instruction Vectors 6 - 38. */
- .long trap /* Trap Instruction Vectors 7 - 39. */
- .long trap /* Trap Instruction Vectors 8 - 40. */
- .long trap /* Trap Instruction Vectors 9 - 41. */
- .long trap /* Trap Instruction Vectors 10 - 42. */
- .long trap /* Trap Instruction Vectors 11 - 43. */
- .long trap /* Trap Instruction Vectors 12 - 44. */
- .long trap /* Trap Instruction Vectors 13 - 45. */
- .long trap /* Trap Instruction Vectors 14 - 46. */
- .long trap /* Trap Instruction Vectors 15 - 47. */
- .long 0 /* (Reserved for Coprocessor) - 48. */
- .long 0 /* (Reserved for Coprocessor) - 49. */
- .long 0 /* (Reserved for Coprocessor) - 50. */
- .long 0 /* (Reserved for Coprocessor) - 51. */
- .long 0 /* (Reserved for Coprocessor) - 52. */
- .long 0 /* (Reserved for Coprocessor) - 53. */
- .long 0 /* (Reserved for Coprocessor) - 54. */
- .long 0 /* (Reserved for Coprocessor) - 55. */
- .long 0 /* (Reserved for Coprocessor) - 56. */
- .long 0 /* (Reserved for Coprocessor) - 57. */
- .long 0 /* (Reserved for Coprocessor) - 58. */
- .long 0 /* (Unassigned, Reserved) - 59. */
- .long 0 /* (Unassigned, Reserved) - 60. */
- .long 0 /* (Unassigned, Reserved) - 61. */
- .long 0 /* (Unassigned, Reserved) - 62. */
- .long 0 /* (Unassigned, Reserved) - 63. */
- /* The assignment of these vectors to the CPM is */
- /* dependent on the configuration of the CPM vba */
- /* fields. */
- .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */
- .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */
- .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */
- .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */
- .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */
- .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */
- .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */
- .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */
- .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */
- .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */
- .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */
- .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */
- .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */
- .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */
- .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */
- .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */
- .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */
- .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */
- .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */
- .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */
- .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */
- .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */
- .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */
- .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */
- .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */
- .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */
- .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */
- .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */
- .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */
- .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */
- .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */
- .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */
- /* I don't think anything uses the vectors after here. */
- .long 0 /* (User-Defined Vectors 34) - 96. */
- .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */
- .long 0,0,0 /* (User-Defined Vectors 190 - 192). */
-.text
-ignore: rte
diff --git a/arch/m68k/68360/head-rom.S b/arch/m68k/68360/head-rom.S
deleted file mode 100644
index b3a7e40f35e1..000000000000
--- a/arch/m68k/68360/head-rom.S
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * head-rom.S - startup code for Motorola 68360
- *
- * Copyright (C) SED Systems, a Division of Calian Ltd.
- * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S
- * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7
- * uClinux Kernel
- * Copyright (C) Michael Leslie <mleslie@lineo.com>
- * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S
- * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>,
- *
- */
-
-.global _stext
-.global __bss_start
-.global _start
-
-.global _rambase
-.global _ramvec
-.global _ramstart
-.global _ramend
-
-.global _quicc_base
-.global _periph_base
-
-#define RAMEND (CONFIG_RAMBASE + CONFIG_RAMSIZE)
-
-#define REGB 0x1000
-#define PEPAR (_dprbase + REGB + 0x0016)
-#define GMR (_dprbase + REGB + 0x0040)
-#define OR0 (_dprbase + REGB + 0x0054)
-#define BR0 (_dprbase + REGB + 0x0050)
-
-#define OR1 (_dprbase + REGB + 0x0064)
-#define BR1 (_dprbase + REGB + 0x0060)
-
-#define OR2 (_dprbase + REGB + 0x0074)
-#define BR2 (_dprbase + REGB + 0x0070)
-
-#define OR3 (_dprbase + REGB + 0x0084)
-#define BR3 (_dprbase + REGB + 0x0080)
-
-#define OR4 (_dprbase + REGB + 0x0094)
-#define BR4 (_dprbase + REGB + 0x0090)
-
-#define OR5 (_dprbase + REGB + 0x00A4)
-#define BR5 (_dprbase + REGB + 0x00A0)
-
-#define OR6 (_dprbase + REGB + 0x00b4)
-#define BR6 (_dprbase + REGB + 0x00b0)
-
-#define OR7 (_dprbase + REGB + 0x00c4)
-#define BR7 (_dprbase + REGB + 0x00c0)
-
-#define MCR (_dprbase + REGB + 0x0000)
-#define AVR (_dprbase + REGB + 0x0008)
-
-#define SYPCR (_dprbase + REGB + 0x0022)
-
-#define PLLCR (_dprbase + REGB + 0x0010)
-#define CLKOCR (_dprbase + REGB + 0x000C)
-#define CDVCR (_dprbase + REGB + 0x0014)
-
-#define BKAR (_dprbase + REGB + 0x0030)
-#define BKCR (_dprbase + REGB + 0x0034)
-#define SWIV (_dprbase + REGB + 0x0023)
-#define PICR (_dprbase + REGB + 0x0026)
-#define PITR (_dprbase + REGB + 0x002A)
-
-/* Define for all memory configuration */
-#define MCU_SIM_GMR 0x00000000
-#define SIM_OR_MASK 0x0fffffff
-
-/* Defines for chip select zero - the flash */
-#define SIM_OR0_MASK 0x20000000
-#define SIM_BR0_MASK 0x00000001
-
-/* Defines for chip select one - the RAM */
-#define SIM_OR1_MASK 0x10000000
-#define SIM_BR1_MASK 0x00000001
-
-#define MCU_SIM_MBAR_ADRS 0x0003ff00
-#define MCU_SIM_MBAR_BA_MASK 0xfffff000
-#define MCU_SIM_MBAR_AS_MASK 0x00000001
-
-#define MCU_SIM_PEPAR 0x00B4
-
-#define MCU_DISABLE_INTRPTS 0x2700
-#define MCU_SIM_AVR 0x00
-
-#define MCU_SIM_MCR 0x00005cff
-
-#define MCU_SIM_CLKOCR 0x00
-#define MCU_SIM_PLLCR 0x8000
-#define MCU_SIM_CDVCR 0x0000
-
-#define MCU_SIM_SYPCR 0x0000
-#define MCU_SIM_SWIV 0x00
-#define MCU_SIM_PICR 0x0000
-#define MCU_SIM_PITR 0x0000
-
-
-#include <asm/m68360_regs.h>
-
-
-/*
- * By the time this RAM specific code begins to execute, DPRAM
- * and DRAM should already be mapped and accessible.
- */
-
- .text
-_start:
-_stext:
- nop
- ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */
- /* We should not need to setup the boot stack the reset should do it. */
- movea.l #RAMEND, %sp /* set up stack at the end of DRAM:*/
-
-
-set_mbar_register:
- moveq.l #0x07, %d1 /* Setup MBAR */
- movec %d1, %dfc
-
- lea.l MCU_SIM_MBAR_ADRS, %a0
- move.l #_dprbase, %d0
- andi.l #MCU_SIM_MBAR_BA_MASK, %d0
- ori.l #MCU_SIM_MBAR_AS_MASK, %d0
- moves.l %d0, %a0@
-
- moveq.l #0x05, %d1
- movec.l %d1, %dfc
-
- /* Now we can begin to access registers in DPRAM */
-
-set_sim_mcr:
- /* Set Module Configuration Register */
- move.l #MCU_SIM_MCR, MCR
-
- /* to do: Determine cause of reset */
-
- /*
- * configure system clock MC68360 p. 6-40
- * (value +1)*osc/128 = system clock
- * or
- * (value + 1)*osc = system clock
- * You do not need to divide the oscillator by 128 unless you want to.
- */
-set_sim_clock:
- move.w #MCU_SIM_PLLCR, PLLCR
- move.b #MCU_SIM_CLKOCR, CLKOCR
- move.w #MCU_SIM_CDVCR, CDVCR
-
- /* Wait for the PLL to settle */
- move.w #16384, %d0
-pll_settle_wait:
- subi.w #1, %d0
- bne pll_settle_wait
-
- /* Setup the system protection register, and watchdog timer register */
- move.b #MCU_SIM_SWIV, SWIV
- move.w #MCU_SIM_PICR, PICR
- move.w #MCU_SIM_PITR, PITR
- move.w #MCU_SIM_SYPCR, SYPCR
-
- /* Clear DPRAM - system + parameter */
- movea.l #_dprbase, %a0
- movea.l #_dprbase+0x2000, %a1
-
- /* Copy 0 to %a0 until %a0 == %a1 */
-clear_dpram:
- movel #0, %a0@+
- cmpal %a0, %a1
- bhi clear_dpram
-
-configure_memory_controller:
- /* Set up Global Memory Register (GMR) */
- move.l #MCU_SIM_GMR, %d0
- move.l %d0, GMR
-
-configure_chip_select_0:
- move.l #0x00400000, %d0
- subq.l #0x01, %d0
- eori.l #SIM_OR_MASK, %d0
- ori.l #SIM_OR0_MASK, %d0
- move.l %d0, OR0
-
- move.l #__rom_start, %d0
- ori.l #SIM_BR0_MASK, %d0
- move.l %d0, BR0
-
- move.l #0x0, BR1
- move.l #0x0, BR2
- move.l #0x0, BR3
- move.l #0x0, BR4
- move.l #0x0, BR5
- move.l #0x0, BR6
- move.l #0x0, BR7
-
- move.w #MCU_SIM_PEPAR, PEPAR
-
- /* point to vector table: */
- move.l #_romvec, %a0
- move.l #_ramvec, %a1
-copy_vectors:
- move.l %a0@, %d0
- move.l %d0, %a1@
- move.l %a0@, %a1@
- addq.l #0x04, %a0
- addq.l #0x04, %a1
- cmp.l #_start, %a0
- blt copy_vectors
-
- move.l #_ramvec, %a1
- movec %a1, %vbr
-
-
- /* Copy data segment from ROM to RAM */
- moveal #_etext, %a0
- moveal #_sdata, %a1
- moveal #_edata, %a2
-
- /* Copy %a0 to %a1 until %a1 == %a2 */
-LD1:
- move.l %a0@, %d0
- addq.l #0x04, %a0
- move.l %d0, %a1@
- addq.l #0x04, %a1
- cmp.l #_edata, %a1
- blt LD1
-
- moveal #__bss_start, %a0
- moveal #__bss_stop, %a1
-
- /* Copy 0 to %a0 until %a0 == %a1 */
-L1:
- movel #0, %a0@+
- cmpal %a0, %a1
- bhi L1
-
-load_quicc:
- move.l #_dprbase, _quicc_base
-
-store_ram_size:
- /* Set ram size information */
- move.l #_sdata, _rambase
- move.l #__bss_stop, _ramstart
- move.l #RAMEND, %d0
- sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/
- move.l %d0, _ramend /* Different from RAMEND.*/
-
- pea 0
- pea env
- pea %sp@(4)
- pea 0
-
- lea init_thread_union, %a2
- lea 0x2000(%a2), %sp
-
-lp:
- jsr start_kernel
-
-_exit:
- jmp _exit
-
-
- .data
- .align 4
-env:
- .long 0
-_quicc_base:
- .long 0
-_periph_base:
- .long 0
-_ramvec:
- .long 0
-_rambase:
- .long 0
-_ramstart:
- .long 0
-_ramend:
- .long 0
-_dprbase:
- .long 0xffffe000
-
-
- .text
-
- /*
- * These are the exception vectors at boot up, they are copied into RAM
- * and then overwritten as needed.
- */
-
-.section ".data..initvect","awx"
- .long RAMEND /* Reset: Initial Stack Pointer - 0. */
- .long _start /* Reset: Initial Program Counter - 1. */
- .long buserr /* Bus Error - 2. */
- .long trap /* Address Error - 3. */
- .long trap /* Illegal Instruction - 4. */
- .long trap /* Divide by zero - 5. */
- .long trap /* CHK, CHK2 Instructions - 6. */
- .long trap /* TRAPcc, TRAPV Instructions - 7. */
- .long trap /* Privilege Violation - 8. */
- .long trap /* Trace - 9. */
- .long trap /* Line 1010 Emulator - 10. */
- .long trap /* Line 1111 Emualtor - 11. */
- .long trap /* Harware Breakpoint - 12. */
- .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */
- .long trap /* Format Error - 14. */
- .long trap /* Uninitialized Interrupt - 15. */
- .long trap /* (Unassigned, Reserver) - 16. */
- .long trap /* (Unassigned, Reserver) - 17. */
- .long trap /* (Unassigned, Reserver) - 18. */
- .long trap /* (Unassigned, Reserver) - 19. */
- .long trap /* (Unassigned, Reserver) - 20. */
- .long trap /* (Unassigned, Reserver) - 21. */
- .long trap /* (Unassigned, Reserver) - 22. */
- .long trap /* (Unassigned, Reserver) - 23. */
- .long trap /* Spurious Interrupt - 24. */
- .long trap /* Level 1 Interrupt Autovector - 25. */
- .long trap /* Level 2 Interrupt Autovector - 26. */
- .long trap /* Level 3 Interrupt Autovector - 27. */
- .long trap /* Level 4 Interrupt Autovector - 28. */
- .long trap /* Level 5 Interrupt Autovector - 29. */
- .long trap /* Level 6 Interrupt Autovector - 30. */
- .long trap /* Level 7 Interrupt Autovector - 31. */
- .long system_call /* Trap Instruction Vectors 0 - 32. */
- .long trap /* Trap Instruction Vectors 1 - 33. */
- .long trap /* Trap Instruction Vectors 2 - 34. */
- .long trap /* Trap Instruction Vectors 3 - 35. */
- .long trap /* Trap Instruction Vectors 4 - 36. */
- .long trap /* Trap Instruction Vectors 5 - 37. */
- .long trap /* Trap Instruction Vectors 6 - 38. */
- .long trap /* Trap Instruction Vectors 7 - 39. */
- .long trap /* Trap Instruction Vectors 8 - 40. */
- .long trap /* Trap Instruction Vectors 9 - 41. */
- .long trap /* Trap Instruction Vectors 10 - 42. */
- .long trap /* Trap Instruction Vectors 11 - 43. */
- .long trap /* Trap Instruction Vectors 12 - 44. */
- .long trap /* Trap Instruction Vectors 13 - 45. */
- .long trap /* Trap Instruction Vectors 14 - 46. */
- .long trap /* Trap Instruction Vectors 15 - 47. */
- .long 0 /* (Reserved for Coprocessor) - 48. */
- .long 0 /* (Reserved for Coprocessor) - 49. */
- .long 0 /* (Reserved for Coprocessor) - 50. */
- .long 0 /* (Reserved for Coprocessor) - 51. */
- .long 0 /* (Reserved for Coprocessor) - 52. */
- .long 0 /* (Reserved for Coprocessor) - 53. */
- .long 0 /* (Reserved for Coprocessor) - 54. */
- .long 0 /* (Reserved for Coprocessor) - 55. */
- .long 0 /* (Reserved for Coprocessor) - 56. */
- .long 0 /* (Reserved for Coprocessor) - 57. */
- .long 0 /* (Reserved for Coprocessor) - 58. */
- .long 0 /* (Unassigned, Reserved) - 59. */
- .long 0 /* (Unassigned, Reserved) - 60. */
- .long 0 /* (Unassigned, Reserved) - 61. */
- .long 0 /* (Unassigned, Reserved) - 62. */
- .long 0 /* (Unassigned, Reserved) - 63. */
- /* The assignment of these vectors to the CPM is */
- /* dependent on the configuration of the CPM vba */
- /* fields. */
- .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */
- .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */
- .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */
- .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */
- .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */
- .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */
- .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */
- .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */
- .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */
- .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */
- .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */
- .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */
- .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */
- .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */
- .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */
- .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */
- .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */
- .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */
- .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */
- .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */
- .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */
- .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */
- .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */
- .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */
- .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */
- .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */
- .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */
- .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */
- .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */
- .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */
- .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */
- .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */
- /* I don't think anything uses the vectors after here. */
- .long 0 /* (User-Defined Vectors 34) - 96. */
- .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */
- .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */
- .long 0,0,0 /* (User-Defined Vectors 190 - 192). */
-.text
-ignore: rte
diff --git a/arch/m68k/68360/ints.c b/arch/m68k/68360/ints.c
deleted file mode 100644
index 2360fc046681..000000000000
--- a/arch/m68k/68360/ints.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * ints.c - first level interrupt handlers
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com>
- * Copyright (c) 1996 Roman Zippel
- * Copyright (c) 1999 D. Jeff Dionne <jeff@uclinux.org>
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <asm/traps.h>
-#include <asm/machdep.h>
-#include <asm/m68360.h>
-
-/* from quicc/commproc.c: */
-extern QUICC *pquicc;
-extern void cpm_interrupt_init(void);
-
-#define INTERNAL_IRQS (96)
-
-/* assembler routines */
-asmlinkage void system_call(void);
-asmlinkage void buserr(void);
-asmlinkage void trap(void);
-asmlinkage void bad_interrupt(void);
-asmlinkage void inthandler(void);
-
-static void intc_irq_unmask(struct irq_data *d)
-{
- pquicc->intr_cimr |= (1 << d->irq);
-}
-
-static void intc_irq_mask(struct irq_data *d)
-{
- pquicc->intr_cimr &= ~(1 << d->irq);
-}
-
-static void intc_irq_ack(struct irq_data *d)
-{
- pquicc->intr_cisr = (1 << d->irq);
-}
-
-static struct irq_chip intc_irq_chip = {
- .name = "M68K-INTC",
- .irq_mask = intc_irq_mask,
- .irq_unmask = intc_irq_unmask,
- .irq_ack = intc_irq_ack,
-};
-
-/*
- * This function should be called during kernel startup to initialize
- * the vector table.
- */
-void __init trap_init(void)
-{
- int vba = (CPM_VECTOR_BASE<<4);
-
- /* set up the vectors */
- _ramvec[2] = buserr;
- _ramvec[3] = trap;
- _ramvec[4] = trap;
- _ramvec[5] = trap;
- _ramvec[6] = trap;
- _ramvec[7] = trap;
- _ramvec[8] = trap;
- _ramvec[9] = trap;
- _ramvec[10] = trap;
- _ramvec[11] = trap;
- _ramvec[12] = trap;
- _ramvec[13] = trap;
- _ramvec[14] = trap;
- _ramvec[15] = trap;
-
- _ramvec[32] = system_call;
- _ramvec[33] = trap;
-
- cpm_interrupt_init();
-
- /* set up CICR for vector base address and irq level */
- /* irl = 4, hp = 1f - see MC68360UM p 7-377 */
- pquicc->intr_cicr = 0x00e49f00 | vba;
-
- /* CPM interrupt vectors: (p 7-376) */
- _ramvec[vba+CPMVEC_ERROR] = bad_interrupt; /* Error */
- _ramvec[vba+CPMVEC_PIO_PC11] = inthandler; /* pio - pc11 */
- _ramvec[vba+CPMVEC_PIO_PC10] = inthandler; /* pio - pc10 */
- _ramvec[vba+CPMVEC_SMC2] = inthandler; /* smc2/pip */
- _ramvec[vba+CPMVEC_SMC1] = inthandler; /* smc1 */
- _ramvec[vba+CPMVEC_SPI] = inthandler; /* spi */
- _ramvec[vba+CPMVEC_PIO_PC9] = inthandler; /* pio - pc9 */
- _ramvec[vba+CPMVEC_TIMER4] = inthandler; /* timer 4 */
- _ramvec[vba+CPMVEC_RESERVED1] = inthandler; /* reserved */
- _ramvec[vba+CPMVEC_PIO_PC8] = inthandler; /* pio - pc8 */
- _ramvec[vba+CPMVEC_PIO_PC7] = inthandler; /* pio - pc7 */
- _ramvec[vba+CPMVEC_PIO_PC6] = inthandler; /* pio - pc6 */
- _ramvec[vba+CPMVEC_TIMER3] = inthandler; /* timer 3 */
- _ramvec[vba+CPMVEC_PIO_PC5] = inthandler; /* pio - pc5 */
- _ramvec[vba+CPMVEC_PIO_PC4] = inthandler; /* pio - pc4 */
- _ramvec[vba+CPMVEC_RESERVED2] = inthandler; /* reserved */
- _ramvec[vba+CPMVEC_RISCTIMER] = inthandler; /* timer table */
- _ramvec[vba+CPMVEC_TIMER2] = inthandler; /* timer 2 */
- _ramvec[vba+CPMVEC_RESERVED3] = inthandler; /* reserved */
- _ramvec[vba+CPMVEC_IDMA2] = inthandler; /* idma 2 */
- _ramvec[vba+CPMVEC_IDMA1] = inthandler; /* idma 1 */
- _ramvec[vba+CPMVEC_SDMA_CB_ERR] = inthandler; /* sdma channel bus error */
- _ramvec[vba+CPMVEC_PIO_PC3] = inthandler; /* pio - pc3 */
- _ramvec[vba+CPMVEC_PIO_PC2] = inthandler; /* pio - pc2 */
- /* _ramvec[vba+CPMVEC_TIMER1] = cpm_isr_timer1; */ /* timer 1 */
- _ramvec[vba+CPMVEC_TIMER1] = inthandler; /* timer 1 */
- _ramvec[vba+CPMVEC_PIO_PC1] = inthandler; /* pio - pc1 */
- _ramvec[vba+CPMVEC_SCC4] = inthandler; /* scc 4 */
- _ramvec[vba+CPMVEC_SCC3] = inthandler; /* scc 3 */
- _ramvec[vba+CPMVEC_SCC2] = inthandler; /* scc 2 */
- _ramvec[vba+CPMVEC_SCC1] = inthandler; /* scc 1 */
- _ramvec[vba+CPMVEC_PIO_PC0] = inthandler; /* pio - pc0 */
-
-
- /* turn off all CPM interrupts */
- pquicc->intr_cimr = 0x00000000;
-}
-
-void init_IRQ(void)
-{
- int i;
-
- for (i = 0; (i < NR_IRQS); i++) {
- irq_set_chip(i, &intc_irq_chip);
- irq_set_handler(i, handle_level_irq);
- }
-}
-
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index c496d48a8c8d..0dfcf1281e9c 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -114,13 +114,6 @@ config M68VZ328
help
Motorola 68VZ328 processor support.
-config M68360
- bool "MC68360"
- depends on !MMU
- select MCPU32
- help
- Motorola 68360 processor support.
-
endif # M68KCLASSIC
if COLDFIRE
diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug
index 64776d7ac199..50a67d08aec4 100644
--- a/arch/m68k/Kconfig.debug
+++ b/arch/m68k/Kconfig.debug
@@ -12,7 +12,7 @@ config BOOTPARAM_STRING
config EARLY_PRINTK
bool "Early printk"
- depends on !(SUN3 || M68360 || M68000 || COLDFIRE)
+ depends on !(SUN3 || M68000 || COLDFIRE)
help
Write kernel log output directly to a serial port.
Where implemented, output goes to the framebuffer as well.
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index 61dc643c0b05..2a5c7abb2896 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -187,12 +187,6 @@ config MEMORY_RESERVE
help
Reserve certain memory regions on 68x328 based boards.
-config UCQUICC
- bool "Lineo uCquicc board support"
- depends on M68360
- help
- Support for the Lineo uCquicc board.
-
config ARN5206
bool "Arnewsh 5206 board support"
depends on M5206
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 0b29dcfef69f..f0dd9fc84002 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -39,7 +39,6 @@ cpuflags-$(CONFIG_M68040) := -m68040
endif
cpuflags-$(CONFIG_M68030) :=
cpuflags-$(CONFIG_M68020) :=
-cpuflags-$(CONFIG_M68360) := -m68332
cpuflags-$(CONFIG_M68000) := -m68000
cpuflags-$(CONFIG_M5441x) := $(call cc-option,-mcpu=54455,-mcfv4e)
cpuflags-$(CONFIG_M54xx) := $(call cc-option,-mcpu=5475,-m5200)
@@ -92,7 +91,6 @@ endif
#
head-y := arch/m68k/kernel/head.o
head-$(CONFIG_SUN3) := arch/m68k/kernel/sun3-head.o
-head-$(CONFIG_M68360) := arch/m68k/68360/head.o
head-$(CONFIG_M68000) := arch/m68k/68000/head.o
head-$(CONFIG_COLDFIRE) := arch/m68k/coldfire/head.o
@@ -114,7 +112,6 @@ core-$(CONFIG_NATFEAT) += arch/m68k/emu/
core-$(CONFIG_M68040) += arch/m68k/fpsp040/
core-$(CONFIG_M68060) += arch/m68k/ifpsp060/
core-$(CONFIG_M68KFPU_EMU) += arch/m68k/math-emu/
-core-$(CONFIG_M68360) += arch/m68k/68360/
core-$(CONFIG_M68000) += arch/m68k/68000/
core-$(CONFIG_COLDFIRE) += arch/m68k/coldfire/
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index 71ea4c02795d..a0fc0c192427 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -89,7 +89,7 @@ static struct platform_device mcf_uart = {
.dev.platform_data = mcf_uart_platform_data,
};
-#ifdef CONFIG_FEC
+#if IS_ENABLED(CONFIG_FEC)
#ifdef CONFIG_M5441x
#define FEC_NAME "enet-fec"
@@ -329,7 +329,7 @@ static struct platform_device mcf_qspi = {
static struct platform_device *mcf_devices[] __initdata = {
&mcf_uart,
-#ifdef CONFIG_FEC
+#if IS_ENABLED(CONFIG_FEC)
&mcf_fec0,
#ifdef MCFFEC_BASE1
&mcf_fec1,
diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c
index 37a83e27c7a6..8832083e1cb8 100644
--- a/arch/m68k/coldfire/gpio.c
+++ b/arch/m68k/coldfire/gpio.c
@@ -178,7 +178,7 @@ static struct gpio_chip mcfgpio_chip = {
static int __init mcfgpio_sysinit(void)
{
- gpiochip_add(&mcfgpio_chip);
+ gpiochip_add_data(&mcfgpio_chip, NULL);
return subsys_system_register(&mcfgpio_subsys, NULL);
}
diff --git a/arch/m68k/include/asm/MC68328.h b/arch/m68k/include/asm/MC68328.h
index 4ebf098b8a1f..1a8080c4cc40 100644
--- a/arch/m68k/include/asm/MC68328.h
+++ b/arch/m68k/include/asm/MC68328.h
@@ -798,7 +798,7 @@
/**********
*
- * 0xFFFFF7xx -- Serial Periferial Interface Slave (SPIS)
+ * 0xFFFFF7xx -- Serial Peripheral Interface Slave (SPIS)
*
**********/
@@ -824,7 +824,7 @@
/**********
*
- * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM)
+ * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM)
*
**********/
@@ -904,7 +904,7 @@
#define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */
#define UBAUD_PRESCALER_SHIFT 0
-#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */
+#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */
#define UBAUD_DIVIDE_SHIFT 8
#define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */
#define UBAUD_GPIOSRC 0x1000 /* GPIO source */
diff --git a/arch/m68k/include/asm/MC68EZ328.h b/arch/m68k/include/asm/MC68EZ328.h
index d1bde58ab0dd..fedac87c5d13 100644
--- a/arch/m68k/include/asm/MC68EZ328.h
+++ b/arch/m68k/include/asm/MC68EZ328.h
@@ -631,7 +631,7 @@
/**********
*
- * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM)
+ * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM)
*
**********/
@@ -712,7 +712,7 @@
#define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */
#define UBAUD_PRESCALER_SHIFT 0
-#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */
+#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */
#define UBAUD_DIVIDE_SHIFT 8
#define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */
#define UBAUD_UCLKDIR 0x2000 /* UCLK Direction */
@@ -1160,7 +1160,7 @@ typedef volatile struct {
#define DRAMMC_COL10 0x0080 /* Col address bit for MD10 PA11/PA0 */
#define DRAMMC_COL9 0x0040 /* Col address bit for MD9 PA10/PA0 */
#define DRAMMC_COL8 0x0020 /* Col address bit for MD8 PA9/PA0 */
-#define DRAMMC_REF_MASK 0x001f /* Reresh Cycle */
+#define DRAMMC_REF_MASK 0x001f /* Refresh Cycle */
#define DRAMMC_REF_SHIFT 0
/*
diff --git a/arch/m68k/include/asm/MC68VZ328.h b/arch/m68k/include/asm/MC68VZ328.h
index 6bd1bf1f85ea..34a51b2c784f 100644
--- a/arch/m68k/include/asm/MC68VZ328.h
+++ b/arch/m68k/include/asm/MC68VZ328.h
@@ -724,7 +724,7 @@
/**********
*
- * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM)
+ * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM)
*
**********/
@@ -806,7 +806,7 @@
#define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */
#define UBAUD_PRESCALER_SHIFT 0
-#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */
+#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */
#define UBAUD_DIVIDE_SHIFT 8
#define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */
#define UBAUD_UCLKDIR 0x2000 /* UCLK Direction */
@@ -1256,7 +1256,7 @@ typedef struct {
#define DRAMMC_COL10 0x0080 /* Col address bit for MD10 PA11/PA0 */
#define DRAMMC_COL9 0x0040 /* Col address bit for MD9 PA10/PA0 */
#define DRAMMC_COL8 0x0020 /* Col address bit for MD8 PA9/PA0 */
-#define DRAMMC_REF_MASK 0x001f /* Reresh Cycle */
+#define DRAMMC_REF_MASK 0x001f /* Refresh Cycle */
#define DRAMMC_REF_SHIFT 0
/*
diff --git a/arch/m68k/include/asm/commproc.h b/arch/m68k/include/asm/commproc.h
deleted file mode 100644
index f41c96863e98..000000000000
--- a/arch/m68k/include/asm/commproc.h
+++ /dev/null
@@ -1,664 +0,0 @@
-
-/*
- * 68360 Communication Processor Module.
- * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> (mc68360) after:
- * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> (mpc8xx)
- *
- * This file contains structures and information for the communication
- * processor channels. Some CPM control and status is available
- * through the 68360 internal memory map. See include/asm/360_immap.h for details.
- * This file is not a complete map of all of the 360 QUICC's capabilities
- *
- * On the MBX board, EPPC-Bug loads CPM microcode into the first 512
- * bytes of the DP RAM and relocates the I2C parameter area to the
- * IDMA1 space. The remaining DP RAM is available for buffer descriptors
- * or other use.
- */
-#ifndef __CPM_360__
-#define __CPM_360__
-
-
-/* CPM Command register masks: */
-#define CPM_CR_RST ((ushort)0x8000)
-#define CPM_CR_OPCODE ((ushort)0x0f00)
-#define CPM_CR_CHAN ((ushort)0x00f0)
-#define CPM_CR_FLG ((ushort)0x0001)
-
-/* CPM Command set (opcodes): */
-#define CPM_CR_INIT_TRX ((ushort)0x0000)
-#define CPM_CR_INIT_RX ((ushort)0x0001)
-#define CPM_CR_INIT_TX ((ushort)0x0002)
-#define CPM_CR_HUNT_MODE ((ushort)0x0003)
-#define CPM_CR_STOP_TX ((ushort)0x0004)
-#define CPM_CR_GRSTOP_TX ((ushort)0x0005)
-#define CPM_CR_RESTART_TX ((ushort)0x0006)
-#define CPM_CR_CLOSE_RXBD ((ushort)0x0007)
-#define CPM_CR_SET_GADDR ((ushort)0x0008)
-#define CPM_CR_GCI_TIMEOUT ((ushort)0x0009)
-#define CPM_CR_GCI_ABORT ((ushort)0x000a)
-#define CPM_CR_RESET_BCS ((ushort)0x000a)
-
-/* CPM Channel numbers. */
-#define CPM_CR_CH_SCC1 ((ushort)0x0000)
-#define CPM_CR_CH_SCC2 ((ushort)0x0004)
-#define CPM_CR_CH_SPI ((ushort)0x0005) /* SPI / Timers */
-#define CPM_CR_CH_TMR ((ushort)0x0005)
-#define CPM_CR_CH_SCC3 ((ushort)0x0008)
-#define CPM_CR_CH_SMC1 ((ushort)0x0009) /* SMC1 / IDMA1 */
-#define CPM_CR_CH_IDMA1 ((ushort)0x0009)
-#define CPM_CR_CH_SCC4 ((ushort)0x000c)
-#define CPM_CR_CH_SMC2 ((ushort)0x000d) /* SMC2 / IDMA2 */
-#define CPM_CR_CH_IDMA2 ((ushort)0x000d)
-
-
-#define mk_cr_cmd(CH, CMD) ((CMD << 8) | (CH << 4))
-
-#if 1 /* mleslie: I dinna think we have any such restrictions on
- * DP RAM aboard the 360 board - see the MC68360UM p.3-3 */
-
-/* The dual ported RAM is multi-functional. Some areas can be (and are
- * being) used for microcode. There is an area that can only be used
- * as data ram for buffer descriptors, which is all we use right now.
- * Currently the first 512 and last 256 bytes are used for microcode.
- */
-/* mleslie: The uCquicc board is using no extra microcode in DPRAM */
-#define CPM_DATAONLY_BASE ((uint)0x0000)
-#define CPM_DATAONLY_SIZE ((uint)0x0800)
-#define CPM_DP_NOSPACE ((uint)0x7fffffff)
-
-#endif
-
-
-/* Export the base address of the communication processor registers
- * and dual port ram. */
-/* extern cpm360_t *cpmp; */ /* Pointer to comm processor */
-extern QUICC *pquicc;
-uint m360_cpm_dpalloc(uint size);
-/* void *m360_cpm_hostalloc(uint size); */
-void m360_cpm_setbrg(uint brg, uint rate);
-
-#if 0 /* use QUICC_BD declared in include/asm/m68360_quicc.h */
-/* Buffer descriptors used by many of the CPM protocols. */
-typedef struct cpm_buf_desc {
- ushort cbd_sc; /* Status and Control */
- ushort cbd_datlen; /* Data length in buffer */
- uint cbd_bufaddr; /* Buffer address in host memory */
-} cbd_t;
-#endif
-
-
-/* rx bd status/control bits */
-#define BD_SC_EMPTY ((ushort)0x8000) /* Receive is empty */
-#define BD_SC_WRAP ((ushort)0x2000) /* Last buffer descriptor in table */
-#define BD_SC_INTRPT ((ushort)0x1000) /* Interrupt on change */
-#define BD_SC_LAST ((ushort)0x0800) /* Last buffer in frame OR control char */
-
-#define BD_SC_FIRST ((ushort)0x0400) /* 1st buffer in an HDLC frame */
-#define BD_SC_ADDR ((ushort)0x0400) /* 1st byte is a multidrop address */
-
-#define BD_SC_CM ((ushort)0x0200) /* Continuous mode */
-#define BD_SC_ID ((ushort)0x0100) /* Received too many idles */
-
-#define BD_SC_AM ((ushort)0x0080) /* Multidrop address match */
-#define BD_SC_DE ((ushort)0x0080) /* DPLL Error (HDLC) */
-
-#define BD_SC_BR ((ushort)0x0020) /* Break received */
-#define BD_SC_LG ((ushort)0x0020) /* Frame length violation (HDLC) */
-
-#define BD_SC_FR ((ushort)0x0010) /* Framing error */
-#define BD_SC_NO ((ushort)0x0010) /* Nonoctet aligned frame (HDLC) */
-
-#define BD_SC_PR ((ushort)0x0008) /* Parity error */
-#define BD_SC_AB ((ushort)0x0008) /* Received abort Sequence (HDLC) */
-
-#define BD_SC_OV ((ushort)0x0002) /* Overrun */
-#define BD_SC_CD ((ushort)0x0001) /* Carrier Detect lost */
-
-/* tx bd status/control bits (as differ from rx bd) */
-#define BD_SC_READY ((ushort)0x8000) /* Transmit is ready */
-#define BD_SC_TC ((ushort)0x0400) /* Transmit CRC */
-#define BD_SC_P ((ushort)0x0100) /* xmt preamble */
-#define BD_SC_UN ((ushort)0x0002) /* Underrun */
-
-
-
-
-/* Parameter RAM offsets. */
-
-
-
-/* In 2.4 ppc, the PROFF_S?C? are used as byte offsets into DPRAM.
- * In 2.0, we use a more structured C struct map of DPRAM, and so
- * instead, we need only a parameter ram `slot' */
-
-#define PRSLOT_SCC1 0
-#define PRSLOT_SCC2 1
-#define PRSLOT_SCC3 2
-#define PRSLOT_SMC1 2
-#define PRSLOT_SCC4 3
-#define PRSLOT_SMC2 3
-
-
-/* #define PROFF_SCC1 ((uint)0x0000) */
-/* #define PROFF_SCC2 ((uint)0x0100) */
-/* #define PROFF_SCC3 ((uint)0x0200) */
-/* #define PROFF_SMC1 ((uint)0x0280) */
-/* #define PROFF_SCC4 ((uint)0x0300) */
-/* #define PROFF_SMC2 ((uint)0x0380) */
-
-
-/* Define enough so I can at least use the serial port as a UART.
- * The MBX uses SMC1 as the host serial port.
- */
-typedef struct smc_uart {
- ushort smc_rbase; /* Rx Buffer descriptor base address */
- ushort smc_tbase; /* Tx Buffer descriptor base address */
- u_char smc_rfcr; /* Rx function code */
- u_char smc_tfcr; /* Tx function code */
- ushort smc_mrblr; /* Max receive buffer length */
- uint smc_rstate; /* Internal */
- uint smc_idp; /* Internal */
- ushort smc_rbptr; /* Internal */
- ushort smc_ibc; /* Internal */
- uint smc_rxtmp; /* Internal */
- uint smc_tstate; /* Internal */
- uint smc_tdp; /* Internal */
- ushort smc_tbptr; /* Internal */
- ushort smc_tbc; /* Internal */
- uint smc_txtmp; /* Internal */
- ushort smc_maxidl; /* Maximum idle characters */
- ushort smc_tmpidl; /* Temporary idle counter */
- ushort smc_brklen; /* Last received break length */
- ushort smc_brkec; /* rcv'd break condition counter */
- ushort smc_brkcr; /* xmt break count register */
- ushort smc_rmask; /* Temporary bit mask */
-} smc_uart_t;
-
-/* Function code bits.
-*/
-#define SMC_EB ((u_char)0x10) /* Set big endian byte order */
-
-/* SMC uart mode register.
-*/
-#define SMCMR_REN ((ushort)0x0001)
-#define SMCMR_TEN ((ushort)0x0002)
-#define SMCMR_DM ((ushort)0x000c)
-#define SMCMR_SM_GCI ((ushort)0x0000)
-#define SMCMR_SM_UART ((ushort)0x0020)
-#define SMCMR_SM_TRANS ((ushort)0x0030)
-#define SMCMR_SM_MASK ((ushort)0x0030)
-#define SMCMR_PM_EVEN ((ushort)0x0100) /* Even parity, else odd */
-#define SMCMR_REVD SMCMR_PM_EVEN
-#define SMCMR_PEN ((ushort)0x0200) /* Parity enable */
-#define SMCMR_BS SMCMR_PEN
-#define SMCMR_SL ((ushort)0x0400) /* Two stops, else one */
-#define SMCR_CLEN_MASK ((ushort)0x7800) /* Character length */
-#define smcr_mk_clen(C) (((C) << 11) & SMCR_CLEN_MASK)
-
-/* SMC2 as Centronics parallel printer. It is half duplex, in that
- * it can only receive or transmit. The parameter ram values for
- * each direction are either unique or properly overlap, so we can
- * include them in one structure.
- */
-typedef struct smc_centronics {
- ushort scent_rbase;
- ushort scent_tbase;
- u_char scent_cfcr;
- u_char scent_smask;
- ushort scent_mrblr;
- uint scent_rstate;
- uint scent_r_ptr;
- ushort scent_rbptr;
- ushort scent_r_cnt;
- uint scent_rtemp;
- uint scent_tstate;
- uint scent_t_ptr;
- ushort scent_tbptr;
- ushort scent_t_cnt;
- uint scent_ttemp;
- ushort scent_max_sl;
- ushort scent_sl_cnt;
- ushort scent_character1;
- ushort scent_character2;
- ushort scent_character3;
- ushort scent_character4;
- ushort scent_character5;
- ushort scent_character6;
- ushort scent_character7;
- ushort scent_character8;
- ushort scent_rccm;
- ushort scent_rccr;
-} smc_cent_t;
-
-/* Centronics Status Mask Register.
-*/
-#define SMC_CENT_F ((u_char)0x08)
-#define SMC_CENT_PE ((u_char)0x04)
-#define SMC_CENT_S ((u_char)0x02)
-
-/* SMC Event and Mask register.
-*/
-#define SMCM_BRKE ((unsigned char)0x40) /* When in UART Mode */
-#define SMCM_BRK ((unsigned char)0x10) /* When in UART Mode */
-#define SMCM_TXE ((unsigned char)0x10) /* When in Transparent Mode */
-#define SMCM_BSY ((unsigned char)0x04)
-#define SMCM_TX ((unsigned char)0x02)
-#define SMCM_RX ((unsigned char)0x01)
-
-/* Baud rate generators.
-*/
-#define CPM_BRG_RST ((uint)0x00020000)
-#define CPM_BRG_EN ((uint)0x00010000)
-#define CPM_BRG_EXTC_INT ((uint)0x00000000)
-#define CPM_BRG_EXTC_CLK2 ((uint)0x00004000)
-#define CPM_BRG_EXTC_CLK6 ((uint)0x00008000)
-#define CPM_BRG_ATB ((uint)0x00002000)
-#define CPM_BRG_CD_MASK ((uint)0x00001ffe)
-#define CPM_BRG_DIV16 ((uint)0x00000001)
-
-/* SCCs.
-*/
-#define SCC_GSMRH_IRP ((uint)0x00040000)
-#define SCC_GSMRH_GDE ((uint)0x00010000)
-#define SCC_GSMRH_TCRC_CCITT ((uint)0x00008000)
-#define SCC_GSMRH_TCRC_BISYNC ((uint)0x00004000)
-#define SCC_GSMRH_TCRC_HDLC ((uint)0x00000000)
-#define SCC_GSMRH_REVD ((uint)0x00002000)
-#define SCC_GSMRH_TRX ((uint)0x00001000)
-#define SCC_GSMRH_TTX ((uint)0x00000800)
-#define SCC_GSMRH_CDP ((uint)0x00000400)
-#define SCC_GSMRH_CTSP ((uint)0x00000200)
-#define SCC_GSMRH_CDS ((uint)0x00000100)
-#define SCC_GSMRH_CTSS ((uint)0x00000080)
-#define SCC_GSMRH_TFL ((uint)0x00000040)
-#define SCC_GSMRH_RFW ((uint)0x00000020)
-#define SCC_GSMRH_TXSY ((uint)0x00000010)
-#define SCC_GSMRH_SYNL16 ((uint)0x0000000c)
-#define SCC_GSMRH_SYNL8 ((uint)0x00000008)
-#define SCC_GSMRH_SYNL4 ((uint)0x00000004)
-#define SCC_GSMRH_RTSM ((uint)0x00000002)
-#define SCC_GSMRH_RSYN ((uint)0x00000001)
-
-#define SCC_GSMRL_SIR ((uint)0x80000000) /* SCC2 only */
-#define SCC_GSMRL_EDGE_NONE ((uint)0x60000000)
-#define SCC_GSMRL_EDGE_NEG ((uint)0x40000000)
-#define SCC_GSMRL_EDGE_POS ((uint)0x20000000)
-#define SCC_GSMRL_EDGE_BOTH ((uint)0x00000000)
-#define SCC_GSMRL_TCI ((uint)0x10000000)
-#define SCC_GSMRL_TSNC_3 ((uint)0x0c000000)
-#define SCC_GSMRL_TSNC_4 ((uint)0x08000000)
-#define SCC_GSMRL_TSNC_14 ((uint)0x04000000)
-#define SCC_GSMRL_TSNC_INF ((uint)0x00000000)
-#define SCC_GSMRL_RINV ((uint)0x02000000)
-#define SCC_GSMRL_TINV ((uint)0x01000000)
-#define SCC_GSMRL_TPL_128 ((uint)0x00c00000)
-#define SCC_GSMRL_TPL_64 ((uint)0x00a00000)
-#define SCC_GSMRL_TPL_48 ((uint)0x00800000)
-#define SCC_GSMRL_TPL_32 ((uint)0x00600000)
-#define SCC_GSMRL_TPL_16 ((uint)0x00400000)
-#define SCC_GSMRL_TPL_8 ((uint)0x00200000)
-#define SCC_GSMRL_TPL_NONE ((uint)0x00000000)
-#define SCC_GSMRL_TPP_ALL1 ((uint)0x00180000)
-#define SCC_GSMRL_TPP_01 ((uint)0x00100000)
-#define SCC_GSMRL_TPP_10 ((uint)0x00080000)
-#define SCC_GSMRL_TPP_ZEROS ((uint)0x00000000)
-#define SCC_GSMRL_TEND ((uint)0x00040000)
-#define SCC_GSMRL_TDCR_32 ((uint)0x00030000)
-#define SCC_GSMRL_TDCR_16 ((uint)0x00020000)
-#define SCC_GSMRL_TDCR_8 ((uint)0x00010000)
-#define SCC_GSMRL_TDCR_1 ((uint)0x00000000)
-#define SCC_GSMRL_RDCR_32 ((uint)0x0000c000)
-#define SCC_GSMRL_RDCR_16 ((uint)0x00008000)
-#define SCC_GSMRL_RDCR_8 ((uint)0x00004000)
-#define SCC_GSMRL_RDCR_1 ((uint)0x00000000)
-#define SCC_GSMRL_RENC_DFMAN ((uint)0x00003000)
-#define SCC_GSMRL_RENC_MANCH ((uint)0x00002000)
-#define SCC_GSMRL_RENC_FM0 ((uint)0x00001000)
-#define SCC_GSMRL_RENC_NRZI ((uint)0x00000800)
-#define SCC_GSMRL_RENC_NRZ ((uint)0x00000000)
-#define SCC_GSMRL_TENC_DFMAN ((uint)0x00000600)
-#define SCC_GSMRL_TENC_MANCH ((uint)0x00000400)
-#define SCC_GSMRL_TENC_FM0 ((uint)0x00000200)
-#define SCC_GSMRL_TENC_NRZI ((uint)0x00000100)
-#define SCC_GSMRL_TENC_NRZ ((uint)0x00000000)
-#define SCC_GSMRL_DIAG_LE ((uint)0x000000c0) /* Loop and echo */
-#define SCC_GSMRL_DIAG_ECHO ((uint)0x00000080)
-#define SCC_GSMRL_DIAG_LOOP ((uint)0x00000040)
-#define SCC_GSMRL_DIAG_NORM ((uint)0x00000000)
-#define SCC_GSMRL_ENR ((uint)0x00000020)
-#define SCC_GSMRL_ENT ((uint)0x00000010)
-#define SCC_GSMRL_MODE_ENET ((uint)0x0000000c)
-#define SCC_GSMRL_MODE_DDCMP ((uint)0x00000009)
-#define SCC_GSMRL_MODE_BISYNC ((uint)0x00000008)
-#define SCC_GSMRL_MODE_V14 ((uint)0x00000007)
-#define SCC_GSMRL_MODE_AHDLC ((uint)0x00000006)
-#define SCC_GSMRL_MODE_PROFIBUS ((uint)0x00000005)
-#define SCC_GSMRL_MODE_UART ((uint)0x00000004)
-#define SCC_GSMRL_MODE_SS7 ((uint)0x00000003)
-#define SCC_GSMRL_MODE_ATALK ((uint)0x00000002)
-#define SCC_GSMRL_MODE_HDLC ((uint)0x00000000)
-
-#define SCC_TODR_TOD ((ushort)0x8000)
-
-/* SCC Event and Mask register.
-*/
-#define SCCM_TXE ((unsigned char)0x10)
-#define SCCM_BSY ((unsigned char)0x04)
-#define SCCM_TX ((unsigned char)0x02)
-#define SCCM_RX ((unsigned char)0x01)
-
-typedef struct scc_param {
- ushort scc_rbase; /* Rx Buffer descriptor base address */
- ushort scc_tbase; /* Tx Buffer descriptor base address */
- u_char scc_rfcr; /* Rx function code */
- u_char scc_tfcr; /* Tx function code */
- ushort scc_mrblr; /* Max receive buffer length */
- uint scc_rstate; /* Internal */
- uint scc_idp; /* Internal */
- ushort scc_rbptr; /* Internal */
- ushort scc_ibc; /* Internal */
- uint scc_rxtmp; /* Internal */
- uint scc_tstate; /* Internal */
- uint scc_tdp; /* Internal */
- ushort scc_tbptr; /* Internal */
- ushort scc_tbc; /* Internal */
- uint scc_txtmp; /* Internal */
- uint scc_rcrc; /* Internal */
- uint scc_tcrc; /* Internal */
-} sccp_t;
-
-
-/* Function code bits.
- */
-#define SCC_EB ((u_char)0x10) /* Set big endian byte order */
-#define SCC_FC_DMA ((u_char)0x08) /* Set SDMA */
-
-/* CPM Ethernet through SCC1.
- */
-typedef struct scc_enet {
- sccp_t sen_genscc;
- uint sen_cpres; /* Preset CRC */
- uint sen_cmask; /* Constant mask for CRC */
- uint sen_crcec; /* CRC Error counter */
- uint sen_alec; /* alignment error counter */
- uint sen_disfc; /* discard frame counter */
- ushort sen_pads; /* Tx short frame pad character */
- ushort sen_retlim; /* Retry limit threshold */
- ushort sen_retcnt; /* Retry limit counter */
- ushort sen_maxflr; /* maximum frame length register */
- ushort sen_minflr; /* minimum frame length register */
- ushort sen_maxd1; /* maximum DMA1 length */
- ushort sen_maxd2; /* maximum DMA2 length */
- ushort sen_maxd; /* Rx max DMA */
- ushort sen_dmacnt; /* Rx DMA counter */
- ushort sen_maxb; /* Max BD byte count */
- ushort sen_gaddr1; /* Group address filter */
- ushort sen_gaddr2;
- ushort sen_gaddr3;
- ushort sen_gaddr4;
- uint sen_tbuf0data0; /* Save area 0 - current frame */
- uint sen_tbuf0data1; /* Save area 1 - current frame */
- uint sen_tbuf0rba; /* Internal */
- uint sen_tbuf0crc; /* Internal */
- ushort sen_tbuf0bcnt; /* Internal */
- ushort sen_paddrh; /* physical address (MSB) */
- ushort sen_paddrm;
- ushort sen_paddrl; /* physical address (LSB) */
- ushort sen_pper; /* persistence */
- ushort sen_rfbdptr; /* Rx first BD pointer */
- ushort sen_tfbdptr; /* Tx first BD pointer */
- ushort sen_tlbdptr; /* Tx last BD pointer */
- uint sen_tbuf1data0; /* Save area 0 - current frame */
- uint sen_tbuf1data1; /* Save area 1 - current frame */
- uint sen_tbuf1rba; /* Internal */
- uint sen_tbuf1crc; /* Internal */
- ushort sen_tbuf1bcnt; /* Internal */
- ushort sen_txlen; /* Tx Frame length counter */
- ushort sen_iaddr1; /* Individual address filter */
- ushort sen_iaddr2;
- ushort sen_iaddr3;
- ushort sen_iaddr4;
- ushort sen_boffcnt; /* Backoff counter */
-
- /* NOTE: Some versions of the manual have the following items
- * incorrectly documented. Below is the proper order.
- */
- ushort sen_taddrh; /* temp address (MSB) */
- ushort sen_taddrm;
- ushort sen_taddrl; /* temp address (LSB) */
-} scc_enet_t;
-
-
-
-#if defined (CONFIG_UCQUICC)
-/* uCquicc has the following signals connected to Ethernet:
- * 68360 - lxt905
- * PA0/RXD1 - rxd
- * PA1/TXD1 - txd
- * PA8/CLK1 - tclk
- * PA9/CLK2 - rclk
- * PC0/!RTS1 - t_en
- * PC1/!CTS1 - col
- * PC5/!CD1 - cd
- */
-#define PA_ENET_RXD PA_RXD1
-#define PA_ENET_TXD PA_TXD1
-#define PA_ENET_TCLK PA_CLK1
-#define PA_ENET_RCLK PA_CLK2
-#define PC_ENET_TENA PC_RTS1
-#define PC_ENET_CLSN PC_CTS1
-#define PC_ENET_RENA PC_CD1
-
-/* Control bits in the SICR to route TCLK (CLK1) and RCLK (CLK2) to
- * SCC1.
- */
-#define SICR_ENET_MASK ((uint)0x000000ff)
-#define SICR_ENET_CLKRT ((uint)0x0000002c)
-
-#endif /* config_ucquicc */
-
-
-#ifdef MBX
-/* Bits in parallel I/O port registers that have to be set/cleared
- * to configure the pins for SCC1 use. The TCLK and RCLK seem unique
- * to the MBX860 board. Any two of the four available clocks could be
- * used, and the MPC860 cookbook manual has an example using different
- * clock pins.
- */
-#define PA_ENET_RXD ((ushort)0x0001)
-#define PA_ENET_TXD ((ushort)0x0002)
-#define PA_ENET_TCLK ((ushort)0x0200)
-#define PA_ENET_RCLK ((ushort)0x0800)
-#define PC_ENET_TENA ((ushort)0x0001)
-#define PC_ENET_CLSN ((ushort)0x0010)
-#define PC_ENET_RENA ((ushort)0x0020)
-
-/* Control bits in the SICR to route TCLK (CLK2) and RCLK (CLK4) to
- * SCC1. Also, make sure GR1 (bit 24) and SC1 (bit 25) are zero.
- */
-#define SICR_ENET_MASK ((uint)0x000000ff)
-#define SICR_ENET_CLKRT ((uint)0x0000003d)
-#endif
-
-/* SCC Event register as used by Ethernet.
-*/
-#define SCCE_ENET_GRA ((ushort)0x0080) /* Graceful stop complete */
-#define SCCE_ENET_TXE ((ushort)0x0010) /* Transmit Error */
-#define SCCE_ENET_RXF ((ushort)0x0008) /* Full frame received */
-#define SCCE_ENET_BSY ((ushort)0x0004) /* All incoming buffers full */
-#define SCCE_ENET_TXB ((ushort)0x0002) /* A buffer was transmitted */
-#define SCCE_ENET_RXB ((ushort)0x0001) /* A buffer was received */
-
-/* SCC Mode Register (PMSR) as used by Ethernet.
-*/
-#define SCC_PMSR_HBC ((ushort)0x8000) /* Enable heartbeat */
-#define SCC_PMSR_FC ((ushort)0x4000) /* Force collision */
-#define SCC_PMSR_RSH ((ushort)0x2000) /* Receive short frames */
-#define SCC_PMSR_IAM ((ushort)0x1000) /* Check individual hash */
-#define SCC_PMSR_ENCRC ((ushort)0x0800) /* Ethernet CRC mode */
-#define SCC_PMSR_PRO ((ushort)0x0200) /* Promiscuous mode */
-#define SCC_PMSR_BRO ((ushort)0x0100) /* Catch broadcast pkts */
-#define SCC_PMSR_SBT ((ushort)0x0080) /* Special backoff timer */
-#define SCC_PMSR_LPB ((ushort)0x0040) /* Set Loopback mode */
-#define SCC_PMSR_SIP ((ushort)0x0020) /* Sample Input Pins */
-#define SCC_PMSR_LCW ((ushort)0x0010) /* Late collision window */
-#define SCC_PMSR_NIB22 ((ushort)0x000a) /* Start frame search */
-#define SCC_PMSR_FDE ((ushort)0x0001) /* Full duplex enable */
-
-/* Buffer descriptor control/status used by Ethernet receive.
-*/
-#define BD_ENET_RX_EMPTY ((ushort)0x8000)
-#define BD_ENET_RX_WRAP ((ushort)0x2000)
-#define BD_ENET_RX_INTR ((ushort)0x1000)
-#define BD_ENET_RX_LAST ((ushort)0x0800)
-#define BD_ENET_RX_FIRST ((ushort)0x0400)
-#define BD_ENET_RX_MISS ((ushort)0x0100)
-#define BD_ENET_RX_LG ((ushort)0x0020)
-#define BD_ENET_RX_NO ((ushort)0x0010)
-#define BD_ENET_RX_SH ((ushort)0x0008)
-#define BD_ENET_RX_CR ((ushort)0x0004)
-#define BD_ENET_RX_OV ((ushort)0x0002)
-#define BD_ENET_RX_CL ((ushort)0x0001)
-#define BD_ENET_RX_STATS ((ushort)0x013f) /* All status bits */
-
-/* Buffer descriptor control/status used by Ethernet transmit.
-*/
-#define BD_ENET_TX_READY ((ushort)0x8000)
-#define BD_ENET_TX_PAD ((ushort)0x4000)
-#define BD_ENET_TX_WRAP ((ushort)0x2000)
-#define BD_ENET_TX_INTR ((ushort)0x1000)
-#define BD_ENET_TX_LAST ((ushort)0x0800)
-#define BD_ENET_TX_TC ((ushort)0x0400)
-#define BD_ENET_TX_DEF ((ushort)0x0200)
-#define BD_ENET_TX_HB ((ushort)0x0100)
-#define BD_ENET_TX_LC ((ushort)0x0080)
-#define BD_ENET_TX_RL ((ushort)0x0040)
-#define BD_ENET_TX_RCMASK ((ushort)0x003c)
-#define BD_ENET_TX_UN ((ushort)0x0002)
-#define BD_ENET_TX_CSL ((ushort)0x0001)
-#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */
-
-/* SCC as UART
-*/
-typedef struct scc_uart {
- sccp_t scc_genscc;
- uint scc_res1; /* Reserved */
- uint scc_res2; /* Reserved */
- ushort scc_maxidl; /* Maximum idle chars */
- ushort scc_idlc; /* temp idle counter */
- ushort scc_brkcr; /* Break count register */
- ushort scc_parec; /* receive parity error counter */
- ushort scc_frmec; /* receive framing error counter */
- ushort scc_nosec; /* receive noise counter */
- ushort scc_brkec; /* receive break condition counter */
- ushort scc_brkln; /* last received break length */
- ushort scc_uaddr1; /* UART address character 1 */
- ushort scc_uaddr2; /* UART address character 2 */
- ushort scc_rtemp; /* Temp storage */
- ushort scc_toseq; /* Transmit out of sequence char */
- ushort scc_char1; /* control character 1 */
- ushort scc_char2; /* control character 2 */
- ushort scc_char3; /* control character 3 */
- ushort scc_char4; /* control character 4 */
- ushort scc_char5; /* control character 5 */
- ushort scc_char6; /* control character 6 */
- ushort scc_char7; /* control character 7 */
- ushort scc_char8; /* control character 8 */
- ushort scc_rccm; /* receive control character mask */
- ushort scc_rccr; /* receive control character register */
- ushort scc_rlbc; /* receive last break character */
-} scc_uart_t;
-
-/* SCC Event and Mask registers when it is used as a UART.
-*/
-#define UART_SCCM_GLR ((ushort)0x1000)
-#define UART_SCCM_GLT ((ushort)0x0800)
-#define UART_SCCM_AB ((ushort)0x0200)
-#define UART_SCCM_IDL ((ushort)0x0100)
-#define UART_SCCM_GRA ((ushort)0x0080)
-#define UART_SCCM_BRKE ((ushort)0x0040)
-#define UART_SCCM_BRKS ((ushort)0x0020)
-#define UART_SCCM_CCR ((ushort)0x0008)
-#define UART_SCCM_BSY ((ushort)0x0004)
-#define UART_SCCM_TX ((ushort)0x0002)
-#define UART_SCCM_RX ((ushort)0x0001)
-
-/* The SCC PMSR when used as a UART.
-*/
-#define SCU_PMSR_FLC ((ushort)0x8000)
-#define SCU_PMSR_SL ((ushort)0x4000)
-#define SCU_PMSR_CL ((ushort)0x3000)
-#define SCU_PMSR_UM ((ushort)0x0c00)
-#define SCU_PMSR_FRZ ((ushort)0x0200)
-#define SCU_PMSR_RZS ((ushort)0x0100)
-#define SCU_PMSR_SYN ((ushort)0x0080)
-#define SCU_PMSR_DRT ((ushort)0x0040)
-#define SCU_PMSR_PEN ((ushort)0x0010)
-#define SCU_PMSR_RPM ((ushort)0x000c)
-#define SCU_PMSR_REVP ((ushort)0x0008)
-#define SCU_PMSR_TPM ((ushort)0x0003)
-#define SCU_PMSR_TEVP ((ushort)0x0003)
-
-/* CPM Transparent mode SCC.
- */
-typedef struct scc_trans {
- sccp_t st_genscc;
- uint st_cpres; /* Preset CRC */
- uint st_cmask; /* Constant mask for CRC */
-} scc_trans_t;
-
-#define BD_SCC_TX_LAST ((ushort)0x0800)
-
-
-
-/* CPM interrupts. There are nearly 32 interrupts generated by CPM
- * channels or devices. All of these are presented to the PPC core
- * as a single interrupt. The CPM interrupt handler dispatches its
- * own handlers, in a similar fashion to the PPC core handler. We
- * use the table as defined in the manuals (i.e. no special high
- * priority and SCC1 == SCCa, etc...).
- */
-/* #define CPMVEC_NR 32 */
-/* #define CPMVEC_PIO_PC15 ((ushort)0x1f) */
-/* #define CPMVEC_SCC1 ((ushort)0x1e) */
-/* #define CPMVEC_SCC2 ((ushort)0x1d) */
-/* #define CPMVEC_SCC3 ((ushort)0x1c) */
-/* #define CPMVEC_SCC4 ((ushort)0x1b) */
-/* #define CPMVEC_PIO_PC14 ((ushort)0x1a) */
-/* #define CPMVEC_TIMER1 ((ushort)0x19) */
-/* #define CPMVEC_PIO_PC13 ((ushort)0x18) */
-/* #define CPMVEC_PIO_PC12 ((ushort)0x17) */
-/* #define CPMVEC_SDMA_CB_ERR ((ushort)0x16) */
-/* #define CPMVEC_IDMA1 ((ushort)0x15) */
-/* #define CPMVEC_IDMA2 ((ushort)0x14) */
-/* #define CPMVEC_TIMER2 ((ushort)0x12) */
-/* #define CPMVEC_RISCTIMER ((ushort)0x11) */
-/* #define CPMVEC_I2C ((ushort)0x10) */
-/* #define CPMVEC_PIO_PC11 ((ushort)0x0f) */
-/* #define CPMVEC_PIO_PC10 ((ushort)0x0e) */
-/* #define CPMVEC_TIMER3 ((ushort)0x0c) */
-/* #define CPMVEC_PIO_PC9 ((ushort)0x0b) */
-/* #define CPMVEC_PIO_PC8 ((ushort)0x0a) */
-/* #define CPMVEC_PIO_PC7 ((ushort)0x09) */
-/* #define CPMVEC_TIMER4 ((ushort)0x07) */
-/* #define CPMVEC_PIO_PC6 ((ushort)0x06) */
-/* #define CPMVEC_SPI ((ushort)0x05) */
-/* #define CPMVEC_SMC1 ((ushort)0x04) */
-/* #define CPMVEC_SMC2 ((ushort)0x03) */
-/* #define CPMVEC_PIO_PC5 ((ushort)0x02) */
-/* #define CPMVEC_PIO_PC4 ((ushort)0x01) */
-/* #define CPMVEC_ERROR ((ushort)0x00) */
-
-extern void cpm_install_handler(int vec, irq_handler_t handler, void *dev_id);
-
-/* CPM interrupt configuration vector.
-*/
-#define CICR_SCD_SCC4 ((uint)0x00c00000) /* SCC4 @ SCCd */
-#define CICR_SCC_SCC3 ((uint)0x00200000) /* SCC3 @ SCCc */
-#define CICR_SCB_SCC2 ((uint)0x00040000) /* SCC2 @ SCCb */
-#define CICR_SCA_SCC1 ((uint)0x00000000) /* SCC1 @ SCCa */
-#define CICR_IRL_MASK ((uint)0x0000e000) /* Core interrupt */
-#define CICR_HP_MASK ((uint)0x00001f00) /* Hi-pri int. */
-#define CICR_IEN ((uint)0x00000080) /* Int. enable */
-#define CICR_SPS ((uint)0x00000001) /* SCC Spread */
-#endif /* __CPM_360__ */
diff --git a/arch/m68k/include/asm/m54xxacr.h b/arch/m68k/include/asm/m54xxacr.h
index 6d13cae44af5..59e171063c2f 100644
--- a/arch/m68k/include/asm/m54xxacr.h
+++ b/arch/m68k/include/asm/m54xxacr.h
@@ -23,8 +23,8 @@
#define CACR_IEC 0x00008000 /* Enable instruction cache */
#define CACR_DNFB 0x00002000 /* Inhibited fill buffer */
#define CACR_IDPI 0x00001000 /* Disable CPUSHL */
-#define CACR_IHLCK 0x00000800 /* Intruction cache half lock */
-#define CACR_IDCM 0x00000400 /* Intruction cache inhibit */
+#define CACR_IHLCK 0x00000800 /* Instruction cache half lock */
+#define CACR_IDCM 0x00000400 /* Instruction cache inhibit */
#define CACR_ICINVA 0x00000100 /* Invalidate instr cache */
#define CACR_EUSP 0x00000020 /* Enable separate user a7 */
diff --git a/arch/m68k/include/asm/m68360.h b/arch/m68k/include/asm/m68360.h
deleted file mode 100644
index 4664180a3ab3..000000000000
--- a/arch/m68k/include/asm/m68360.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <asm/m68360_regs.h>
-#include <asm/m68360_pram.h>
-#include <asm/m68360_quicc.h>
-#include <asm/m68360_enet.h>
-
-#ifdef CONFIG_M68360
-
-#define CPM_INTERRUPT 4
-
-/* see MC68360 User's Manual, p. 7-377 */
-#define CPM_VECTOR_BASE 0x04 /* 3 MSbits of CPM vector */
-
-#endif /* CONFIG_M68360 */
diff --git a/arch/m68k/include/asm/m68360_enet.h b/arch/m68k/include/asm/m68360_enet.h
deleted file mode 100644
index 4d04037c78a2..000000000000
--- a/arch/m68k/include/asm/m68360_enet.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/***********************************
- * $Id: m68360_enet.h,v 1.1 2002/03/02 15:01:07 gerg Exp $
- ***********************************
- *
- ***************************************
- * Definitions for the ETHERNET controllers
- ***************************************
- */
-
-#ifndef __ETHER_H
-#define __ETHER_H
-
-#include <asm/quicc_simple.h>
-
-/*
- * transmit BD's
- */
-#define T_R 0x8000 /* ready bit */
-#define E_T_PAD 0x4000 /* short frame padding */
-#define T_W 0x2000 /* wrap bit */
-#define T_I 0x1000 /* interrupt on completion */
-#define T_L 0x0800 /* last in frame */
-#define T_TC 0x0400 /* transmit CRC (when last) */
-
-#define T_DEF 0x0200 /* defer indication */
-#define T_HB 0x0100 /* heartbeat */
-#define T_LC 0x0080 /* error: late collision */
-#define T_RL 0x0040 /* error: retransmission limit */
-#define T_RC 0x003c /* retry count */
-#define T_UN 0x0002 /* error: underrun */
-#define T_CSL 0x0001 /* carier sense lost */
-#define T_ERROR (T_HB | T_LC | T_RL | T_UN | T_CSL)
-
-/*
- * receive BD's
- */
-#define R_E 0x8000 /* buffer empty */
-#define R_W 0x2000 /* wrap bit */
-#define R_I 0x1000 /* interrupt on reception */
-#define R_L 0x0800 /* last BD in frame */
-#define R_F 0x0400 /* first BD in frame */
-#define R_M 0x0100 /* received because of promisc. mode */
-
-#define R_LG 0x0020 /* frame too long */
-#define R_NO 0x0010 /* non-octet aligned */
-#define R_SH 0x0008 /* short frame */
-#define R_CR 0x0004 /* receive CRC error */
-#define R_OV 0x0002 /* receive overrun */
-#define R_CL 0x0001 /* collision */
-#define ETHER_R_ERROR (R_LG | R_NO | R_SH | R_CR | R_OV | R_CL)
-
-
-/*
- * ethernet interrupts
- */
-#define ETHERNET_GRA 0x0080 /* graceful stop complete */
-#define ETHERNET_TXE 0x0010 /* transmit error */
-#define ETHERNET_RXF 0x0008 /* receive frame */
-#define ETHERNET_BSY 0x0004 /* busy condition */
-#define ETHERNET_TXB 0x0002 /* transmit buffer */
-#define ETHERNET_RXB 0x0001 /* receive buffer */
-
-/*
- * ethernet protocol specific mode register (PSMR)
- */
-#define ETHER_HBC 0x8000 /* heartbeat checking */
-#define ETHER_FC 0x4000 /* force collision */
-#define ETHER_RSH 0x2000 /* receive short frames */
-#define ETHER_IAM 0x1000 /* individual address mode */
-#define ETHER_CRC_32 (0x2<<10) /* Enable CRC */
-#define ETHER_PRO 0x0200 /* promiscuous */
-#define ETHER_BRO 0x0100 /* broadcast address */
-#define ETHER_SBT 0x0080 /* stop backoff timer */
-#define ETHER_LPB 0x0040 /* Loop Back Mode */
-#define ETHER_SIP 0x0020 /* sample input pins */
-#define ETHER_LCW 0x0010 /* late collision window */
-#define ETHER_NIB_13 (0x0<<1) /* # of ignored bits 13 */
-#define ETHER_NIB_14 (0x1<<1) /* # of ignored bits 14 */
-#define ETHER_NIB_15 (0x2<<1) /* # of ignored bits 15 */
-#define ETHER_NIB_16 (0x3<<1) /* # of ignored bits 16 */
-#define ETHER_NIB_21 (0x4<<1) /* # of ignored bits 21 */
-#define ETHER_NIB_22 (0x5<<1) /* # of ignored bits 22 */
-#define ETHER_NIB_23 (0x6<<1) /* # of ignored bits 23 */
-#define ETHER_NIB_24 (0x7<<1) /* # of ignored bits 24 */
-
-/*
- * ethernet specific parameters
- */
-#define CRC_WORD 4 /* Length in bytes of CRC */
-#define C_PRES 0xffffffff /* preform 32 bit CRC */
-#define C_MASK 0xdebb20e3 /* comply with 32 bit CRC */
-#define CRCEC 0x00000000
-#define ALEC 0x00000000
-#define DISFC 0x00000000
-#define PADS 0x00000000
-#define RET_LIM 0x000f /* retry 15 times to send a frame before interrupt */
-#define ETH_MFLR 0x05ee /* 1518 max frame size */
-#define MINFLR 0x0040 /* Minimum frame size 64 */
-#define MAXD1 0x05ee /* Max dma count 1518 */
-#define MAXD2 0x05ee
-#define GADDR1 0x00000000 /* Clear group address */
-#define GADDR2 0x00000000
-#define GADDR3 0x00000000
-#define GADDR4 0x00000000
-#define P_PER 0x00000000 /*not used */
-#define IADDR1 0x00000000 /* Individual hash table not used */
-#define IADDR2 0x00000000
-#define IADDR3 0x00000000
-#define IADDR4 0x00000000
-#define TADDR_H 0x00000000 /* clear this regs */
-#define TADDR_M 0x00000000
-#define TADDR_L 0x00000000
-
-/* SCC Parameter Ram */
-#define RFCR 0x18 /* normal operation */
-#define TFCR 0x18 /* normal operation */
-#define E_MRBLR 1518 /* Max ethernet frame length */
-
-/*
- * ethernet specific structure
- */
-typedef union {
- unsigned char b[6];
- struct {
- unsigned short high;
- unsigned short middl;
- unsigned short low;
- } w;
-} ETHER_ADDR;
-
-typedef struct {
- int max_frame_length;
- int promisc_mode;
- int reject_broadcast;
- ETHER_ADDR phys_adr;
-} ETHER_SPECIFIC;
-
-typedef struct {
- ETHER_ADDR dst_addr;
- ETHER_ADDR src_addr;
- unsigned short type_or_len;
- unsigned char data[1];
-} ETHER_FRAME;
-
-#define MAX_DATALEN 1500
-typedef struct {
- ETHER_ADDR dst_addr;
- ETHER_ADDR src_addr;
- unsigned short type_or_len;
- unsigned char data[MAX_DATALEN];
- unsigned char fcs[CRC_WORD];
-} ETHER_MAX_FRAME;
-
-
-/*
- * Internal ethernet function prototypes
- */
-void ether_interrupt(int scc_num);
-/* mleslie: debug */
-/* static void ethernet_rx_internal(int scc_num); */
-/* static void ethernet_tx_internal(int scc_num); */
-
-/*
- * User callable routines prototypes (ethernet specific)
- */
-void ethernet_init(int scc_number,
- alloc_routine *alloc_buffer,
- free_routine *free_buffer,
- store_rx_buffer_routine *store_rx_buffer,
- handle_tx_error_routine *handle_tx_error,
- handle_rx_error_routine *handle_rx_error,
- handle_lost_error_routine *handle_lost_error,
- ETHER_SPECIFIC *ether_spec);
-int ethernet_tx(int scc_number, void *buf, int length);
-
-#endif
-
diff --git a/arch/m68k/include/asm/m68360_pram.h b/arch/m68k/include/asm/m68360_pram.h
deleted file mode 100644
index c0cbd96f09bc..000000000000
--- a/arch/m68k/include/asm/m68360_pram.h
+++ /dev/null
@@ -1,431 +0,0 @@
-/***********************************
- * $Id: m68360_pram.h,v 1.1 2002/03/02 15:01:07 gerg Exp $
- ***********************************
- *
- ***************************************
- * Definitions of the parameter area RAM.
- * Note that different structures are overlaid
- * at the same offsets for the different modes
- * of operation.
- ***************************************
- */
-
-#ifndef __PRAM_H
-#define __PRAM_H
-
-/* Time slot assignment table */
-#define VALID_SLOT 0x8000
-#define WRAP_SLOT 0x4000
-
-/*****************************************************************
- Global Multichannel parameter RAM
-*****************************************************************/
-struct global_multi_pram {
- /*
- * Global Multichannel parameter RAM
- */
- unsigned long mcbase; /* Multichannel Base pointer */
- unsigned short qmcstate; /* Multichannel Controller state */
- unsigned short mrblr; /* Maximum Receive Buffer Length */
- unsigned short tx_s_ptr; /* TSTATx Pointer */
- unsigned short rxptr; /* Current Time slot entry in TSATRx */
- unsigned short grfthr; /* Global Receive frame threshold */
- unsigned short grfcnt; /* Global Receive Frame Count */
- unsigned long intbase; /* Multichannel Base address */
- unsigned long iintptr; /* Pointer to interrupt queue */
- unsigned short rx_s_ptr; /* TSTARx Pointer */
-
- unsigned short txptr; /* Current Time slot entry in TSATTx */
- unsigned long c_mask32; /* CRC Constant (debb20e3) */
- unsigned short tsatrx[32]; /* Time Slot Assignment Table Rx */
- unsigned short tsattx[32]; /* Time Slot Assignment Table Tx */
- unsigned short c_mask16; /* CRC Constant (f0b8) */
-};
-
-/*****************************************************************
- Quicc32 HDLC parameter RAM
-*****************************************************************/
-struct quicc32_pram {
-
- unsigned short tbase; /* Tx Buffer Descriptors Base Address */
- unsigned short chamr; /* Channel Mode Register */
- unsigned long tstate; /* Tx Internal State */
- unsigned long txintr; /* Tx Internal Data Pointer */
- unsigned short tbptr; /* Tx Buffer Descriptor Pointer */
- unsigned short txcntr; /* Tx Internal Byte Count */
- unsigned long tupack; /* (Tx Temp) */
- unsigned long zistate; /* Zero Insertion machine state */
- unsigned long tcrc; /* Temp Transmit CRC */
- unsigned short intmask; /* Channel's interrupt mask flags */
- unsigned short bdflags;
- unsigned short rbase; /* Rx Buffer Descriptors Base Address */
- unsigned short mflr; /* Max Frame Length Register */
- unsigned long rstate; /* Rx Internal State */
- unsigned long rxintr; /* Rx Internal Data Pointer */
- unsigned short rbptr; /* Rx Buffer Descriptor Pointer */
- unsigned short rxbyc; /* Rx Internal Byte Count */
- unsigned long rpack; /* (Rx Temp) */
- unsigned long zdstate; /* Zero Deletion machine state */
- unsigned long rcrc; /* Temp Transmit CRC */
- unsigned short maxc; /* Max_length counter */
- unsigned short tmp_mb; /* Temp */
-};
-
-
-/*****************************************************************
- HDLC parameter RAM
-*****************************************************************/
-
-struct hdlc_pram {
- /*
- * SCC parameter RAM
- */
- unsigned short rbase; /* RX BD base address */
- unsigned short tbase; /* TX BD base address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned long rcrc; /* temp receive CRC */
- unsigned long tcrc; /* temp transmit CRC */
-
- /*
- * HDLC specific parameter RAM
- */
- unsigned char RESERVED1[4]; /* Reserved area */
- unsigned long c_mask; /* CRC constant */
- unsigned long c_pres; /* CRC preset */
- unsigned short disfc; /* discarded frame counter */
- unsigned short crcec; /* CRC error counter */
- unsigned short abtsc; /* abort sequence counter */
- unsigned short nmarc; /* nonmatching address rx cnt */
- unsigned short retrc; /* frame retransmission cnt */
- unsigned short mflr; /* maximum frame length reg */
- unsigned short max_cnt; /* maximum length counter */
- unsigned short rfthr; /* received frames threshold */
- unsigned short rfcnt; /* received frames count */
- unsigned short hmask; /* user defined frm addr mask */
- unsigned short haddr1; /* user defined frm address 1 */
- unsigned short haddr2; /* user defined frm address 2 */
- unsigned short haddr3; /* user defined frm address 3 */
- unsigned short haddr4; /* user defined frm address 4 */
- unsigned short tmp; /* temp */
- unsigned short tmp_mb; /* temp */
-};
-
-
-
-/*****************************************************************
- UART parameter RAM
-*****************************************************************/
-
-/*
- * bits in uart control characters table
- */
-#define CC_INVALID 0x8000 /* control character is valid */
-#define CC_REJ 0x4000 /* don't store char in buffer */
-#define CC_CHAR 0x00ff /* control character */
-
-/* UART */
-struct uart_pram {
- /*
- * SCC parameter RAM
- */
- unsigned short rbase; /* RX BD base address */
- unsigned short tbase; /* TX BD base address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rx_temp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned long rcrc; /* temp receive CRC */
- unsigned long tcrc; /* temp transmit CRC */
-
- /*
- * UART specific parameter RAM
- */
- unsigned char RESERVED1[8]; /* Reserved area */
- unsigned short max_idl; /* maximum idle characters */
- unsigned short idlc; /* rx idle counter (internal) */
- unsigned short brkcr; /* break count register */
-
- unsigned short parec; /* Rx parity error counter */
- unsigned short frmer; /* Rx framing error counter */
- unsigned short nosec; /* Rx noise counter */
- unsigned short brkec; /* Rx break character counter */
- unsigned short brkln; /* Receive break length */
-
- unsigned short uaddr1; /* address character 1 */
- unsigned short uaddr2; /* address character 2 */
- unsigned short rtemp; /* temp storage */
- unsigned short toseq; /* Tx out of sequence char */
- unsigned short cc[8]; /* Rx control characters */
- unsigned short rccm; /* Rx control char mask */
- unsigned short rccr; /* Rx control char register */
- unsigned short rlbc; /* Receive last break char */
-};
-
-
-
-/*****************************************************************
- BISYNC parameter RAM
-*****************************************************************/
-
-struct bisync_pram {
- /*
- * SCC parameter RAM
- */
- unsigned short rbase; /* RX BD base address */
- unsigned short tbase; /* TX BD base address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned long rcrc; /* temp receive CRC */
- unsigned long tcrc; /* temp transmit CRC */
-
- /*
- * BISYNC specific parameter RAM
- */
- unsigned char RESERVED1[4]; /* Reserved area */
- unsigned long crcc; /* CRC Constant Temp Value */
- unsigned short prcrc; /* Preset Receiver CRC-16/LRC */
- unsigned short ptcrc; /* Preset Transmitter CRC-16/LRC */
- unsigned short parec; /* Receive Parity Error Counter */
- unsigned short bsync; /* BISYNC SYNC Character */
- unsigned short bdle; /* BISYNC DLE Character */
- unsigned short cc[8]; /* Rx control characters */
- unsigned short rccm; /* Receive Control Character Mask */
-};
-
-/*****************************************************************
- IOM2 parameter RAM
- (overlaid on tx bd[5] of SCC channel[2])
-*****************************************************************/
-struct iom2_pram {
- unsigned short ci_data; /* ci data */
- unsigned short monitor_data; /* monitor data */
- unsigned short tstate; /* transmitter state */
- unsigned short rstate; /* receiver state */
-};
-
-/*****************************************************************
- SPI/SMC parameter RAM
- (overlaid on tx bd[6,7] of SCC channel[2])
-*****************************************************************/
-
-#define SPI_R 0x8000 /* Ready bit in BD */
-
-struct spi_pram {
- unsigned short rbase; /* Rx BD Base Address */
- unsigned short tbase; /* Tx BD Base Address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
-};
-
-struct smc_uart_pram {
- unsigned short rbase; /* Rx BD Base Address */
- unsigned short tbase; /* Tx BD Base Address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned short max_idl; /* Maximum IDLE Characters */
- unsigned short idlc; /* Temporary IDLE Counter */
- unsigned short brkln; /* Last Rx Break Length */
- unsigned short brkec; /* Rx Break Condition Counter */
- unsigned short brkcr; /* Break Count Register (Tx) */
- unsigned short r_mask; /* Temporary bit mask */
-};
-
-struct smc_trnsp_pram {
- unsigned short rbase; /* rx BD Base Address */
- unsigned short tbase; /* Tx BD Base Address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned short reserved[5]; /* Reserved */
-};
-
-struct idma_pram {
- unsigned short ibase; /* IDMA BD Base Address */
- unsigned short ibptr; /* IDMA buffer descriptor pointer */
- unsigned long istate; /* IDMA internal state */
- unsigned long itemp; /* IDMA temp */
-};
-
-struct ethernet_pram {
- /*
- * SCC parameter RAM
- */
- unsigned short rbase; /* RX BD base address */
- unsigned short tbase; /* TX BD base address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned long rcrc; /* temp receive CRC */
- unsigned long tcrc; /* temp transmit CRC */
-
- /*
- * ETHERNET specific parameter RAM
- */
- unsigned long c_pres; /* preset CRC */
- unsigned long c_mask; /* constant mask for CRC */
- unsigned long crcec; /* CRC error counter */
- unsigned long alec; /* alignment error counter */
- unsigned long disfc; /* discard frame counter */
- unsigned short pads; /* short frame PAD characters */
- unsigned short ret_lim; /* retry limit threshold */
- unsigned short ret_cnt; /* retry limit counter */
- unsigned short mflr; /* maximum frame length reg */
- unsigned short minflr; /* minimum frame length reg */
- unsigned short maxd1; /* maximum DMA1 length reg */
- unsigned short maxd2; /* maximum DMA2 length reg */
- unsigned short maxd; /* rx max DMA */
- unsigned short dma_cnt; /* rx dma counter */
- unsigned short max_b; /* max bd byte count */
- unsigned short gaddr1; /* group address filter 1 */
- unsigned short gaddr2; /* group address filter 2 */
- unsigned short gaddr3; /* group address filter 3 */
- unsigned short gaddr4; /* group address filter 4 */
- unsigned long tbuf0_data0; /* save area 0 - current frm */
- unsigned long tbuf0_data1; /* save area 1 - current frm */
- unsigned long tbuf0_rba0;
- unsigned long tbuf0_crc;
- unsigned short tbuf0_bcnt;
- union {
- unsigned char b[6];
- struct {
- unsigned short high;
- unsigned short middl;
- unsigned short low;
- } w;
- } paddr;
- unsigned short p_per; /* persistence */
- unsigned short rfbd_ptr; /* rx first bd pointer */
- unsigned short tfbd_ptr; /* tx first bd pointer */
- unsigned short tlbd_ptr; /* tx last bd pointer */
- unsigned long tbuf1_data0; /* save area 0 - next frame */
- unsigned long tbuf1_data1; /* save area 1 - next frame */
- unsigned long tbuf1_rba0;
- unsigned long tbuf1_crc;
- unsigned short tbuf1_bcnt;
- unsigned short tx_len; /* tx frame length counter */
- unsigned short iaddr1; /* individual address filter 1*/
- unsigned short iaddr2; /* individual address filter 2*/
- unsigned short iaddr3; /* individual address filter 3*/
- unsigned short iaddr4; /* individual address filter 4*/
- unsigned short boff_cnt; /* back-off counter */
- unsigned short taddr_h; /* temp address (MSB) */
- unsigned short taddr_m; /* temp address */
- unsigned short taddr_l; /* temp address (LSB) */
-};
-
-struct transparent_pram {
- /*
- * SCC parameter RAM
- */
- unsigned short rbase; /* RX BD base address */
- unsigned short tbase; /* TX BD base address */
- unsigned char rfcr; /* Rx function code */
- unsigned char tfcr; /* Tx function code */
- unsigned short mrblr; /* Rx buffer length */
- unsigned long rstate; /* Rx internal state */
- unsigned long rptr; /* Rx internal data pointer */
- unsigned short rbptr; /* rb BD Pointer */
- unsigned short rcount; /* Rx internal byte count */
- unsigned long rtemp; /* Rx temp */
- unsigned long tstate; /* Tx internal state */
- unsigned long tptr; /* Tx internal data pointer */
- unsigned short tbptr; /* Tx BD pointer */
- unsigned short tcount; /* Tx byte count */
- unsigned long ttemp; /* Tx temp */
- unsigned long rcrc; /* temp receive CRC */
- unsigned long tcrc; /* temp transmit CRC */
-
- /*
- * TRANSPARENT specific parameter RAM
- */
- unsigned long crc_p; /* CRC Preset */
- unsigned long crc_c; /* CRC constant */
-};
-
-struct timer_pram {
- /*
- * RISC timers parameter RAM
- */
- unsigned short tm_base; /* RISC timer table base adr */
- unsigned short tm_ptr; /* RISC timer table pointer */
- unsigned short r_tmr; /* RISC timer mode register */
- unsigned short r_tmv; /* RISC timer valid register */
- unsigned long tm_cmd; /* RISC timer cmd register */
- unsigned long tm_cnt; /* RISC timer internal cnt */
-};
-
-#endif
diff --git a/arch/m68k/include/asm/m68360_quicc.h b/arch/m68k/include/asm/m68360_quicc.h
deleted file mode 100644
index 59414cc108d3..000000000000
--- a/arch/m68k/include/asm/m68360_quicc.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/***********************************
- * $Id: m68360_quicc.h,v 1.1 2002/03/02 15:01:07 gerg Exp $
- ***********************************
- *
- ***************************************
- * Definitions of QUICC memory structures
- ***************************************
- */
-
-#ifndef __M68360_QUICC_H
-#define __M68360_QUICC_H
-
-/*
- * include registers and
- * parameter ram definitions files
- */
-#include <asm/m68360_regs.h>
-#include <asm/m68360_pram.h>
-
-
-
-/* Buffer Descriptors */
-typedef struct quicc_bd {
- volatile unsigned short status;
- volatile unsigned short length;
- volatile unsigned char *buf; /* WARNING: This is only true if *char is 32 bits */
-} QUICC_BD;
-
-
-#ifdef MOTOROLA_ORIGINAL
-struct user_data {
- /* BASE + 0x000: user data memory */
- volatile unsigned char udata_bd_ucode[0x400]; /*user data bd's Ucode*/
- volatile unsigned char udata_bd[0x200]; /*user data Ucode */
- volatile unsigned char ucode_ext[0x100]; /*Ucode Extension ram */
- volatile unsigned char RESERVED1[0x500]; /* Reserved area */
-};
-#else
-struct user_data {
- /* BASE + 0x000: user data memory */
- volatile unsigned char udata_bd_ucode[0x400]; /* user data, bds, Ucode*/
- volatile unsigned char udata_bd1[0x200]; /* user, bds */
- volatile unsigned char ucode_bd_scratch[0x100]; /* user, bds, ucode scratch */
- volatile unsigned char udata_bd2[0x100]; /* user, bds */
- volatile unsigned char RESERVED1[0x400]; /* Reserved area */
-};
-#endif
-
-
-/*
- * internal ram
- */
-typedef struct quicc {
- union {
- struct quicc32_pram ch_pram_tbl[32]; /* 32*64(bytes) per channel */
- struct user_data u;
- }ch_or_u; /* multipul or user space */
-
- /* BASE + 0xc00: PARAMETER RAM */
- union {
- struct scc_pram {
- union {
- struct hdlc_pram h;
- struct uart_pram u;
- struct bisync_pram b;
- struct transparent_pram t;
- unsigned char RESERVED66[0x70];
- } pscc; /* scc parameter area (protocol dependent) */
- union {
- struct {
- unsigned char RESERVED70[0x10];
- struct spi_pram spi;
- unsigned char RESERVED72[0x8];
- struct timer_pram timer;
- } timer_spi;
- struct {
- struct idma_pram idma;
- unsigned char RESERVED67[0x4];
- union {
- struct smc_uart_pram u;
- struct smc_trnsp_pram t;
- } psmc;
- } idma_smc;
- } pothers;
- } scc;
- struct ethernet_pram enet_scc;
- struct global_multi_pram m;
- unsigned char pr[0x100];
- } pram[4];
-
- /* reserved */
-
- /* BASE + 0x1000: INTERNAL REGISTERS */
- /* SIM */
- volatile unsigned long sim_mcr; /* module configuration reg */
- volatile unsigned short sim_simtr; /* module test register */
- volatile unsigned char RESERVED2[0x2]; /* Reserved area */
- volatile unsigned char sim_avr; /* auto vector reg */
- volatile unsigned char sim_rsr; /* reset status reg */
- volatile unsigned char RESERVED3[0x2]; /* Reserved area */
- volatile unsigned char sim_clkocr; /* CLCO control register */
- volatile unsigned char RESERVED62[0x3]; /* Reserved area */
- volatile unsigned short sim_pllcr; /* PLL control register */
- volatile unsigned char RESERVED63[0x2]; /* Reserved area */
- volatile unsigned short sim_cdvcr; /* Clock devider control register */
- volatile unsigned short sim_pepar; /* Port E pin assignment register */
- volatile unsigned char RESERVED64[0xa]; /* Reserved area */
- volatile unsigned char sim_sypcr; /* system protection control*/
- volatile unsigned char sim_swiv; /* software interrupt vector*/
- volatile unsigned char RESERVED6[0x2]; /* Reserved area */
- volatile unsigned short sim_picr; /* periodic interrupt control reg */
- volatile unsigned char RESERVED7[0x2]; /* Reserved area */
- volatile unsigned short sim_pitr; /* periodic interrupt timing reg */
- volatile unsigned char RESERVED8[0x3]; /* Reserved area */
- volatile unsigned char sim_swsr; /* software service */
- volatile unsigned long sim_bkar; /* breakpoint address register*/
- volatile unsigned long sim_bkcr; /* breakpoint control register*/
- volatile unsigned char RESERVED10[0x8]; /* Reserved area */
- /* MEMC */
- volatile unsigned long memc_gmr; /* Global memory register */
- volatile unsigned short memc_mstat; /* MEMC status register */
- volatile unsigned char RESERVED11[0xa]; /* Reserved area */
- volatile unsigned long memc_br0; /* base register 0 */
- volatile unsigned long memc_or0; /* option register 0 */
- volatile unsigned char RESERVED12[0x8]; /* Reserved area */
- volatile unsigned long memc_br1; /* base register 1 */
- volatile unsigned long memc_or1; /* option register 1 */
- volatile unsigned char RESERVED13[0x8]; /* Reserved area */
- volatile unsigned long memc_br2; /* base register 2 */
- volatile unsigned long memc_or2; /* option register 2 */
- volatile unsigned char RESERVED14[0x8]; /* Reserved area */
- volatile unsigned long memc_br3; /* base register 3 */
- volatile unsigned long memc_or3; /* option register 3 */
- volatile unsigned char RESERVED15[0x8]; /* Reserved area */
- volatile unsigned long memc_br4; /* base register 3 */
- volatile unsigned long memc_or4; /* option register 3 */
- volatile unsigned char RESERVED16[0x8]; /* Reserved area */
- volatile unsigned long memc_br5; /* base register 3 */
- volatile unsigned long memc_or5; /* option register 3 */
- volatile unsigned char RESERVED17[0x8]; /* Reserved area */
- volatile unsigned long memc_br6; /* base register 3 */
- volatile unsigned long memc_or6; /* option register 3 */
- volatile unsigned char RESERVED18[0x8]; /* Reserved area */
- volatile unsigned long memc_br7; /* base register 3 */
- volatile unsigned long memc_or7; /* option register 3 */
- volatile unsigned char RESERVED9[0x28]; /* Reserved area */
- /* TEST */
- volatile unsigned short test_tstmra; /* master shift a */
- volatile unsigned short test_tstmrb; /* master shift b */
- volatile unsigned short test_tstsc; /* shift count */
- volatile unsigned short test_tstrc; /* repetition counter */
- volatile unsigned short test_creg; /* control */
- volatile unsigned short test_dreg; /* destributed register */
- volatile unsigned char RESERVED58[0x404]; /* Reserved area */
- /* IDMA1 */
- volatile unsigned short idma_iccr; /* channel configuration reg*/
- volatile unsigned char RESERVED19[0x2]; /* Reserved area */
- volatile unsigned short idma1_cmr; /* dma mode reg */
- volatile unsigned char RESERVED68[0x2]; /* Reserved area */
- volatile unsigned long idma1_sapr; /* dma source addr ptr */
- volatile unsigned long idma1_dapr; /* dma destination addr ptr */
- volatile unsigned long idma1_bcr; /* dma byte count reg */
- volatile unsigned char idma1_fcr; /* function code reg */
- volatile unsigned char RESERVED20; /* Reserved area */
- volatile unsigned char idma1_cmar; /* channel mask reg */
- volatile unsigned char RESERVED21; /* Reserved area */
- volatile unsigned char idma1_csr; /* channel status reg */
- volatile unsigned char RESERVED22[0x3]; /* Reserved area */
- /* SDMA */
- volatile unsigned char sdma_sdsr; /* status reg */
- volatile unsigned char RESERVED23; /* Reserved area */
- volatile unsigned short sdma_sdcr; /* configuration reg */
- volatile unsigned long sdma_sdar; /* address reg */
- /* IDMA2 */
- volatile unsigned char RESERVED69[0x2]; /* Reserved area */
- volatile unsigned short idma2_cmr; /* dma mode reg */
- volatile unsigned long idma2_sapr; /* dma source addr ptr */
- volatile unsigned long idma2_dapr; /* dma destination addr ptr */
- volatile unsigned long idma2_bcr; /* dma byte count reg */
- volatile unsigned char idma2_fcr; /* function code reg */
- volatile unsigned char RESERVED24; /* Reserved area */
- volatile unsigned char idma2_cmar; /* channel mask reg */
- volatile unsigned char RESERVED25; /* Reserved area */
- volatile unsigned char idma2_csr; /* channel status reg */
- volatile unsigned char RESERVED26[0x7]; /* Reserved area */
- /* Interrupt Controller */
- volatile unsigned long intr_cicr; /* CP interrupt configuration reg*/
- volatile unsigned long intr_cipr; /* CP interrupt pending reg */
- volatile unsigned long intr_cimr; /* CP interrupt mask reg */
- volatile unsigned long intr_cisr; /* CP interrupt in service reg*/
- /* Parallel I/O */
- volatile unsigned short pio_padir; /* port A data direction reg */
- volatile unsigned short pio_papar; /* port A pin assignment reg */
- volatile unsigned short pio_paodr; /* port A open drain reg */
- volatile unsigned short pio_padat; /* port A data register */
- volatile unsigned char RESERVED28[0x8]; /* Reserved area */
- volatile unsigned short pio_pcdir; /* port C data direction reg*/
- volatile unsigned short pio_pcpar; /* port C pin assignment reg*/
- volatile unsigned short pio_pcso; /* port C special options */
- volatile unsigned short pio_pcdat; /* port C data register */
- volatile unsigned short pio_pcint; /* port C interrupt cntrl reg */
- volatile unsigned char RESERVED29[0x16]; /* Reserved area */
- /* Timer */
- volatile unsigned short timer_tgcr; /* timer global configuration reg */
- volatile unsigned char RESERVED30[0xe]; /* Reserved area */
- volatile unsigned short timer_tmr1; /* timer 1 mode reg */
- volatile unsigned short timer_tmr2; /* timer 2 mode reg */
- volatile unsigned short timer_trr1; /* timer 1 referance reg */
- volatile unsigned short timer_trr2; /* timer 2 referance reg */
- volatile unsigned short timer_tcr1; /* timer 1 capture reg */
- volatile unsigned short timer_tcr2; /* timer 2 capture reg */
- volatile unsigned short timer_tcn1; /* timer 1 counter reg */
- volatile unsigned short timer_tcn2; /* timer 2 counter reg */
- volatile unsigned short timer_tmr3; /* timer 3 mode reg */
- volatile unsigned short timer_tmr4; /* timer 4 mode reg */
- volatile unsigned short timer_trr3; /* timer 3 referance reg */
- volatile unsigned short timer_trr4; /* timer 4 referance reg */
- volatile unsigned short timer_tcr3; /* timer 3 capture reg */
- volatile unsigned short timer_tcr4; /* timer 4 capture reg */
- volatile unsigned short timer_tcn3; /* timer 3 counter reg */
- volatile unsigned short timer_tcn4; /* timer 4 counter reg */
- volatile unsigned short timer_ter1; /* timer 1 event reg */
- volatile unsigned short timer_ter2; /* timer 2 event reg */
- volatile unsigned short timer_ter3; /* timer 3 event reg */
- volatile unsigned short timer_ter4; /* timer 4 event reg */
- volatile unsigned char RESERVED34[0x8]; /* Reserved area */
- /* CP */
- volatile unsigned short cp_cr; /* command register */
- volatile unsigned char RESERVED35[0x2]; /* Reserved area */
- volatile unsigned short cp_rccr; /* main configuration reg */
- volatile unsigned char RESERVED37; /* Reserved area */
- volatile unsigned char cp_rmds; /* development support status reg */
- volatile unsigned long cp_rmdr; /* development support control reg */
- volatile unsigned short cp_rctr1; /* ram break register 1 */
- volatile unsigned short cp_rctr2; /* ram break register 2 */
- volatile unsigned short cp_rctr3; /* ram break register 3 */
- volatile unsigned short cp_rctr4; /* ram break register 4 */
- volatile unsigned char RESERVED59[0x2]; /* Reserved area */
- volatile unsigned short cp_rter; /* RISC timers event reg */
- volatile unsigned char RESERVED38[0x2]; /* Reserved area */
- volatile unsigned short cp_rtmr; /* RISC timers mask reg */
- volatile unsigned char RESERVED39[0x14]; /* Reserved area */
- /* BRG */
- union {
- volatile unsigned long l;
- struct {
- volatile unsigned short BRGC_RESERV:14;
- volatile unsigned short rst:1;
- volatile unsigned short en:1;
- volatile unsigned short extc:2;
- volatile unsigned short atb:1;
- volatile unsigned short cd:12;
- volatile unsigned short div16:1;
- } b;
- } brgc[4]; /* BRG1-BRG4 configuration regs*/
- /* SCC registers */
- struct scc_regs {
- union {
- struct {
- /* Low word. */
- volatile unsigned short GSMR_RESERV2:1;
- volatile unsigned short edge:2;
- volatile unsigned short tci:1;
- volatile unsigned short tsnc:2;
- volatile unsigned short rinv:1;
- volatile unsigned short tinv:1;
- volatile unsigned short tpl:3;
- volatile unsigned short tpp:2;
- volatile unsigned short tend:1;
- volatile unsigned short tdcr:2;
- volatile unsigned short rdcr:2;
- volatile unsigned short renc:3;
- volatile unsigned short tenc:3;
- volatile unsigned short diag:2;
- volatile unsigned short enr:1;
- volatile unsigned short ent:1;
- volatile unsigned short mode:4;
- /* High word. */
- volatile unsigned short GSMR_RESERV1:14;
- volatile unsigned short pri:1;
- volatile unsigned short gde:1;
- volatile unsigned short tcrc:2;
- volatile unsigned short revd:1;
- volatile unsigned short trx:1;
- volatile unsigned short ttx:1;
- volatile unsigned short cdp:1;
- volatile unsigned short ctsp:1;
- volatile unsigned short cds:1;
- volatile unsigned short ctss:1;
- volatile unsigned short tfl:1;
- volatile unsigned short rfw:1;
- volatile unsigned short txsy:1;
- volatile unsigned short synl:2;
- volatile unsigned short rtsm:1;
- volatile unsigned short rsyn:1;
- } b;
- struct {
- volatile unsigned long low;
- volatile unsigned long high;
- } w;
- } scc_gsmr; /* SCC general mode reg */
- volatile unsigned short scc_psmr; /* protocol specific mode reg */
- volatile unsigned char RESERVED42[0x2]; /* Reserved area */
- volatile unsigned short scc_todr; /* SCC transmit on demand */
- volatile unsigned short scc_dsr; /* SCC data sync reg */
- volatile unsigned short scc_scce; /* SCC event reg */
- volatile unsigned char RESERVED43[0x2];/* Reserved area */
- volatile unsigned short scc_sccm; /* SCC mask reg */
- volatile unsigned char RESERVED44[0x1];/* Reserved area */
- volatile unsigned char scc_sccs; /* SCC status reg */
- volatile unsigned char RESERVED45[0x8]; /* Reserved area */
- } scc_regs[4];
- /* SMC */
- struct smc_regs {
- volatile unsigned char RESERVED46[0x2]; /* Reserved area */
- volatile unsigned short smc_smcmr; /* SMC mode reg */
- volatile unsigned char RESERVED60[0x2]; /* Reserved area */
- volatile unsigned char smc_smce; /* SMC event reg */
- volatile unsigned char RESERVED47[0x3]; /* Reserved area */
- volatile unsigned char smc_smcm; /* SMC mask reg */
- volatile unsigned char RESERVED48[0x5]; /* Reserved area */
- } smc_regs[2];
- /* SPI */
- volatile unsigned short spi_spmode; /* SPI mode reg */
- volatile unsigned char RESERVED51[0x4]; /* Reserved area */
- volatile unsigned char spi_spie; /* SPI event reg */
- volatile unsigned char RESERVED52[0x3]; /* Reserved area */
- volatile unsigned char spi_spim; /* SPI mask reg */
- volatile unsigned char RESERVED53[0x2]; /* Reserved area */
- volatile unsigned char spi_spcom; /* SPI command reg */
- volatile unsigned char RESERVED54[0x4]; /* Reserved area */
- /* PIP */
- volatile unsigned short pip_pipc; /* pip configuration reg */
- volatile unsigned char RESERVED65[0x2]; /* Reserved area */
- volatile unsigned short pip_ptpr; /* pip timing parameters reg */
- volatile unsigned long pip_pbdir; /* port b data direction reg */
- volatile unsigned long pip_pbpar; /* port b pin assignment reg */
- volatile unsigned long pip_pbodr; /* port b open drain reg */
- volatile unsigned long pip_pbdat; /* port b data reg */
- volatile unsigned char RESERVED71[0x18]; /* Reserved area */
- /* Serial Interface */
- volatile unsigned long si_simode; /* SI mode register */
- volatile unsigned char si_sigmr; /* SI global mode register */
- volatile unsigned char RESERVED55; /* Reserved area */
- volatile unsigned char si_sistr; /* SI status register */
- volatile unsigned char si_sicmr; /* SI command register */
- volatile unsigned char RESERVED56[0x4]; /* Reserved area */
- volatile unsigned long si_sicr; /* SI clock routing */
- volatile unsigned long si_sirp; /* SI ram pointers */
- volatile unsigned char RESERVED57[0xc]; /* Reserved area */
- volatile unsigned short si_siram[0x80]; /* SI routing ram */
-} QUICC;
-
-#endif
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 4
- * End:
- */
diff --git a/arch/m68k/include/asm/m68360_regs.h b/arch/m68k/include/asm/m68360_regs.h
deleted file mode 100644
index d57217ca4f27..000000000000
--- a/arch/m68k/include/asm/m68360_regs.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/***********************************
- * $Id: m68360_regs.h,v 1.2 2002/10/26 15:03:55 gerg Exp $
- ***********************************
- *
- ***************************************
- * Definitions of the QUICC registers
- ***************************************
- */
-
-#ifndef __REGISTERS_H
-#define __REGISTERS_H
-
-#define CLEAR_BIT(x, bit) x =bit
-
-/*****************************************************************
- Command Register
-*****************************************************************/
-
-/* bit fields within command register */
-#define SOFTWARE_RESET 0x8000
-#define CMD_OPCODE 0x0f00
-#define CMD_CHANNEL 0x00f0
-#define CMD_FLAG 0x0001
-
-/* general command opcodes */
-#define INIT_RXTX_PARAMS 0x0000
-#define INIT_RX_PARAMS 0x0100
-#define INIT_TX_PARAMS 0x0200
-#define ENTER_HUNT_MODE 0x0300
-#define STOP_TX 0x0400
-#define GR_STOP_TX 0x0500
-#define RESTART_TX 0x0600
-#define CLOSE_RX_BD 0x0700
-#define SET_ENET_GROUP 0x0800
-#define RESET_ENET_GROUP 0x0900
-
-/* quicc32 CP commands */
-#define STOP_TX_32 0x0e00 /*add chan# bits 2-6 */
-#define ENTER_HUNT_MODE_32 0x1e00
-
-/* quicc32 mask/event SCC register */
-#define GOV 0x01
-#define GUN 0x02
-#define GINT 0x04
-#define IQOV 0x08
-
-
-/* Timer commands */
-#define SET_TIMER 0x0800
-
-/* Multi channel Interrupt structure */
-#define INTR_VALID 0x8000 /* Valid interrupt entry */
-#define INTR_WRAP 0x4000 /* Wrap bit in the interrupt entry table */
-#define INTR_CH_NU 0x07c0 /* Channel Num in interrupt table */
-#define INTR_MASK_BITS 0x383f
-
-/*
- * General SCC mode register (GSMR)
- */
-
-#define MODE_HDLC 0x0
-#define MODE_APPLE_TALK 0x2
-#define MODE_SS7 0x3
-#define MODE_UART 0x4
-#define MODE_PROFIBUS 0x5
-#define MODE_ASYNC_HDLC 0x6
-#define MODE_V14 0x7
-#define MODE_BISYNC 0x8
-#define MODE_DDCMP 0x9
-#define MODE_MULTI_CHANNEL 0xa
-#define MODE_ETHERNET 0xc
-
-#define DIAG_NORMAL 0x0
-#define DIAG_LOCAL_LPB 0x1
-#define DIAG_AUTO_ECHO 0x2
-#define DIAG_LBP_ECHO 0x3
-
-/* For RENC and TENC fields in GSMR */
-#define ENC_NRZ 0x0
-#define ENC_NRZI 0x1
-#define ENC_FM0 0x2
-#define ENC_MANCH 0x4
-#define ENC_DIFF_MANC 0x6
-
-/* For TDCR and RDCR fields in GSMR */
-#define CLOCK_RATE_1 0x0
-#define CLOCK_RATE_8 0x1
-#define CLOCK_RATE_16 0x2
-#define CLOCK_RATE_32 0x3
-
-#define TPP_00 0x0
-#define TPP_10 0x1
-#define TPP_01 0x2
-#define TPP_11 0x3
-
-#define TPL_NO 0x0
-#define TPL_8 0x1
-#define TPL_16 0x2
-#define TPL_32 0x3
-#define TPL_48 0x4
-#define TPL_64 0x5
-#define TPL_128 0x6
-
-#define TSNC_INFINITE 0x0
-#define TSNC_14_65 0x1
-#define TSNC_4_15 0x2
-#define TSNC_3_1 0x3
-
-#define EDGE_BOTH 0x0
-#define EDGE_POS 0x1
-#define EDGE_NEG 0x2
-#define EDGE_NO 0x3
-
-#define SYNL_NO 0x0
-#define SYNL_4 0x1
-#define SYNL_8 0x2
-#define SYNL_16 0x3
-
-#define TCRC_CCITT16 0x0
-#define TCRC_CRC16 0x1
-#define TCRC_CCITT32 0x2
-
-
-/*****************************************************************
- TODR (Transmit on demand) Register
-*****************************************************************/
-#define TODR_TOD 0x8000 /* Transmit on demand */
-
-
-/*****************************************************************
- CICR register settings
-*****************************************************************/
-
-/* note that relative irq priorities of the SCCs can be reordered
- * if desired - see p. 7-377 of the MC68360UM */
-#define CICR_SCA_SCC1 ((uint)0x00000000) /* SCC1 @ SCCa */
-#define CICR_SCB_SCC2 ((uint)0x00040000) /* SCC2 @ SCCb */
-#define CICR_SCC_SCC3 ((uint)0x00200000) /* SCC3 @ SCCc */
-#define CICR_SCD_SCC4 ((uint)0x00c00000) /* SCC4 @ SCCd */
-
-#define CICR_IRL_MASK ((uint)0x0000e000) /* Core interrupt */
-#define CICR_HP_MASK ((uint)0x00001f00) /* Hi-pri int. */
-#define CICR_VBA_MASK ((uint)0x000000e0) /* Vector Base Address */
-#define CICR_SPS ((uint)0x00000001) /* SCC Spread */
-
-
-/*****************************************************************
- Interrupt bits for CIPR and CIMR (MC68360UM p. 7-379)
-*****************************************************************/
-
-#define INTR_PIO_PC0 0x80000000 /* parallel I/O C bit 0 */
-#define INTR_SCC1 0x40000000 /* SCC port 1 */
-#define INTR_SCC2 0x20000000 /* SCC port 2 */
-#define INTR_SCC3 0x10000000 /* SCC port 3 */
-#define INTR_SCC4 0x08000000 /* SCC port 4 */
-#define INTR_PIO_PC1 0x04000000 /* parallel i/o C bit 1 */
-#define INTR_TIMER1 0x02000000 /* timer 1 */
-#define INTR_PIO_PC2 0x01000000 /* parallel i/o C bit 2 */
-#define INTR_PIO_PC3 0x00800000 /* parallel i/o C bit 3 */
-#define INTR_SDMA_BERR 0x00400000 /* SDMA channel bus error */
-#define INTR_DMA1 0x00200000 /* idma 1 */
-#define INTR_DMA2 0x00100000 /* idma 2 */
-#define INTR_TIMER2 0x00040000 /* timer 2 */
-#define INTR_CP_TIMER 0x00020000 /* CP timer */
-#define INTR_PIP_STATUS 0x00010000 /* PIP status */
-#define INTR_PIO_PC4 0x00008000 /* parallel i/o C bit 4 */
-#define INTR_PIO_PC5 0x00004000 /* parallel i/o C bit 5 */
-#define INTR_TIMER3 0x00001000 /* timer 3 */
-#define INTR_PIO_PC6 0x00000800 /* parallel i/o C bit 6 */
-#define INTR_PIO_PC7 0x00000400 /* parallel i/o C bit 7 */
-#define INTR_PIO_PC8 0x00000200 /* parallel i/o C bit 8 */
-#define INTR_TIMER4 0x00000080 /* timer 4 */
-#define INTR_PIO_PC9 0x00000040 /* parallel i/o C bit 9 */
-#define INTR_SCP 0x00000020 /* SCP */
-#define INTR_SMC1 0x00000010 /* SMC 1 */
-#define INTR_SMC2 0x00000008 /* SMC 2 */
-#define INTR_PIO_PC10 0x00000004 /* parallel i/o C bit 10 */
-#define INTR_PIO_PC11 0x00000002 /* parallel i/o C bit 11 */
-#define INTR_ERR 0x00000001 /* error */
-
-
-/*****************************************************************
- CPM Interrupt vector encodings (MC68360UM p. 7-376)
-*****************************************************************/
-
-#define CPMVEC_NR 32
-#define CPMVEC_PIO_PC0 0x1f
-#define CPMVEC_SCC1 0x1e
-#define CPMVEC_SCC2 0x1d
-#define CPMVEC_SCC3 0x1c
-#define CPMVEC_SCC4 0x1b
-#define CPMVEC_PIO_PC1 0x1a
-#define CPMVEC_TIMER1 0x19
-#define CPMVEC_PIO_PC2 0x18
-#define CPMVEC_PIO_PC3 0x17
-#define CPMVEC_SDMA_CB_ERR 0x16
-#define CPMVEC_IDMA1 0x15
-#define CPMVEC_IDMA2 0x14
-#define CPMVEC_RESERVED3 0x13
-#define CPMVEC_TIMER2 0x12
-#define CPMVEC_RISCTIMER 0x11
-#define CPMVEC_RESERVED2 0x10
-#define CPMVEC_PIO_PC4 0x0f
-#define CPMVEC_PIO_PC5 0x0e
-#define CPMVEC_TIMER3 0x0c
-#define CPMVEC_PIO_PC6 0x0b
-#define CPMVEC_PIO_PC7 0x0a
-#define CPMVEC_PIO_PC8 0x09
-#define CPMVEC_RESERVED1 0x08
-#define CPMVEC_TIMER4 0x07
-#define CPMVEC_PIO_PC9 0x06
-#define CPMVEC_SPI 0x05
-#define CPMVEC_SMC1 0x04
-#define CPMVEC_SMC2 0x03
-#define CPMVEC_PIO_PC10 0x02
-#define CPMVEC_PIO_PC11 0x01
-#define CPMVEC_ERROR 0x00
-
-/* #define CPMVEC_PIO_PC0 ((ushort)0x1f) */
-/* #define CPMVEC_SCC1 ((ushort)0x1e) */
-/* #define CPMVEC_SCC2 ((ushort)0x1d) */
-/* #define CPMVEC_SCC3 ((ushort)0x1c) */
-/* #define CPMVEC_SCC4 ((ushort)0x1b) */
-/* #define CPMVEC_PIO_PC1 ((ushort)0x1a) */
-/* #define CPMVEC_TIMER1 ((ushort)0x19) */
-/* #define CPMVEC_PIO_PC2 ((ushort)0x18) */
-/* #define CPMVEC_PIO_PC3 ((ushort)0x17) */
-/* #define CPMVEC_SDMA_CB_ERR ((ushort)0x16) */
-/* #define CPMVEC_IDMA1 ((ushort)0x15) */
-/* #define CPMVEC_IDMA2 ((ushort)0x14) */
-/* #define CPMVEC_RESERVED3 ((ushort)0x13) */
-/* #define CPMVEC_TIMER2 ((ushort)0x12) */
-/* #define CPMVEC_RISCTIMER ((ushort)0x11) */
-/* #define CPMVEC_RESERVED2 ((ushort)0x10) */
-/* #define CPMVEC_PIO_PC4 ((ushort)0x0f) */
-/* #define CPMVEC_PIO_PC5 ((ushort)0x0e) */
-/* #define CPMVEC_TIMER3 ((ushort)0x0c) */
-/* #define CPMVEC_PIO_PC6 ((ushort)0x0b) */
-/* #define CPMVEC_PIO_PC7 ((ushort)0x0a) */
-/* #define CPMVEC_PIO_PC8 ((ushort)0x09) */
-/* #define CPMVEC_RESERVED1 ((ushort)0x08) */
-/* #define CPMVEC_TIMER4 ((ushort)0x07) */
-/* #define CPMVEC_PIO_PC9 ((ushort)0x06) */
-/* #define CPMVEC_SPI ((ushort)0x05) */
-/* #define CPMVEC_SMC1 ((ushort)0x04) */
-/* #define CPMVEC_SMC2 ((ushort)0x03) */
-/* #define CPMVEC_PIO_PC10 ((ushort)0x02) */
-/* #define CPMVEC_PIO_PC11 ((ushort)0x01) */
-/* #define CPMVEC_ERROR ((ushort)0x00) */
-
-
-/*****************************************************************
- * PIO control registers
- *****************************************************************/
-
-/* Port A - See 360UM p. 7-358
- *
- * Note that most of these pins have alternate functions
- */
-
-
-/* The macros are nice, but there are all sorts of references to 1-indexed
- * facilities on the 68360... */
-/* #define PA_RXD(n) ((ushort)(0x01<<(2*n))) */
-/* #define PA_TXD(n) ((ushort)(0x02<<(2*n))) */
-
-#define PA_RXD1 ((ushort)0x0001)
-#define PA_TXD1 ((ushort)0x0002)
-#define PA_RXD2 ((ushort)0x0004)
-#define PA_TXD2 ((ushort)0x0008)
-#define PA_RXD3 ((ushort)0x0010)
-#define PA_TXD3 ((ushort)0x0020)
-#define PA_RXD4 ((ushort)0x0040)
-#define PA_TXD4 ((ushort)0x0080)
-
-#define PA_CLK1 ((ushort)0x0100)
-#define PA_CLK2 ((ushort)0x0200)
-#define PA_CLK3 ((ushort)0x0400)
-#define PA_CLK4 ((ushort)0x0800)
-#define PA_CLK5 ((ushort)0x1000)
-#define PA_CLK6 ((ushort)0x2000)
-#define PA_CLK7 ((ushort)0x4000)
-#define PA_CLK8 ((ushort)0x8000)
-
-
-/* Port B - See 360UM p. 7-362
- */
-
-
-/* Port C - See 360UM p. 7-365
- */
-
-#define PC_RTS1 ((ushort)0x0001)
-#define PC_RTS2 ((ushort)0x0002)
-#define PC__RTS3 ((ushort)0x0004) /* !RTS3 */
-#define PC__RTS4 ((ushort)0x0008) /* !RTS4 */
-
-#define PC_CTS1 ((ushort)0x0010)
-#define PC_CD1 ((ushort)0x0020)
-#define PC_CTS2 ((ushort)0x0040)
-#define PC_CD2 ((ushort)0x0080)
-#define PC_CTS3 ((ushort)0x0100)
-#define PC_CD3 ((ushort)0x0200)
-#define PC_CTS4 ((ushort)0x0400)
-#define PC_CD4 ((ushort)0x0800)
-
-
-
-/*****************************************************************
- chip select option register
-*****************************************************************/
-#define DTACK 0xe000
-#define ADR_MASK 0x1ffc
-#define RDWR_MASK 0x0002
-#define FC_MASK 0x0001
-
-/*****************************************************************
- tbase and rbase registers
-*****************************************************************/
-#define TBD_ADDR(quicc,pram) ((struct quicc_bd *) \
- (quicc->ch_or_u.u.udata_bd_ucode + pram->tbase))
-#define RBD_ADDR(quicc,pram) ((struct quicc_bd *) \
- (quicc->ch_or_u.u.udata_bd_ucode + pram->rbase))
-#define TBD_CUR_ADDR(quicc,pram) ((struct quicc_bd *) \
- (quicc->ch_or_u.u.udata_bd_ucode + pram->tbptr))
-#define RBD_CUR_ADDR(quicc,pram) ((struct quicc_bd *) \
- (quicc->ch_or_u.u.udata_bd_ucode + pram->rbptr))
-#define TBD_SET_CUR_ADDR(bd,quicc,pram) pram->tbptr = \
- ((unsigned short)((char *)(bd) - (char *)(quicc->ch_or_u.u.udata_bd_ucode)))
-#define RBD_SET_CUR_ADDR(bd,quicc,pram) pram->rbptr = \
- ((unsigned short)((char *)(bd) - (char *)(quicc->ch_or_u.u.udata_bd_ucode)))
-#define INCREASE_TBD(bd,quicc,pram) { \
- if((bd)->status & T_W) \
- (bd) = TBD_ADDR(quicc,pram); \
- else \
- (bd)++; \
-}
-#define DECREASE_TBD(bd,quicc,pram) { \
- if ((bd) == TBD_ADDR(quicc, pram)) \
- while (!((bd)->status & T_W)) \
- (bd)++; \
- else \
- (bd)--; \
-}
-#define INCREASE_RBD(bd,quicc,pram) { \
- if((bd)->status & R_W) \
- (bd) = RBD_ADDR(quicc,pram); \
- else \
- (bd)++; \
-}
-#define DECREASE_RBD(bd,quicc,pram) { \
- if ((bd) == RBD_ADDR(quicc, pram)) \
- while (!((bd)->status & T_W)) \
- (bd)++; \
- else \
- (bd)--; \
-}
-
-/*****************************************************************
- Macros for Multi channel
-*****************************************************************/
-#define QMC_BASE(quicc,page) (struct global_multi_pram *)(&quicc->pram[page])
-#define MCBASE(quicc,page) (unsigned long)(quicc->pram[page].m.mcbase)
-#define CHANNEL_PRAM_BASE(quicc,channel) ((struct quicc32_pram *) \
- (&(quicc->ch_or_u.ch_pram_tbl[channel])))
-#define TBD_32_ADDR(quicc,page,channel) ((struct quicc_bd *) \
- (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->tbase)))
-#define RBD_32_ADDR(quicc,page,channel) ((struct quicc_bd *) \
- (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->rbase)))
-#define TBD_32_CUR_ADDR(quicc,page,channel) ((struct quicc_bd *) \
- (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->tbptr)))
-#define RBD_32_CUR_ADDR(quicc,page,channel) ((struct quicc_bd *) \
- (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->rbptr)))
-#define TBD_32_SET_CUR_ADDR(bd,quicc,page,channel) \
- CHANNEL_PRAM_BASE(quicc,channel)->tbptr = \
- ((unsigned short)((char *)(bd) - (char *)(MCBASE(quicc,page))))
-#define RBD_32_SET_CUR_ADDR(bd,quicc,page,channel) \
- CHANNEL_PRAM_BASE(quicc,channel)->rbptr = \
- ((unsigned short)((char *)(bd) - (char *)(MCBASE(quicc,page))))
-
-#define INCREASE_TBD_32(bd,quicc,page,channel) { \
- if((bd)->status & T_W) \
- (bd) = TBD_32_ADDR(quicc,page,channel); \
- else \
- (bd)++; \
-}
-#define DECREASE_TBD_32(bd,quicc,page,channel) { \
- if ((bd) == TBD_32_ADDR(quicc, page,channel)) \
- while (!((bd)->status & T_W)) \
- (bd)++; \
- else \
- (bd)--; \
-}
-#define INCREASE_RBD_32(bd,quicc,page,channel) { \
- if((bd)->status & R_W) \
- (bd) = RBD_32_ADDR(quicc,page,channel); \
- else \
- (bd)++; \
-}
-#define DECREASE_RBD_32(bd,quicc,page,channel) { \
- if ((bd) == RBD_32_ADDR(quicc, page,channel)) \
- while (!((bd)->status & T_W)) \
- (bd)++; \
- else \
- (bd)--; \
-}
-
-#endif
diff --git a/arch/m68k/include/asm/mac_iop.h b/arch/m68k/include/asm/mac_iop.h
index fde874a01e20..42566fd052bc 100644
--- a/arch/m68k/include/asm/mac_iop.h
+++ b/arch/m68k/include/asm/mac_iop.h
@@ -48,7 +48,7 @@
/* IOP message status codes */
-#define IOP_MSGSTATUS_UNUSED 0 /* Unusued message structure */
+#define IOP_MSGSTATUS_UNUSED 0 /* Unused message structure */
#define IOP_MSGSTATUS_WAITING 1 /* waiting for channel */
#define IOP_MSGSTATUS_SENT 2 /* message sent, awaiting reply */
#define IOP_MSGSTATUS_COMPLETE 3 /* message complete and reply rcvd */
diff --git a/arch/m68k/include/asm/mcftimer.h b/arch/m68k/include/asm/mcftimer.h
index 089f0f150bbf..1150e42c3f19 100644
--- a/arch/m68k/include/asm/mcftimer.h
+++ b/arch/m68k/include/asm/mcftimer.h
@@ -51,7 +51,7 @@
* Bit definitions for the Timer Event Registers (TER).
*/
#define MCFTIMER_TER_CAP 0x01 /* Capture event */
-#define MCFTIMER_TER_REF 0x02 /* Refernece event */
+#define MCFTIMER_TER_REF 0x02 /* Reference event */
/****************************************************************************/
#endif /* mcftimer_h */
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index 848c3dfaad50..3a3dbcf4051d 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -1,7 +1,6 @@
#ifndef _ASM_M68K_PCI_H
#define _ASM_M68K_PCI_H
-#include <asm-generic/pci-dma-compat.h>
#include <asm-generic/pci.h>
/* The PCI address space does equal the physical memory
diff --git a/arch/m68k/include/asm/serial.h b/arch/m68k/include/asm/serial.h
index 06d0cb19b4e1..6d4497049b4b 100644
--- a/arch/m68k/include/asm/serial.h
+++ b/arch/m68k/include/asm/serial.h
@@ -18,11 +18,11 @@
/* Standard COM flags (except for COM4, because of the 8514 problem) */
#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
-#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
#else
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
+#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
#endif
#ifdef CONFIG_ISA
diff --git a/arch/m68k/kernel/early_printk.c b/arch/m68k/kernel/early_printk.c
index ff9708d71921..7d3fe08a48eb 100644
--- a/arch/m68k/kernel/early_printk.c
+++ b/arch/m68k/kernel/early_printk.c
@@ -20,8 +20,8 @@ asmlinkage void __init debug_cons_nputs(const char *s, unsigned n);
static void __ref debug_cons_write(struct console *c,
const char *s, unsigned n)
{
-#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68360) || \
- defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE))
+#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \
+ defined(CONFIG_COLDFIRE))
if (MACH_IS_MVME16x)
mvme16x_cons_write(c, s, n);
else
@@ -52,8 +52,8 @@ early_param("earlyprintk", setup_early_printk);
* debug_cons_nputs() defined in arch/m68k/kernel/head.S cannot be called
* after init sections are discarded (for platforms that use it).
*/
-#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68360) || \
- defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE))
+#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \
+ defined(CONFIG_COLDFIRE))
static int __init unregister_early_console(void)
{
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index b54ac7aba850..97cd3ea5f10b 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -71,13 +71,19 @@ ENTRY(__sys_vfork)
ENTRY(sys_sigreturn)
SAVE_SWITCH_STACK
+ movel %sp,%sp@- | switch_stack pointer
+ pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
jbsr do_sigreturn
+ addql #8,%sp
RESTORE_SWITCH_STACK
rts
ENTRY(sys_rt_sigreturn)
SAVE_SWITCH_STACK
+ movel %sp,%sp@- | switch_stack pointer
+ pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
jbsr do_rt_sigreturn
+ addql #8,%sp
RESTORE_SWITCH_STACK
rts
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 76b9113f3092..9309789215a8 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -68,9 +68,6 @@ void (*mach_power_off)(void);
#define CPU_NAME "MC68000"
#endif
#endif /* CONFIG_M68000 */
-#ifdef CONFIG_M68360
-#define CPU_NAME "MC68360"
-#endif
#ifndef CPU_NAME
#define CPU_NAME "UNKNOWN"
#endif
@@ -209,10 +206,6 @@ void __init setup_arch(char **cmdline_p)
#if defined( CONFIG_PILOT ) && defined( CONFIG_M68EZ328 )
printk(KERN_INFO "PalmV support by Lineo Inc. <jeff@uclinux.com>\n");
#endif
-#if defined (CONFIG_M68360)
- printk(KERN_INFO "QUICC port done by SED Systems <hamilton@sedsystems.ca>,\n");
- printk(KERN_INFO "based on 2.0.38 port by Lineo Inc. <mleslie@lineo.com>.\n");
-#endif
#ifdef CONFIG_DRAGEN2
printk(KERN_INFO "DragonEngine II board support by Georges Menie\n");
#endif
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index af1c4f330aef..2dcee3a88867 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -737,10 +737,8 @@ badframe:
return 1;
}
-asmlinkage int do_sigreturn(unsigned long __unused)
+asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
{
- struct switch_stack *sw = (struct switch_stack *) &__unused;
- struct pt_regs *regs = (struct pt_regs *) (sw + 1);
unsigned long usp = rdusp();
struct sigframe __user *frame = (struct sigframe __user *)(usp - 4);
sigset_t set;
@@ -764,10 +762,8 @@ badframe:
return 0;
}
-asmlinkage int do_rt_sigreturn(unsigned long __unused)
+asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
{
- struct switch_stack *sw = (struct switch_stack *) &__unused;
- struct pt_regs *regs = (struct pt_regs *) (sw + 1);
unsigned long usp = rdusp();
struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4);
sigset_t set;
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index ce56e04386e7..920ff63d4a81 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -68,7 +68,7 @@ static int gIER,gIFR,gBufA,gBufB;
* interrupt. This limitation also seems to apply to VIA clone logic cores in
* Quadra-like ASICs. (RBV and OSS machines don't have this limitation.)
*
- * We used to fake it by configuring the relevent VIA pin as an output
+ * We used to fake it by configuring the relevant VIA pin as an output
* (to mask the interrupt) or input (to unmask). That scheme did not work on
* (at least) the Quadra 700. A NuBus card's /NMRQ signal is an open-collector
* circuit (see Designing Cards and Drivers for Macintosh II and Macintosh SE,
diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/metag/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f0864881..bad13232de51 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
index 53f0f6c47027..b38700ae4e84 100644
--- a/arch/metag/mm/hugetlbpage.c
+++ b/arch/metag/mm/hugetlbpage.c
@@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pgd = pgd_offset(mm, addr);
pud = pud_offset(pgd, addr);
pmd = pmd_offset(pud, addr);
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
pgd->pgd &= ~_PAGE_SZ_MASK;
pgd->pgd |= _PAGE_SZHUGE;
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 53b69deceb99..3d793b55f60c 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -267,6 +267,9 @@ config PCI
config PCI_DOMAINS
def_bool PCI
+config PCI_DOMAINS_GENERIC
+ def_bool PCI_DOMAINS
+
config PCI_SYSCALL
def_bool PCI
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index e2f6543b91e7..dc5dd5b69fde 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -87,5 +87,4 @@ CONFIG_KGDB_KDB=y
CONFIG_EARLY_PRINTK=y
CONFIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index a29ebd4a9fcb..4cdaf565e638 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -92,7 +92,6 @@ CONFIG_DEBUG_INFO=y
CONFIG_EARLY_PRINTK=y
CONFIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
diff --git a/arch/microblaze/include/asm/gpio.h b/arch/microblaze/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/microblaze/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index dc9eb6657e3a..fc3ecb55f1b2 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -22,8 +22,6 @@
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
-
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 89a2a9394927..f31ebb5dc26c 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
memset(__bss_start, 0, __bss_stop-__bss_start);
memset(_ssbss, 0, _esbss-_ssbss);
- lockdep_init();
-
/* initialize device tree for usage in early_printk */
early_init_devtree(_fdt_start);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index ae838ed5fcf2..35654be3f1c0 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -123,17 +123,6 @@ unsigned long pci_address_to_pio(phys_addr_t address)
}
EXPORT_SYMBOL_GPL(pci_address_to_pio);
-/*
- * Return the domain number for this bus.
- */
-int pci_domain_nr(struct pci_bus *bus)
-{
- struct pci_controller *hose = pci_bus_to_host(bus);
-
- return hose->global_number;
-}
-EXPORT_SYMBOL(pci_domain_nr);
-
/* This routine is meant to be used early during boot, when the
* PCI bus numbers have not yet been assigned, and you need to
* issue PCI config cycles to an OF device.
@@ -863,26 +852,10 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
void pcibios_fixup_bus(struct pci_bus *bus)
{
- /* When called from the generic PCI probe, read PCI<->PCI bridge
- * bases. This is -not- called when generating the PCI tree from
- * the OF device-tree.
- */
- if (bus->self != NULL)
- pci_read_bridge_bases(bus);
-
- /* Now fixup the bus bus */
- pcibios_setup_bus_self(bus);
-
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
+ /* nothing to do */
}
EXPORT_SYMBOL(pcibios_fixup_bus);
-static int skip_isa_ioresource_align(struct pci_dev *dev)
-{
- return 0;
-}
-
/*
* We need to avoid collisions with `mirrored' VGA ports
* and other strange ISA hardware, so we always want the
@@ -899,20 +872,18 @@ static int skip_isa_ioresource_align(struct pci_dev *dev)
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size, resource_size_t align)
{
- struct pci_dev *dev = data;
- resource_size_t start = res->start;
-
- if (res->flags & IORESOURCE_IO) {
- if (skip_isa_ioresource_align(dev))
- return start;
- if (start & 0x300)
- start = (start + 0x3ff) & ~0x3ff;
- }
-
- return start;
+ return res->start;
}
EXPORT_SYMBOL(pcibios_align_resource);
+int pcibios_add_device(struct pci_dev *dev)
+{
+ dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_add_device);
+
/*
* Reparent resource children of pr that conflict with res
* under res, and make res replace those children.
@@ -1333,13 +1304,6 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
(unsigned long)hose->io_base_virt - _IO_BASE);
}
-struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
-{
- struct pci_controller *hose = bus->sysdata;
-
- return of_node_get(hose->dn);
-}
-
static void pcibios_scan_phb(struct pci_controller *hose)
{
LIST_HEAD(resources);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 74a3db92da1b..7c4a4ce35603 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -151,6 +151,7 @@ config BMIPS_GENERIC
select CSRC_R4K
select SYNC_R4K
select COMMON_CLK
+ select BCM6345_L1_IRQ
select BCM7038_L1_IRQ
select BCM7120_L2_IRQ
select BRCMSTB_L2_IRQ
@@ -2169,7 +2170,6 @@ config MIPS_MT_SMP
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select SYNC_R4K
- select MIPS_GIC_IPI
select MIPS_MT
select SMP
select SMP_UP
@@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT
config MIPS_CMP
bool "MIPS CMP framework support (DEPRECATED)"
depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
- select MIPS_GIC_IPI
select SMP
select SYNC_R4K
select SYS_SUPPORTS_SMP
@@ -2287,7 +2286,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPC
select MIPS_CPS_PM if HOTPLUG_CPU
- select MIPS_GIC_IPI
select SMP
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2305,9 +2303,6 @@ config MIPS_CPS_PM
select MIPS_CPC
bool
-config MIPS_GIC_IPI
- bool
-
config MIPS_CM
bool
@@ -2876,8 +2871,6 @@ config PCI_DOMAINS
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-
#
# ISA support is now enabled via select. Too many systems still have the one
# or other ISA chip on the board that users don't know about so don't expect
@@ -2937,8 +2930,6 @@ config ZONE_DMA32
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config RAPIDIO
tristate "RapidIO support"
depends on PCI
diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c
index 84548f704035..e6b90e72c23f 100644
--- a/arch/mips/alchemy/common/gpiolib.c
+++ b/arch/mips/alchemy/common/gpiolib.c
@@ -160,14 +160,14 @@ static int __init alchemy_gpiochip_init(void)
switch (alchemy_get_cputype()) {
case ALCHEMY_CPU_AU1000:
- ret = gpiochip_add(&alchemy_gpio_chip[0]);
+ ret = gpiochip_add_data(&alchemy_gpio_chip[0], NULL);
break;
case ALCHEMY_CPU_AU1500...ALCHEMY_CPU_AU1200:
- ret = gpiochip_add(&alchemy_gpio_chip[0]);
- ret |= gpiochip_add(&alchemy_gpio_chip[1]);
+ ret = gpiochip_add_data(&alchemy_gpio_chip[0], NULL);
+ ret |= gpiochip_add_data(&alchemy_gpio_chip[1], NULL);
break;
case ALCHEMY_CPU_AU1300:
- ret = gpiochip_add(&au1300_gpiochip);
+ ret = gpiochip_add_data(&au1300_gpiochip, NULL);
break;
}
return ret;
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index f969f583c68c..ed5b3d297caf 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -33,8 +33,7 @@ struct ar7_gpio_chip {
static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
return !!(readl(gpio_in) & (1 << gpio));
@@ -42,8 +41,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0;
void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1;
@@ -53,8 +51,7 @@ static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
static void ar7_gpio_set_value(struct gpio_chip *chip,
unsigned gpio, int value)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT;
unsigned tmp;
@@ -67,8 +64,7 @@ static void ar7_gpio_set_value(struct gpio_chip *chip,
static void titan_gpio_set_value(struct gpio_chip *chip,
unsigned gpio, int value)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0;
void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1;
unsigned tmp;
@@ -81,8 +77,7 @@ static void titan_gpio_set_value(struct gpio_chip *chip,
static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR;
writel(readl(gpio_dir) | (1 << gpio), gpio_dir);
@@ -92,8 +87,7 @@ static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0;
void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1;
@@ -108,8 +102,7 @@ static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
static int ar7_gpio_direction_output(struct gpio_chip *chip,
unsigned gpio, int value)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR;
ar7_gpio_set_value(chip, gpio, value);
@@ -121,8 +114,7 @@ static int ar7_gpio_direction_output(struct gpio_chip *chip,
static int titan_gpio_direction_output(struct gpio_chip *chip,
unsigned gpio, int value)
{
- struct ar7_gpio_chip *gpch =
- container_of(chip, struct ar7_gpio_chip, chip);
+ struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0;
void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1;
@@ -335,7 +327,7 @@ int __init ar7_gpio_init(void)
return -ENOMEM;
}
- ret = gpiochip_add(&gpch->chip);
+ ret = gpiochip_add_data(&gpch->chip, gpch);
if (ret) {
printk(KERN_ERR "%s: failed to add gpiochip\n",
gpch->chip.label);
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 511c06560dc1..2dfff1f19004 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -26,90 +26,6 @@
#include "common.h"
#include "machtypes.h"
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq);
-
-static void ath79_misc_irq_handler(struct irq_desc *desc)
-{
- struct irq_domain *domain = irq_desc_get_handler_data(desc);
- void __iomem *base = domain->host_data;
- u32 pending;
-
- pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- while (pending) {
- int bit = __ffs(pending);
-
- generic_handle_irq(irq_linear_revmap(domain, bit));
- pending &= ~BIT(bit);
- }
-}
-
-static void ar71xx_misc_irq_unmask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar71xx_misc_irq_mask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar724x_misc_irq_ack(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
-}
-
-static struct irq_chip ath79_misc_irq_chip = {
- .name = "MISC",
- .irq_unmask = ar71xx_misc_irq_unmask,
- .irq_mask = ar71xx_misc_irq_mask,
-};
-
-static void __init ath79_misc_irq_init(void)
-{
- if (soc_is_ar71xx() || soc_is_ar913x())
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- else if (soc_is_ar724x() ||
- soc_is_ar933x() ||
- soc_is_ar934x() ||
- soc_is_qca955x())
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- else
- BUG();
-
- ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
-}
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
{
@@ -212,142 +128,12 @@ static void qca955x_irq_init(void)
irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
}
-/*
- * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
- * these devices typically allocate coherent DMA memory, however the
- * DMA controller may still have some unsynchronized data in the FIFO.
- * Issue a flush in the handlers to ensure that the driver sees
- * the update.
- *
- * This array map the interrupt lines to the DDR write buffer channels.
- */
-
-static unsigned irq_wb_chan[8] = {
- -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-asmlinkage void plat_irq_dispatch(void)
-{
- unsigned long pending;
- int irq;
-
- pending = read_c0_status() & read_c0_cause() & ST0_IM;
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- pending >>= CAUSEB_IP;
- while (pending) {
- irq = fls(pending) - 1;
- if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
- ath79_ddr_wb_flush(irq_wb_chan[irq]);
- do_IRQ(MIPS_CPU_IRQ_BASE + irq);
- pending &= ~BIT(irq);
- }
-}
-
-static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
-{
- irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
- irq_set_chip_data(irq, d->host_data);
- return 0;
-}
-
-static const struct irq_domain_ops misc_irq_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = misc_map,
-};
-
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq)
-{
- void __iomem *base = ath79_reset_base;
- struct irq_domain *domain;
-
- domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
- ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
- if (!domain)
- panic("Failed to add MISC irqdomain");
-
- /* Disable and clear all interrupts */
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
-}
-
-static int __init ath79_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int irq;
-
- irq = irq_of_parse_and_map(node, 0);
- if (!irq)
- panic("Failed to get MISC IRQ");
-
- ath79_misc_intc_domain_init(node, irq);
- return 0;
-}
-
-static int __init ar7100_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
- ar7100_misc_intc_of_init);
-
-static int __init ar7240_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
- ar7240_misc_intc_of_init);
-
-static int __init ar79_cpu_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int err, i, count;
-
- /* Fill the irq_wb_chan table */
- count = of_count_phandle_with_args(
- node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
-
- for (i = 0; i < count; i++) {
- struct of_phandle_args args;
- u32 irq = i;
-
- of_property_read_u32_index(
- node, "qca,ddr-wb-channel-interrupts", i, &irq);
- if (irq >= ARRAY_SIZE(irq_wb_chan))
- continue;
-
- err = of_parse_phandle_with_args(
- node, "qca,ddr-wb-channels",
- "#qca,ddr-wb-channel-cells",
- i, &args);
- if (err)
- return err;
-
- irq_wb_chan[irq] = args.args[0];
- pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
- irq, args.args[0]);
- }
-
- return mips_cpu_irq_of_init(node, parent);
-}
-IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
- ar79_cpu_intc_of_init);
-
void __init arch_init_irq(void)
{
+ unsigned irq_wb_chan2 = -1;
+ unsigned irq_wb_chan3 = -1;
+ bool misc_is_ar71xx;
+
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
irqchip_init();
return;
@@ -355,14 +141,26 @@ void __init arch_init_irq(void)
if (soc_is_ar71xx() || soc_is_ar724x() ||
soc_is_ar913x() || soc_is_ar933x()) {
- irq_wb_chan[2] = 3;
- irq_wb_chan[3] = 2;
+ irq_wb_chan2 = 3;
+ irq_wb_chan3 = 2;
} else if (soc_is_ar934x()) {
- irq_wb_chan[3] = 2;
+ irq_wb_chan3 = 2;
}
- mips_cpu_irq_init();
- ath79_misc_irq_init();
+ ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
+
+ if (soc_is_ar71xx() || soc_is_ar913x())
+ misc_is_ar71xx = true;
+ else if (soc_is_ar724x() ||
+ soc_is_ar933x() ||
+ soc_is_ar934x() ||
+ soc_is_qca955x())
+ misc_is_ar71xx = false;
+ else
+ BUG();
+ ath79_misc_irq_init(
+ ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
+ ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
if (soc_is_ar934x())
ar934x_ip2_irq_init();
diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c
index 468bc7b99cd3..7c256dadb166 100644
--- a/arch/mips/bcm63xx/gpio.c
+++ b/arch/mips/bcm63xx/gpio.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <bcm63xx_cpu.h>
#include <bcm63xx_gpio.h>
@@ -147,5 +147,5 @@ int __init bcm63xx_gpio_init(void)
bcm63xx_gpio_chip.ngpio = bcm63xx_gpio_count();
pr_info("registering %d GPIOs\n", bcm63xx_gpio_chip.ngpio);
- return gpiochip_add(&bcm63xx_gpio_chip);
+ return gpiochip_add_data(&bcm63xx_gpio_chip, NULL);
}
diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c
index e7fc6f9348ba..7efefcf44033 100644
--- a/arch/mips/bmips/irq.c
+++ b/arch/mips/bmips/irq.c
@@ -15,6 +15,12 @@
#include <asm/irq_cpu.h>
#include <asm/time.h>
+static const struct of_device_id smp_intc_dt_match[] = {
+ { .compatible = "brcm,bcm7038-l1-intc" },
+ { .compatible = "brcm,bcm6345-l1-intc" },
+ {}
+};
+
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
@@ -24,8 +30,8 @@ void __init arch_init_irq(void)
{
struct device_node *dn;
- /* Only the STB (bcm7038) controller supports SMP IRQ affinity */
- dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
+ /* Only these controllers support SMP IRQ affinity */
+ dn = of_find_matching_node(NULL, smp_intc_dt_match);
if (dn)
of_node_put(dn);
else
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index 408799a839b4..f7521142deda 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -17,7 +17,7 @@
#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
#endif
-#ifdef CONFIG_MACH_JZ4740
+#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
#include <asm/mach-jz4740/base.h>
#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
#endif
diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi
index d61b1616b604..9d19236f53e7 100644
--- a/arch/mips/boot/dts/brcm/bcm6328.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi
@@ -74,7 +74,7 @@
timer: timer@10000040 {
compatible = "syscon";
reg = <0x10000040 0x2c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi
index 9c8d3fe28b31..1f6b9b5cddb4 100644
--- a/arch/mips/boot/dts/brcm/bcm6368.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi
@@ -54,7 +54,7 @@
periph_cntl: syscon@10000000 {
compatible = "syscon";
reg = <0x10000000 0x14>;
- little-endian;
+ native-endian;
};
reboot: syscon-reboot@10000008 {
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 1a7efa883c5e..3ae16053a0c9 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -98,7 +98,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
reg = <0x404000 0x60c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index d4bf52cfcf17..be7991917d29 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -118,7 +118,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 8e2501694d03..060805be619a 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -112,7 +112,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index 7e5f76040fb8..bcdb09bfe07b 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -112,7 +112,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index c739ea77acb0..d3b1b762e6c3 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -118,7 +118,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index 5f55d0a50a28..3302a1b8a5c9 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -99,7 +99,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
reg = <0x404000 0x60c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index e24d41ab4e30..15b27aae15a9 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -100,7 +100,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 8b9432cc062b..adb33e355043 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -114,7 +114,7 @@
sun_top_ctrl: syscon@404000 {
compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
reg = <0x404000 0x51c>;
- little-endian;
+ native-endian;
};
reboot {
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index b3e7a1b61220..e070dac071c8 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -247,7 +247,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 57ed466e00db..6ba9ce9fcdd5 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -358,7 +358,6 @@ CONFIG_DLM=m
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_CRYPTD=m
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 48e16d98b2cc..77e9f505f5e4 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -346,7 +346,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_DLM=m
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITYFS=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_NULL=m
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index fe48220157a9..f9af98f63cff 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -181,7 +181,6 @@ CONFIG_MAGIC_SYSRQ=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 4f37a5985459..a5e85e1ee5de 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -362,7 +362,6 @@ CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
CONFIG_DLM=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 004cf52d1b7d..d1f198b072a0 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -412,7 +412,6 @@ CONFIG_DEBUG_FS=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_CRYPTD=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index db029f4ff759..82db4e3e4cf1 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -453,7 +453,6 @@ CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
CONFIG_DLM=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index 51bab13ef6f8..7fca09fedb59 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -87,7 +87,6 @@ CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_DLM=m
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 2b3487213d1e..441faa92c3cd 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h
@@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg)
void ath79_device_reset_set(u32 mask);
void ath79_device_reset_clear(u32 mask);
+void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
+void ath79_misc_irq_init(void __iomem *regs, int irq,
+ int irq_base, bool is_ar71xx);
+
#endif /* __ASM_MACH_ATH79_H */
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 774bb45834cb..19e139c9f337 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -275,6 +275,11 @@ static inline void cvmx_write_csr(uint64_t csr_addr, uint64_t val)
cvmx_read64(CVMX_MIO_BOOT_BIST_STAT);
}
+static inline void cvmx_writeq_csr(void __iomem *csr_addr, uint64_t val)
+{
+ cvmx_write_csr((__force uint64_t)csr_addr, val);
+}
+
static inline void cvmx_write_io(uint64_t io_addr, uint64_t val)
{
cvmx_write64(io_addr, val);
@@ -287,6 +292,10 @@ static inline uint64_t cvmx_read_csr(uint64_t csr_addr)
return val;
}
+static inline uint64_t cvmx_readq_csr(void __iomem *csr_addr)
+{
+ return cvmx_read_csr((__force uint64_t) csr_addr);
+}
static inline void cvmx_send_single(uint64_t data)
{
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 98c31e5d9579..8c16fb7b8fdb 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -102,7 +102,6 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
#include <linux/scatterlist.h>
#include <linux/string.h>
#include <asm/io.h>
-#include <asm-generic/pci-bridge.h>
struct pci_dev;
@@ -125,9 +124,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
#endif /* __KERNEL__ */
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
/* Do platform specific device initialization at pci_enable_device() time */
extern int pcibios_plat_dev_init(struct pci_dev *dev);
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 6ba1fb8b11e2..db7c322f057f 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -44,8 +44,9 @@ static inline void plat_smp_setup(void)
mp_ops->smp_setup();
}
-extern void gic_send_ipi_single(int cpu, unsigned int action);
-extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
+extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
+extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action);
#else /* !CONFIG_SMP */
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index d9907e57e9b9..b765773ab8aa 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -18,6 +18,8 @@
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/gpio/driver.h>
+/* FIXME: needed for gpio_request(), try to remove consumer API from driver */
#include <linux/gpio.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
@@ -91,9 +93,9 @@ static inline struct jz_gpio_chip *gpio_to_jz_gpio_chip(unsigned int gpio)
return &jz4740_gpio_chips[gpio >> 5];
}
-static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gpio_chip)
+static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gc)
{
- return container_of(gpio_chip, struct jz_gpio_chip, gpio_chip);
+ return gpiochip_get_data(gc);
}
static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data)
@@ -234,7 +236,7 @@ static int jz_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
static int jz_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
{
- struct jz_gpio_chip *jz_gpio = gpio_chip_to_jz_gpio_chip(chip);
+ struct jz_gpio_chip *jz_gpio = gpiochip_get_data(chip);
return jz_gpio->irq_base + gpio;
}
@@ -449,7 +451,7 @@ static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
irq_setup_generic_chip(gc, IRQ_MSK(chip->gpio_chip.ngpio),
IRQ_GC_INIT_NESTED_LOCK, 0, IRQ_NOPROBE | IRQ_LEVEL);
- gpiochip_add(&chip->gpio_chip);
+ gpiochip_add_data(&chip->gpio_chip, chip);
}
static int __init jz4740_gpio_init(void)
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 68e2b7db9348..b0988fd62fcc 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o
obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o
-obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o
obj-$(CONFIG_MIPS_SPRAM) += spram.o
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o
diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c
index 705be43c3533..cbd47f38073b 100644
--- a/arch/mips/kernel/gpio_txx9.c
+++ b/arch/mips/kernel/gpio_txx9.c
@@ -10,7 +10,7 @@
#include <linux/init.h>
#include <linux/spinlock.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/errno.h>
#include <linux/io.h>
#include <asm/txx9pio.h>
@@ -85,5 +85,5 @@ int __init txx9_gpio_init(unsigned long baseaddr,
return -ENODEV;
txx9_gpio_chip.base = base;
txx9_gpio_chip.ngpio = num;
- return gpiochip_add(&txx9_gpio_chip);
+ return gpiochip_add_data(&txx9_gpio_chip, NULL);
}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5fdaf8bdcd2e..4f607341a793 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -732,21 +732,23 @@ static void __init resource_init(void)
end = HIGHMEM_START - 1;
res = alloc_bootmem(sizeof(struct resource));
+
+ res->start = start;
+ res->end = end;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
switch (boot_mem_map.map[i].type) {
case BOOT_MEM_RAM:
case BOOT_MEM_INIT_RAM:
case BOOT_MEM_ROM_DATA:
res->name = "System RAM";
+ res->flags |= IORESOURCE_SYSRAM;
break;
case BOOT_MEM_RESERVED:
default:
res->name = "reserved";
}
- res->start = start;
- res->end = end;
-
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
/*
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index d5e0f949dc48..76923349b4fe 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus)
}
struct plat_smp_ops cmp_smp_ops = {
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
.init_secondary = cmp_init_secondary,
.smp_finish = cmp_smp_finish,
.boot_secondary = cmp_boot_secondary,
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 2ad4e4c96d61..253e1409338c 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = {
.boot_secondary = cps_boot_secondary,
.init_secondary = cps_init_secondary,
.smp_finish = cps_smp_finish,
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 86311a164ef1..4f9570a57e8d 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
#ifdef CONFIG_MIPS_GIC
if (gic_present) {
- gic_send_ipi_single(cpu, action);
+ mips_smp_send_ipi_single(cpu, action);
return;
}
#endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a8e6e8..37708d9af638 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -33,12 +33,16 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/ftrace.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/atomic.h>
#include <asm/cpu.h>
#include <asm/processor.h>
#include <asm/idle.h>
#include <asm/r4k-timer.h>
+#include <asm/mips-cpc.h>
#include <asm/mmu_context.h>
#include <asm/time.h>
#include <asm/setup.h>
@@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+#ifdef CONFIG_GENERIC_IRQ_IPI
+static struct irq_desc *call_desc;
+static struct irq_desc *sched_desc;
+#endif
+
static inline void set_cpu_sibling_map(int cpu)
{
int i;
@@ -121,6 +130,7 @@ static inline void calculate_cpu_foreign_map(void)
cpumask_t temp_foreign_map;
/* Re-calculate the mask */
+ cpumask_clear(&temp_foreign_map);
for_each_online_cpu(i) {
core_present = 0;
for_each_cpu(k, &temp_foreign_map)
@@ -145,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops)
mp_ops = ops;
}
+#ifdef CONFIG_GENERIC_IRQ_IPI
+void mips_smp_send_ipi_single(int cpu, unsigned int action)
+{
+ mips_smp_send_ipi_mask(cpumask_of(cpu), action);
+}
+
+void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned long flags;
+ unsigned int core;
+ int cpu;
+
+ local_irq_save(flags);
+
+ switch (action) {
+ case SMP_CALL_FUNCTION:
+ __ipi_send_mask(call_desc, mask);
+ break;
+
+ case SMP_RESCHEDULE_YOURSELF:
+ __ipi_send_mask(sched_desc, mask);
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (mips_cpc_present()) {
+ for_each_cpu(cpu, mask) {
+ core = cpu_data[cpu].core;
+
+ if (core == current_cpu_data.core)
+ continue;
+
+ while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+ mips_cpc_lock_other(core);
+ write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
+ mips_cpc_unlock_other();
+ }
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+ scheduler_ipi();
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+ generic_smp_call_function_interrupt();
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+ .handler = ipi_resched_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI resched"
+};
+
+static struct irqaction irq_call = {
+ .handler = ipi_call_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI call"
+};
+
+static __init void smp_ipi_init_one(unsigned int virq,
+ struct irqaction *action)
+{
+ int ret;
+
+ irq_set_handler(virq, handle_percpu_irq);
+ ret = setup_irq(virq, action);
+ BUG_ON(ret);
+}
+
+static int __init mips_smp_ipi_init(void)
+{
+ unsigned int call_virq, sched_virq;
+ struct irq_domain *ipidomain;
+ struct device_node *node;
+
+ node = of_irq_find_parent(of_root);
+ ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
+
+ /*
+ * Some platforms have half DT setup. So if we found irq node but
+ * didn't find an ipidomain, try to search for one that is not in the
+ * DT.
+ */
+ if (node && !ipidomain)
+ ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
+
+ BUG_ON(!ipidomain);
+
+ call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!call_virq);
+
+ sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!sched_virq);
+
+ if (irq_domain_is_ipi_per_cpu(ipidomain)) {
+ int cpu;
+
+ for_each_cpu(cpu, cpu_possible_mask) {
+ smp_ipi_init_one(call_virq + cpu, &irq_call);
+ smp_ipi_init_one(sched_virq + cpu, &irq_resched);
+ }
+ } else {
+ smp_ipi_init_one(call_virq, &irq_call);
+ smp_ipi_init_one(sched_virq, &irq_resched);
+ }
+
+ call_desc = irq_to_desc(call_virq);
+ sched_desc = irq_to_desc(sched_virq);
+
+ return 0;
+}
+early_initcall(mips_smp_ipi_init);
+#endif
+
/*
* First C code run on the secondary CPUs after being started up by
* the master.
@@ -191,7 +328,7 @@ asmlinkage void start_secondary(void)
WARN_ON_ONCE(!irqs_disabled());
mp_ops->smp_finish();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void stop_this_cpu(void *dummy)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 3110447ab1e9..70ef1a43c114 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
dvcpu->arch.wait = 0;
- if (waitqueue_active(&dvcpu->wq))
- wake_up_interruptible(&dvcpu->wq);
+ if (swait_active(&dvcpu->wq))
+ swake_up(&dvcpu->wq);
return 0;
}
@@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data)
kvm_mips_callbacks->queue_timer_int(vcpu);
vcpu->arch.wait = 0;
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq))
+ swake_up(&vcpu->wq);
}
/* low level hrtimer wake routine */
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 1afd87c999b0..6cdffc76735c 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -64,7 +64,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
{
VM_BUG_ON(page != compound_head(page));
VM_BUG_ON(page_count(page) == 0);
- atomic_add(nr, &page->_count);
+ page_ref_add(page, nr);
SetPageReferenced(page);
}
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
index d708ae46d325..2b6d5e196f99 100644
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -40,20 +40,25 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
static void pci_fixup_radeon(struct pci_dev *pdev)
{
- if (pdev->resource[PCI_ROM_RESOURCE].start)
+ struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+
+ if (res->start)
return;
if (!loongson_sysconf.vgabios_addr)
return;
- pdev->resource[PCI_ROM_RESOURCE].start =
- loongson_sysconf.vgabios_addr;
- pdev->resource[PCI_ROM_RESOURCE].end =
- loongson_sysconf.vgabios_addr + 256*1024 - 1;
- pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_COPY;
+ pci_disable_rom(pdev);
+ if (res->parent)
+ release_resource(res);
+
+ res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr);
+ res->end = res->start + 256*1024 - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
- PCI_ROM_RESOURCE, &pdev->resource[PCI_ROM_RESOURCE]);
+ PCI_ROM_RESOURCE, res);
}
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c
index d304be22b963..8e6e8db8dd5f 100644
--- a/arch/mips/pmcs-msp71xx/msp_serial.c
+++ b/arch/mips/pmcs-msp71xx/msp_serial.c
@@ -110,7 +110,7 @@ void __init msp_serial_setup(void)
up.uartclk = uartclk;
up.regshift = 2;
up.iotype = UPIO_MEM;
- up.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
+ up.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
up.type = PORT_16550A;
up.line = 0;
up.serial_out = msp_serial_out;
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index fd1108543a71..fdc704abc8d4 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -32,7 +32,7 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <asm/mach-rc32434/rb.h>
#include <asm/mach-rc32434/gpio.h>
@@ -88,7 +88,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
{
struct rb532_gpio_chip *gpch;
- gpch = container_of(chip, struct rb532_gpio_chip, chip);
+ gpch = gpiochip_get_data(chip);
return !!rb532_get_bit(offset, gpch->regbase + GPIOD);
}
@@ -100,7 +100,7 @@ static void rb532_gpio_set(struct gpio_chip *chip,
{
struct rb532_gpio_chip *gpch;
- gpch = container_of(chip, struct rb532_gpio_chip, chip);
+ gpch = gpiochip_get_data(chip);
rb532_set_bit(value, offset, gpch->regbase + GPIOD);
}
@@ -111,7 +111,7 @@ static int rb532_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
{
struct rb532_gpio_chip *gpch;
- gpch = container_of(chip, struct rb532_gpio_chip, chip);
+ gpch = gpiochip_get_data(chip);
/* disable alternate function in case it's set */
rb532_set_bit(0, offset, gpch->regbase + GPIOFUNC);
@@ -128,7 +128,7 @@ static int rb532_gpio_direction_output(struct gpio_chip *chip,
{
struct rb532_gpio_chip *gpch;
- gpch = container_of(chip, struct rb532_gpio_chip, chip);
+ gpch = gpiochip_get_data(chip);
/* disable alternate function in case it's set */
rb532_set_bit(0, offset, gpch->regbase + GPIOFUNC);
@@ -200,7 +200,7 @@ int __init rb532_gpio_init(void)
}
/* Register our GPIO chip */
- gpiochip_add(&rb532_gpio_chip->chip);
+ gpiochip_add_data(&rb532_gpio_chip->chip, rb532_gpio_chip);
return 0;
}
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 2fd350f31f4b..108f8a8d1640 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -17,7 +17,7 @@
#include <linux/module.h>
#include <linux/clk.h>
#include <linux/err.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/platform_device.h>
#include <linux/serial_core.h>
#include <linux/mtd/physmap.h>
@@ -687,16 +687,14 @@ struct txx9_iocled_data {
static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
{
- struct txx9_iocled_data *data =
- container_of(chip, struct txx9_iocled_data, chip);
+ struct txx9_iocled_data *data = gpiochip_get_data(chip);
return !!(data->cur_val & (1 << offset));
}
static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
int value)
{
- struct txx9_iocled_data *data =
- container_of(chip, struct txx9_iocled_data, chip);
+ struct txx9_iocled_data *data = gpiochip_get_data(chip);
unsigned long flags;
spin_lock_irqsave(&txx9_iocled_lock, flags);
if (value)
@@ -749,7 +747,7 @@ void __init txx9_iocled_init(unsigned long baseaddr,
iocled->chip.label = "iocled";
iocled->chip.base = basenum;
iocled->chip.ngpio = num;
- if (gpiochip_add(&iocled->chip))
+ if (gpiochip_add_data(&iocled->chip, iocled))
goto out_unmap;
if (basenum < 0)
basenum = iocled->chip.base;
diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c
index c9afd05020e0..54de66837103 100644
--- a/arch/mips/txx9/rbtx4938/setup.c
+++ b/arch/mips/txx9/rbtx4938/setup.c
@@ -14,6 +14,7 @@
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
+#include <linux/gpio/driver.h>
#include <linux/gpio.h>
#include <linux/mtd/physmap.h>
@@ -335,7 +336,7 @@ static void __init rbtx4938_mtd_init(void)
static void __init rbtx4938_arch_init(void)
{
- gpiochip_add(&rbtx4938_spi_gpio_chip);
+ gpiochip_add_data(&rbtx4938_spi_gpio_chip, NULL);
rbtx4938_pci_setup();
rbtx4938_spi_init();
}
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 10607f0d2bcd..06ddb5501ab1 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -53,6 +53,7 @@ config GENERIC_HWEIGHT
config GENERIC_BUG
def_bool y
+ depends on BUG
config QUICKLIST
def_bool y
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index be3debb8fc02..51159fff025a 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -80,9 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
#endif /* __KERNEL__ */
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
return channel ? 15 : 14;
diff --git a/arch/mn10300/include/asm/serial.h b/arch/mn10300/include/asm/serial.h
index c1990218f18c..594ebff15d3f 100644
--- a/arch/mn10300/include/asm/serial.h
+++ b/arch/mn10300/include/asm/serial.h
@@ -14,15 +14,15 @@
/* Standard COM flags (except for COM4, because of the 8514 problem) */
#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
-#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
#else
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
+#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
#endif
#ifdef CONFIG_SERIAL_8250_MANY_PORTS
-#define FOURPORT_FLAGS ASYNC_FOURPORT
+#define FOURPORT_FLAGS UPF_FOURPORT
#define ACCENT_FLAGS 0
#define BOCA_FLAGS 0
#define HUB6_FLAGS 0
diff --git a/arch/mn10300/kernel/fpu-nofpu.c b/arch/mn10300/kernel/fpu-nofpu.c
index 31c765b92c5d..8d0e041aa798 100644
--- a/arch/mn10300/kernel/fpu-nofpu.c
+++ b/arch/mn10300/kernel/fpu-nofpu.c
@@ -9,6 +9,7 @@
* 2 of the Licence, or (at your option) any later version.
*/
#include <asm/fpu.h>
+#include <asm/elf.h>
/*
* handle an FPU operational exception
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193718b1..426173c4b0b9 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
init_clockevents();
#endif
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/openrisc/include/asm/gpio.h b/arch/openrisc/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/openrisc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig
index 9387cc2693f6..db8f56bf3883 100644
--- a/arch/parisc/configs/712_defconfig
+++ b/arch/parisc/configs/712_defconfig
@@ -183,7 +183,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_DEBUG_RODATA=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig
index 0490199d7b15..1a4f776b49b8 100644
--- a/arch/parisc/configs/a500_defconfig
+++ b/arch/parisc/configs/a500_defconfig
@@ -193,7 +193,6 @@ CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig
index 4d8127e8428a..310b6657e4ac 100644
--- a/arch/parisc/configs/default_defconfig
+++ b/arch/parisc/configs/default_defconfig
@@ -211,7 +211,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_MD4=m
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 0ffb08ff5125..5b04d703a924 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -301,7 +301,6 @@ CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_LATENCYTOP=y
CONFIG_LKDTM=m
CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 3d0e17bcc8e9..df0f52bd18b4 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -22,6 +22,9 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
void disable_sr_hashing(void); /* turns off space register hashing */
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 845272ce9cc5..7bd69bd43a01 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
}
}
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#include <asm/kmap_types.h>
#define ARCH_HAS_KMAP
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 89c53bfff055..defebd956585 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -194,9 +194,6 @@ extern void pcibios_init_bridge(struct pci_dev *);
#define PCIBIOS_MIN_IO 0x10
#define PCIBIOS_MIN_MEM 0x1000 /* NBPG - but pci/setup-res.c dies */
-/* export the pci_ DMA API in terms of the dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
return channel ? 15 : 14;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e85973a283..c2a9cc55a62f 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -305,7 +305,7 @@ void __init smp_callin(void)
local_irq_enable(); /* Interrupts have been off until now */
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* NOTREACHED */
panic("smp_callin() AAAAaaaaahhhh....\n");
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 54ba39262b82..5d6eea925cf4 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -63,7 +63,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
}
return pte;
}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1b366c477687..3c07d6b96877 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource pdcdata_resource = {
@@ -201,7 +201,7 @@ static void __init setup_bootmem(void)
res->name = "System RAM";
res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9faa18c4f3f7..a030e5ecb10b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -158,6 +158,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -828,14 +829,10 @@ config PCI_8260
select PPC_INDIRECT_PCI
default y
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config HAS_RAPIDIO
bool
default n
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 91862292cd55..340685caa7b8 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -387,7 +387,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index e5d2c3dc07f1..99ccbebabfd3 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1175,7 +1175,6 @@ CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_XMON=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 93ee046d12cd..ab113198ed20 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -22,6 +22,7 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/xts.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -126,6 +127,11 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
key_len >>= 1;
diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/powerpc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c864e91..7529aab068f5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
-#define SPAPR_TCE_SHIFT 12
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9d08d8cbed1a..d7b343170453 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
- u32 window_size;
+ struct rcu_head rcu;
+ u32 page_shift;
+ u64 offset; /* in pages */
+ u64 size; /* window size in pages */
struct page *pages[0];
};
@@ -289,7 +292,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
@@ -629,7 +632,7 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2241d5357129..2544edabe7f3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args);
+ struct kvm_create_spapr_tce_64 *args);
+extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
+ struct kvm_vcpu *vcpu, unsigned long liobn);
+extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages);
+extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
+ unsigned long tce);
+extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap);
+extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
+ unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
+extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
#else
+static inline void kvmppc_alloc_host_rm_ops(void) {};
+static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+ unsigned long raw;
+ struct {
+ u32 in_host;
+ u32 rm_action;
+ };
+};
+
+struct kvmppc_host_rm_core {
+ union kvmppc_rm_state rm_state;
+ void *rm_data;
+ char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+ struct kvmppc_host_rm_core *rm_core;
+ void (*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 54843ca5fa2b..78968c1ff931 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -10,7 +10,6 @@
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
-#include <asm-generic/pci-bridge.h>
struct device_node;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a7d487..a6f3ac0d4602 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -20,8 +20,6 @@
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
-
/* Return values for pci_controller_ops.probe_mode function */
#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ac9fb114e25d..47897a30982d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
}
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
+
+unsigned long vmalloc_to_phys(void *vmalloc_addr);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c30945..78083ed20792 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void);
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb190bb..254604856e69 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index ab4d4732c492..c93cf35ce379 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+ __u64 liobn;
+ __u32 page_shift;
+ __u32 flags;
+ __u64 offset; /* in pages */
+ __u64 size; /* in pages */
+};
+
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049c1c61..aa610ce8742f 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max;
static unsigned int event_scan;
static unsigned int rtas_event_scan_rate;
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
/* Stop logging to nvram after first fatal error */
static int logging_enabled; /* Until we initialize everything,
@@ -592,11 +592,6 @@ __setup("surveillance=", surveillance_setup);
static int __init rtasmsgs_setup(char *str)
{
- if (strcmp(str, "on") == 0)
- full_rtas_msgs = 1;
- else if (strcmp(str, "off") == 0)
- full_rtas_msgs = 0;
-
- return 1;
+ return (kstrtobool(str, &full_rtas_msgs) == 0);
}
__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db61237..d544fa311757 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
- lockdep_init();
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a9b054..f98be8383a39 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
setup_paca(&boot_paca);
fixup_boot_paca();
- /* Initialize lockdep early or else spinlocks will blow */
- lockdep_init();
-
/* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d..b7dea05f0725 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
- int messages; /* current messages */
+ long messages; /* current messages */
unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
}
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/
smp_mb();
message[msg] = 1;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+ smp_muxed_ipi_set_message(cpu, msg);
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
}
#ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned int all;
+ unsigned long all;
mb(); /* order any irq clear */
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ /*
+ * Must check for PPC_MSG_RM_HOST_ACTION messages
+ * before PPC_MSG_CALL_FUNCTION messages because when
+ * a VM is destroyed, we call kick_all_cpus_sync()
+ * to ensure that any pending PPC_MSG_RM_HOST_ACTION
+ * messages have completed before we free any VCPUs.
+ */
+ if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+ kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
@@ -727,7 +745,7 @@ void start_secondary(void *unused)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b6becc795bb5..33c47fcc455a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -203,9 +203,8 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP NR_CPUS=%d ", NR_CPUS);
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_NUMA
printk("NUMA ");
#endif
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 0570eef83fba..7f7b6d86ac73 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o
+ $(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 638c6d9be9e0..b34220d2aa42 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc94dad..2c2d1030843a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
+{
+ return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
-static long kvmppc_stt_npages(unsigned long window_size)
+static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+ unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
+ (tce_pages * sizeof(struct page *));
+
+ return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
- struct kvm *kvm = stt->kvm;
- int i;
+ long ret = 0;
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ if (!current || !current->mm)
+ return ret; /* process exited */
+
+ down_write(&current->mm->mmap_sem);
+
+ if (inc) {
+ unsigned long locked, lock_limit;
+
+ locked = current->mm->locked_vm + stt_pages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ current->mm->locked_vm += stt_pages;
+ } else {
+ if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
+ stt_pages = current->mm->locked_vm;
+
+ current->mm->locked_vm -= stt_pages;
+ }
+
+ pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
+ inc ? '+' : '-',
+ stt_pages << PAGE_SHIFT,
+ current->mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK),
+ ret ? " - exceeded" : "");
+
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+static void release_spapr_tce_table(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_table *stt = container_of(head,
+ struct kvmppc_spapr_tce_table, rcu);
+ unsigned long i, npages = kvmppc_tce_pages(stt->size);
+
+ for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
- kvm_put_kvm(kvm);
+ kfree(stt);
}
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page;
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff];
@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
- release_spapr_tce_table(stt);
+ list_del_rcu(&stt->list);
+
+ kvm_put_kvm(stt->kvm);
+
+ kvmppc_account_memlimit(
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+ call_rcu(&stt->rcu, release_spapr_tce_table);
+
return 0;
}
@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
};
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
+ struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
+ unsigned long npages, size;
int ret = -ENOMEM;
int i;
+ if (!args->size)
+ return -EINVAL;
+
/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}
- npages = kvmppc_stt_npages(args->window_size);
+ size = args->size;
+ npages = kvmppc_tce_pages(size);
+ ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+ if (ret) {
+ stt = NULL;
+ goto fail;
+ }
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
stt->liobn = args->liobn;
- stt->window_size = args->window_size;
+ stt->page_shift = args->page_shift;
+ stt->offset = args->offset;
+ stt->size = size;
stt->kvm = kvm;
for (i = 0; i < npages; i++) {
@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm);
mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock);
@@ -148,3 +208,59 @@ fail:
}
return ret;
}
+
+long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS, idx;
+ unsigned long entry, ua = 0;
+ u64 __user *tces, tce;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * SPAPR spec says that the maximum size of the list is 512 TCEs
+ * so the whole table fits in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tces = (u64 __user *) ua;
+
+ for (i = 0; i < npages; ++i) {
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
+
+ kvmppc_tce_put(stt, entry + i, tce);
+ }
+
+unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 89e96b3e0039..44be73e6aa26 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -30,76 +31,321 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu-hash64.h>
+#include <asm/mmu_context.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-/* WARNING: This will be called in real-mode on HV KVM and virtual
+/*
+ * Finds a TCE table descriptor by LIOBN.
+ *
+ * WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
-long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba, unsigned long tce)
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+ unsigned long liobn)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
+ if (stt->liobn == liobn)
+ return stt;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_find_table);
+
+/*
+ * Validates IO address.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages)
+{
+ unsigned long mask = (1ULL << stt->page_shift) - 1;
+ unsigned long idx = ioba >> stt->page_shift;
+
+ if ((ioba & mask) || (idx < stt->offset) ||
+ (idx - stt->offset + npages > stt->size) ||
+ (idx + npages < idx))
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
+
+/*
+ * Validates TCE address.
+ * At the moment flags and page mask are validated.
+ * As the host kernel does not access those addresses (just puts them
+ * to the table and user space is supposed to process them), we can skip
+ * checking other things (such as TCE is a guest RAM address or the page
+ * was actually allocated).
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+{
+ unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
+ unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+
+ if (tce & mask)
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
+
+/* Note on the use of page_address() in real mode,
+ *
+ * It is safe to use page_address() in real mode on ppc64 because
+ * page_address() is always defined as lowmem_page_address()
+ * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
+ * operation and does not access page struct.
+ *
+ * Theoretically page_address() could be defined different
+ * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
+ * would have to be enabled.
+ * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
+ * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
+ * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
+ * is not expected to be enabled on ppc32, page_address()
+ * is safe for ppc32 as well.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+static u64 *kvmppc_page_address(struct page *page)
+{
+#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
+#error TODO: fix to avoid page_address() here
+#endif
+ return (u64 *) page_address(page);
+}
+
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Called in both real and virtual modes.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+
+ idx -= stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = kvmppc_page_address(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_put);
+
+long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
-
- /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
- /* liobn, stt, stt->window_size); */
- if (ioba >= stt->window_size)
- return H_PARAMETER;
-
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
-
- /* FIXME: Need to validate the TCE itself */
- /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
- tbl[idx % TCES_PER_PAGE] = tce;
- return H_SUCCESS;
- }
- }
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
-long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
+ unsigned long ua, unsigned long *phpa)
+{
+ pte_t *ptep, pte;
+ unsigned shift = 0;
+
+ ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
+ if (!ptep || !pte_present(*ptep))
+ return -ENXIO;
+ pte = *ptep;
+
+ if (!shift)
+ shift = PAGE_SHIFT;
+
+ /* Avoid handling anything potentially complicated in realmode */
+ if (shift > PAGE_SHIFT)
+ return -EAGAIN;
+
+ if (!pte_young(pte))
+ return -EAGAIN;
+
+ *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
+ (ua & ~PAGE_MASK);
+
+ return 0;
+}
+
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
{
- struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS;
+ unsigned long tces, entry, ua = 0;
+ unsigned long *rmap = NULL;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * The spec says that the maximum size of the list is 512 TCEs
+ * so the whole table addressed resides in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ return H_TOO_HARD;
- if (ioba >= stt->window_size)
- return H_PARAMETER;
+ rmap = (void *) vmalloc_to_phys(rmap);
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
+ /*
+ * Synchronize with the MMU notifier callbacks in
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * While we have the rmap lock, code running on other CPUs
+ * cannot finish unmapping the host real page that backs
+ * this guest real page, so we are OK to access the host
+ * real page.
+ */
+ lock_rmap(rmap);
+ if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
- vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
- return H_SUCCESS;
- }
+ kvmppc_tce_put(stt, entry + i, tce);
}
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+unlock_exit:
+ unlock_rmap(rmap);
+
+ return ret;
+}
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check permission bits only to allow userspace poison TCE for debug */
+ if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+ return H_PARAMETER;
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+ unsigned long idx;
+ struct page *page;
+ u64 *tbl;
+
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
+
+#endif /* KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index baeddb06811d..84fb4fcfaa41 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+ .set = param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,11 +125,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swait_active(wqp)) {
+ swake_up(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -701,8 +712,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
tvcpu->arch.prodded = 1;
smp_mb();
if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq)) {
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
- } /* fallthrough */
+ }
+ return RESUME_HOST;
+ case H_PUT_TCE:
+ ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PUT_TCE_INDIRECT:
+ ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_STUFF_TCE:
+ ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
default:
return RESUME_HOST;
}
@@ -1459,7 +1494,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_waitqueue_head(&vcore->wq);
+ init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
@@ -2279,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
/*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline void kvmppc_clear_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+ /*
+ * Memory barrier can be omitted here as we will do a smp_wmb()
+ * later in kvmppc_start_thread and we need ensure that state is
+ * visible to other CPUs only after we enter guest.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline void kvmppc_set_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+
+ /*
+ * Memory barrier can be omitted here because we do a spin_unlock
+ * immediately after this which provides the memory barrier.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+}
+
+/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
*/
@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
+ kvmppc_clear_host_core(pcpu);
+
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i);
}
+ kvmppc_set_host_core(pcpu);
+
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
@@ -2531,10 +2610,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int do_sleep = 1;
+ DECLARE_SWAITQUEUE(wait);
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
/*
* Check one last time for pending exceptions and ceded state after
@@ -2548,7 +2626,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
if (!do_sleep) {
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
return;
}
@@ -2556,7 +2634,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -2612,7 +2690,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ swake_up(&vc->wq);
}
}
@@ -2984,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
+#ifdef CONFIG_KVM_XICS
+static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ kvmppc_set_host_core(cpu);
+ break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ kvmppc_clear_host_core(cpu);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvmppc_cpu_notifier = {
+ .notifier_call = kvmppc_cpu_notify,
+};
+
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+ struct kvmppc_host_rm_ops *ops;
+ unsigned long l_ops;
+ int cpu, core;
+ int size;
+
+ /* Not the first time here ? */
+ if (kvmppc_host_rm_ops_hv != NULL)
+ return;
+
+ ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+ if (!ops)
+ return;
+
+ size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+ ops->rm_core = kzalloc(size, GFP_KERNEL);
+
+ if (!ops->rm_core) {
+ kfree(ops);
+ return;
+ }
+
+ get_online_cpus();
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
+ if (!cpu_online(cpu))
+ continue;
+
+ core = cpu >> threads_shift;
+ ops->rm_core[core].rm_state.in_host = 1;
+ }
+
+ ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
+ /*
+ * Make the contents of the kvmppc_host_rm_ops structure visible
+ * to other CPUs before we assign it to the global variable.
+ * Do an atomic assignment (no locks used here), but if someone
+ * beats us to it, just free our copy and return.
+ */
+ smp_wmb();
+ l_ops = (unsigned long) ops;
+
+ if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+ put_online_cpus();
+ kfree(ops->rm_core);
+ kfree(ops);
+ return;
+ }
+
+ register_cpu_notifier(&kvmppc_cpu_notifier);
+
+ put_online_cpus();
+}
+
+void kvmppc_free_host_rm_ops(void)
+{
+ if (kvmppc_host_rm_ops_hv) {
+ unregister_cpu_notifier(&kvmppc_cpu_notifier);
+ kfree(kvmppc_host_rm_ops_hv->rm_core);
+ kfree(kvmppc_host_rm_ops_hv);
+ kvmppc_host_rm_ops_hv = NULL;
+ }
+}
+#endif
+
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
@@ -2996,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
+ kvmppc_alloc_host_rm_ops();
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -3229,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fd7006bf6b1a..5f0380db3eab 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee);
}
}
+
+struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f58076d49e..980d8a6f7284 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,12 +17,16 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
+#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+ int hcpu;
+
+ hcpu = hcore << threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+ smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+ icp_native_cause_ipi_rm(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+ struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+ bool success;
+ int core;
+ union kvmppc_rm_state old, new;
+
+ for (core = start + 1; core < max; core++) {
+ old = new = READ_ONCE(rm_core[core].rm_state);
+
+ if (!old.in_host || old.rm_action)
+ continue;
+
+ /* Try to grab this host core if not taken already. */
+ new.rm_action = action;
+
+ success = cmpxchg64(&rm_core[core].rm_state.raw,
+ old.raw, new.raw) == old.raw;
+ if (success) {
+ /*
+ * Make sure that the store to the rm_action is made
+ * visible before we return to caller (and the
+ * subsequent store to rm_data) to synchronize with
+ * the IPI handler.
+ */
+ smp_wmb();
+ return core;
+ }
+ }
+
+ return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+ int core;
+ int my_core = smp_processor_id() >> threads_shift;
+ struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+ core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+ if (core == -1)
+ core = grab_next_hostcore(core, rm_core, my_core, action);
+
+ return core;
+}
+
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
+ int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- /* Check if the core is loaded, if not, too hard */
+ /*
+ * Check if the core is loaded,
+ * if not, find an available host core to post to wake the VCPU,
+ * if we can't find one, set up state to eventually return too hard.
+ */
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
+ hcore = -1;
+ if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
+ hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
+ if (hcore != -1) {
+ icp_send_hcore_msg(hcore, vcpu);
+ } else {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ }
return;
}
@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail:
return check_too_hard(xics, icp);
}
+
+/* --- Non-real mode XICS-related built-in routines --- */
+
+/**
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+ switch (action) {
+ case XICS_RM_KICK_VCPU:
+ kvmppc_host_rm_ops_hv->vcpu_kick(data);
+ break;
+ default:
+ WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+ break;
+ }
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+ int core;
+ unsigned int cpu = smp_processor_id();
+ struct kvmppc_host_rm_core *rm_corep;
+
+ core = cpu >> threads_shift;
+ rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+ if (rm_corep->rm_data) {
+ rm_host_ipi_action(rm_corep->rm_state.rm_action,
+ rm_corep->rm_data);
+ /* Order these stores against the real mode KVM */
+ rm_corep->rm_data = NULL;
+ smp_wmb();
+ rm_corep->rm_state.rm_action = 0;
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ee26de9a1de..85b32f16fa74 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_ACOP(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
+ /*
+ * Restore various registers to 0, where non-zero values
+ * set by the guest could disrupt the host.
+ */
+ li r0, 0
+ mtspr SPRN_IAMR, r0
+ mtspr SPRN_CIABR, r0
+ mtspr SPRN_DAWRX, r0
+ mtspr SPRN_TCSCR, r0
+ mtspr SPRN_WORT, r0
+ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+ li r0, 1
+ sldi r0, r0, 31
+ mtspr SPRN_MMCRS, r0
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -2006,8 +2020,8 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
- .long 0 /* 0x138 */
- .long 0 /* 0x13c */
+ .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */
.long 0 /* 0x144 */
.long 0 /* 0x148 */
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f2c75a1e0536..02176fd52f84 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
+ tce, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
+ case H_PUT_TCE_INDIRECT:
+ return kvmppc_h_pr_put_tce_indirect(vcpu);
+ case H_STUFF_TCE:
+ return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a3b182dcb823..19aa59b0850c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -33,6 +33,7 @@
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
+#include <asm/iommu.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
+#ifdef CONFIG_KVM_XICS
+ /*
+ * We call kick_all_cpus_sync() to ensure that all
+ * CPUs have executed any pending IPIs before we
+ * continue and free VCPUs structures below.
+ */
+ if (is_kvmppc_hv_enabled(kvm))
+ kick_all_cpus_sync();
+#endif
+
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
+ case KVM_CAP_SPAPR_MULTITCE:
+ r = 1;
+ break;
#endif
default:
r = 0;
@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CREATE_SPAPR_TCE_64: {
+ struct kvm_create_spapr_tce_64 create_tce_64;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
+ goto out;
+ if (create_tce_64.flags) {
+ r = -EINVAL;
+ goto out;
+ }
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+ goto out;
+ }
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
+ struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
- r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+
+ create_tce_64.liobn = create_tce.liobn;
+ create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.offset = 0;
+ create_tce_64.size = create_tce.window_size >>
+ IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.flags = 0;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_PPC_GET_SMMU_INFO: {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba59d5977f34..1005281be9a6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -255,8 +255,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (ret < 0)
break;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
+ if (debug_pagealloc_enabled() &&
+ (paddr >> PAGE_SHIFT) < linear_map_hash_count)
linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
#endif /* CONFIG_DEBUG_PAGEALLOC */
}
@@ -512,17 +514,17 @@ static void __init htab_init_page_sizes(void)
if (mmu_has_feature(MMU_FTR_16M_PAGE))
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
- found:
-#ifndef CONFIG_DEBUG_PAGEALLOC
- /*
- * Pick a size for the linear mapping. Currently, we only support
- * 16M, 1M and 4K which is the default
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_linear_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_linear_psize = MMU_PAGE_1M;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+found:
+ if (!debug_pagealloc_enabled()) {
+ /*
+ * Pick a size for the linear mapping. Currently, we only
+ * support 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+ }
#ifdef CONFIG_PPC_64K_PAGES
/*
@@ -721,10 +723,12 @@ static void __init htab_initialize(void)
prot = pgprot_val(PAGE_KERNEL);
#ifdef CONFIG_DEBUG_PAGEALLOC
- linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
- 1, ppc64_rma_size));
- memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ if (debug_pagealloc_enabled()) {
+ linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ linear_map_hash_slots = __va(memblock_alloc_base(
+ linear_map_hash_count, 1, ppc64_rma_size));
+ memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ }
#endif /* CONFIG_DEBUG_PAGEALLOC */
/* On U3 based machines, we need to reserve the DART area and
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index a10be665b645..c2b771614d4f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -112,10 +112,10 @@ void __init MMU_setup(void)
if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
- __map_without_bats = 1;
- __map_without_ltlbs = 1;
-#endif
+ if (debug_pagealloc_enabled()) {
+ __map_without_bats = 1;
+ __map_without_ltlbs = 1;
+ }
}
/*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a514b04e..f078a1f94fc2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void)
res->name = "System RAM";
res->start = base;
res->end = base + size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0);
}
}
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 4e4efbc2658e..9ca6fe16cb29 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -118,8 +118,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
- if (!count) {
+ if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
pgtable_page_dtor(page);
free_hot_cold_page(page, 0);
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..de37ff445362 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
+unsigned long vmalloc_to_phys(void *va)
+{
+ unsigned long pfn = vmalloc_to_pfn(va);
+
+ BUG_ON(!pfn);
+ return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
+}
+EXPORT_SYMBOL_GPL(vmalloc_to_phys);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index cdf2123d46db..d9cc66cbdbb7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -403,7 +403,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- atomic_set(&page->_count, PTE_FRAG_NR);
+ set_page_count(page, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda9ed2c..3b09ecfd0aee 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
}
}
-static unsigned long vmalloc_to_phys(void *v)
-{
- struct page *p = vmalloc_to_page(v);
-
- BUG_ON(!p);
- return page_to_phys(p) + offset_in_page(v);
-}
-
/* */
struct event_uniq {
struct rb_node node;
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 711f3d352af7..452da2391153 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -188,7 +188,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
static inline void mpc512x_free_bootmem(struct page *page)
{
BUG_ON(PageTail(page));
- BUG_ON(atomic_read(&page->_count) > 1);
+ BUG_ON(page_ref_count(page) > 1);
free_reserved_page(page);
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 32274f72fe3f..282837a1d74b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
/*
* Enable/disable cede_offline when available.
*/
static int __init setup_cede_offline(char *str)
{
- if (!strcmp(str, "off"))
- cede_offline_enabled = 0;
- else if (!strcmp(str, "on"))
- cede_offline_enabled = 1;
- else
- return 0;
- return 1;
+ return (kstrtobool(str, &cede_offline_enabled) == 0);
}
__setup("cede_offline=", setup_cede_offline);
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c
index fc65ad1b3293..d7a7ef135b9f 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/sysdev/ppc4xx_gpio.c
@@ -78,7 +78,7 @@ static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
- return in_be32(&regs->ir) & GPIO_MASK(gpio);
+ return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
}
static inline void
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ff5e73230a36..56ce8ca3281b 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -46,7 +46,7 @@ static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- return in_8(mm_gc->regs) & u8_pin2mask(gpio);
+ return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
}
static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index eae32654bdf2..afdf62f2a695 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+ /*
+ * Currently not used to send IPIs to another CPU
+ * on the same core. Only caller is KVM real mode.
+ * Need the physical address of the XICS to be
+ * previously saved in kvm_hstate in the paca.
+ */
+ unsigned long xics_phys;
+
+ /*
+ * Just like the cause_ipi functions, it is required to
+ * include a full barrier (out8 includes a sync) before
+ * causing the IPI.
+ */
+ xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+#endif
+
/*
* Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3be9c832dec1..7e3e8a8338d6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -254,12 +254,12 @@ config MARCH_ZEC12
older machines.
config MARCH_Z13
- bool "IBM z13"
+ bool "IBM z13s and z13"
select HAVE_MARCH_Z13_FEATURES
help
- Select this to enable optimizations for IBM z13 (2964 series).
- The kernel will be slightly faster but will not work on older
- machines.
+ Select this to enable optimizations for IBM z13s and z13 (2965 and
+ 2964 series). The kernel will be slightly faster but will not work on
+ older machines.
endchoice
@@ -605,8 +605,6 @@ config PCI_NR_MSI
PCI devices.
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
endif # PCI
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 0b9b95f3c703..48e1a2d3e318 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -27,6 +27,7 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <crypto/xts.h>
#include "crypt_s390.h"
#define AES_KEYLEN_128 1
@@ -587,6 +588,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
switch (key_len) {
case 32:
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index a0e71a501f7c..5687d62fb0cb 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -4,14 +4,23 @@
/* CLP common request & response block size */
#define CLP_BLK_SIZE PAGE_SIZE
+#define CLP_LPS_BASE 0
+#define CLP_LPS_PCI 2
+
struct clp_req_hdr {
u16 len;
u16 cmd;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
struct clp_rsp_hdr {
u16 len;
u16 rsp;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
/* CLP Response Codes */
@@ -25,4 +34,22 @@ struct clp_rsp_hdr {
#define CLP_RC_NODATA 0x0080 /* No data available */
#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+ struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[8];
+ u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc response;
+} __packed;
+
#endif
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 000000000000..d054c1b07a3c
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,64 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+/**
+ * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+ struct list_head list;
+ struct list_head crst_list;
+ struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
+ unsigned long *table;
+ unsigned long asce;
+ unsigned long asce_end;
+ void *private;
+ bool pfault_enabled;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8959ebb6d2c9..6da41fab70fb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -20,6 +20,7 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
+#include <linux/seqlock.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@@ -229,17 +230,11 @@ struct kvm_s390_itdb {
__u8 data[256];
} __packed;
-struct kvm_s390_vregs {
- __vector128 vrs[32];
- __u8 reserved200[512]; /* for future vector expansion */
-} __packed;
-
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[1280]; /* 0x0700 */
- struct kvm_s390_vregs vregs; /* 0x0c00 */
+ __u8 reserved700[2304]; /* 0x0700 */
} __packed;
struct kvm_vcpu_stat {
@@ -467,7 +462,7 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
@@ -558,6 +553,15 @@ struct kvm_vcpu_arch {
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
+ bool cputm_enabled;
+ /*
+ * The seqcount protects updates to cputm_start and sie_block.cputm,
+ * this way we can have non-blocking reads with consistent values.
+ * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
+ * values and to start/stop/enable/disable cpu timer accounting.
+ */
+ seqcount_t cputm_seqcount;
+ __u64 cputm_start;
};
struct kvm_vm_stat {
@@ -596,15 +600,11 @@ struct s390_io_adapter {
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
-struct kvm_s390_fac {
- /* facility list requested by guest */
- __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
- /* facility mask supported by kvm & hosting machine */
- __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
-};
-
struct kvm_s390_cpu_model {
- struct kvm_s390_fac *fac;
+ /* facility mask supported by kvm & hosting machine */
+ __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ /* facility list requested by guest (in dma page) */
+ __u64 *fac_list;
struct cpuid cpu_id;
unsigned short ibc;
};
@@ -623,6 +623,16 @@ struct kvm_s390_crypto_cb {
__u8 reserved80[128]; /* 0x0080 */
};
+/*
+ * sie_page2 has to be allocated as DMA because fac_list and crycb need
+ * 31bit addresses in the sie control block.
+ */
+struct sie_page2 {
+ __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
+ struct kvm_s390_crypto_cb crycb; /* 0x0800 */
+ u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+} __packed;
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -643,6 +653,7 @@ struct kvm_arch{
int ipte_lock_count;
struct mutex ipte_mutex;
spinlock_t start_stop_lock;
+ struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index a52b6cca873d..d5427c78b1b3 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -19,7 +19,6 @@
#include <linux/module.h>
-#ifdef CONFIG_LIVEPATCH
static inline int klp_check_compiler_support(void)
{
return 0;
@@ -36,8 +35,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
{
regs->psw.addr = ip;
}
-#else
-#error Include linux/livepatch.h, not asm/livepatch.h
-#endif
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fb1b93ea3e3f..e485817f7b1a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,17 +15,25 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.list_lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
- mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- mm->context.asce_limit = STACK_TOP_MAX;
+ if (mm->context.asce_limit == 0) {
+ /* context created by exec, set asce limit to 4TB */
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ mm->context.asce_limit = STACK_TOP_MAX;
+ } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm_inc_nr_pmds(mm);
+ }
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
}
@@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev,
static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
- if (oldmm->context.asce_limit < mm->context.asce_limit)
- crst_table_downgrade(mm, oldmm->context.asce_limit);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..b6bfa169a002 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -9,7 +9,6 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <asm-generic/pci.h>
-#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -45,7 +44,7 @@ struct zpci_fmb {
u64 rpcit_ops;
u64 dma_rbytes;
u64 dma_wbytes;
-} __packed __aligned(16);
+} __packed __aligned(64);
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -66,7 +65,6 @@ struct s390_domain;
/* Private data per function */
struct zpci_dev {
- struct pci_dev *pdev;
struct pci_bus *bus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
@@ -192,7 +190,7 @@ int zpci_fmb_disable_device(struct zpci_dev *);
/* Debug */
int zpci_debug_init(void);
void zpci_debug_exit(void);
-void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index dd78f92f1cce..e75c64cbcf08 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -49,9 +49,6 @@ struct clp_fh_list_entry {
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u64 reserved2;
} __packed;
@@ -59,9 +56,6 @@ struct clp_req_list_pci {
/* List PCI functions response */
struct clp_rsp_list_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u32 reserved2;
u16 max_fn;
@@ -73,9 +67,6 @@ struct clp_rsp_list_pci {
/* Query PCI function request */
struct clp_req_query_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved2;
u64 reserved3;
@@ -84,9 +75,6 @@ struct clp_req_query_pci {
/* Query PCI function response */
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 : 64;
u16 vfn; /* virtual fn number */
u16 : 7;
u16 util_str_avail : 1; /* utility string available? */
@@ -108,21 +96,15 @@ struct clp_rsp_query_pci {
/* Query PCI function group request */
struct clp_req_query_pci_grp {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
- u32 : 24;
+ u32 reserved2 : 24;
u32 pfgid : 8; /* function group id */
- u32 reserved2;
- u64 reserved3;
+ u32 reserved3;
+ u64 reserved4;
} __packed;
/* Query PCI function group response */
struct clp_rsp_query_pci_grp {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
@@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp {
/* Set PCI function request */
struct clp_req_set_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u16 reserved2;
u8 oc; /* operation controls */
@@ -154,9 +133,6 @@ struct clp_req_set_pci {
/* Set PCI function response */
struct clp_rsp_set_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved3;
u64 reserved4;
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 6d6556ca24aa..90240dfef76a 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -178,7 +178,6 @@
ret__; \
})
-#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
#include <asm-generic/percpu.h>
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index f897ec73dc8c..1f7ff85c5e4c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -21,7 +21,7 @@
#define PMU_F_ERR_LSDA 0x0200
#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
extern __init const struct attribute_group **cpumf_cf_event_group(void);
extern ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b7858f158b4..9b3d9b6099f2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
extern int page_table_allocate_pgste;
-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
-
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
@@ -100,12 +96,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- spin_lock_init(&mm->context.list_lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
- INIT_LIST_HEAD(&mm->context.gmap_list);
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ if (mm->context.asce_limit == (1UL << 31)) {
+ /* Forking a compat process with 2 page table levels */
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ }
+ return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (mm->context.asce_limit == (1UL << 31))
+ pgtable_pmd_page_dtor(virt_to_page(pgd));
+ crst_table_free(mm, (unsigned long *) pgd);
}
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 64ead8091248..2f66645587a2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr)
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * dy..R...I...wr
+ * dy..R...I...rw
* prot-none, clean, old 00..1...1...00
* prot-none, clean, young 01..1...1...00
* prot-none, dirty, old 10..1...1...00
* prot-none, dirty, young 11..1...1...00
- * read-only, clean, old 00..1...1...01
- * read-only, clean, young 01..1...0...01
- * read-only, dirty, old 10..1...1...01
- * read-only, dirty, young 11..1...0...01
+ * read-only, clean, old 00..1...1...10
+ * read-only, clean, young 01..1...0...10
+ * read-only, dirty, old 10..1...1...10
+ * read-only, dirty, young 11..1...0...10
* read-write, clean, old 00..1...1...11
* read-write, clean, young 01..1...0...11
* read-write, dirty, old 10..0...1...11
@@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd)
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty);
-
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
@@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd;
}
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
- unsigned long new = 0;
-#ifdef CONFIG_PGSTE
- unsigned long old;
-
- preempt_disable();
- asm(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear PCL bit in old */
- " oihh %1,0x0080\n" /* set PCL bit in new */
- " csg %0,%1,%2\n"
- " jl 0b\n"
- : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
-#endif
- return __pgste(new);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- asm(
- " nihh %1,0xff7f\n" /* clear PCL bit */
- " stg %1,%0\n"
- : "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
- : "cc", "memory");
- preempt_enable();
-#endif
-}
-
-static inline pgste_t pgste_get(pte_t *ptep)
-{
- unsigned long pgste = 0;
-#ifdef CONFIG_PGSTE
- pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
-#endif
- return __pgste(pgste);
-}
-
-static inline void pgste_set(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
-#endif
-}
-
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address, bits, skey;
-
- if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
- return pgste;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = (unsigned long) page_get_storage_key(address);
- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
- /* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-#endif
- return pgste;
-
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address;
- unsigned long nkey;
-
- if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
- return;
- VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
- address = pte_val(entry) & PAGE_MASK;
- /*
- * Set page access key and fetch protection bit from pgste.
- * The guest C/R information is still in the PGSTE, set real
- * key C/R to 0.
- */
- nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
- nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
- page_set_storage_key(address, nkey, 0);
-#endif
-}
-
-static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
-{
- if ((pte_val(entry) & _PAGE_PRESENT) &&
- (pte_val(entry) & _PAGE_WRITE) &&
- !(pte_val(entry) & _PAGE_INVALID)) {
- if (!MACHINE_HAS_ESOP) {
- /*
- * Without enhanced suppression-on-protection force
- * the dirty bit on for all writable ptes.
- */
- pte_val(entry) |= _PAGE_DIRTY;
- pte_val(entry) &= ~_PAGE_PROTECT;
- }
- if (!(pte_val(entry) & _PAGE_PROTECT))
- /* This pte allows write access, set user-dirty */
- pgste_val(pgste) |= PGSTE_UC_BIT;
- }
- *ptep = entry;
- return pgste;
-}
-
-/**
- * struct gmap_struct - guest address space
- * @crst_list: list of all crst tables used in the guest address space
- * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
- * @guest_table_lock: spinlock to protect all entries in the guest page table
- * @table: pointer to the page directory
- * @asce: address space control element for gmap page table
- * @pfault_enabled: defines if pfaults are applicable for the guest
- */
-struct gmap {
- struct list_head list;
- struct list_head crst_list;
- struct mm_struct *mm;
- struct radix_tree_root guest_to_host;
- struct radix_tree_root host_to_guest;
- spinlock_t guest_table_lock;
- unsigned long *table;
- unsigned long asce;
- unsigned long asce_end;
- void *private;
- bool pfault_enabled;
-};
-
-/**
- * struct gmap_notifier - notify function block for page invalidation
- * @notifier_call: address of callback function
- */
-struct gmap_notifier {
- struct list_head list;
- void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
-};
-
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
-void gmap_free(struct gmap *gmap);
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len);
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
-void __gmap_zap(struct gmap *, unsigned long gaddr);
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
-
-
-void gmap_register_ipte_notifier(struct gmap_notifier *);
-void gmap_unregister_ipte_notifier(struct gmap_notifier *);
-int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
-
-static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- if (pgste_val(pgste) & PGSTE_IN_BIT) {
- pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, addr, ptep);
- }
-#endif
- return pgste;
-}
-
-/*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified. Thus, the following
- * hook is made available.
- */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t entry)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
- pgste_set_key(ptep, pgste, entry, mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else {
- *ptep = entry;
- }
-}
-
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
} while (nr != 255);
}
-static inline void ptep_flush_direct(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte_local(address, ptep);
- else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
-static inline void ptep_flush_lazy(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pte_val(*ptep) |= _PAGE_INVALID;
- mm->context.flush_mm = 1;
- } else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
/*
- * Get (and clear) the user dirty bit for a pte.
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ * 1) ptep_get_and_clear
+ * 2) set_pte_at
+ * 3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
*/
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
- int dirty;
-
- if (!mm_has_pgste(mm))
- return 0;
- pgste = pgste_get_lock(ptep);
- dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
- pte = *ptep;
- if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
- pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep);
- if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_INVALID;
- *ptep = pte;
- }
- pgste_set_unlock(ptep, pgste);
- return dirty;
-}
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte, oldpte;
- int young;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
- }
-
- oldpte = pte = *ptep;
- ptep_flush_direct(vma->vm_mm, addr, ptep);
- young = pte_young(pte);
- pte = pte_mkold(pte);
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
+ pte_t pte = *ptep;
- return young;
+ pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+ return pte_young(pte);
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
-/*
- * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
- * both clear the TLB for the unmapped pte. The reason is that
- * ptep_get_and_clear is used in common code (e.g. change_pte_range)
- * to modify an active pte. The sequence is
- * 1) ptep_get_and_clear
- * 2) set_pte_at
- * 3) flush_tlb_range
- * On s390 the tlb needs to get flushed with the modification of the pte
- * if the pte is active. The only way how this can be implemented is to
- * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
- * is a nop.
- */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set(ptep, pgste);
- }
- return pte;
-}
-
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep, pte_t pte)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get(ptep);
- pgste_set_key(ptep, pgste, pte, mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
-}
+pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_direct(vma->vm_mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(vma->vm_mm)) {
- if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
- _PGSTE_GPS_USAGE_UNUSED)
- pte_val(pte) |= _PAGE_UNUSED;
- pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
}
/*
@@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
*/
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pte_t *ptep, int full)
{
- pgste_t pgste;
- pte_t pte;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- if (!full)
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
+ if (full) {
+ pte_t pte = *ptep;
+ *ptep = __pte(_PAGE_INVALID);
+ return pte;
}
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
pte_t pte = *ptep;
- if (pte_write(pte)) {
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- ptep_flush_lazy(mm, address, ptep);
- pte = pte_wrprotect(pte);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
- }
- return pte;
+ if (pte_write(pte))
+ ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
+ unsigned long addr, pte_t *ptep,
pte_t entry, int dirty)
{
- pgste_t pgste;
- pte_t oldpte;
-
- oldpte = *ptep;
- if (pte_same(oldpte, entry))
+ if (pte_same(*ptep, entry))
return 0;
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ return 1;
+}
- ptep_flush_direct(vma->vm_mm, address, ptep);
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq);
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
- if (mm_has_pgste(vma->vm_mm)) {
- if (pte_val(oldpte) & _PAGE_INVALID)
- pgste_set_key(ptep, pgste, entry, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ if (mm_has_pgste(mm))
+ ptep_set_pte_at(mm, addr, ptep, entry);
+ else
*ptep = entry;
- return 1;
}
/*
@@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
: "cc" );
}
-static inline void pmdp_flush_direct(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
-{
- int active, count;
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return;
- }
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte_local(address, pmdp);
- else
- __pmdp_idte(address, pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void pmdp_flush_lazy(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t entry, int dirty)
{
- int active, count;
+ VM_BUG_ON(addr & ~HPAGE_MASK);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
- mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(address, pmdp);
- else
- __pmdp_csp(pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
+ if (pmd_val(*pmdp) == pmd_val(entry))
+ return 0;
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+ return 1;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+ pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+ return pmd_young(pmd);
+}
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ VM_BUG_ON(addr & ~HPAGE_MASK);
+ return pmdp_test_and_clear_young(vma, addr, pmdp);
+}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
@@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return pmd;
}
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- pmd_t pmd;
-
- pmd = *pmdp;
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
- *pmdp = pmd_mkold(pmd);
- return pmd_young(pmd);
-}
-
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = *pmdp;
-
- pmdp_flush_direct(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pmd_t *pmdp, int full)
{
- pmd_t pmd = *pmdp;
-
- if (!full)
- pmdp_flush_lazy(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ if (full) {
+ pmd_t pmd = *pmdp;
+ *pmdp = __pmd(_SEGMENT_ENTRY_INVALID);
+ return pmd;
+ }
+ return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+ return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
}
#define __HAVE_ARCH_PMDP_INVALIDATE
static inline void pmdp_invalidate(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
- if (pmd_write(pmd)) {
- pmdp_flush_direct(mm, address, pmdp);
- set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
- }
+ if (pmd_write(pmd))
+ pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
}
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 1c4fe129486d..d6fd22ea270d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -184,6 +184,10 @@ struct task_struct;
struct mm_struct;
struct seq_file;
+typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+void dump_trace(dump_trace_func_t func, void *data,
+ struct task_struct *task, unsigned long sp);
+
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
@@ -203,6 +207,14 @@ unsigned long get_wchan(struct task_struct *p);
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+static inline unsigned long current_stack_pointer(void)
+{
+ unsigned long sp;
+
+ asm volatile("la %0,0(15)" : "=a" (sp));
+ return sp;
+}
+
static inline unsigned short stap(void)
{
unsigned short cpu_address;
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 4b43ee7e6776..fead491dfc28 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -31,7 +31,7 @@
* This should be totally fair - if anything is waiting, a process that wants a
* lock will go to the back of the queue. When the currently active lock is
* released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
* front, then they'll all be woken up, but no other readers will be.
*/
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 69837225119e..c0f0efbb6ab5 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -101,6 +101,8 @@ extern void pfault_fini(void);
#define pfault_fini() do { } while (0)
#endif /* CONFIG_PFAULT */
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
extern void cmma_init(void);
extern void (*_machine_restart)(char *command);
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index c82eb12a5b18..c988df744a70 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -1 +1,20 @@
-#include <asm-generic/xor.h>
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_xc); \
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 000000000000..ab72d9d24373
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,28 @@
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+ unsigned int c : 1;
+ unsigned int r : 1;
+ unsigned int lps : 6;
+ unsigned int cmd : 8;
+ unsigned int : 16;
+ unsigned int reserved;
+ __u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index fe84bd5fe7ce..347fe5afa419 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -154,6 +154,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
+#define KVM_SYNC_FPRS (1UL << 8)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -168,9 +169,12 @@ struct kvm_sync_regs {
__u64 pft; /* pfault token [PFAULT] */
__u64 pfs; /* pfault select [PFAULT] */
__u64 pfc; /* pfault compare [PFAULT] */
- __u64 vrs[32][2]; /* vector registers */
+ union {
+ __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
+ __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
+ };
__u8 reserved[512]; /* for future vector expansion */
- __u32 fpc; /* only valid with vector registers */
+ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
__u8 padding[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
};
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index ee69c0854c88..5dbaa72baa64 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -7,6 +7,7 @@
{ 0x9c, "DIAG (0x9c) time slice end directed" }, \
{ 0x204, "DIAG (0x204) logical-cpu utilization" }, \
{ 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x288, "DIAG (0x288) watchdog functions" }, \
{ 0x308, "DIAG (0x308) ipl functions" }, \
{ 0x500, "DIAG (0x500) KVM virtio functions" }, \
{ 0x501, "DIAG (0x501) KVM breakpoint" }
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 53bbc9e8b281..1f95cc1faeb7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/idle.h>
#include <asm/vdso.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
/*
* Make sure that the compiler is new enough. We want a compiler that
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 7f768914fb4f..7f48e568ac64 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
(((unsigned long)response + rlen) >> 31)) {
lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
if (!lowbuf) {
- pr_warning("The cpcmd kernel function failed to "
- "allocate a response buffer\n");
+ pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
}
spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c890a5589e59..aa12de72fd47 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
/* Since debugfs currently does not support uid/gid other than root, */
/* we do not allow gid/uid != 0 until we get support for that. */
if ((uid != 0) || (gid != 0))
- pr_warning("Root becomes the owner of all s390dbf files "
- "in sysfs\n");
+ pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
BUG_ON(!initialized);
mutex_lock(&debug_mutex);
@@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view,
new_level = debug_get_uint(str);
}
if(new_level < 0) {
- pr_warning("%s is not a valid level for a debug "
- "feature\n", str);
+ pr_warn("%s is not a valid level for a debug feature\n", str);
rc = -EINVAL;
} else {
debug_set_level(id, new_level);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 62973efd214a..8cb9bfdd3ea8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
}
if (separator)
ptr += sprintf(ptr, "%c", separator);
- /*
- * Use four '%' characters below because of the
- * following two conversions:
- *
- * 1) sprintf: %%%%r -> %%r
- * 2) printk : %%r -> %r
- */
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%%%r%i", value);
+ ptr += sprintf(ptr, "%%r%i", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%%%f%i", value);
+ ptr += sprintf(ptr, "%%f%i", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%%%a%i", value);
+ ptr += sprintf(ptr, "%%a%i", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%%%c%i", value);
+ ptr += sprintf(ptr, "%%c%i", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%%%v%i", value);
+ ptr += sprintf(ptr, "%%v%i", value);
else if (operand->flags & OPERAND_PCREL)
ptr += sprintf(ptr, "%lx", (signed int) value
+ addr);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 02bd02ff648b..1b6081c0aff9 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/kdebug.h>
#include <linux/ptrace.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <asm/processor.h>
@@ -19,28 +20,28 @@
#include <asm/ipl.h>
/*
- * For show_trace we have tree different stack to consider:
+ * For dump_trace we have tree different stack to consider:
* - the panic stack which is used if the kernel stack has overflown
* - the asynchronous interrupt stack (cpu related)
* - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
+ * The stack trace can start at any of the three stacks and can potentially
* touch all of them. The order is: panic stack, async stack, sync stack.
*/
static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ unsigned long low, unsigned long high)
{
struct stack_frame *sf;
struct pt_regs *regs;
- unsigned long addr;
while (1) {
if (sp < low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8];
- printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
/* Follow the backchain. */
while (1) {
+ if (func(data, sf->gprs[8]))
+ return sp;
low = sp;
sp = sf->back_chain;
if (!sp)
@@ -48,46 +49,58 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
if (sp <= low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8];
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
}
/* Zero backchain detected, check for interrupt frame. */
sp = (unsigned long) (sf + 1);
if (sp <= low || sp > high - sizeof(*regs))
return sp;
regs = (struct pt_regs *) sp;
- addr = regs->psw.addr;
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ if (!user_mode(regs)) {
+ if (func(data, regs->psw.addr))
+ return sp;
+ }
low = sp;
sp = regs->gprs[15];
}
}
-static void show_trace(struct task_struct *task, unsigned long *stack)
+void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
+ unsigned long sp)
{
- const unsigned long frame_size =
- STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- register unsigned long __r15 asm ("15");
- unsigned long sp;
+ unsigned long frame_size;
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.panic_stack + frame_size - 4096,
S390_lowcore.panic_stack + frame_size);
#endif
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
S390_lowcore.async_stack + frame_size);
if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ __dump_trace(func, data, sp,
+ (unsigned long)task_stack_page(task),
+ (unsigned long)task_stack_page(task) + THREAD_SIZE);
else
- __show_trace(sp, S390_lowcore.thread_info,
+ __dump_trace(func, data, sp,
+ S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
+}
+EXPORT_SYMBOL_GPL(dump_trace);
+
+static int show_address(void *data, unsigned long address)
+{
+ printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ return 0;
+}
+
+static void show_trace(struct task_struct *task, unsigned long sp)
+{
+ if (!sp)
+ sp = task ? task->thread.ksp : current_stack_pointer();
+ printk("Call Trace:\n");
+ dump_trace(show_address, NULL, task, sp);
if (!task)
task = current;
debug_show_held_locks(task);
@@ -95,15 +108,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack)
void show_stack(struct task_struct *task, unsigned long *sp)
{
- register unsigned long *__r15 asm ("15");
unsigned long *stack;
int i;
- if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
+ stack = sp;
+ if (!stack) {
+ if (!task)
+ stack = (unsigned long *)current_stack_pointer();
+ else
+ stack = (unsigned long *)task->thread.ksp;
+ }
for (i = 0; i < 20; i++) {
if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
break;
@@ -112,7 +126,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
printk("%016lx ", *stack++);
}
printk("\n");
- show_trace(task, sp);
+ show_trace(task, (unsigned long)sp);
}
static void show_last_breaking_event(struct pt_regs *regs)
@@ -121,13 +135,9 @@ static void show_last_breaking_event(struct pt_regs *regs)
printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
}
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
void show_registers(struct pt_regs *regs)
{
+ struct psw_bits *psw = &psw_bits(regs->psw);
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
@@ -136,13 +146,9 @@ void show_registers(struct pt_regs *regs)
printk(" (%pSR)", (void *)regs->psw.addr);
printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
- printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+ "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
+ psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
+ printk(" RI:%x EA:%x", psw->ri, psw->eaba);
printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
printk(" %016lx %016lx %016lx %016lx\n",
@@ -160,7 +166,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_trace(NULL, regs->gprs[15]);
show_last_breaking_event(regs);
}
@@ -184,9 +190,8 @@ void die(struct pt_regs *regs, const char *str)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC");
printk("\n");
notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
print_modules();
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c55576bbaa1f..a0684de5a93b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -448,7 +448,6 @@ void __init startup_init(void)
rescue_initrd();
clear_bss_section();
init_kernel_storage_key();
- lockdep_init();
lockdep_off();
setup_lowcore_early();
setup_facility_list();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index cd5a191381b9..2d47f9cfcb36 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -186,6 +186,7 @@ ENTRY(__switch_to)
stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
+ /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
@@ -1199,114 +1200,12 @@ cleanup_critical:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bor %r14
- clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
- jhe 5f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
- jhe 4f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
- jhe 2f
- clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
- jhe 1f
- lg %r2,__LC_CURRENT
- aghi %r2,__TASK_thread
-0: # Store floating-point controls
- stfpc __THREAD_FPU_fpc(%r2)
-1: # Load register save area and check if VX is active
- lg %r3,__THREAD_FPU_regs(%r2)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- jz 4f # no VX -> store FP regs
-2: # Store vector registers (V0-V15)
- VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
-3: # Store vector registers (V16-V31)
- VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
- j 5f # -> done, set CIF_FPU flag
-4: # Store floating-point registers
- std 0,0(%r3)
- std 1,8(%r3)
- std 2,16(%r3)
- std 3,24(%r3)
- std 4,32(%r3)
- std 5,40(%r3)
- std 6,48(%r3)
- std 7,56(%r3)
- std 8,64(%r3)
- std 9,72(%r3)
- std 10,80(%r3)
- std 11,88(%r3)
- std 12,96(%r3)
- std 13,104(%r3)
- std 14,112(%r3)
- std 15,120(%r3)
-5: # Set CIF_FPU flag
- oi __LC_CPU_FLAGS+7,_CIF_FPU
- lg %r9,48(%r11) # return from save_fpu_regs
+ larl %r9,save_fpu_regs
br %r14
-.Lcleanup_save_fpu_fpc_end:
- .quad .Lsave_fpu_regs_fpc_end
-.Lcleanup_save_fpu_regs_vx_low:
- .quad .Lsave_fpu_regs_vx_low
-.Lcleanup_save_fpu_regs_vx_high:
- .quad .Lsave_fpu_regs_vx_high
-.Lcleanup_save_fpu_regs_fp:
- .quad .Lsave_fpu_regs_fp
-.Lcleanup_save_fpu_regs_done:
- .quad .Lsave_fpu_regs_done
.Lcleanup_load_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bnor %r14
- clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
- jhe 1f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
- jhe 2f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
- jhe 4f
- lg %r4,__LC_CURRENT
- aghi %r4,__TASK_thread
- lfpc __THREAD_FPU_fpc(%r4)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
- jz 2f # -> no VX, load FP regs
-4: # Load V0 ..V15 registers
- VLM %v0,%v15,0,%r4
-3: # Load V16..V31 registers
- VLM %v16,%v31,256,%r4
- j 1f
-2: # Load floating-point registers
- ld 0,0(%r4)
- ld 1,8(%r4)
- ld 2,16(%r4)
- ld 3,24(%r4)
- ld 4,32(%r4)
- ld 5,40(%r4)
- ld 6,48(%r4)
- ld 7,56(%r4)
- ld 8,64(%r4)
- ld 9,72(%r4)
- ld 10,80(%r4)
- ld 11,88(%r4)
- ld 12,96(%r4)
- ld 13,104(%r4)
- ld 14,112(%r4)
- ld 15,120(%r4)
-1: # Clear CIF_FPU bit
- ni __LC_CPU_FLAGS+7,255-_CIF_FPU
- lg %r9,48(%r11) # return from load_fpu_regs
+ larl %r9,load_fpu_regs
br %r14
-.Lcleanup_load_fpu_regs_vx:
- .quad .Lload_fpu_regs_vx
-.Lcleanup_load_fpu_regs_vx_high:
- .quad .Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp:
- .quad .Lload_fpu_regs_fp
-.Lcleanup_load_fpu_regs_done:
- .quad .Lload_fpu_regs_done
/*
* Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c5febe84eba6..03c2b469c472 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,7 @@
__HEAD
ENTRY(startup_continue)
- tm __LC_STFLE_FAC_LIST+6,0x80 # LPP available ?
+ tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ?
jz 0f
xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
mvi __LC_LPP,0x80 # and set LPP_MAGIC
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f41d5208aaf7..c373a1d41d10 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -164,8 +164,7 @@ void do_softirq_own_stack(void)
{
unsigned long old, new;
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
+ old = current_stack_pointer();
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 929c147e07b4..58bf4572d457 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event)
/* Validate the counter that is assigned to this event.
* Because the counter facility can use numerous counters at the
- * same time without constraints, it is not necessary to explicity
+ * same time without constraints, it is not necessary to explicitly
* validate event groups (event->group_leader != event).
*/
err = validate_event(hwc);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8da1e742c2..1a43474df541 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
/*
* A non-zero guest program parameter indicates a guest
* sample.
- * Note that some early samples might be misaccounted to
- * the host.
+ * Note that some early samples or samples from guests without
+ * lpp usage would be misaccounted to the host. We use the asn
+ * value as a heuristic to detect most of these guest samples.
+ * If the value differs from the host hpp value, we assume
+ * it to be a KVM guest.
*/
- if (sfr->basic.gpp)
+ if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp)
sde_regs->in_guest = 1;
overflow = 0;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0943b11a2f6e..c3e4099b60a5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -222,67 +222,23 @@ static int __init service_level_perf_register(void)
}
arch_initcall(service_level_perf_register);
-/* See also arch/s390/kernel/traps.c */
-static unsigned long __store_trace(struct perf_callchain_entry *entry,
- unsigned long sp,
- unsigned long low, unsigned long high)
+static int __perf_callchain_kernel(void *data, unsigned long address)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- low = sp;
- sp = regs->gprs[15];
- }
+ struct perf_callchain_entry *entry = data;
+
+ perf_callchain_store(entry, address);
+ return 0;
}
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- unsigned long head, frame_size;
- struct stack_frame *head_sf;
-
if (user_mode(regs))
return;
-
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- head = regs->gprs[15];
- head_sf = (struct stack_frame *) head;
-
- if (!head_sf || !head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
- head = __store_trace(entry, head,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size);
-
- __store_trace(entry, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
+ dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
}
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
{
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 9220db5c996a..d3f9688f26b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -327,6 +327,7 @@ static void __init setup_lowcore(void)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
+ lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
@@ -374,17 +375,17 @@ static void __init setup_lowcore(void)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata *standard_resources[] = {
@@ -408,7 +409,7 @@ static void __init setup_resources(void)
for_each_memblock(memory, reg) {
res = alloc_bootmem_low(sizeof(*res));
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = reg->base;
@@ -779,6 +780,7 @@ static int __init setup_hwcaps(void)
strcpy(elf_platform, "zEC12");
break;
case 0x2964:
+ case 0x2965:
strcpy(elf_platform, "z13");
break;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d..40a6b4f9c36c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 8f64ebd63767..44f84b23d4e5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,78 +10,39 @@
#include <linux/kallsyms.h>
#include <linux/module.h>
-static unsigned long save_context_stack(struct stack_trace *trace,
- unsigned long sp,
- unsigned long low,
- unsigned long high,
- int savesched)
+static int __save_address(void *data, unsigned long address, int nosched)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
- unsigned long addr;
+ struct stack_trace *trace = data;
- while(1) {
- if (sp < low || sp > high)
- return sp;
- sf = (struct stack_frame *)sp;
- while(1) {
- addr = sf->gprs[8];
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *)sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long)(sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *)sp;
- addr = regs->psw.addr;
- if (savesched || !in_sched_functions(addr)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- }
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = regs->gprs[15];
+ if (nosched && in_sched_functions(address))
+ return 0;
+ if (trace->skip > 0) {
+ trace->skip--;
+ return 0;
}
+ if (trace->nr_entries < trace->max_entries) {
+ trace->entries[trace->nr_entries++] = address;
+ return 0;
+ }
+ return 1;
}
-static void __save_stack_trace(struct stack_trace *trace, unsigned long sp)
+static int save_address(void *data, unsigned long address)
{
- unsigned long new_sp, frame_size;
+ return __save_address(data, address, 0);
+}
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- new_sp = save_context_stack(trace, sp,
- S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
- S390_lowcore.panic_stack + frame_size, 1);
- new_sp = save_context_stack(trace, new_sp,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size, 1);
- save_context_stack(trace, new_sp,
- S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE, 1);
+static int save_address_nosched(void *data, unsigned long address)
+{
+ return __save_address(data, address, 1);
}
void save_stack_trace(struct stack_trace *trace)
{
- register unsigned long r15 asm ("15");
unsigned long sp;
- sp = r15;
- __save_stack_trace(trace, sp);
+ sp = current_stack_pointer();
+ dump_trace(save_address, trace, NULL, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
@@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- unsigned long sp, low, high;
+ unsigned long sp;
sp = tsk->thread.ksp;
- if (tsk == current) {
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (sp));
- }
- low = (unsigned long) task_stack_page(tsk);
- high = (unsigned long) task_pt_regs(tsk);
- save_context_stack(trace, sp, low, high, 0);
+ if (tsk == current)
+ sp = current_stack_pointer();
+ dump_trace(save_address_nosched, trace, tsk, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
@@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
unsigned long sp;
sp = kernel_stack_pointer(regs);
- __save_stack_trace(trace, sp);
+ dump_trace(save_address, trace, NULL, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..9409d32f285e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -499,8 +499,7 @@ static void etr_reset(void)
if (etr_port0_online && etr_port1_online)
set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
} else if (etr_port0_online || etr_port1_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an ETR interface\n");
+ pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
etr_port0_online = etr_port1_online = 0;
}
}
@@ -1433,7 +1432,7 @@ device_initcall(etr_init_sysfs);
/*
* Server Time Protocol (STP) code.
*/
-static int stp_online;
+static bool stp_online;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -1444,11 +1443,7 @@ static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
- if (strncmp(p, "off", 3) == 0)
- stp_online = 0;
- else if (strncmp(p, "on", 2) == 0)
- stp_online = 1;
- return 0;
+ return kstrtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
@@ -1464,8 +1459,7 @@ static void __init stp_reset(void)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an STP interface\n");
+ pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = 0;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..64298a867589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,7 @@ static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
-static int topology_enabled = 1;
+static bool topology_enabled = true;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
@@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
static int __init early_parse_topology(char *p)
{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
+ return kstrtobool(p, &topology_enabled);
}
early_param("topology", early_parse_topology);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 017eb03daee2..dd97a3e8a34a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -22,8 +22,6 @@
#include <asm/fpu/api.h>
#include "entry.h"
-int show_unhandled_signals = 1;
-
static inline void __user *get_trap_ip(struct pt_regs *regs)
{
unsigned long address;
@@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
return (void __user *) (address - (regs->int_code >> 16));
}
-static inline void report_user_fault(struct pt_regs *regs, int signr)
-{
- if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
- return;
- if (!unhandled_signal(current, signr))
- return;
- if (!printk_ratelimit())
- return;
- printk("User process fault: interruption code %04x ilc:%d ",
- regs->int_code & 0xffff, regs->int_code >> 17);
- print_vma_addr("in ", regs->psw.addr);
- printk("\n");
- show_regs(regs);
-}
-
int is_valid_bugaddr(unsigned long addr)
{
return 1;
@@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
info.si_code = si_code;
info.si_addr = get_trap_ip(regs);
force_sig_info(si_signo, &info, current);
- report_user_fault(regs, si_signo);
+ report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->psw.addr);
@@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap);
void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 0);
do_exit(SIGSEGV);
} else
die(regs, "Unknown program exception");
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 05f7de9869a9..1ea4095b67d7 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d30db40437dc..66938d283b77 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
- int write)
+ enum gacc_mode mode)
{
union alet alet;
struct ale ale;
@@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
}
}
- if (ale.fo == 1 && write)
+ if (ale.fo == 1 && mode == GACC_STORE)
return PGM_PROTECTION;
asce->val = aste.asce;
@@ -477,25 +477,28 @@ enum {
};
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, int write)
+ ar_t ar, enum gacc_mode mode)
{
int rc;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
struct trans_exc_code_bits *tec_bits;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw_bits(*psw).as;
+ tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw.as;
- if (!psw_bits(*psw).t) {
+ if (!psw.t) {
asce->val = 0;
asce->r = 1;
return 0;
}
- switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ if (mode == GACC_IFETCH)
+ psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
+
+ switch (psw.as) {
case PSW_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
@@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
case PSW_AS_ACCREG:
- rc = ar_translation(vcpu, asce, ar, write);
+ rc = ar_translation(vcpu, asce, ar, mode);
switch (rc) {
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
- * @write: indicates if access is a write access
+ * @mode: indicates the access mode to be used
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- int write)
+ enum gacc_mode mode)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
@@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (write && dat_protection)
+ if (mode == GACC_STORE && dat_protection)
return PGM_PROTECTION;
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
@@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
- const union asce asce, int write)
+ const union asce asce, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
- if (write && lap_enabled && is_low_address(ga)) {
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
pgm->code = PGM_PROTECTION;
return pgm->code;
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, asce, write);
+ rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
@@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write)
+ unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long _len, nr_pages, gpa, idx;
@@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode == GACC_STORE)
rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
else
rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
@@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write)
+ void *data, unsigned long len, enum gacc_mode mode)
{
unsigned long _len, gpa;
int rc = 0;
@@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode)
rc = write_guest_abs(vcpu, gpa, data, _len);
else
rc = read_guest_abs(vcpu, gpa, data, _len);
@@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* has to take care of this.
*/
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long *gpa, int write)
+ unsigned long *gpa, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
gva = kvm_s390_logical_to_effective(vcpu, gva);
tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
tec->addr = gva >> PAGE_SHIFT;
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (write) {
+ if (mode == GACC_STORE) {
rc = pgm->code = PGM_PROTECTION;
return rc;
}
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, write);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0) {
if (rc == PGM_PROTECTION)
tec->b61 = 1;
@@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
* check_gva_range - test a range of guest virtual addresses for accessibility
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write)
+ unsigned long length, enum gacc_mode mode)
{
unsigned long gpa;
unsigned long currlen;
@@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
ipte_lock(vcpu);
while (length > 0 && !rc) {
currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+ rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
gva += currlen;
length -= currlen;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ef03726cc661..df0a79dd8159 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
+enum gacc_mode {
+ GACC_FETCH,
+ GACC_STORE,
+ GACC_IFETCH,
+};
+
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- ar_t ar, unsigned long *gpa, int write);
+ ar_t ar, unsigned long *gpa, enum gacc_mode mode);
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write);
+ unsigned long length, enum gacc_mode mode);
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write);
+ unsigned long len, enum gacc_mode mode);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write);
+ void *data, unsigned long len, enum gacc_mode mode);
/**
* write_guest - copy data from kernel space to guest space
@@ -215,7 +221,7 @@ static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 1);
+ return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
}
/**
@@ -235,7 +241,27 @@ static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 0);
+ return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+}
+
+/**
+ * read_guest_instr - copy instruction data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from the current psw address (guest space) to @data (kernel
+ * space).
+ *
+ * The behaviour of read_guest_instr is identical to read_guest, except that
+ * instruction data will be read from primary space when in home-space or
+ * address-space mode.
+ */
+static inline __must_check
+int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
+{
+ return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
+ GACC_IFETCH);
}
/**
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index d697312ce9ee..e8c6843b9600 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -17,7 +17,7 @@
/*
* Extends the address range given by *start and *stop to include the address
* range starting with estart and the length len. Takes care of overflowing
- * intervals and tries to minimize the overall intervall size.
+ * intervals and tries to minimize the overall interval size.
*/
static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
{
@@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
return;
/*
- * If the guest is not interrested in branching events, we can savely
+ * If the guest is not interested in branching events, we can safely
* limit them to the PER address range.
*/
if (!(*cr9 & PER_EVENT_BRANCH))
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index d53c10753c46..2e6b54e4d3f9 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xeb] = kvm_s390_handle_eb,
};
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+ u8 ilen = 0;
- /* Use the length of the EXECUTE instruction if necessary */
- if (sie_block->icptstatus & 1) {
- ilc = (sie_block->icptstatus >> 4) & 0x6;
- if (!ilc)
- ilc = 4;
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* instruction only stored for these icptcodes */
+ ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
+ /* Use the length of the EXECUTE instruction if necessary */
+ if (sie_block->icptstatus & 1) {
+ ilen = (sie_block->icptstatus >> 4) & 0x6;
+ if (!ilen)
+ ilen = 4;
+ }
+ break;
+ case ICPT_PROGI:
+ /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
+ ilen = vcpu->arch.sie_block->pgmilc & 0x6;
+ break;
}
- sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+ return ilen;
}
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
-static void __extract_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
{
- memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
- pgm_info->code = vcpu->arch.sie_block->iprcc;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = vcpu->arch.sie_block->iprcc,
+ /* the PSW has already been rewound */
+ .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
+ };
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
- pgm_info->op_access_id = vcpu->arch.sie_block->oai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
- pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
- pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_VECTOR_PROCESSING:
case PGM_DATA:
- pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+ pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
- pgm_info->per_code = vcpu->arch.sie_block->perc;
- pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
- pgm_info->per_address = vcpu->arch.sie_block->peraddr;
- pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+ pgm_info.per_code = vcpu->arch.sie_block->perc;
+ pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info.per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
}
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
/*
@@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
static int handle_prog(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_pgm_info pgm_info;
psw_t psw;
int rc;
@@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- __extract_prog_irq(vcpu, &pgm_info);
- return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+ return inject_prog_on_prog_intercept(vcpu);
}
/**
@@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the source is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- reg2, &srcaddr, 0);
+ reg2, &srcaddr, GACC_FETCH);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the destination is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- reg1, &dstaddr, 1);
+ reg1, &dstaddr, GACC_STORE);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
if (rc != 0)
return rc;
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
return 0;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f88ca72c3a77..84efc2ba6a90 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -23,6 +23,7 @@
#include <asm/uaccess.h>
#include <asm/sclp.h>
#include <asm/isc.h>
+#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
@@ -182,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.sie_block->cputm >> 63) &&
- cpu_timer_interrupts_enabled(vcpu);
+ if (!cpu_timer_interrupts_enabled(vcpu))
+ return 0;
+ return kvm_s390_get_cpu_timer(vcpu) >> 63;
}
static inline int is_ioirq(unsigned long irq_type)
@@ -335,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
set_intercept_indicators_stop(vcpu);
}
-static u16 get_ilc(struct kvm_vcpu *vcpu)
-{
- switch (vcpu->arch.sie_block->icptcode) {
- case ICPT_INST:
- case ICPT_INSTPROGI:
- case ICPT_OPEREXC:
- case ICPT_PARTEXEC:
- case ICPT_IOINST:
- /* last instruction only stored for these icptcodes */
- return insn_length(vcpu->arch.sie_block->ipa >> 8);
- case ICPT_PROGI:
- return vcpu->arch.sie_block->pgmilc;
- default:
- return 0;
- }
-}
-
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -588,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_pgm_info pgm_info;
int rc = 0, nullifying = false;
- u16 ilc = get_ilc(vcpu);
+ u16 ilen;
spin_lock(&li->lock);
pgm_info = li->irq.pgm;
@@ -596,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
memset(&li->irq.pgm, 0, sizeof(pgm_info));
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
- pgm_info.code, ilc);
+ ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
+ VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
+ pgm_info.code, ilen);
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
@@ -681,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *) __LC_PER_ACCESS_ID);
}
- if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
- kvm_s390_rewind_psw(vcpu, ilc);
+ if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
+ kvm_s390_rewind_psw(vcpu, ilen);
- rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+ /* bit 1+2 of the target are the ilc, so we can directly use ilen */
+ rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_LAST_BREAK);
rc |= put_guest_lc(vcpu, pgm_info.code,
@@ -923,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
}
+static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
+{
+ u64 now, cputm, sltime = 0;
+
+ if (ckc_interrupts_enabled(vcpu)) {
+ now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+ sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ /* already expired or overflow? */
+ if (!sltime || vcpu->arch.sie_block->ckc <= now)
+ return 0;
+ if (cpu_timer_interrupts_enabled(vcpu)) {
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (cputm >> 63)
+ return 0;
+ return min(sltime, tod_to_ns(cputm));
+ }
+ } else if (cpu_timer_interrupts_enabled(vcpu)) {
+ sltime = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (sltime >> 63)
+ return 0;
+ }
+ return sltime;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
- u64 now, sltime;
+ u64 sltime;
vcpu->stat.exit_wait_state++;
@@ -938,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
- if (!ckc_interrupts_enabled(vcpu)) {
+ if (!ckc_interrupts_enabled(vcpu) &&
+ !cpu_timer_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
__set_cpu_idle(vcpu);
goto no_timer;
}
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-
- /* underflow */
- if (vcpu->arch.sie_block->ckc < now)
+ sltime = __calculate_sltime(vcpu);
+ if (!sltime)
return 0;
__set_cpu_idle(vcpu);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
- VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
+ VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
@@ -966,13 +977,13 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- if (waitqueue_active(&vcpu->wq)) {
+ if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
- wake_up_interruptible(&vcpu->wq);
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -980,18 +991,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- u64 now, sltime;
+ u64 sltime;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ sltime = __calculate_sltime(vcpu);
/*
* If the monotonic clock runs faster than the tod clock we might be
* woken up too early and have to go back to sleep to avoid deadlocks.
*/
- if (vcpu->arch.sie_block->ckc > now &&
- hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
@@ -1059,8 +1068,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
irq->u.pgm.code, 0);
+ if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
+ /* auto detection if no valid ILC was given */
+ irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
+ irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
+ irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
+ }
+
if (irq->u.pgm.code == PGM_PER) {
li->irq.pgm.code |= PGM_PER;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify PER related information */
li->irq.pgm.per_address = irq->u.pgm.per_address;
li->irq.pgm.per_code = irq->u.pgm.per_code;
@@ -1069,6 +1086,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
} else if (!(irq->u.pgm.code & PGM_PER)) {
li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
irq->u.pgm.code;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify non-PER information */
li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
li->irq.pgm.mon_code = irq->u.pgm.mon_code;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4af21c771f9b..668c087513e5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <asm/lowcore.h>
#include <asm/etr.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
#include <asm/isc.h>
@@ -158,6 +159,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start += *delta;
}
}
return NOTIFY_OK;
@@ -274,16 +277,17 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
- down_read(&gmap->mm->mmap_sem);
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
- if (gmap_test_and_clear_dirty(address, gmap))
+ if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
+ if (fatal_signal_pending(current))
+ return;
+ cond_resched();
}
- up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
@@ -352,8 +356,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (MACHINE_HAS_VX) {
- set_kvm_facility(kvm->arch.model.fac->mask, 129);
- set_kvm_facility(kvm->arch.model.fac->list, 129);
+ set_kvm_facility(kvm->arch.model.fac_mask, 129);
+ set_kvm_facility(kvm->arch.model.fac_list, 129);
r = 0;
} else
r = -EINVAL;
@@ -367,8 +371,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (test_facility(64)) {
- set_kvm_facility(kvm->arch.model.fac->mask, 64);
- set_kvm_facility(kvm->arch.model.fac->list, 64);
+ set_kvm_facility(kvm->arch.model.fac_mask, 64);
+ set_kvm_facility(kvm->arch.model.fac_list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
@@ -651,7 +655,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
sizeof(struct cpuid));
kvm->arch.model.ibc = proc->ibc;
- memcpy(kvm->arch.model.fac->list, proc->fac_list,
+ memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
@@ -685,7 +689,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
proc->ibc = kvm->arch.model.ibc;
- memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
+ memcpy(&proc->fac_list, kvm->arch.model.fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
@@ -705,7 +710,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp.ibc;
- memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+ memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1082,16 +1087,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
cpu_id->version = 0xff;
}
-static int kvm_s390_crypto_init(struct kvm *kvm)
+static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
- return 0;
-
- kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
- GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.crypto.crycb)
- return -ENOMEM;
+ return;
+ kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
/* Enable AES/DEA protected key functions by default */
@@ -1101,8 +1102,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
-
- return 0;
}
static void sca_dispose(struct kvm *kvm)
@@ -1156,37 +1155,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
- /*
- * The architectural maximum amount of facilities is 16 kbit. To store
- * this amount, 2 kbyte of memory is required. Thus we need a full
- * page to hold the guest facility list (arch.model.fac->list) and the
- * facility mask (arch.model.fac->mask). Its address size has to be
- * 31 bits and word aligned.
- */
- kvm->arch.model.fac =
- (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.model.fac)
+ kvm->arch.sie_page2 =
+ (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.sie_page2)
goto out_err;
/* Populate the facility mask initially. */
- memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+ memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
- kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
+ kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
else
- kvm->arch.model.fac->mask[i] = 0UL;
+ kvm->arch.model.fac_mask[i] = 0UL;
}
/* Populate the facility list initially. */
- memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+ kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
+ memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
- if (kvm_s390_crypto_init(kvm) < 0)
- goto out_err;
+ kvm_s390_crypto_init(kvm);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -1222,8 +1214,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
out_err:
- kfree(kvm->arch.crypto.crycb);
- free_page((unsigned long)kvm->arch.model.fac);
+ free_page((unsigned long)kvm->arch.sie_page2);
debug_unregister(kvm->arch.dbf);
sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
@@ -1269,10 +1260,9 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
- free_page((unsigned long)kvm->arch.model.fac);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
- kfree(kvm->arch.crypto.crycb);
+ free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
@@ -1414,8 +1404,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_PFAULT;
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
- if (test_kvm_facility(vcpu->kvm, 129))
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1423,6 +1418,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ vcpu->arch.cputm_start = 0;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_enabled);
+ vcpu->arch.cputm_enabled = true;
+ __start_cpu_timer_accounting(vcpu);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
+ __stop_cpu_timer_accounting(vcpu);
+ vcpu->arch.cputm_enabled = false;
+}
+
+static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __enable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __disable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+/* set the cpu timer - may only be called from the VCPU thread itself */
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ vcpu->arch.sie_block->cputm = cputm;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+ preempt_enable();
+}
+
+/* update and get the cpu timer - can also be called from other VCPU threads */
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned int seq;
+ __u64 value;
+
+ if (unlikely(!vcpu->arch.cputm_enabled))
+ return vcpu->arch.sie_block->cputm;
+
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ do {
+ seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
+ /*
+ * If the writer would ever execute a read in the critical
+ * section, e.g. in irq context, we have a deadlock.
+ */
+ WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
+ value = vcpu->arch.sie_block->cputm;
+ /* if cputm_start is 0, accounting is being started/stopped */
+ if (likely(vcpu->arch.cputm_start))
+ value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
+ preempt_enable();
+ return value;
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
@@ -1430,10 +1512,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- /* Depending on MACHINE_HAS_VX, data stored to vrs either
- * has vector register or floating point register format.
- */
- current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
@@ -1443,10 +1525,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __start_cpu_timer_accounting(vcpu);
+ vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ vcpu->cpu = -1;
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __stop_cpu_timer_accounting(vcpu);
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
@@ -1468,7 +1556,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
kvm_s390_set_prefix(vcpu, 0);
- vcpu->arch.sie_block->cputm = 0UL;
+ kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
@@ -1538,7 +1626,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
vcpu->arch.cpu_id = model->cpu_id;
vcpu->arch.sie_block->ibc = model->ibc;
- vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+ if (test_kvm_facility(vcpu->kvm, 7))
+ vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1616,6 +1705,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@@ -1715,7 +1805,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = put_user(vcpu->arch.sie_block->cputm,
+ r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CLOCK_COMP:
@@ -1753,6 +1843,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
+ __u64 val;
switch (reg->id) {
case KVM_REG_S390_TODPR:
@@ -1764,8 +1855,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = get_user(vcpu->arch.sie_block->cputm,
- (u64 __user *)reg->addr);
+ r = get_user(val, (u64 __user *)reg->addr);
+ if (!r)
+ kvm_s390_set_cpu_timer(vcpu, val);
break;
case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
@@ -2158,8 +2250,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- u8 opcode;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_ADDRESSING,
+ };
+ u8 opcode, ilen;
int rc;
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
@@ -2173,12 +2267,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
* to look up the current opcode to get the length of the instruction
* to be able to forward the PSW.
*/
- rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
- if (rc)
- return kvm_s390_inject_prog_cond(vcpu, rc);
- psw->addr = __rewind_psw(*psw, -insn_length(opcode));
-
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = read_guest_instr(vcpu, &opcode, 1);
+ ilen = insn_length(opcode);
+ if (rc < 0) {
+ return rc;
+ } else if (rc) {
+ /* Instruction-Fetching Exceptions - we can't detect the ilen.
+ * Forward by arbitrary ilc, injection will take care of
+ * nullification if necessary.
+ */
+ pgm_info = vcpu->arch.pgm;
+ ilen = 4;
+ }
+ pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
+ kvm_s390_forward_psw(vcpu, ilen);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -2244,10 +2347,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
local_irq_disable();
__kvm_guest_enter();
+ __disable_cpu_timer_accounting(vcpu);
local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
local_irq_disable();
+ __enable_cpu_timer_accounting(vcpu);
__kvm_guest_exit();
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2271,7 +2376,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
@@ -2293,7 +2398,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
- kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+ kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
@@ -2325,6 +2430,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
sync_regs(vcpu, kvm_run);
+ enable_cpu_timer_accounting(vcpu);
might_fault();
rc = __vcpu_run(vcpu);
@@ -2344,6 +2450,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
+ disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
@@ -2364,7 +2471,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
unsigned char archmode = 1;
freg_t fprs[NUM_FPRS];
unsigned int px;
- u64 clkcomp;
+ u64 clkcomp, cputm;
int rc;
px = kvm_s390_get_prefix(vcpu);
@@ -2381,12 +2488,12 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
/* manually convert vector registers if necessary */
if (MACHINE_HAS_VX) {
- convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
fprs, 128);
} else {
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
- vcpu->run->s.regs.vrs, 128);
+ vcpu->run->s.regs.fprs, 128);
}
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
@@ -2398,8 +2505,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
&vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
+ cputm = kvm_s390_get_cpu_timer(vcpu);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
- &vcpu->arch.sie_block->cputm, 8);
+ &cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
@@ -2605,7 +2713,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_FETCH);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@@ -2616,7 +2725,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_STORE);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index df1abada1f36..8621ab00ec8e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,6 +19,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/facility.h>
+#include <asm/processor.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
+static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
+}
+
static inline int kvm_is_ucontrol(struct kvm *kvm)
{
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
/* test availability of facility in a kvm instance */
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
{
- return __test_facility(nr, kvm->arch.model.fac->mask) &&
- __test_facility(nr, kvm->arch.model.fac->list);
+ return __test_facility(nr, kvm->arch.model.fac_mask) &&
+ __test_facility(nr, kvm->arch.model.fac_list);
}
static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
@@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm,
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in intercept.c */
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+ sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
+}
+static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ kvm_s390_rewind_psw(vcpu, -ilen);
+}
+static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
+}
/* implemented in priv.c */
int is_valid_psw(psw_t *psw);
@@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
unsigned long kvm_s390_fac_list_mask_size(void);
extern unsigned long kvm_s390_fac_list_mask[];
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed74e86d9b9e..0a1591d3d25d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -23,6 +23,7 @@
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
@@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
if (psw_bits(vcpu->arch.sie_block->gpsw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
@@ -354,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
* into a u32 memory representation. They will remain bits 0-31.
*/
- fac = *vcpu->kvm->arch.model.fac->list >> 32;
+ fac = *vcpu->kvm->arch.model.fac_list >> 32;
rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
&fac, sizeof(fac));
if (rc)
@@ -759,8 +760,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Rewind PSW to repeat the ESSA instruction */
- kvm_s390_rewind_psw(vcpu, 4);
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
@@ -981,11 +982,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
if (ret == PGM_PROTECTION) {
/* Write protected? Try again with read-only... */
cc = 1;
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa,
+ GACC_FETCH);
}
if (ret) {
if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 0e8fefe5b0ce..1d1af31e8354 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
#
lib-y += delay.o string.o uaccess.o find.o
-obj-y += mem.o
+obj-y += mem.o xor.o
lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000000..7d94e3ec34a9
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,134 @@
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/raid/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ "3:\n"
+ : : "d" (bytes), "a" (p1), "a" (p2)
+ : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ /* Get around a gcc oddity */
+ register unsigned long *reg7 asm ("7") = p5;
+
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " xc 0(256,%1),0(%5)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " la %5,256(%5)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " ex %0,18(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ " xc 0(1,%1),0(%5)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+ "+a" (reg7)
+ : : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+ .name = "xc",
+ .do_2 = xor_xc_2,
+ .do_3 = xor_xc_3,
+ .do_4 = xor_xc_4,
+ .do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 839592ca265c..2ae54cad2b6a 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,11 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o
+obj-y += pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PGSTE) += gmap.o
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index a1bf4ad8925d..02042b6b66bf 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg)
goto out_free;
}
if (diag_cc > 1) {
- pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
+ pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
rc = dcss_diag_translate_rc (vmrc);
goto out_free;
}
@@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_resource;
}
if (diag_cc > 1) {
- pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
@@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_unlock;
}
if (atomic_read (&seg->ref_count) != 1) {
- pr_warning("DCSS %s is in use and cannot be reloaded\n",
- name);
+ pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
rc = -EAGAIN;
goto out_unlock;
}
@@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared)
seg->res->flags |= IORESOURCE_READONLY;
if (request_resource(&iomem_resource, seg->res)) {
- pr_warning("DCSS %s overlaps with used memory resources "
- "and cannot be reloaded\n", name);
+ pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+ name);
rc = -EBUSY;
kfree(seg->res);
goto out_del_mem;
@@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_del_res;
}
if (diag_cc > 1) {
- pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+ name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
goto out_del_res;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 791a4146052c..cce577feab1e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/asm-offsets.h>
#include <asm/diag.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
@@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs)
{
unsigned long asce;
+ pr_alert("Failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
pr_alert("Fault in ");
switch (regs->int_parm_long & 3) {
case 3:
@@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs)
dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
}
-static inline void report_user_fault(struct pt_regs *regs, long signr)
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
return;
@@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr(KERN_CONT "in ", regs->psw.addr);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
- dump_fault_info(regs);
+ if (is_mm_fault)
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -244,7 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
struct siginfo si;
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 1);
si.si_signo = SIGSEGV;
si.si_code = si_code;
si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
@@ -272,8 +276,6 @@ static noinline void do_no_context(struct pt_regs *regs)
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" in virtual user address space\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 000000000000..69247b4dcc43
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,774 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+{
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
+ gmap->mm = mm;
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ goto out_free;
+ page->index = 0;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, etype);
+ gmap->table = table;
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
+ list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+
+ /* Free all segment & region tables. */
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+ __free_pages(page, 2);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
+ list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
+ kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
+{
+ struct page *page;
+ unsigned long *new;
+
+ /* since we dont free the gmap table until gmap_free we can unlock */
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
+ if (*table & _REGION_ENTRY_INVALID) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
+ __free_pages(page, 2);
+ return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset, mask;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+ page = virt_to_page((void *)((unsigned long) entry & mask));
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
+ }
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ if (off >= len)
+ return 0;
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(gmap, gaddr);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ unsigned long *table;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ int rc;
+
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
+ /* Walk the parent mm page table */
+ mm = gmap->mm;
+ pgd = pgd_offset(mm, vmaddr);
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
+ /* Link gmap segment table entry location to page table. */
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ unsigned long vmaddr;
+ int rc;
+ bool unlocked;
+
+ down_read(&gmap->mm->mmap_sem);
+
+retry:
+ unlocked = false;
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ &unlocked)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ /*
+ * In the case that fixup_user_fault unlocked the mmap_sem during
+ * faultin redo __gmap_translate to not race with a map/unmap_segment.
+ */
+ if (unlocked)
+ goto retry;
+
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (vmaddr) {
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+ if (likely(ptep))
+ ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+ pte_unmap_unlock(ptep, ptl);
+ }
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+ unsigned long gaddr, vmaddr, size;
+ struct vm_area_struct *vma;
+
+ down_read(&gmap->mm->mmap_sem);
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ continue;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_add(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_init(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+{
+ unsigned long addr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+ bool unlocked;
+ int rc = 0;
+
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ while (len) {
+ unlocked = false;
+ /* Convert gmap address and connect the page tables */
+ addr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(addr)) {
+ rc = addr;
+ break;
+ }
+ /* Get the page mapped */
+ if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
+ &unlocked)) {
+ rc = -EFAULT;
+ break;
+ }
+ /* While trying to map mmap_sem got unlocked. Let us retry */
+ if (unlocked)
+ continue;
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
+ /* Walk the process page table, lock and get pte pointer */
+ ptep = get_locked_pte(gmap->mm, addr, &ptl);
+ VM_BUG_ON(!ptep);
+ /* Set notification bit in the pgste of the pte */
+ if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+ ptep_set_notify(gmap->mm, addr, ptep);
+ gaddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ pte_unmap_unlock(ptep, ptl);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+{
+ unsigned long offset, gaddr;
+ unsigned long *table;
+ struct gmap_notifier *nb;
+ struct gmap *gmap;
+
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
+ spin_lock(&gmap_notifier_lock);
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(gmap, gaddr);
+ }
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (addr = vma->vm_start;
+ addr < vma->vm_end;
+ addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ }
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ /* Do we have pgstes? if yes, we are done */
+ if (mm_has_pgste(mm))
+ return 0;
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ /*
+ * Remove all zero page mappings,
+ * after establishing a policy to forbid zero page mappings
+ * following faults for that page will get fresh anonymous pages
+ */
+ if (is_zero_pfn(pte_pfn(*pte)))
+ ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
+ /* Clear storage key */
+ ptep_zap_key(walk->mm, addr, pte);
+ return 0;
+}
+
+int s390_enable_skey(void)
+{
+ struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc = 0;
+
+ down_write(&mm->mmap_sem);
+ if (mm_use_skey(mm))
+ goto out_up;
+
+ mm->context.use_skey = 1;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags)) {
+ mm->context.use_skey = 0;
+ rc = -ENOMEM;
+ goto out_up;
+ }
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ ptep_zap_unused(walk->mm, addr, pte, 1);
+ return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+ down_write(&mm->mmap_sem);
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f81096b6940d..1b5e8983f4f3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pmd_t *pmdp = (pmd_t *) ptep;
- pte_t pte = huge_ptep_get(ptep);
+ pmd_t old;
- pmdp_flush_direct(mm, addr, pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
- return pte;
+ old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return __pmd_to_pte(old);
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 749c98407b41..f2a5c29a97e9 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr)
static void change_page_attr(unsigned long addr, int numpages,
pte_t (*set) (pte_t))
{
- pte_t *ptep, pte;
+ pte_t *ptep;
int i;
for (i = 0; i < numpages; i++) {
ptep = walk_page_table(addr);
if (WARN_ON_ONCE(!ptep))
break;
- pte = *ptep;
- pte = set(pte);
- __ptep_ipte(addr, ptep);
- *ptep = pte;
+ *ptep = set(*ptep);
addr += PAGE_SIZE;
}
+ __tlb_flush_kernel();
}
int set_memory_ro(unsigned long addr, int numpages)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 000000000000..f6c3de26cda8
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,360 @@
+/*
+ * Page table allocation functions
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
+
+ if (!page)
+ return NULL;
+ return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ free_pages((unsigned long) table, 2);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ set_user_asce(mm);
+ }
+ __tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+ unsigned long *table, *pgd;
+ unsigned long entry;
+ int flush;
+
+ BUG_ON(limit > TASK_MAX_SIZE);
+ flush = 0;
+repeat:
+ table = crst_table_alloc(mm);
+ if (!table)
+ return -ENOMEM;
+ spin_lock_bh(&mm->page_table_lock);
+ if (mm->context.asce_limit < limit) {
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit <= (1UL << 31)) {
+ entry = _REGION3_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ } else {
+ entry = _REGION2_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION2;
+ }
+ crst_table_init(table, entry);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->task_size = mm->context.asce_limit;
+ table = NULL;
+ flush = 1;
+ }
+ spin_unlock_bh(&mm->page_table_lock);
+ if (table)
+ crst_table_free(mm, table);
+ if (mm->context.asce_limit < limit)
+ goto repeat;
+ if (flush)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+ pgd_t *pgd;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
+ while (mm->context.asce_limit > limit) {
+ pgd = mm->pgd;
+ switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+ case _REGION_ENTRY_TYPE_R2:
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ break;
+ case _REGION_ENTRY_TYPE_R3:
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_SEGMENT;
+ break;
+ default:
+ BUG();
+ }
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ mm->task_size = mm->context.asce_limit;
+ crst_table_free(mm, (unsigned long *) pgd);
+ }
+ if (current->active_mm == mm)
+ set_user_asce(mm);
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+ unsigned long *table;
+ struct page *page;
+ unsigned int mask, bit;
+
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
+ }
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
+ atomic_set(&page->_mapcount, 1);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+ spin_lock_bh(&mm->context.list_lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ }
+ return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ struct page *page;
+ unsigned int bit, mask;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
+ }
+
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ struct page *page;
+ unsigned int bit, mask;
+
+ mm = tlb->mm;
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (mm_alloc_pgste(mm)) {
+ gmap_unlink(mm, table, vmaddr);
+ table = (unsigned long *) (__pa(table) | 3);
+ tlb_remove_table(tlb, table);
+ return;
+ }
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ table = (unsigned long *) (__pa(table) | (1U << bit));
+ tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->mm->context.flush_mm = 1;
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm_lazy(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_flush_mmu(tlb);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5109827883ac..4324b87f9398 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,591 +24,397 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-unsigned long *crst_table_alloc(struct mm_struct *mm)
-{
- struct page *page = alloc_pages(GFP_KERNEL, 2);
-
- if (!page)
- return NULL;
- return (unsigned long *) page_to_phys(page);
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(addr, ptep);
+ else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-void crst_table_free(struct mm_struct *mm, unsigned long *table)
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- free_pages((unsigned long) table, 2);
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-static void __crst_table_upgrade(void *arg)
+static inline pgste_t pgste_get_lock(pte_t *ptep)
{
- struct mm_struct *mm = arg;
+ unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long old;
- if (current->active_mm == mm) {
- clear_user_asce();
- set_user_asce(mm);
- }
- __tlb_flush_local();
+ preempt_disable();
+ asm(
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+ return __pgste(new);
}
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
- unsigned long *table, *pgd;
- unsigned long entry;
- int flush;
-
- BUG_ON(limit > TASK_MAX_SIZE);
- flush = 0;
-repeat:
- table = crst_table_alloc(mm);
- if (!table)
- return -ENOMEM;
- spin_lock_bh(&mm->page_table_lock);
- if (mm->context.asce_limit < limit) {
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit <= (1UL << 31)) {
- entry = _REGION3_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- } else {
- entry = _REGION2_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION2;
- }
- crst_table_init(table, entry);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->task_size = mm->context.asce_limit;
- table = NULL;
- flush = 1;
- }
- spin_unlock_bh(&mm->page_table_lock);
- if (table)
- crst_table_free(mm, table);
- if (mm->context.asce_limit < limit)
- goto repeat;
- if (flush)
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
+#ifdef CONFIG_PGSTE
+ asm(
+ " nihh %1,0xff7f\n" /* clear PCL bit */
+ " stg %1,%0\n"
+ : "=Q" (ptep[PTRS_PER_PTE])
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
+ preempt_enable();
+#endif
}
-void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_get(pte_t *ptep)
{
- pgd_t *pgd;
-
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
- }
- while (mm->context.asce_limit > limit) {
- pgd = mm->pgd;
- switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
- case _REGION_ENTRY_TYPE_R2:
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- break;
- case _REGION_ENTRY_TYPE_R3:
- mm->context.asce_limit = 1UL << 31;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_SEGMENT;
- break;
- default:
- BUG();
- }
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->task_size = mm->context.asce_limit;
- crst_table_free(mm, (unsigned long *) pgd);
- }
- if (current->active_mm == mm)
- set_user_asce(mm);
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
}
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
-/**
- * gmap_alloc - allocate a guest address space
- * @mm: pointer to the parent mm_struct
- * @limit: maximum address of the gmap address space
- *
- * Returns a guest address space structure.
- */
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+ struct mm_struct *mm)
{
- struct gmap *gmap;
- struct page *page;
- unsigned long *table;
- unsigned long etype, atype;
-
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
- atype = _ASCE_TYPE_SEGMENT;
- etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
- atype = _ASCE_TYPE_REGION3;
- etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
- atype = _ASCE_TYPE_REGION2;
- etype = _REGION2_ENTRY_EMPTY;
- } else {
- limit = -1UL;
- atype = _ASCE_TYPE_REGION1;
- etype = _REGION1_ENTRY_EMPTY;
- }
- gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
- if (!gmap)
- goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
- INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
- INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
- spin_lock_init(&gmap->guest_table_lock);
- gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
- table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, etype);
- gmap->table = table;
- gmap->asce = atype | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
- gmap->asce_end = limit;
- down_write(&mm->mmap_sem);
- list_add(&gmap->list, &mm->context.gmap_list);
- up_write(&mm->mmap_sem);
- return gmap;
-
-out_free:
- kfree(gmap);
-out:
- return NULL;
+#ifdef CONFIG_PGSTE
+ unsigned long address, bits, skey;
+
+ if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+ return pgste;
+ address = pte_val(pte) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+ return pgste;
+
}
-EXPORT_SYMBOL_GPL(gmap_alloc);
-static void gmap_flush_tlb(struct gmap *gmap)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
{
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
+#ifdef CONFIG_PGSTE
+ unsigned long address;
+ unsigned long nkey;
+
+ if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+ return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+ address = pte_val(entry) & PAGE_MASK;
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
+#endif
}
-static void gmap_radix_tree_free(struct radix_tree_root *root)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
- struct radix_tree_iter iter;
- unsigned long indices[16];
- unsigned long index;
- void **slot;
- int i, nr;
-
- /* A radix tree is freed by deleting all of its entries */
- index = 0;
- do {
- nr = 0;
- radix_tree_for_each_slot(slot, root, &iter, index) {
- indices[nr] = iter.index;
- if (++nr == 16)
- break;
- }
- for (i = 0; i < nr; i++) {
- index = indices[i];
- radix_tree_delete(root, index);
+#ifdef CONFIG_PGSTE
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
}
- } while (nr > 0);
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+#endif
+ *ptep = entry;
+ return pgste;
}
-/**
- * gmap_free - free a guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_free(struct gmap *gmap)
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, pgste_t pgste)
{
- struct page *page, *next;
-
- /* Flush tlb. */
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
-
- /* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
- gmap_radix_tree_free(&gmap->guest_to_host);
- gmap_radix_tree_free(&gmap->host_to_guest);
- down_write(&gmap->mm->mmap_sem);
- list_del(&gmap->list);
- up_write(&gmap->mm->mmap_sem);
- kfree(gmap);
+#ifdef CONFIG_PGSTE
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
+ ptep_notify(mm, addr, ptep);
+ }
+#endif
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_free);
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- S390_lowcore.gmap = (unsigned long) gmap;
+ pgste_t pgste = __pgste(0);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ }
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_enable);
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
+static inline void ptep_xchg_commit(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pgste_t pgste, pte_t old, pte_t new)
{
- S390_lowcore.gmap = 0UL;
+ if (mm_has_pgste(mm)) {
+ if (pte_val(old) & _PAGE_INVALID)
+ pgste_set_key(ptep, pgste, new, mm);
+ if (pte_val(new) & _PAGE_INVALID) {
+ pgste = pgste_update_all(old, pgste, mm);
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(old) |= _PAGE_UNUSED;
+ }
+ pgste = pgste_set_pte(ptep, pgste, new);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = new;
+ }
}
-EXPORT_SYMBOL_GPL(gmap_disable);
-/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
- */
-static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
- unsigned long init, unsigned long gaddr)
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long *new;
-
- /* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- return -ENOMEM;
- new = (unsigned long *) page_to_phys(page);
- crst_table_init(new, init);
- spin_lock(&gmap->mm->page_table_lock);
- if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
- *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
- (*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
- page = NULL;
- }
- spin_unlock(&gmap->mm->page_table_lock);
- if (page)
- __free_pages(page, 2);
- return 0;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_direct(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_direct);
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long offset, mask;
-
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
- page = virt_to_page((void *)((unsigned long) entry & mask));
- return page->index + offset;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_lazy);
-/**
- * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
- * @gmap: pointer to the guest address space structure
- * @vmaddr: address in the host process address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- unsigned long *entry;
- int flush = 0;
-
- spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_INVALID);
- *entry = _SEGMENT_ENTRY_INVALID;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(old, pgste, mm);
+ pgste_set(ptep, pgste);
}
- spin_unlock(&gmap->guest_table_lock);
- return flush;
+ return old;
}
+EXPORT_SYMBOL(ptep_modify_prot_start);
-/**
- * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
- * @gmap: pointer to the guest address space structure
- * @gaddr: address in the guest address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- unsigned long vmaddr;
+ pgste_t pgste;
- vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = pte;
+ }
}
+EXPORT_SYMBOL(ptep_modify_prot_commit);
-/**
- * gmap_unmap_segment - unmap segment from the guest address space
- * @gmap: pointer to the guest address space structure
- * @to: address in the guest address space
- * @len: length of the memory area to unmap
- *
- * Returns 0 if the unmap succeeded, -EINVAL if not.
- */
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
{
- unsigned long off;
- int flush;
-
- if ((to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || to + len < to)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE)
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- return 0;
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return old;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(addr, pmdp);
+ else
+ __pmdp_idte(addr, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp);
+ else
+ __pmdp_csp(pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_unmap_segment);
-
-/**
- * gmap_mmap_segment - map a segment to the guest address space
- * @gmap: pointer to the guest address space structure
- * @from: source address in the parent address space
- * @to: target address in the guest address space
- * @len: length of the memory area to map
- *
- * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
- */
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len)
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long off;
- int flush;
-
- if ((from | to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || from + len < from || to + len < to ||
- from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Remove old translation */
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- /* Store new translation */
- if (radix_tree_insert(&gmap->guest_to_host,
- (to + off) >> PMD_SHIFT,
- (void *) from + off))
- break;
- }
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- if (off >= len)
- return 0;
- gmap_unmap_segment(gmap, to, len);
- return -ENOMEM;
+ pmd_t old;
+
+ old = pmdp_flush_direct(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_map_segment);
-
-/**
- * __gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long vmaddr;
+ pmd_t old;
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
- return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+ old = pmdp_flush_lazy(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(__gmap_translate);
-
-/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
{
- unsigned long rc;
+ struct list_head *lh = (struct list_head *) pgtable;
- down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(gmap, gaddr);
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
-EXPORT_SYMBOL_GPL(gmap_translate);
-/**
- * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
- * @table: pointer to the host page table
- * @vmaddr: vm address associated with the host page table
- */
-static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- struct gmap *gmap;
- int flush;
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
- if (flush)
- gmap_flush_tlb(gmap);
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
}
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_INVALID;
+ ptep++;
+ pte_val(*ptep) = _PAGE_INVALID;
+ return pgtable;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/**
- * gmap_link - set up shadow page tables to connect a host to a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @vmaddr: vm address
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
{
- struct mm_struct *mm;
- unsigned long *table;
- spinlock_t *ptl;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- int rc;
-
- /* Create higher level tables in the gmap page table */
- table = gmap->table;
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- table += (gaddr >> 20) & 0x7ff;
- /* Walk the parent mm page table */
- mm = gmap->mm;
- pgd = pgd_offset(mm, vmaddr);
- VM_BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, vmaddr);
- VM_BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, vmaddr);
- VM_BUG_ON(pmd_none(*pmd));
- /* large pmds cannot yet be handled */
- if (pmd_large(*pmd))
- return -EFAULT;
- /* Link gmap segment table entry location to page table. */
- rc = radix_tree_preload(GFP_KERNEL);
- if (rc)
- return rc;
- ptl = pmd_lock(mm, pmd);
- spin_lock(&gmap->guest_table_lock);
- if (*table == _SEGMENT_ENTRY_INVALID) {
- rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
- if (!rc)
- *table = pmd_val(*pmd);
- } else
- rc = 0;
- spin_unlock(&gmap->guest_table_lock);
- spin_unlock(ptl);
- radix_tree_preload_end();
- return rc;
+ pgste_t pgste;
+
+ /* the mm_has_pgste() check is done in set_pte_at() */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
}
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long vmaddr;
- int rc;
- bool unlocked;
-
- down_read(&gmap->mm->mmap_sem);
-
-retry:
- unlocked = false;
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = vmaddr;
- goto out_up;
- }
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
- &unlocked)) {
- rc = -EFAULT;
- goto out_up;
- }
- /*
- * In the case that fixup_user_fault unlocked the mmap_sem during
- * faultin redo __gmap_translate to not race with a map/unmap_segment.
- */
- if (unlocked)
- goto retry;
+ pgste_t pgste;
- rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_fault);
-static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
{
if (!non_swap_entry(entry))
dec_mm_counter(mm, MM_SWAPENTS);
@@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
free_swap_and_cache(entry);
}
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int reset)
{
- unsigned long vmaddr, ptev, pgstev;
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ unsigned long pgstev;
pgste_t pgste;
+ pte_t pte;
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- return;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (unlikely(!ptep))
- return;
- pte = *ptep;
- if (!pte_swap(pte))
- goto out_pte;
/* Zap unused and logically-zero pages */
pgste = pgste_get_lock(ptep);
pgstev = pgste_val(pgste);
- ptev = pte_val(pte);
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
- pte_clear(gmap->mm, vmaddr, ptep);
- }
+ pte = *ptep;
+ if (pte_swap(pte) &&
+ ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO))) {
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+ pte_clear(mm, addr, ptep);
+ }
+ if (reset)
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
pgste_set_unlock(ptep, pgste);
-out_pte:
- pte_unmap_unlock(ptep, ptl);
}
-EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- down_read(&gmap->mm->mmap_sem);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size, NULL);
- }
- up_read(&gmap->mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
-static LIST_HEAD(gmap_notifier_list);
-static DEFINE_SPINLOCK(gmap_notifier_lock);
+ unsigned long ptev;
+ pgste_t pgste;
-/**
- * gmap_register_ipte_notifier - register a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_register_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_add(&nb->list, &gmap_notifier_list);
- spin_unlock(&gmap_notifier_lock);
+ /* Clear storage key */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+ ptev = pte_val(*ptep);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
-/**
- * gmap_unregister_ipte_notifier - remove a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_del_init(&nb->list);
- spin_unlock(&gmap_notifier_lock);
-}
-EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
-
-/**
- * gmap_ipte_notify - mark a range of ptes for invalidation notification
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- *
- * Returns 0 if for each page in the given range a gmap mapping exists and
- * the invalidation notification could be set. If the gmap mapping is missing
- * for one or more pages -EFAULT is returned. If no memory could be allocated
- * -ENOMEM is returned. This function establishes missing page table entries.
+/*
+ * Test and reset if a guest page is dirty
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
- unsigned long addr;
spinlock_t *ptl;
- pte_t *ptep, entry;
pgste_t pgste;
- bool unlocked;
- int rc = 0;
-
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
- return -EINVAL;
- down_read(&gmap->mm->mmap_sem);
- while (len) {
- unlocked = false;
- /* Convert gmap address and connect the page tables */
- addr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(addr)) {
- rc = addr;
- break;
- }
- /* Get the page mapped */
- if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
- &unlocked)) {
- rc = -EFAULT;
- break;
- }
- /* While trying to map mmap_sem got unlocked. Let us retry */
- if (unlocked)
- continue;
- rc = __gmap_link(gmap, gaddr, addr);
- if (rc)
- break;
- /* Walk the process page table, lock and get pte pointer */
- ptep = get_locked_pte(gmap->mm, addr, &ptl);
- VM_BUG_ON(!ptep);
- /* Set notification bit in the pgste of the pte */
- entry = *ptep;
- if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= PGSTE_IN_BIT;
- pgste_set_unlock(ptep, pgste);
- gaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
- }
- pte_unmap_unlock(ptep, ptl);
- }
- up_read(&gmap->mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_ipte_notify);
-
-/**
- * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
- * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
- * @pte: pointer to the page table entry
- *
- * This function is assumed to be called with the page table lock held
- * for the pte to notify.
- */
-void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
-{
- unsigned long offset, gaddr;
- unsigned long *table;
- struct gmap_notifier *nb;
- struct gmap *gmap;
-
- offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
- spin_lock(&gmap_notifier_lock);
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (!table)
- continue;
- gaddr = __gmap_segment_gaddr(table) + offset;
- list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(gmap, gaddr);
+ pte_t *ptep;
+ pte_t pte;
+ bool dirty;
+
+ ptep = get_locked_pte(mm, addr, &ptl);
+ if (unlikely(!ptep))
+ return false;
+
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ __ptep_ipte(addr, ptep);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
}
- spin_unlock(&gmap_notifier_lock);
+ pgste_set_unlock(ptep, pgste);
+
+ spin_unlock(ptl);
+ return dirty;
}
-EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq)
+ unsigned char key, bool nq)
{
+ unsigned long keyul;
spinlock_t *ptl;
pgste_t old, new;
pte_t *ptep;
- bool unlocked;
down_read(&mm->mmap_sem);
-retry:
- unlocked = false;
ptep = get_locked_pte(mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
- if (!(pte_val(*ptep) & _PAGE_INVALID) &&
- (pte_val(*ptep) & _PAGE_PROTECT)) {
- pte_unmap_unlock(ptep, ptl);
- /*
- * We do not really care about unlocked. We will retry either
- * way. But this allows fixup_user_fault to enable userfaultfd.
- */
- if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE,
- &unlocked)) {
- up_read(&mm->mmap_sem);
- return -EFAULT;
- }
- goto retry;
- }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
- pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ keyul = (unsigned long) key;
+ pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long address, bits, skey;
@@ -863,13 +543,12 @@ retry:
}
EXPORT_SYMBOL(set_guest_storage_key);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
{
+ unsigned char key;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
- uint64_t physaddr;
- unsigned long key = 0;
down_read(&mm->mmap_sem);
ptep = get_locked_pte(mm, addr, &ptl);
@@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
pgste = pgste_get_lock(ptep);
if (pte_val(*ptep) & _PAGE_INVALID) {
- key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+ key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
} else {
- physaddr = pte_val(*ptep) & PAGE_MASK;
- key = page_get_storage_key(physaddr);
+ key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
/* Reflect guest's logical view, not physical */
if (pgste_val(pgste) & PGSTE_GR_BIT)
@@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
return key;
}
EXPORT_SYMBOL(get_guest_storage_key);
-
-static int page_table_allocate_pgste_min = 0;
-static int page_table_allocate_pgste_max = 1;
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
- .extra1 = &page_table_allocate_pgste_min,
- .extra2 = &page_table_allocate_pgste_max,
- },
- { }
-};
-
-static struct ctl_table page_table_sysctl_dir[] = {
- {
- .procname = "vm",
- .maxlen = 0,
- .mode = 0555,
- .child = page_table_sysctl,
- },
- { }
-};
-
-static int __init page_table_register_sysctl(void)
-{
- return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#else /* CONFIG_PGSTE */
-
-static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
-{
-}
-
-#endif /* CONFIG_PGSTE */
-
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- unsigned int old, new;
-
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
-}
-
-/*
- * page table entry allocation/free routines.
- */
-unsigned long *page_table_alloc(struct mm_struct *mm)
-{
- unsigned long *table;
- struct page *page;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.list_lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- mask = atomic_read(&page->_mapcount);
- mask = (mask | (mask >> 4)) & 3;
- if (mask != 3) {
- table = (unsigned long *) page_to_phys(page);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_mapcount, 1U << bit);
- list_del(&page->lru);
- }
- }
- spin_unlock_bh(&mm->context.list_lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- /* Initialize page table */
- table = (unsigned long *) page_to_phys(page);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- atomic_set(&page->_mapcount, 3);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_set(&page->_mapcount, 1);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE);
- spin_lock_bh(&mm->context.list_lock);
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- }
- return table;
-}
-
-void page_table_free(struct mm_struct *mm, unsigned long *table)
-{
- struct page *page;
- unsigned int bit, mask;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
- if (mask & 3)
- list_add(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask != 0)
- return;
- }
-
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
-{
- struct mm_struct *mm;
- struct page *page;
- unsigned int bit, mask;
-
- mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | 3);
- tlb_remove_table(tlb, table);
- return;
- }
- bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
- if (mask & 3)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (1U << bit));
- tlb_remove_table(tlb, table);
-}
-
-static void __tlb_remove_table(void *_table)
-{
- unsigned int mask = (unsigned long) _table & 3;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-
- switch (mask) {
- case 0: /* pmd or pud */
- free_pages((unsigned long) table, 2);
- break;
- case 1: /* lower 2K of a 4K page table */
- case 2: /* higher 2K of a 4K page table */
- if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
- break;
- /* fallthrough */
- case 3: /* 4K page table with pgstes */
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- break;
- }
-}
-
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void thp_split_vma(struct vm_area_struct *vma)
-{
- unsigned long addr;
-
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
-}
-
-static inline void thp_split_mm(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
-
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- thp_split_vma(vma);
- vma->vm_flags &= ~VM_HUGEPAGE;
- vma->vm_flags |= VM_NOHUGEPAGE;
- }
- mm->def_flags |= VM_NOHUGEPAGE;
-}
-#else
-static inline void thp_split_mm(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-/*
- * switch on pgstes for its userspace process (for kvm)
- */
-int s390_enable_sie(void)
-{
- struct mm_struct *mm = current->mm;
-
- /* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(mm))
- return 0;
- /* Fail if the page tables are 2K */
- if (!mm_alloc_pgste(mm))
- return -EINVAL;
- down_write(&mm->mmap_sem);
- mm->context.has_pgste = 1;
- /* split thp mappings and disable thp for future mappings */
- thp_split_mm(mm);
- up_write(&mm->mmap_sem);
- return 0;
-}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
-
-/*
- * Enable storage key handling from now on and initialize the storage
- * keys with the default key.
- */
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- unsigned long ptev;
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte))) {
- ptep_flush_direct(walk->mm, addr, pte);
- pte_val(*pte) = _PAGE_INVALID;
- }
- /* Clear storage key */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
- ptev = pte_val(*pte);
- if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-int s390_enable_skey(void)
-{
- struct mm_walk walk = { .pte_entry = __s390_enable_skey };
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int rc = 0;
-
- down_write(&mm->mmap_sem);
- if (mm_use_skey(mm))
- goto out_up;
-
- mm->context.use_skey = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.use_skey = 0;
- rc = -ENOMEM;
- goto out_up;
- }
- }
- mm->def_flags &= ~VM_MERGEABLE;
-
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
-
-out_up:
- up_write(&mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_enable_skey);
-
-/*
- * Reset CMMA state, make all pages stable again.
- */
-static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-void s390_reset_cmma(struct mm_struct *mm)
-{
- struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
- down_write(&mm->mmap_sem);
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
- up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(s390_reset_cmma);
-
-/*
- * Test and reset if a guest page is dirty
- */
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
-{
- pte_t *pte;
- spinlock_t *ptl;
- bool dirty = false;
-
- pte = get_locked_pte(gmap->mm, address, &ptl);
- if (unlikely(!pte))
- return false;
-
- if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
- dirty = true;
-
- spin_unlock(ptl);
- return dirty;
-}
-EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB */
- return pmdp_test_and_clear_young(vma, address, pmdp);
-}
-
-int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
- entry = pmd_mkyoung(entry);
- if (dirty)
- entry = pmd_mkdirty(entry);
- if (pmd_same(*pmdp, entry))
- return 0;
- pmdp_invalidate(vma, address, pmdp);
- set_pmd_at(vma->vm_mm, address, pmdp, entry);
- return 1;
-}
-
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
-{
- struct list_head *lh = (struct list_head *) pgtable;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(lh);
- else
- list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
- pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
- struct list_head *lh;
- pgtable_t pgtable;
- pte_t *ptep;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- pgtable = pmd_huge_pte(mm, pmdp);
- lh = (struct list_head *) pgtable;
- if (list_empty(lh))
- pmd_huge_pte(mm, pmdp) = NULL;
- else {
- pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
- list_del(lh);
- }
- ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_INVALID;
- ptep++;
- pte_val(*ptep) = _PAGE_INVALID;
- return pgtable;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ef7d6c8fea66..d27fccbad7c1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pgd_populate(&init_mm, pg_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
+ !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pud_val(*pu_dir) = __pa(address) |
_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
(ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
-#endif
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
@@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pud_populate(&init_mm, pu_dir, pm_dir);
}
pm_dir = pmd_offset(pu_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
_SEGMENT_ENTRY_YOUNG |
@@ -121,7 +120,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
address += PMD_SIZE;
continue;
}
-#endif
if (pmd_none(*pm_dir)) {
pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 1bd23017191e..496e4a7ee00e 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y := $(DRIVER_OBJS) init.o
oprofile-y += hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
deleted file mode 100644
index 1884e1759529..000000000000
--- a/arch/s390/oprofile/backtrace.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * S390 Version
- * Copyright IBM Corp. 2005
- * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
- */
-
-#include <linux/oprofile.h>
-
-#include <asm/processor.h> /* for struct stack_frame */
-
-static unsigned long
-__show_trace(unsigned int *depth, unsigned long sp,
- unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (*depth) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
-
- /* Follow the backchain. */
- while (*depth) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
-
- }
-
- if (*depth == 0)
- break;
-
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
- low = sp;
- sp = regs->gprs[15];
- }
- return sp;
-}
-
-void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
- unsigned long head, frame_size;
- struct stack_frame* head_sf;
-
- if (user_mode(regs))
- return;
-
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- head = regs->gprs[15];
- head_sf = (struct stack_frame*)head;
-
- if (!head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
-
- head = __show_trace(&depth, head,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size);
-
- __show_trace(&depth, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
-}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..791935a65800 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -20,8 +20,6 @@
#include "../../../drivers/oprofile/oprof.h"
-extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-
#include "hwsampler.h"
#include "op_counter.h"
@@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+ case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
default: return -ENODEV;
}
}
@@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void)
hwsampler_shutdown();
}
+static int __s390_backtrace(void *data, unsigned long address)
+{
+ unsigned int *depth = data;
+
+ if (*depth == 0)
+ return 1;
+ (*depth)--;
+ oprofile_add_trace(address);
+ return 0;
+}
+
+static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
+{
+ if (user_mode(regs))
+ return;
+ dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+}
+
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
ops->backtrace = s390_backtrace;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8f19c8f9d660..9fd59a7cfcd3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -637,11 +637,9 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int pcibios_add_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
- zdev->pdev = pdev;
pdev->dev.groups = zpci_attr_groups;
zpci_map_resources(pdev);
@@ -664,8 +662,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
struct zpci_dev *zdev = to_zpci(pdev);
- zdev->pdev = pdev;
- zpci_debug_init_device(zdev);
+ zpci_debug_init_device(zdev, dev_name(&pdev->dev));
zpci_fmb_enable_device(zdev);
return pci_enable_resources(pdev, mask);
@@ -677,7 +674,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
- zdev->pdev = NULL;
}
#ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -864,8 +860,11 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
+ BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+ __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
- 16, 0, NULL);
+ __alignof__(struct zpci_fmb), 0, NULL);
if (!zdev_fmb_cache)
goto error_fmb;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d6e411ed8b1f..21591ddb4c1f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -8,13 +8,19 @@
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/compat.h>
#include <linux/kernel.h>
+#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/pci.h>
+#include <linux/uaccess.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+#include <asm/compat.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
@@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
}
/*
- * Call Logical Processor
- * Retry logic is handled by the caller.
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
*/
-static inline u8 clp_instr(void *data)
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+ unsigned long mask;
+ int cc = 3;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+ : "cc");
+ *ilp = mask;
+ return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static inline int clp_req(void *data, unsigned int lps)
{
struct { u8 _[CLP_BLK_SIZE]; } *req = data;
u64 ignored;
- u8 cc;
+ int cc = 3;
asm volatile (
- " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
- " ipm %[cc]\n"
+ " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+ "0: ipm %[cc]\n"
" srl %[cc],28\n"
- : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
- : [req] "a" (req)
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : [req] "a" (req), [lps] "i" (lps)
: "cc");
return cc;
}
@@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.pfgid = pfgid;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
clp_store_query_pci_fngrp(zdev, &rrb->response);
else {
@@ -143,7 +171,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.fh = fh;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
rc = clp_store_query_pci_fn(zdev, &rrb->response);
if (rc)
@@ -214,7 +242,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
retries--;
if (retries < 0)
@@ -280,7 +308,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
rrb->request.resume_token = resume_token;
/* Get PCI function handle list */
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("List PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -391,3 +419,198 @@ int clp_rescan_pci_devices_simple(void)
clp_free_block(rrb);
return rc;
}
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_base_slpc(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+ struct clp_req_rsp_query_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+ struct clp_req_rsp_query_pci_grp *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+ lpcb->request.reserved4 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_pci_slpc(req, (void *) lpcb);
+ case 0x0002: /* list PCI functions */
+ return clp_pci_list(req, (void *) lpcb);
+ case 0x0003: /* query PCI function */
+ return clp_pci_query(req, (void *) lpcb);
+ case 0x0004: /* query PCI function group */
+ return clp_pci_query_grp(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+ struct clp_req_hdr *lpcb;
+ void __user *uptr;
+ int rc;
+
+ rc = -EINVAL;
+ if (req->lps != 0 && req->lps != 2)
+ goto out;
+
+ rc = -ENOMEM;
+ lpcb = clp_alloc_block(GFP_KERNEL);
+ if (!lpcb)
+ goto out;
+
+ rc = -EFAULT;
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = -EINVAL;
+ if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+ goto out_free;
+
+ switch (req->lps) {
+ case 0:
+ rc = clp_base_command(req, lpcb);
+ break;
+ case 2:
+ rc = clp_pci_command(req, lpcb);
+ break;
+ }
+ if (rc)
+ goto out_free;
+
+ rc = -EFAULT;
+ if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = 0;
+
+out_free:
+ clp_free_block(lpcb);
+out:
+ return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+ void __user *uptr;
+ unsigned long ilp;
+ int exists;
+
+ if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+ return -EINVAL;
+
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (req->cmd == 0) {
+ /* Command code 0: test for a specific processor */
+ exists = test_bit_inv(req->lps, &ilp);
+ return put_user(exists, (int __user *) uptr);
+ }
+ /* Command code 1: return bit mask of installed processors */
+ return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct clp_req req;
+ void __user *argp;
+
+ if (cmd != CLP_SYNC)
+ return -EINVAL;
+
+ argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+ if (req.r != 0)
+ return -EINVAL;
+ return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .release = clp_misc_release,
+ .unlocked_ioctl = clp_misc_ioctl,
+ .compat_ioctl = clp_misc_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "clp",
+ .fops = &clp_misc_fops,
+};
+
+static int __init clp_misc_init(void)
+{
+ return misc_register(&clp_misc_device);
+}
+
+device_initcall(clp_misc_init);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 4129b0a5fd78..c555de3d12d6 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = {
.release = single_release,
};
-void zpci_debug_init_device(struct zpci_dev *zdev)
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
{
- zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
- debugfs_root);
+ zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
if (IS_ERR(zdev->debugfs_dev))
zdev->debugfs_dev = NULL;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4638b93c7632..a06ce8037cec 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table)
dma_free_cpu_table(table);
}
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+static unsigned long __dma_alloc_iommu(struct device *dev,
unsigned long start, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
start, size, 0, boundary_size, 0);
}
-static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+static unsigned long dma_alloc_iommu(struct device *dev, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long offset, flags;
int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+ offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
if (offset == -1) {
/* wrap-around */
- offset = __dma_alloc_iommu(zdev, 0, size);
+ offset = __dma_alloc_iommu(dev, 0, size);
wrap = 1;
}
@@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
return offset;
}
-static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long flags;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
@@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
+ iommu_page_index = dma_alloc_iommu(dev, nr_pages);
if (iommu_page_index == -1) {
ret = -ENOSPC;
goto out_err;
@@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
return dma_addr + (offset & ~PAGE_MASK);
out_free:
- dma_free_iommu(zdev, iommu_page_index, nr_pages);
+ dma_free_iommu(dev, iommu_page_index, nr_pages);
out_err:
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
@@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
atomic64_add(npages, &zdev->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
- dma_free_iommu(zdev, iommu_page_index, npages);
+ dma_free_iommu(dev, iommu_page_index, npages);
}
static void *s390_dma_alloc(struct device *dev, size_t size,
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b0e04751c5d5..fb2a9a560fdc 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,11 +46,14 @@ struct zpci_ccdf_avail {
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
return;
pdev->error_state = pci_channel_io_perm_failure;
+ pci_dev_put(pdev);
}
void zpci_event_error(void *data)
@@ -69,9 +73,12 @@ void zpci_event_error(void *data)
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
int ret;
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
zpci_err("avail CCDF:\n");
@@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
+ if (pdev)
+ pci_dev_put(pdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/score/configs/spct6600_defconfig b/arch/score/configs/spct6600_defconfig
index df1edbf507a2..b2d8802f43b4 100644
--- a/arch/score/configs/spct6600_defconfig
+++ b/arch/score/configs/spct6600_defconfig
@@ -70,7 +70,6 @@ CONFIG_NFSD=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_CRYPTO_NULL=y
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index b48459afefdd..f3a0649ab521 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -101,7 +101,7 @@ static void __init resource_init(void)
res->name = "System RAM";
res->start = MEMORY_START;
res->end = MEMORY_START + MEMORY_SIZE - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
request_resource(res, &code_resource);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e13da05505dc..17a4f1593d65 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -847,14 +847,10 @@ config PCI
config PCI_DOMAINS
bool
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
endmenu
menu "Executable file formats"
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index e343dbd02e41..644314f2b1ef 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -105,9 +105,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
return channel ? 15 : 14;
}
-/* generic DMA-mapping stuff */
-#include <asm-generic/pci-dma-compat.h>
-
#endif /* __KERNEL__ */
#endif /* __ASM_SH_PCI_H */
diff --git a/arch/sh/include/mach-common/mach/magicpanelr2.h b/arch/sh/include/mach-common/mach/magicpanelr2.h
index 183a2f744251..eb0cf205176f 100644
--- a/arch/sh/include/mach-common/mach/magicpanelr2.h
+++ b/arch/sh/include/mach-common/mach/magicpanelr2.h
@@ -13,7 +13,7 @@
#ifndef __ASM_SH_MAGICPANELR2_H
#define __ASM_SH_MAGICPANELR2_H
-#include <asm/gpio.h>
+#include <linux/gpio.h>
#define __IO_PREFIX mpr2
#include <asm/io_generic.h>
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de19cfa768f2..3f1c18b28e8a 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, };
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
unsigned long memory_start;
@@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
res->name = "System RAM";
res->start = start;
res->end = end - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
if (request_resource(&iomem_resource, res)) {
pr_err("unable to request memory_resource 0x%lx 0x%lx\n",
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be008fc01..13f633add29a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
set_cpu_online(cpu, true);
per_cpu(cpu_state, cpu) = CPU_ONLINE;
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
extern struct {
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 6385f60209b6..cc948db74878 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
}
}
diff --git a/arch/sparc/include/asm/gpio.h b/arch/sparc/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/sparc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index d9c031f9910f..6e14fd179335 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -5,7 +5,4 @@
#else
#include <asm/pci_32.h>
#endif
-
-#include <asm-generic/pci-dma-compat.h>
-
#endif
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index f2d30cab5b3f..cd1f592cd347 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -696,14 +696,6 @@ tlb_fixup_done:
call __bzero
sub %o1, %o0, %o1
-#ifdef CONFIG_LOCKDEP
- /* We have this call this super early, as even prom_init can grab
- * spinlocks and thus call into the lockdep code.
- */
- call lockdep_init
- nop
-#endif
-
call prom_init
mov %l7, %o0 ! OpenPROM cif handler
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81b20f0..fb30e7c6a5b1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
local_irq_enable();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* We should never reach here! */
BUG();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d18672..8a6151a628ce 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -134,7 +134,7 @@ void smp_callin(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void cpu_panic(void)
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 131eaf4ad7f5..4977800e9770 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -146,7 +146,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
- pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pte = pte_alloc_map(mm, pmd, addr);
}
return pte;
}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f216853f272..1cfe6aab7a11 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static inline resource_size_t compute_kern_paddr(void *addr)
@@ -2909,7 +2909,7 @@ static int __init report_memory(void)
res->name = "System RAM";
res->start = pavail[i].phys_addr;
res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
if (insert_resource(&iomem_resource, res) < 0) {
pr_warn("Resource insertion failed.\n");
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index de4a4fff9323..81719302b056 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -455,8 +455,6 @@ config TILE_PCI_IO
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-
config TILE_USB
tristate "Tilera USB host adapter support"
default y
@@ -467,8 +465,6 @@ config TILE_USB
Provides USB host adapter support for the built-in EHCI and OHCI
interfaces on TILE-Gx chips.
-source "drivers/pci/hotplug/Kconfig"
-
endmenu
menu "Executable file formats"
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 984fa00a8c25..3f3dfb8b150a 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -374,7 +374,6 @@ CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_KGDB=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index 71ad9f7e40c9..ef9e27eb2f50 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -486,7 +486,6 @@ CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index dfedd7ac7298..fe3de505b024 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -226,7 +226,4 @@ static inline int pcibios_assign_all_busses(void)
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index bbb855de6569..a992238e9b58 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1632,14 +1632,14 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
/*
@@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved)
kzalloc(sizeof(struct resource), GFP_ATOMIC);
if (!res)
return NULL;
- res->name = reserved ? "Reserved" : "System RAM";
res->start = start_pfn << PAGE_SHIFT;
res->end = (end_pfn << PAGE_SHIFT) - 1;
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (reserved) {
+ res->name = "Reserved";
+ } else {
+ res->name = "System RAM";
+ res->flags |= IORESOURCE_SYSRAM;
+ }
if (insert_resource(&iomem_resource, res)) {
kfree(res);
return NULL;
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a98e171..6c0abaacec33 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -208,7 +208,7 @@ void online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index c034dc3fe2d4..e212c64682c5 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -77,7 +77,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
else {
if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
panic("Unexpected page size %#lx\n", sz);
- return pte_alloc_map(mm, NULL, pmd, addr);
+ return pte_alloc_map(mm, pmd, addr);
}
}
#else
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index d4e1fc41d06d..a0582b7f41d3 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -896,17 +896,15 @@ void __init pgtable_cache_init(void)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-static long __write_once initfree;
-#else
static long __write_once initfree = 1;
-#endif
+static bool __write_once set_initfree_done;
/* Select whether to free (1) or mark unusable (0) the __init pages. */
static int __init set_initfree(char *str)
{
long val;
if (kstrtol(str, 0, &val) == 0) {
+ set_initfree_done = true;
initfree = val;
pr_info("initfree: %s free init pages\n",
initfree ? "will" : "won't");
@@ -919,6 +917,11 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr = (unsigned long) begin;
+ /* Prefer user request first */
+ if (!set_initfree_done) {
+ if (debug_pagealloc_enabled())
+ initfree = 0;
+ }
if (kdata_huge && !initfree) {
pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n");
initfree = 1;
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 9591a66aa5c5..3943e9d7d13d 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
if (!pmd)
goto out_pmd;
- pte = pte_alloc_map(mm, NULL, pmd, proc);
+ pte = pte_alloc_map(mm, pmd, proc);
if (!pte)
goto out_pte;
diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h
index 38b3f3785c3c..37e55d018de5 100644
--- a/arch/unicore32/include/asm/pci.h
+++ b/arch/unicore32/include/asm/pci.h
@@ -13,8 +13,6 @@
#define __UNICORE_PCI_H__
#ifdef __KERNEL__
-#include <asm-generic/pci-dma-compat.h>
-#include <asm-generic/pci-bridge.h>
#include <asm-generic/pci.h>
#include <mach/hardware.h> /* for PCIBIOS_MIN_* */
@@ -23,5 +21,4 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine);
#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/unicore32/include/mach/hardware.h b/arch/unicore32/include/mach/hardware.h
index 9e20b5d9ed50..25146232c7cf 100644
--- a/arch/unicore32/include/mach/hardware.h
+++ b/arch/unicore32/include/mach/hardware.h
@@ -28,11 +28,6 @@
#define PCIBIOS_MIN_IO 0x4000 /* should lower than 64KB */
#define PCIBIOS_MIN_MEM io_v2p(PKUNITY_PCIMEM_BASE)
-/*
- * We override the standard dma-mask routines for bouncing.
- */
-#define HAVE_ARCH_PCI_SET_DMA_MASK
-
#define pcibios_assign_all_busses() 1
#endif /* __MACH_PUV3_HARDWARE_H__ */
diff --git a/arch/unicore32/kernel/gpio.c b/arch/unicore32/kernel/gpio.c
index cb12ec39552c..5ab23794ea17 100644
--- a/arch/unicore32/kernel/gpio.c
+++ b/arch/unicore32/kernel/gpio.c
@@ -52,7 +52,7 @@ device_initcall(puv3_gpio_leds_init);
static int puv3_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- return readl(GPIO_GPLR) & GPIO_GPIO(offset);
+ return !!(readl(GPIO_GPLR) & GPIO_GPIO(offset));
}
static void puv3_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 3fa317f96122..c2bffa5614a4 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -72,13 +72,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi)
res->name = "System RAM";
res->start = mi->bank[i].start;
res->end = mi->bank[i].start + mi->bank[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index afccef5529cc..2ec3d3adcefc 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -276,7 +276,7 @@ retry:
up_read(&mm->mmap_sem);
/*
- * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+ * Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault &
(VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
index 2ade20d8eab3..c572a28c76c9 100644
--- a/arch/unicore32/mm/pgd.c
+++ b/arch/unicore32/mm/pgd.c
@@ -54,7 +54,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
if (!new_pmd)
goto no_pmd;
- new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
+ new_pte = pte_alloc_map(mm, new_pmd, 0);
if (!new_pte)
goto no_pte;
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 1538562cc720..eb3abf8ac44e 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,6 +1,7 @@
-
obj-y += entry/
+obj-$(CONFIG_PERF_EVENTS) += events/
+
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c46662f64c39..3c74b549ea9a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES
config FIX_EARLYCON_MEM
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config PGTABLE_LEVELS
int
default 4 if X86_64
@@ -1160,22 +1163,23 @@ config MICROCODE
bool "CPU microcode loading support"
default y
depends on CPU_SUP_AMD || CPU_SUP_INTEL
- depends on BLK_DEV_INITRD
select FW_LOADER
---help---
-
If you say Y here, you will be able to update the microcode on
- certain Intel and AMD processors. The Intel support is for the
- IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
- Xeon etc. The AMD support is for families 0x10 and later. You will
- obviously need the actual microcode binary data itself which is not
- shipped with the Linux kernel.
+ Intel and AMD processors. The Intel support is for the IA32 family,
+ e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+ AMD support is for families 0x10 and later. You will obviously need
+ the actual microcode binary data itself which is not shipped with
+ the Linux kernel.
- This option selects the general module only, you need to select
- at least one vendor specific module as well.
+ The preferred method to load microcode from a detached initrd is described
+ in Documentation/x86/early-microcode.txt. For that you need to enable
+ CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+ initrd for microcode blobs.
- To compile this driver as a module, choose M here: the module
- will be called microcode.
+ In addition, you can build-in the microcode into the kernel. For that you
+ need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+ to the CONFIG_EXTRA_FIRMWARE config option.
config MICROCODE_INTEL
bool "Intel microcode loading support"
@@ -2431,8 +2435,6 @@ config PCI_CNB20LE_QUIRK
You should say N unless you know you need this.
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
# x86_64 have no ISA slots, but can have ISA-style DMA.
@@ -2588,8 +2590,6 @@ config AMD_NB
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config RAPIDIO
tristate "RapidIO support"
depends on PCI
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed97a8a2..67eec55093a5 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -74,28 +74,16 @@ config EFI_PGT_DUMP
issues with the mapping of the EFI runtime regions into that
table.
-config DEBUG_RODATA
- bool "Write protect kernel read-only data structures"
- default y
- depends on DEBUG_KERNEL
- ---help---
- Mark the kernel read-only data as write-protected in the pagetables,
- in order to catch accidental (and incorrect) writes to such const
- data. This is recommended so that we can catch kernel bugs sooner.
- If in doubt, say "Y".
-
config DEBUG_RODATA_TEST
- bool "Testcase for the DEBUG_RODATA feature"
- depends on DEBUG_RODATA
+ bool "Testcase for the marking rodata read-only"
default y
---help---
- This option enables a testcase for the DEBUG_RODATA
- feature as well as for the change_page_attr() infrastructure.
+ This option enables a testcase for the setting rodata read-only
+ as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_WX
bool "Warn on W+X mappings at boot"
- depends on DEBUG_RODATA
select X86_PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
@@ -350,16 +338,6 @@ config DEBUG_IMR_SELFTEST
If unsure say N here.
-config X86_DEBUG_STATIC_CPU_HAS
- bool "Debug alternatives"
- depends on DEBUG_KERNEL
- ---help---
- This option causes additional code to be generated which
- fails if static_cpu_has() is used before alternatives have
- run.
-
- If unsure, say N.
-
config X86_DEBUG_FPU
bool "Debug the x86 FPU code"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697e51e4..4cb404fd45ce 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/processor-flags.h>
struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e66a62..f72498dc90d2 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
#include "../kernel/cpu/capflags.c"
int main(void)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a7661c430cd9..0702d2531bc7 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -49,7 +49,6 @@ typedef unsigned int u32;
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
-int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48c8839..265901a84f3f 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_FRAME_WARN=2048
+CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
@@ -303,7 +303,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index cb5b3ab5beec..4f404a64681b 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -300,7 +300,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 3633ad6145c5..064c7e2bd7c8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -639,16 +639,11 @@ static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 5c8b6266a394..aa76cad9d262 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1503,13 +1503,9 @@ int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index fca459578c35..50e684768c55 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -329,13 +329,9 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c86a54..27226df3f7d8 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e9871693f24..0857b1a1de3b 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/internal.h>
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc97a311..cd4df9322501 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 5dc37026c7ce..6f778d3daa22 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -332,16 +332,11 @@ int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 3643dd508f45..8943407e8917 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -309,16 +309,11 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a841e9765bd6..a8a0224fa0f8 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -762,6 +762,38 @@ static int sha1_mb_async_digest(struct ahash_request *req)
return crypto_ahash_digest(mcryptd_req);
}
+static int sha1_mb_async_export(struct ahash_request *req, void *out)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_export(mcryptd_req, out);
+}
+
+static int sha1_mb_async_import(struct ahash_request *req, const void *in)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+ struct crypto_shash *child = mcryptd_ahash_child(mcryptd_tfm);
+ struct mcryptd_hash_request_ctx *rctx;
+ struct shash_desc *desc;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ rctx = ahash_request_ctx(mcryptd_req);
+ desc = &rctx->desc;
+ desc->tfm = child;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_import(mcryptd_req, in);
+}
+
static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
{
struct mcryptd_ahash *mcryptd_tfm;
@@ -796,8 +828,11 @@ static struct ahash_alg sha1_mb_async_alg = {
.final = sha1_mb_async_final,
.finup = sha1_mb_async_finup,
.digest = sha1_mb_async_digest,
+ .export = sha1_mb_async_export,
+ .import = sha1_mb_async_import,
.halg = {
.digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_hash_ctx),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1_mb",
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
index 2ab9560b53c8..c420d89b175f 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
@@ -197,7 +197,7 @@ len_is_0:
vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
- movl 4*32(state, idx, 4), DWORD_tmp
+ movl _args_digest+4*32(state, idx, 4), DWORD_tmp
vmovdqu %xmm0, _result_digest(job_rax)
movl DWORD_tmp, _result_digest+1*16(job_rax)
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 56d8a08ee479..2ebb5e9789f3 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -277,13 +277,9 @@ int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
u32 *flags = &tfm->crt_flags;
int err;
- /* key consists of keys of equal size concatenated, therefore
- * the length must be even
- */
- if (keylen % 2) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ err = xts_check_key(tfm, key, keylen);
+ if (err)
+ return err;
/* first half of xts-key is for crypt */
err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e32206e09868..9a9e5884066c 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
- .macro SAVE_ALL
- pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- .endm
-
- .macro RESTORE_ALL
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- .endm
-
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 03663740c866..e79d93d44ecd 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/uaccess.h>
+#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
+#else
+static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-#ifdef CONFIG_CONTEXT_TRACKING
- /*
- * If TIF_NOHZ is set, we are required to call user_exit() before
- * doing anything that could touch RCU.
- */
- if (work & _TIF_NOHZ) {
- enter_from_user_mode();
- work &= ~_TIF_NOHZ;
- }
-#endif
-
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
@@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- /*
- * If we stepped into a sysenter/syscall insn, it trapped in
- * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
- * If user-mode had set TF itself, then it's still clear from
- * do_debug() and we need to set it again to restore the user
- * state. If we entered on the slow path, TF was already set.
- */
- if (work & _TIF_SINGLESTEP)
- regs->flags |= X86_EFLAGS_TF;
-
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
@@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
lockdep_sys_exit();
- cached_flags =
- READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+ cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
+#ifdef CONFIG_COMPAT
+ /*
+ * Compat syscalls set TS_COMPAT. Make sure we clear it before
+ * returning to user mode. We need to clear it *after* signal
+ * handling, because syscall restart has a fixup for compat
+ * syscalls. The fixup is exercised by the ptrace_syscall_32
+ * selftest.
+ */
+ ti->status &= ~TS_COMPAT;
+#endif
+
user_enter();
}
@@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
syscall_slow_exit_work(regs, cached_flags);
-#ifdef CONFIG_COMPAT
+ local_irq_disable();
+ prepare_exit_to_usermode(regs);
+}
+
+#ifdef CONFIG_X86_64
+__visible void do_syscall_64(struct pt_regs *regs)
+{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
+ unsigned long nr = regs->orig_ax;
+
+ enter_from_user_mode();
+ local_irq_enable();
+
+ if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+ nr = syscall_trace_enter(regs);
+
/*
- * Compat syscalls set TS_COMPAT. Make sure we clear it before
- * returning to user mode.
+ * NB: Native and x32 syscalls are dispatched from the same
+ * table. The only functional difference is the x32 bit in
+ * regs->orig_ax, which changes the behavior of some syscalls.
*/
- ti->status &= ~TS_COMPAT;
-#endif
+ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+ regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ regs->di, regs->si, regs->dx,
+ regs->r10, regs->r8, regs->r9);
+ }
- local_irq_disable();
- prepare_exit_to_usermode(regs);
+ syscall_return_slowpath(regs);
}
+#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
- * Does a 32-bit syscall. Called with IRQs on and does all entry and
- * exit work and returns with IRQs off. This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
+ * all entry and exit work and returns with IRQs off. This function is
+ * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
-#ifdef CONFIG_X86_32
-/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
-__visible
-#else
-/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
-static
-#endif
-__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned int nr = (unsigned int)regs->orig_ax;
@@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
-#ifdef CONFIG_X86_64
-/* Handles INT80 on 64-bit kernels */
-__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+/* Handles int $0x80 */
+__visible void do_int80_syscall_32(struct pt_regs *regs)
{
+ enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
-#endif
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
@@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;
- /*
- * Fetch EBP from where the vDSO stashed it.
- *
- * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
- */
+ enter_from_user_mode();
+
local_irq_enable();
+
+ /* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
@@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
- enter_from_user_mode();
-#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bb3e376d0f33..10868aa734dc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
@@ -287,14 +287,64 @@ need_resched:
END(resume_kernel)
#endif
- # SYSENTER call handler stub
+GLOBAL(__begin_SYSENTER_singlestep_region)
+/*
+ * All code from here through __end_SYSENTER_singlestep_region is subject
+ * to being single-stepped if a user program sets TF and executes SYSENTER.
+ * There is absolutely nothing that we can do to prevent this from happening
+ * (thanks Intel!). To keep our handling of this situation as simple as
+ * possible, we handle TF just like AC and NT, except that our #DB handler
+ * will ignore all of the single-step traps generated in this range.
+ */
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+ addl $5*4, %esp /* remove xen-provided frame */
+ jmp sysenter_past_esp
+#endif
+
+/*
+ * 32-bit SYSENTER entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * if X86_FEATURE_SEP is available. This is the preferred system call
+ * entry on 32-bit systems.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old EIP (!!!), ESP, or EFLAGS.
+ *
+ * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
+ * user and/or vm86 state), we explicitly disable the SYSENTER
+ * instruction in vm86 mode by reprogramming the MSRs.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp user stack
+ * 0(%ebp) arg6
+ */
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
- ASM_CLAC /* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
@@ -302,6 +352,29 @@ sysenter_past_esp:
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
+ * SYSENTER doesn't filter flags, so we need to clear NT, AC
+ * and TF ourselves. To save a few cycles, we can check whether
+ * either was set instead of doing an unconditional popfq.
+ * This needs to happen before enabling interrupts so that
+ * we don't get preempted with NT set.
+ *
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
+ * NB.: .Lsysenter_fix_flags is a label with the code under it moved
+ * out-of-line as an optimization: NT is unlikely to be set in the
+ * majority of the cases and instead of polluting the I$ unnecessarily,
+ * we're keeping that code behind a branch which will predict as
+ * not-taken and therefore its instructions won't be fetched.
+ */
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+ /*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
*/
@@ -327,6 +400,15 @@ sysenter_past_esp:
popl %eax /* pt_regs->ax */
/*
+ * Restore all flags except IF. (We restore IF separately because
+ * STI gives a one-instruction window in which we won't be interrupted,
+ * whereas POPF does not.)
+ */
+ addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
+ btr $X86_EFLAGS_IF_BIT, (%esp)
+ popfl
+
+ /*
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
@@ -339,28 +421,63 @@ sysenter_past_esp:
.popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+ pushl $X86_EFLAGS_FIXED
+ popfl
+ jmp .Lsysenter_flags_fixed
+GLOBAL(__end_SYSENTER_singlestep_region)
ENDPROC(entry_SYSENTER_32)
- # system call handler stub
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by any 32-bit perform system calls.
+ * Instances of INT $0x80 can be found inline in various programs and
+ * libraries. It is also used by the vDSO's __kernel_vsyscall
+ * fallback for hardware that doesn't support a faster entry method.
+ * Restarted 32-bit system calls also fall back to INT $0x80
+ * regardless of what instruction was originally used to do the system
+ * call. (64-bit programs can use INT $0x80 as well, but they can
+ * only run on 64-bit kernels and therefore land in
+ * entry_INT80_compat.)
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6
+ */
ENTRY(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
- * User mode is traced as though IRQs are on. Unlike the 64-bit
- * case, INT80 is a trap gate on 32-bit kernels, so interrupts
- * are already on (unless user code is messing around with iopl).
+ * User mode is traced as though IRQs are on, and the interrupt gate
+ * turned them off.
*/
+ TRACE_IRQS_OFF
movl %esp, %eax
- call do_syscall_32_irqs_on
+ call do_int80_syscall_32
.Lsyscall_32_done:
restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
+ ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
+
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -387,19 +504,6 @@ ENTRY(iret_exc )
#ifdef CONFIG_X86_ESPFIX32
ldt_ss:
-#ifdef CONFIG_PARAVIRT
- /*
- * The kernel can't run on a non-flat stack if paravirt mode
- * is active. Rather than try to fixup the high bits of
- * ESP, bypass this code entirely. This may break DOSemu
- * and/or Wine support in a paravirt VM, although the option
- * is still available to implement the setting of the high
- * 16-bits in the INTERRUPT_RETURN paravirt-op.
- */
- cmpl $0, pv_info+PARAVIRT_enabled
- jne restore_nocheck
-#endif
-
/*
* Setup and switch to ESPFIX stack
*
@@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
END(spurious_interrupt_bug)
#ifdef CONFIG_XEN
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-ENTRY(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp sysenter_past_esp
-
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@@ -939,51 +1035,48 @@ error_code:
jmp ret_from_exception
END(page_fault)
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
- cmpw $__KERNEL_CS, 4(%esp)
- jne \ok
-\label:
- movl TSS_sysenter_sp0 + \offset(%esp), %esp
- pushfl
- pushl $__KERNEL_CS
- pushl $sysenter_past_esp
-.endm
-
ENTRY(debug)
+ /*
+ * #DB can happen at the first instruction of
+ * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
+ * happens, then we will be running on a very small stack. We
+ * need to detect this condition and switch to the thread
+ * stack before calling any C code at all.
+ *
+ * If you edit this code, keep in mind that NMIs can happen in here.
+ */
ASM_CLAC
- cmpl $entry_SYSENTER_32, (%esp)
- jne debug_stack_correct
- FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
pushl $-1 # mark this as an int
SAVE_ALL
- TRACE_IRQS_OFF
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Ldebug_from_sysenter_stack
+
+ TRACE_IRQS_OFF
+ call do_debug
+ jmp ret_from_exception
+
+.Ldebug_from_sysenter_stack:
+ /* We're on the SYSENTER stack. Switch off. */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ TRACE_IRQS_OFF
call do_debug
+ movl %ebp, %esp
jmp ret_from_exception
END(debug)
/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
+ * NMI is doubly nasty. It can happen on the first instruction of
+ * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
+ * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
+ * switched stacks. We handle both conditions by simply checking whether we
+ * interrupted kernel code running on the SYSENTER stack.
*/
ENTRY(nmi)
ASM_CLAC
@@ -994,41 +1087,32 @@ ENTRY(nmi)
popl %eax
je nmi_espfix_stack
#endif
- cmpl $entry_SYSENTER_32, (%esp)
- je nmi_stack_fixup
- pushl %eax
- movl %esp, %eax
- /*
- * Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1), %eax
- cmpl $(THREAD_SIZE-20), %eax
- popl %eax
- jae nmi_stack_correct
- cmpl $entry_SYSENTER_32, 12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- pushl %eax
+
+ pushl %eax # pt_regs->orig_ax
SAVE_ALL
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Lnmi_from_sysenter_stack
+
+ /* Not on SYSENTER stack. */
call do_nmi
jmp restore_all_notrace
-nmi_stack_fixup:
- FIX_STACK 12, nmi_stack_correct, 1
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- cmpw $__KERNEL_CS, 16(%esp)
- jne nmi_stack_correct
- cmpl $debug, (%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn, (%esp)
- ja nmi_stack_correct
- FIX_STACK 24, nmi_stack_correct, 1
- jmp nmi_stack_correct
+.Lnmi_from_sysenter_stack:
+ /*
+ * We're on the SYSENTER stack. Switch off. No one (not even debug)
+ * is using the thread stack right now, so it's safe for us to use it.
+ */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ call do_nmi
+ movl %ebp, %esp
+ jmp restore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
nmi_espfix_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3cfceb6..858b555e274b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
+ * This is the only entry point used for 64-bit system calls. The
+ * hardware interface is reasonably well designed and the register to
+ * argument mapping Linux uses fits well with the registers that are
+ * available when SYSCALL is used.
+ *
+ * SYSCALL instructions can be found inlined in libc implementations as
+ * well as some other programs and libraries. There are also a handful
+ * of SYSCALL instructions in the vDSO used, for example, as a
+ * clock_gettimeofday fallback.
+ *
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
@@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ TRACE_IRQS_OFF
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
- /*
- * Re-enable interrupts.
- * We use 'rsp_scratch' as a scratch space, hence irq-off block above
- * must execute atomically in the face of possible interrupt-driven
- * task preemption. We must enable interrupts only after we're done
- * with using rsp_scratch:
- */
- ENABLE_INTERRUPTS(CLBR_NONE)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
@@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz tracesys
+ /*
+ * If we need to do entry work or if we guess we'll need to do
+ * exit work, go straight to the slow path.
+ */
+ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz entry_SYSCALL64_slow_path
+
entry_SYSCALL_64_fastpath:
+ /*
+ * Easy case: enable interrupts and issue the syscall. If the syscall
+ * needs pt_regs, we'll call a stub that disables interrupts again
+ * and jumps to the slow path.
+ */
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max, %rax
#else
@@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+
+ /*
+ * This call instruction is handled specially in stub_ptregs_64.
+ * It might end up jumping to the slow path. If it jumps, RAX
+ * and all argument registers are clobbered.
+ */
call *sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
movq %rax, RAX(%rsp)
1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
- LOCKDEP_SYS_EXIT
- /*
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
/*
- * We must check ti flags with interrupts (or at least preemption)
- * off because we must *never* return to userspace without
- * processing exit work that is enqueued if we're preempted here.
- * In particular, returning to userspace with any of the one-shot
- * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
- * very bad.
+ * If we get here, then we know that pt_regs is clean for SYSRET64.
+ * If we see that no exit work is required (which we are required
+ * to check with IRQs off), then we can go straight to SYSRET64.
*/
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
+ jnz 1f
- RESTORE_C_REGS_EXCEPT_RCX_R11
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
- /*
- * 64-bit SYSRET restores rip from rcx,
- * rflags from r11 (but RF and VM bits are forced to 0),
- * cs and ss are loaded from MSRs.
- * Restoration of rflags re-enables interrupts.
- *
- * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
- * descriptor is not reinitialized. This means that we should
- * avoid SYSRET with SS == NULL, which could happen if we schedule,
- * exit the kernel, and re-enter using an interrupt vector. (All
- * interrupt entries on x86_64 set SS to NULL.) We prevent that
- * from happening by reloading SS in __switch_to. (Actually
- * detecting the failure in 64-bit userspace is tricky but can be
- * done.)
- */
USERGS_SYSRET64
-GLOBAL(int_ret_from_sys_call_irqs_off)
+1:
+ /*
+ * The fast path looked good when we started, but something changed
+ * along the way and we need to switch to the slow path. Calling
+ * raise(3) will trigger this, for example. IRQs are off.
+ */
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- jmp int_ret_from_sys_call
-
- /* Do syscall entry tracing */
-tracesys:
- movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- call syscall_trace_enter_phase1
- test %rax, %rax
- jnz tracesys_phase2 /* if needed, run the slow path */
- RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
- movq ORIG_RAX(%rsp), %rax
- jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
-
-tracesys_phase2:
SAVE_EXTRA_REGS
movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- movq %rax, %rdx
- call syscall_trace_enter_phase2
-
- /*
- * Reload registers from stack in case ptrace changed them.
- * We don't reload %rax because syscall_trace_entry_phase2() returned
- * the value it wants us to use in the table lookup.
- */
- RESTORE_C_REGS_EXCEPT_RAX
- RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx /* fixup for C */
- call *sys_call_table(, %rax, 8)
- movq %rax, RAX(%rsp)
-1:
- /* Use IRET because user could have changed pt_regs->foo */
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ jmp return_from_SYSCALL_64
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
+entry_SYSCALL64_slow_path:
+ /* IRQs are off. */
SAVE_EXTRA_REGS
movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
+ call do_syscall_64 /* returns with IRQs disabled */
+
+return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -355,83 +324,45 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
+ENTRY(stub_ptregs_64)
+ /*
+ * Syscalls marked as needing ptregs land here.
+ * If we are on the fast path, we need to save the extra regs,
+ * which we achieve by trying again on the slow path. If we are on
+ * the slow path, the extra regs are already saved.
+ *
+ * RAX stores a pointer to the C function implementing the syscall.
+ * IRQs are on.
+ */
+ cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+ jne 1f
- .macro FORK_LIKE func
-ENTRY(stub_\func)
- SAVE_EXTRA_REGS 8
- jmp sys_\func
-END(stub_\func)
- .endm
-
- FORK_LIKE clone
- FORK_LIKE fork
- FORK_LIKE vfork
-
-ENTRY(stub_execve)
- call sys_execve
-return_from_execve:
- testl %eax, %eax
- jz 1f
- /* exec failed, can use fast SYSRET code path in this case */
- ret
-1:
- /* must use IRET code path (pt_regs->cs may have changed) */
- addq $8, %rsp
- ZERO_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
- .align 8
-GLOBAL(stub_execveat)
- call sys_execveat
- jmp return_from_execve
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI)
- .align 8
-GLOBAL(stub_x32_execve)
- call compat_sys_execve
- jmp return_from_execve
-END(stub_x32_execve)
- .align 8
-GLOBAL(stub_x32_execveat)
- call compat_sys_execveat
- jmp return_from_execve
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
/*
- * SAVE_EXTRA_REGS result is not normally needed:
- * sigreturn overwrites all pt_regs->GPREGS.
- * But sigreturn can fail (!), and there is no easy way to detect that.
- * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
- * we SAVE_EXTRA_REGS here.
+ * Called from fast path -- disable IRQs again, pop return address
+ * and jump to slow path
*/
- SAVE_EXTRA_REGS 8
- call sys_rt_sigreturn
-return_from_stub:
- addq $8, %rsp
- RESTORE_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_rt_sigreturn)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ popq %rax
+ jmp entry_SYSCALL64_slow_path
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
- SAVE_EXTRA_REGS 8
- call sys32_x32_rt_sigreturn
- jmp return_from_stub
-END(stub_x32_rt_sigreturn)
-#endif
+1:
+ /* Called from C */
+ jmp *%rax /* called from C */
+END(stub_ptregs_64)
+
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+ leaq \func(%rip), %rax
+ jmp stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
/*
* A newly forked process directly context switches into this address.
@@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
-
LOCK ; btr $TIF_FORK, TI_flags(%r8)
pushq $0x0002
@@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
call schedule_tail /* rdi: 'prev' task parameter */
- RESTORE_EXTRA_REGS
-
testb $3, CS(%rsp) /* from kernel_thread? */
+ jnz 1f
/*
- * By the time we get here, we have no idea whether our pt_regs,
- * ti flags, and ti status came from the 64-bit SYSCALL fast path,
- * the slow path, or one of the 32-bit compat paths.
- * Use IRET code path to return, since it can safely handle
- * all of the above.
+ * We came from kernel_thread. This code path is quite twisted, and
+ * someone should clean it up.
+ *
+ * copy_thread_tls stashes the function pointer in RBX and the
+ * parameter to be passed in RBP. The called function is permitted
+ * to call do_execve and thereby jump to user mode.
*/
- jnz int_ret_from_sys_call
+ movq RBP(%rsp), %rdi
+ call *RBX(%rsp)
+ movl $0, RAX(%rsp)
/*
- * We came from kernel_thread
- * nb: we depend on RESTORE_EXTRA_REGS above
+ * Fall through as though we're exiting a syscall. This makes a
+ * twisted sort of sense if we just called do_execve.
*/
- movq %rbp, %rdi
- call *%rbx
- movl $0, RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+
+1:
+ movq %rsp, %rdi
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ TRACE_IRQS_ON /* user mode is traced as IRQS on */
+ SWAPGS
+ jmp restore_regs_and_iret
END(ret_from_fork)
/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 3c990eeee40b..847f2f0c31e5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -19,12 +19,21 @@
.section .entry.text, "ax"
/*
- * 32-bit SYSENTER instruction entry.
+ * 32-bit SYSENTER entry.
*
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on Intel CPUs.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
*
* Arguments:
* eax system call number
@@ -35,10 +44,6 @@
* edi arg5
* ebp user stack
* 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
@@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
*/
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
- ASM_CLAC /* Clear AC after saving FLAGS */
-
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
@@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
cld
/*
- * Sysenter doesn't filter flags, so we need to clear NT
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
- * NT was set instead of doing an unconditional popfq.
+ * either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
- testl $X86_EFLAGS_NT, EFLAGS(%rsp)
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
@@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
+GLOBAL(__end_entry_SYSENTER_compat)
ENDPROC(entry_SYSENTER_compat)
/*
- * 32-bit SYSCALL instruction entry.
+ * 32-bit SYSCALL entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on AMD CPUs.
+ *
+ * The SYSCALL instruction, in principle, should *only* occur in the
+ * vDSO. In practice, it appears that this really is the case.
+ * As evidence:
+ *
+ * - The calling convention for SYSCALL has changed several times without
+ * anyone noticing.
*
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
+ * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
+ * user task that did SYSCALL without immediately reloading SS
+ * would randomly crash.
*
- * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * - Most programmers do not directly target AMD CPUs, and the 32-bit
+ * SYSCALL instruction does not exist on Intel CPUs. Even on AMD
+ * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
+ * because the SYSCALL instruction in legacy/native 32-bit mode (as
+ * opposed to compat mode) is sufficiently poorly designed as to be
+ * essentially unusable.
+ *
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
+ * programmed MSRs. RFLAGS gets masked by a value from another MSR
+ * (so CLD and CLAC are not needed). SYSCALL does not save anything on
+ * the stack and does not change RSP.
+ *
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
@@ -236,7 +267,21 @@ sysret32_from_system_call:
END(entry_SYSCALL_compat)
/*
- * Emulated IA32 system calls via int 0x80.
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
@@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
* edx arg3
* esi arg4
* edi arg5
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
+ * ebp arg6
*/
-
ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
@@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_OFF
movq %rsp, %rdi
- call do_syscall_32_irqs_off
+ call do_int80_syscall_32
.Lsyscall_32_done:
/* Go back to user mode. */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 9a6649857106..8f895ee13a1c 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -6,17 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
+#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 41283d22be7a..9dbc5abb6162 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,19 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index dc1040a50bdc..2e5b565adacc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn stub_rt_sigreturn
+15 64 rt_sigreturn sys_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
@@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
-56 common clone stub_clone
-57 common fork stub_fork
-58 common vfork stub_vfork
-59 64 execve stub_execve
+56 common clone sys_clone/ptregs
+57 common fork sys_fork/ptregs
+58 common vfork sys_vfork/ptregs
+59 64 execve sys_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
@@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl
+172 common iopl sys_iopl/ptregs
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
@@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
-322 64 execveat stub_execveat
+322 64 execveat sys_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
@@ -339,14 +339,14 @@
# for native 64-bit operation.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
-513 x32 rt_sigreturn stub_x32_rt_sigreturn
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
516 x32 writev compat_sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve stub_x32_execve
+520 x32 execve compat_sys_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
-545 x32 execveat stub_x32_execveat
+545 x32 execveat compat_sys_execveat/ptregs
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec071e7..cd3d3015d7df 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -3,13 +3,63 @@
in="$1"
out="$2"
+syscall_macro() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+
+ # Entry can be either just a function name or "function/qualifier"
+ real_entry="${entry%%/*}"
+ qualifier="${entry:${#real_entry}}" # Strip the function name
+ qualifier="${qualifier:1}" # Strip the slash, if any
+
+ echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+}
+
+emit() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+ compat="$4"
+
+ if [ "$abi" == "64" -a -n "$compat" ]; then
+ echo "a compat entry for a 64-bit syscall makes no sense" >&2
+ exit 1
+ fi
+
+ if [ -z "$compat" ]; then
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ else
+ echo "#ifdef CONFIG_X86_32"
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ echo "#else"
+ syscall_macro "$abi" "$nr" "$compat"
+ echo "#endif"
+ fi
+}
+
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
- if [ -n "$compat" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $compat)"
- elif [ -n "$entry" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+ if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
+ # COMMON is the same as 64, except that we don't expect X32
+ # programs to use it. Our expectation has nothing to do with
+ # any generated code, so treat them the same.
+ emit 64 "$nr" "$entry" "$compat"
+ elif [ "$abi" == "X32" ]; then
+ # X32 is equivalent to 64 on an X32-compatible kernel.
+ echo "#ifdef CONFIG_X86_X32_ABI"
+ emit 64 "$nr" "$entry" "$compat"
+ echo "#endif"
+ elif [ "$abi" == "I386" ]; then
+ emit "$abi" "$nr" "$entry" "$compat"
+ else
+ echo "Unknown abi $abi" >&2
+ exit 1
fi
done
) > "$out"
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987556ce..63a03bb91497 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "#include <asm/vdso.h>\n");
fprintf(outfile, "\n");
fprintf(outfile,
- "static unsigned char raw_data[%lu] __page_aligned_data = {",
+ "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
for (j = 0; j < stripped_len; j++) {
if (j % 10 == 0)
@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "static struct page *pages[%lu];\n\n",
- mapping_size / 4096);
-
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
- fprintf(outfile, "\t.text_mapping = {\n");
- fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
- fprintf(outfile, "\t\t.pages = pages,\n");
- fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_offset));
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a9ae4b..7853b53959cd 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/mm_types.h>
-#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/vdso.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d9297074b..0109ac6cb79c 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
*/
#include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e264ac4..10f704584922 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
#include <asm/page.h>
#include <asm/hpet.h>
#include <asm/desc.h>
+#include <asm/cpufeature.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
void __init init_vdso_image(const struct vdso_image *image)
{
- int i;
- int npages = (image->size) / PAGE_SIZE;
-
BUG_ON(image->size % PAGE_SIZE != 0);
- for (i = 0; i < npages; i++)
- image->text_mapping.pages[i] =
- virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
#endif
}
+static int vdso_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+ if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ long sym_offset;
+ int ret = -EFAULT;
+
+ if (!image)
+ return VM_FAULT_SIGBUS;
+
+ sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+ image->sym_vvar_start;
+
+ /*
+ * Sanity check: a symbol offset of zero means that the page
+ * does not exist for this vdso image, not that the page is at
+ * offset zero relative to the text mapping. This should be
+ * impossible here, because sym_offset should only be zero for
+ * the page past the end of the vvar mapping.
+ */
+ if (sym_offset == 0)
+ return VM_FAULT_SIGBUS;
+
+ if (sym_offset == image->sym_vvar_page) {
+ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ } else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+ if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+ ret = vm_insert_pfn_prot(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ hpet_address >> PAGE_SHIFT,
+ pgprot_noncached(PAGE_READONLY));
+ }
+#endif
+ } else if (sym_offset == image->sym_pvclock_page) {
+ struct pvclock_vsyscall_time_info *pvti =
+ pvclock_pvti_cpu0_va();
+ if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+ ret = vm_insert_pfn(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ __pa(pvti) >> PAGE_SHIFT);
+ }
+ }
+
+ if (ret == 0 || ret == -EBUSY)
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
- static struct page *no_pages[] = {NULL};
- static struct vm_special_mapping vvar_mapping = {
+ static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
- .pages = no_pages,
+ .fault = vvar_fault,
};
- struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &image->text_mapping);
+ &text_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
- VM_READ|VM_MAYREAD,
+ VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+ VM_PFNMAP,
&vvar_mapping);
if (IS_ERR(vma)) {
@@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
goto up_fail;
}
- if (image->sym_vvar_page)
- ret = remap_pfn_range(vma,
- text_start + image->sym_vvar_page,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address && image->sym_hpet_page) {
- ret = io_remap_pfn_range(vma,
- text_start + image->sym_hpet_page,
- hpet_address >> PAGE_SHIFT,
- PAGE_SIZE,
- pgprot_noncached(PAGE_READONLY));
-
- if (ret)
- goto up_fail;
- }
-#endif
-
- pvti = pvclock_pvti_cpu0_va();
- if (pvti && image->sym_pvclock_page) {
- ret = remap_pfn_range(vma,
- text_start + image->sym_pvclock_page,
- __pa(pvti) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
- }
-
up_fail:
if (ret)
current->mm->context.vdso = NULL;
@@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
- if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e330416995..0fb3a104ac62 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,8 @@
#include <asm/vgtod.h>
#include <asm/vvar.h>
+int vclocks_used __read_mostly;
+
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644
index 000000000000..fdfea1511cc0
--- /dev/null
+++ b/arch/x86/events/Makefile
@@ -0,0 +1,13 @@
+obj-y += core.o
+
+obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c
index 58610539b048..049ada8d4e9c 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/events/amd/core.c
@@ -5,7 +5,7 @@
#include <linux/slab.h>
#include <asm/apicdef.h>
-#include "perf_event.h"
+#include "../perf_event.h"
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c
index 989d3c215d2b..51087c29b2c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -14,7 +14,7 @@
#include <asm/apic.h>
-#include "perf_event.h"
+#include "../perf_event.h"
static u32 ibs_caps;
@@ -670,7 +670,7 @@ static __init int perf_event_ibs_init(void)
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
- printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+ pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
}
@@ -774,14 +774,14 @@ static int setup_ibs_ctl(int ibs_eilvt_off)
pci_read_config_dword(cpu_cfg, IBSCTL, &value);
if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
pci_dev_put(cpu_cfg);
- printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
- "IBSCTL = 0x%08x\n", value);
+ pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+ value);
return -EINVAL;
}
} while (1);
if (!nodes) {
- printk(KERN_DEBUG "No CPU node configured for IBS\n");
+ pr_debug("No CPU node configured for IBS\n");
return -ENODEV;
}
@@ -810,7 +810,7 @@ static void force_ibs_eilvt_setup(void)
preempt_enable();
if (offset == APIC_EILVT_NR_MAX) {
- printk(KERN_DEBUG "No EILVT entry available\n");
+ pr_debug("No EILVT entry available\n");
return;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c
index 97242a9242bd..635e5eba0caf 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -16,8 +16,8 @@
#include <linux/cpumask.h>
#include <linux/slab.h>
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
+#include "../perf_event.h"
+#include "iommu.h"
#define COUNTER_SHIFT 16
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h
index 845d173278e3..845d173278e3 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h
+++ b/arch/x86/events/amd/iommu.h
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c
index 8836fc9fa84b..3db9569e658c 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -538,7 +538,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
- printk(KERN_INFO "perf: AMD NB counters detected\n");
+ pr_info("perf: AMD NB counters detected\n");
ret = 0;
}
@@ -552,7 +552,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_l2;
- printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ pr_info("perf: AMD L2I counters detected\n");
ret = 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c
index d276b31ca473..9b6ad08aa51a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/events/core.c
@@ -254,15 +254,16 @@ static bool check_hw_exists(void)
* We still allow the PMU driver to operate:
*/
if (bios_fail) {
- printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
- printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+ pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+ pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+ reg_fail, val_fail);
}
return true;
msr_fail:
- printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
- printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ pr_cont("Broken PMU hardware detected, using software events only.\n");
+ pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);
@@ -596,6 +597,19 @@ void x86_pmu_disable_all(void)
}
}
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
static void x86_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c
index 2cad71d1b14c..b99dc9258c0f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -26,7 +26,7 @@
#include <asm-generic/sizes.h>
#include <asm/perf_event.h>
-#include "perf_event.h"
+#include "../perf_event.h"
struct bts_ctx {
struct perf_output_handle handle;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c
index fed2ab1f1065..68fa55b4d42e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/events/intel/core.c
@@ -18,7 +18,7 @@
#include <asm/hardirq.h>
#include <asm/apic.h>
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* Intel PerfMon, used on Core and later.
@@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs
};
/*
- * Use from PMIs where the LBRs are already disabled.
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
*/
static void __intel_pmu_disable_all(void)
{
@@ -1884,6 +1892,16 @@ again:
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++;
x86_pmu.drain_pebs(regs);
+ /*
+ * There are cases where, even though, the PEBS ovfl bit is set
+ * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+ * overflow bits set for their counters. We must clear them
+ * here because they have been processed as exact samples in
+ * the drain_pebs() routine. They must not be processed again
+ * in the for_each_bit_set() loop for regular samples below.
+ */
+ status &= ~cpuc->pebs_enabled;
+ status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
}
/*
@@ -1929,7 +1947,10 @@ again:
goto again;
done:
- __intel_pmu_enable_all(0, true);
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ __intel_pmu_enable_all(0, true);
+
/*
* Only unmask the NMI after the overflow counters
* have been reset. This avoids spurious NMIs on
@@ -3396,6 +3417,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+ intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk);
pr_cont("Nehalem events, ");
@@ -3459,6 +3481,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+ intel_pmu_pebs_data_source_nhm();
pr_cont("Westmere events, ");
break;
@@ -3581,7 +3604,7 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_bdw_event_constraints;
- x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c
index a316ca96f1b6..93cb412a5579 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -7,7 +7,7 @@
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
#define MSR_IA32_PQR_ASSOC 0x0c8f
#define MSR_IA32_QM_CTR 0x0c8e
@@ -1244,15 +1244,12 @@ static struct pmu intel_cqm_pmu = {
static inline void cqm_pick_event_reader(int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
+ int reader;
- for_each_cpu(i, &cqm_cpumask) {
- if (phys_id == topology_physical_package_id(i))
- return; /* already got reader for this socket */
- }
-
- cpumask_set_cpu(cpu, &cqm_cpumask);
+ /* First online cpu in package becomes the reader */
+ reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+ if (reader >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cqm_cpumask);
}
static void intel_cqm_cpu_starting(unsigned int cpu)
@@ -1270,24 +1267,17 @@ static void intel_cqm_cpu_starting(unsigned int cpu)
static void intel_cqm_cpu_exit(unsigned int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
+ int target;
- /*
- * Is @cpu a designated cqm reader?
- */
+ /* Is @cpu the current cqm reader for this package ? */
if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
return;
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
+ /* Find another online reader in this package */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- if (phys_id == topology_physical_package_id(i)) {
- cpumask_set_cpu(i, &cqm_cpumask);
- break;
- }
- }
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &cqm_cpumask);
}
static int intel_cqm_cpu_notifier(struct notifier_block *nb,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c
index 75a38b5a2e26..7946c4231169 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,7 +89,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c
index 10602f0a438f..ce7211a07c0b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,7 +5,7 @@
#include <asm/perf_event.h>
#include <asm/insn.h>
-#include "perf_event.h"
+#include "../perf_event.h"
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
@@ -51,7 +51,8 @@ union intel_x86_pebs_dse {
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
-static const u64 pebs_data_source[] = {
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = {
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
};
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+ pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
+ pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+ pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -269,7 +278,7 @@ static int alloc_pebs_buffer(int cpu)
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+ buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -286,7 +295,7 @@ static int alloc_pebs_buffer(int cpu)
per_cpu(insn_buffer, cpu) = ibuffer;
}
- max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+ max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
@@ -722,6 +731,30 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
@@ -1319,19 +1352,28 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
int format = x86_pmu.intel_cap.pebs_format;
switch (format) {
case 0:
- printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
+ pr_cont("PEBS fmt0%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+ /*
+ * Using >PAGE_SIZE buffers makes the WRMSR to
+ * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+ * mysteriously hang on Core2.
+ *
+ * As a workaround, we don't do this.
+ */
+ x86_pmu.pebs_buffer_size = PAGE_SIZE;
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
break;
case 1:
- printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
+ pr_cont("PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
@@ -1351,7 +1393,7 @@ void __init intel_ds_init(void)
break;
default:
- printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
+ pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c
index 5b0c232d1ee6..548d5f774b07 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -5,7 +5,7 @@
#include <asm/hardirq.h>
-#include "perf_event.h"
+#include "../perf_event.h"
static const u64 knc_perfmon_event_map[] =
{
@@ -263,7 +263,9 @@ again:
goto again;
done:
- knc_pmu_enable_all(0);
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ knc_pmu_enable_all(0);
return handled;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c
index 653f88d25987..69dd11887dd1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -5,7 +5,7 @@
#include <asm/msr.h>
#include <asm/insn.h>
-#include "perf_event.h"
+#include "../perf_event.h"
enum {
LBR_FORMAT_32 = 0x00,
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c
index f2e56783af3d..0a5ede187d9c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -13,7 +13,7 @@
#include <asm/hardirq.h>
#include <asm/apic.h>
-#include "perf_event.h"
+#include "../perf_event.h"
#define P4_CNTR_LIMIT 3
/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c
index 7c1a0c07b607..1f5c47ab4c65 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -1,7 +1,7 @@
#include <linux/perf_event.h>
#include <linux/types.h>
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* Not sure about some of these
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c
index c0bbd1033b7c..6af7cf71d6b2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -29,8 +29,8 @@
#include <asm/io.h>
#include <asm/intel_pt.h>
-#include "perf_event.h"
-#include "intel_pt.h"
+#include "../perf_event.h"
+#include "pt.h"
static DEFINE_PER_CPU(struct pt, pt_ctx);
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h
index 336878a5d205..336878a5d205 100644
--- a/arch/x86/kernel/cpu/intel_pt.h
+++ b/arch/x86/events/intel/pt.h
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c
index 24a351ad628d..b834a3f55a01 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -44,11 +44,14 @@
* the duration of the measurement. Tools may use a function such as
* ldexp(raw_count, -32);
*/
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* RAPL energy status counters
@@ -107,7 +110,7 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_CNTR_WIDTH 32
#define RAPL_EVENT_ATTR_STR(_name, v, str) \
static struct perf_pmu_events_attr event_attr_##v = { \
@@ -117,23 +120,33 @@ static struct perf_pmu_events_attr event_attr_##v = { \
};
struct rapl_pmu {
- spinlock_t lock;
- int n_active; /* number of active events */
- struct list_head active_list;
- struct pmu *pmu; /* pointer to rapl_pmu_class */
- ktime_t timer_interval; /* in ktime_t unit */
- struct hrtimer hrtimer;
+ raw_spinlock_t lock;
+ int n_active;
+ int cpu;
+ struct list_head active_list;
+ struct pmu *pmu;
+ ktime_t timer_interval;
+ struct hrtimer hrtimer;
};
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
+struct rapl_pmus {
+ struct pmu pmu;
+ unsigned int maxpkg;
+ struct rapl_pmu *pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+ return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
-static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -141,19 +154,10 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw;
}
-#define rapl_add_quirk(func_) \
-do { \
- static struct x86_pmu_quirk __quirk __initdata = { \
- .func = func_, \
- }; \
- __quirk.next = rapl_quirks; \
- rapl_quirks = &__quirk; \
-} while (0)
-
static inline u64 rapl_scale(u64 v, int cfg)
{
if (cfg > NR_RAPL_DOMAINS) {
- pr_warn("invalid domain %d, failed to scale data\n", cfg);
+ pr_warn("Invalid domain %d, failed to scale data\n", cfg);
return v;
}
/*
@@ -206,27 +210,21 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
HRTIMER_MODE_REL_PINNED);
}
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
- hrtimer_cancel(&pmu->hrtimer);
-}
-
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
struct perf_event *event;
unsigned long flags;
if (!pmu->n_active)
return HRTIMER_NORESTART;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
- list_for_each_entry(event, &pmu->active_list, active_entry) {
+ list_for_each_entry(event, &pmu->active_list, active_entry)
rapl_event_update(event);
- }
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
hrtimer_forward_now(hrtimer, pmu->timer_interval);
@@ -260,28 +258,28 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
static void rapl_pmu_event_start(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
__rapl_pmu_event_start(pmu, event);
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
/* mark event as deactivated and stopped */
if (!(hwc->state & PERF_HES_STOPPED)) {
WARN_ON_ONCE(pmu->n_active <= 0);
pmu->n_active--;
if (pmu->n_active == 0)
- rapl_stop_hrtimer(pmu);
+ hrtimer_cancel(&pmu->hrtimer);
list_del(&event->active_entry);
@@ -299,23 +297,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
hwc->state |= PERF_HES_UPTODATE;
}
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static int rapl_pmu_event_add(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (mode & PERF_EF_START)
__rapl_pmu_event_start(pmu, event);
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
return 0;
}
@@ -329,15 +327,19 @@ static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
int bit, msr, ret = 0;
+ struct rapl_pmu *pmu;
/* only look at RAPL events */
- if (event->attr.type != rapl_pmu_class.type)
+ if (event->attr.type != rapl_pmus->pmu.type)
return -ENOENT;
/* check only supported bits are set */
if (event->attr.config & ~RAPL_EVENT_MASK)
return -EINVAL;
+ if (event->cpu < 0)
+ return -EINVAL;
+
/*
* check event is known (determines counter)
*/
@@ -376,6 +378,9 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
/* must be done before validate_group */
+ pmu = cpu_to_rapl_pmu(event->cpu);
+ event->cpu = pmu->cpu;
+ event->pmu_private = pmu;
event->hw.event_base = msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -506,139 +511,62 @@ const struct attribute_group *rapl_attr_groups[] = {
NULL,
};
-static struct pmu rapl_pmu_class = {
- .attr_groups = rapl_attr_groups,
- .task_ctx_nr = perf_invalid_context, /* system-wide only */
- .event_init = rapl_pmu_event_init,
- .add = rapl_pmu_event_add, /* must have */
- .del = rapl_pmu_event_del, /* must have */
- .start = rapl_pmu_event_start,
- .stop = rapl_pmu_event_stop,
- .read = rapl_pmu_event_read,
-};
-
static void rapl_cpu_exit(int cpu)
{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int i, phys_id = topology_physical_package_id(cpu);
- int target = -1;
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
- /* find a new cpu on same package */
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- /*
- * clear cpu from cpumask
- * if was set in cpumask and still some cpu on package,
- * then move to new cpu
- */
- if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
- cpumask_set_cpu(target, &rapl_cpu_mask);
+ /* Check if exiting cpu is used for collecting rapl events */
+ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+ return;
- WARN_ON(cpumask_empty(&rapl_cpu_mask));
- /*
- * migrate events and context to new cpu
- */
- if (target >= 0)
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
+ pmu->cpu = -1;
+ /* Find a new cpu to collect rapl events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- /* cancel overflow polling timer for CPU */
- rapl_stop_hrtimer(pmu);
+ /* Migrate rapl events to the new target */
+ if (target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &rapl_cpu_mask);
+ pmu->cpu = target;
+ perf_pmu_migrate_context(pmu->pmu, cpu, target);
+ }
}
static void rapl_cpu_init(int cpu)
{
- int i, phys_id = topology_physical_package_id(cpu);
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
- /* check if phys_is is already covered */
- for_each_cpu(i, &rapl_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
- /* was not found, so add it */
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
/*
- * DRAM domain on HSW server has fixed energy unit which can be
- * different than the unit from power unit MSR.
- * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
- * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ * Check if there is an online cpu in the package which collects rapl
+ * events already.
*/
- rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
+
+ cpumask_set_cpu(cpu, &rapl_cpu_mask);
+ pmu->cpu = cpu;
}
static int rapl_cpu_prepare(int cpu)
{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int phys_id = topology_physical_package_id(cpu);
- u64 ms;
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
if (pmu)
return 0;
- if (phys_id < 0)
- return -1;
-
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
- return -1;
- spin_lock_init(&pmu->lock);
+ return -ENOMEM;
+ raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
-
- pmu->pmu = &rapl_pmu_class;
-
- /*
- * use reference of 200W for scaling the timeout
- * to avoid missing counter overflows.
- * 200W = 200 Joules/sec
- * divide interval by 2 to avoid lockstep (2 * 100)
- * if hw unit is 32, then we use 2 ms 1/200/2
- */
- if (rapl_hw_unit[0] < 32)
- ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
- else
- ms = 2;
-
- pmu->timer_interval = ms_to_ktime(ms);
-
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ pmu->cpu = -1;
rapl_hrtimer_init(pmu);
-
- /* set RAPL pmu for this cpu for now */
- per_cpu(rapl_pmu, cpu) = pmu;
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
- return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
- kfree(pmu);
-
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
- if (!pmu)
- return 0;
-
- per_cpu(rapl_pmu, cpu) = NULL;
-
- per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
+ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
return 0;
}
@@ -651,28 +579,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
case CPU_UP_PREPARE:
rapl_cpu_prepare(cpu);
break;
- case CPU_STARTING:
- rapl_cpu_init(cpu);
- break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- rapl_cpu_dying(cpu);
- break;
+
+ case CPU_DOWN_FAILED:
case CPU_ONLINE:
- case CPU_DEAD:
- rapl_cpu_kfree(cpu);
+ rapl_cpu_init(cpu);
break;
+
case CPU_DOWN_PREPARE:
rapl_cpu_exit(cpu);
break;
- default:
- break;
}
-
return NOTIFY_OK;
}
-static int rapl_check_hw_unit(void)
+static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
int i;
@@ -683,28 +603,107 @@ static int rapl_check_hw_unit(void)
for (i = 0; i < NR_RAPL_DOMAINS; i++)
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ /*
+ * DRAM domain on HSW server and KNL has fixed energy unit which can be
+ * different than the unit from power unit MSR. See
+ * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+ * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ */
+ if (apply_quirk)
+ rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+ /*
+ * Calculate the timer rate:
+ * Use reference of 200W for scaling the timeout to avoid counter
+ * overflows. 200W = 200 Joules/sec
+ * Divide interval by 2 to avoid lockstep (2 * 100)
+ * if hw unit is 32, then we use 2 ms 1/200/2
+ */
+ rapl_timer_ms = 2;
+ if (rapl_hw_unit[0] < 32) {
+ rapl_timer_ms = (1000 / (2 * 100));
+ rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+ }
+ return 0;
+}
+
+static void __init rapl_advertise(void)
+{
+ int i;
+
+ pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+ hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+ for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+ if (rapl_cntr_mask & (1 << i)) {
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_domain_names[i], rapl_hw_unit[i]);
+ }
+ }
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+ unsigned int cpu, pkg;
+ int ret;
+
+ for_each_online_cpu(cpu) {
+ pkg = topology_logical_package_id(cpu);
+ if (rapl_pmus->pmus[pkg])
+ continue;
+
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ rapl_cpu_init(cpu);
+ }
+ return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+ int i;
+
+ for (i = 0; i < rapl_pmus->maxpkg; i++)
+ kfree(rapl_pmus->pmus + i);
+ kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+ int maxpkg = topology_max_packages();
+ size_t size;
+
+ size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ rapl_pmus = kzalloc(size, GFP_KERNEL);
+ if (!rapl_pmus)
+ return -ENOMEM;
+
+ rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->pmu.attr_groups = rapl_attr_groups;
+ rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
+ rapl_pmus->pmu.event_init = rapl_pmu_event_init;
+ rapl_pmus->pmu.add = rapl_pmu_event_add;
+ rapl_pmus->pmu.del = rapl_pmu_event_del;
+ rapl_pmus->pmu.start = rapl_pmu_event_start;
+ rapl_pmus->pmu.stop = rapl_pmu_event_stop;
+ rapl_pmus->pmu.read = rapl_pmu_event_read;
return 0;
}
-static const struct x86_cpu_id rapl_cpu_match[] = {
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
[1] = {},
};
static int __init rapl_pmu_init(void)
{
- struct rapl_pmu *pmu;
- int cpu, ret;
- struct x86_pmu_quirk *quirk;
- int i;
+ bool apply_quirk = false;
+ int ret;
- /*
- * check for Intel processor family 6
- */
if (!x86_match_cpu(rapl_cpu_match))
- return 0;
+ return -ENODEV;
- /* check supported CPU */
switch (boot_cpu_data.x86_model) {
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
@@ -712,7 +711,7 @@ static int __init rapl_pmu_init(void)
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
case 63: /* Haswell-Server */
- rapl_add_quirk(rapl_hsw_server_quirk);
+ apply_quirk = true;
rapl_cntr_mask = RAPL_IDX_SRV;
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
@@ -728,56 +727,41 @@ static int __init rapl_pmu_init(void)
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
case 87: /* Knights Landing */
- rapl_add_quirk(rapl_hsw_server_quirk);
+ apply_quirk = true;
rapl_cntr_mask = RAPL_IDX_KNL;
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
+ break;
default:
- /* unsupported */
- return 0;
+ return -ENODEV;
}
- ret = rapl_check_hw_unit();
+
+ ret = rapl_check_hw_unit(apply_quirk);
if (ret)
return ret;
- /* run cpu model quirks */
- for (quirk = rapl_quirks; quirk; quirk = quirk->next)
- quirk->func();
- cpu_notifier_register_begin();
+ ret = init_rapl_pmus();
+ if (ret)
+ return ret;
- for_each_online_cpu(cpu) {
- ret = rapl_cpu_prepare(cpu);
- if (ret)
- goto out;
- rapl_cpu_init(cpu);
- }
+ cpu_notifier_register_begin();
- __perf_cpu_notifier(rapl_cpu_notifier);
+ ret = rapl_prepare_cpus();
+ if (ret)
+ goto out;
- ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
- if (WARN_ON(ret)) {
- pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
- cpu_notifier_register_done();
- return -1;
- }
+ ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+ if (ret)
+ goto out;
- pmu = __this_cpu_read(rapl_pmu);
+ __perf_cpu_notifier(rapl_cpu_notifier);
+ cpu_notifier_register_done();
+ rapl_advertise();
+ return 0;
- pr_info("RAPL PMU detected,"
- " API unit is 2^-32 Joules,"
- " %d fixed counters"
- " %llu ms ovfl timer\n",
- hweight32(rapl_cntr_mask),
- ktime_to_ms(pmu->timer_interval));
- for (i = 0; i < NR_RAPL_DOMAINS; i++) {
- if (rapl_cntr_mask & (1 << i)) {
- pr_info("hw unit of domain %s 2^-%d Joules\n",
- rapl_domain_names[i], rapl_hw_unit[i]);
- }
- }
out:
+ pr_warn("Initialization failed (%d), disabled\n", ret);
+ cleanup_rapl_pmus();
cpu_notifier_register_done();
-
- return 0;
+ return ret;
}
device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c
index 3bf41d413775..7012d18bb293 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,4 @@
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
@@ -9,9 +9,9 @@ struct pci_driver *uncore_pci_driver;
/* pci bus to socket mapping */
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
@@ -21,7 +21,7 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
-int uncore_pcibus_to_physid(struct pci_bus *bus)
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
int phys_id = -1;
@@ -38,6 +38,16 @@ int uncore_pcibus_to_physid(struct pci_bus *bus)
return phys_id;
}
+static void uncore_free_pcibus_map(void)
+{
+ struct pci2phy_map *map, *tmp;
+
+ list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
+
struct pci2phy_map *__find_pci2phy_map(int segment)
{
struct pci2phy_map *map, *alloc = NULL;
@@ -82,43 +92,9 @@ ssize_t uncore_event_show(struct kobject *kobj,
return sprintf(buf, "%s", event->config);
}
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
- return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- struct intel_uncore_box *box;
-
- box = *per_cpu_ptr(pmu->box, cpu);
- if (box)
- return box;
-
- raw_spin_lock(&uncore_box_lock);
- /* Recheck in lock to handle races. */
- if (*per_cpu_ptr(pmu->box, cpu))
- goto out;
- list_for_each_entry(box, &pmu->box_list, list) {
- if (box->phys_id == topology_physical_package_id(cpu)) {
- atomic_inc(&box->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = box;
- break;
- }
- }
-out:
- raw_spin_unlock(&uncore_box_lock);
-
- return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
- /*
- * perf core schedules event on the basis of cpu, uncore events are
- * collected by one of the cpus inside a physical package.
- */
- return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
+ return pmu->boxes[topology_logical_package_id(cpu)];
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -207,7 +183,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
return config;
}
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+ struct perf_event *event, int idx)
{
struct hw_perf_event *hwc = &event->hw;
@@ -302,24 +279,25 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+ int node)
{
+ int i, size, numshared = type->num_shared_regs ;
struct intel_uncore_box *box;
- int i, size;
- size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
+ size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
- for (i = 0; i < type->num_shared_regs; i++)
+ for (i = 0; i < numshared; i++)
raw_spin_lock_init(&box->shared_regs[i].lock);
uncore_pmu_init_hrtimer(box);
- atomic_set(&box->refcnt, 1);
box->cpu = -1;
- box->phys_id = -1;
+ box->pci_phys_id = -1;
+ box->pkgid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -341,7 +319,8 @@ static bool is_uncore_event(struct perf_event *event)
}
static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+ bool dogrp)
{
struct perf_event *event;
int n, max_count;
@@ -402,7 +381,8 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
return &type->unconstrainted;
}
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
{
if (box->pmu->type->ops->put_constraint)
box->pmu->type->ops->put_constraint(box, event);
@@ -582,7 +562,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)
if (event == box->event_list[i]) {
uncore_put_event_constraint(box, event);
- while (++i < box->n_events)
+ for (++i; i < box->n_events; i++)
box->event_list[i - 1] = box->event_list[i];
--box->n_events;
@@ -676,6 +656,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
if (!box || box->cpu < 0)
return -EINVAL;
event->cpu = box->cpu;
+ event->pmu_private = box;
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
@@ -760,64 +741,110 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
}
ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+ if (!ret)
+ pmu->registered = true;
return ret;
}
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+ if (!pmu->registered)
+ return;
+ perf_pmu_unregister(&pmu->pmu);
+ pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+ struct intel_uncore_pmu *pmu = type->pmus;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ if (pmu) {
+ pkg = topology_physical_package_id(cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+ struct intel_uncore_type **types;
+
+ for (types = uncore_msr_uncores; *types; types++)
+ __uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+ int pkg;
+
+ for (pkg = 0; pkg < max_packages; pkg++)
+ kfree(pmu->boxes[pkg]);
+ kfree(pmu->boxes);
+}
+
static void __init uncore_type_exit(struct intel_uncore_type *type)
{
+ struct intel_uncore_pmu *pmu = type->pmus;
int i;
- for (i = 0; i < type->num_boxes; i++)
- free_percpu(type->pmus[i].box);
- kfree(type->pmus);
- type->pmus = NULL;
+ if (pmu) {
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ uncore_pmu_unregister(pmu);
+ uncore_free_boxes(pmu);
+ }
+ kfree(type->pmus);
+ type->pmus = NULL;
+ }
kfree(type->events_group);
type->events_group = NULL;
}
static void __init uncore_types_exit(struct intel_uncore_type **types)
{
- int i;
- for (i = 0; types[i]; i++)
- uncore_type_exit(types[i]);
+ for (; *types; types++)
+ uncore_type_exit(*types);
}
-static int __init uncore_type_init(struct intel_uncore_type *type)
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
{
struct intel_uncore_pmu *pmus;
struct attribute_group *attr_group;
struct attribute **attrs;
+ size_t size;
int i, j;
pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
if (!pmus)
return -ENOMEM;
- type->pmus = pmus;
+ size = max_packages * sizeof(struct intel_uncore_box *);
+ for (i = 0; i < type->num_boxes; i++) {
+ pmus[i].func_id = setid ? i : -1;
+ pmus[i].pmu_idx = i;
+ pmus[i].type = type;
+ pmus[i].boxes = kzalloc(size, GFP_KERNEL);
+ if (!pmus[i].boxes)
+ return -ENOMEM;
+ }
+
+ type->pmus = pmus;
type->unconstrainted = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
0, type->num_counters, 0, 0);
- for (i = 0; i < type->num_boxes; i++) {
- pmus[i].func_id = -1;
- pmus[i].pmu_idx = i;
- pmus[i].type = type;
- INIT_LIST_HEAD(&pmus[i].box_list);
- pmus[i].box = alloc_percpu(struct intel_uncore_box *);
- if (!pmus[i].box)
- goto fail;
- }
-
if (type->event_descs) {
- i = 0;
- while (type->event_descs[i].attr.attr.name)
- i++;
+ for (i = 0; type->event_descs[i].attr.attr.name; i++);
attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
sizeof(*attr_group), GFP_KERNEL);
if (!attr_group)
- goto fail;
+ return -ENOMEM;
attrs = (struct attribute **)(attr_group + 1);
attr_group->name = "events";
@@ -831,25 +858,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
type->pmu_group = &uncore_pmu_attr_group;
return 0;
-fail:
- uncore_type_exit(type);
- return -ENOMEM;
}
-static int __init uncore_types_init(struct intel_uncore_type **types)
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
{
- int i, ret;
+ int ret;
- for (i = 0; types[i]; i++) {
- ret = uncore_type_init(types[i]);
+ for (; *types; types++) {
+ ret = uncore_type_init(*types, setid);
if (ret)
- goto fail;
+ return ret;
}
return 0;
-fail:
- while (--i >= 0)
- uncore_type_exit(types[i]);
- return ret;
}
/*
@@ -857,28 +878,28 @@ fail:
*/
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- struct intel_uncore_type *type;
- int phys_id;
- bool first_box = false;
+ int phys_id, pkg, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
+ pkg = topology_phys_to_logical_pkg(phys_id);
+ if (WARN_ON_ONCE(pkg < 0))
+ return -EINVAL;
+
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[phys_id][idx] = pdev;
+
+ uncore_extra_pci_dev[pkg].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, NUMA_NO_NODE);
- if (!box)
- return -ENOMEM;
-
/*
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
@@ -890,44 +911,60 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
* some device types. Hence PCI device idx would be 0 for all devices.
* So increment pmu pointer to point to an unused array element.
*/
- if (boot_cpu_data.x86_model == 87)
+ if (boot_cpu_data.x86_model == 87) {
while (pmu->func_id >= 0)
pmu++;
+ }
+
+ if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ return -EINVAL;
+
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
+ if (!box)
+ return -ENOMEM;
+
if (pmu->func_id < 0)
pmu->func_id = pdev->devfn;
else
WARN_ON_ONCE(pmu->func_id != pdev->devfn);
- box->phys_id = phys_id;
+ atomic_inc(&box->refcnt);
+ box->pci_phys_id = phys_id;
+ box->pkgid = pkg;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
- raw_spin_lock(&uncore_box_lock);
- if (list_empty(&pmu->box_list))
- first_box = true;
- list_add_tail(&box->list, &pmu->box_list);
- raw_spin_unlock(&uncore_box_lock);
+ pmu->boxes[pkg] = box;
+ if (atomic_inc_return(&pmu->activeboxes) > 1)
+ return 0;
- if (first_box)
- uncore_pmu_register(pmu);
- return 0;
+ /* First active box registers the pmu */
+ ret = uncore_pmu_register(pmu);
+ if (ret) {
+ pci_set_drvdata(pdev, NULL);
+ pmu->boxes[pkg] = NULL;
+ uncore_box_exit(box);
+ kfree(box);
+ }
+ return ret;
}
static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id;
- bool last_box = false;
+ int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus);
+ pkg = topology_phys_to_logical_pkg(phys_id);
+
box = pci_get_drvdata(pdev);
if (!box) {
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[phys_id][i] == pdev) {
- uncore_extra_pci_dev[phys_id][i] = NULL;
+ if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+ uncore_extra_pci_dev[pkg].dev[i] = NULL;
break;
}
}
@@ -936,33 +973,20 @@ static void uncore_pci_remove(struct pci_dev *pdev)
}
pmu = box->pmu;
- if (WARN_ON_ONCE(phys_id != box->phys_id))
+ if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pci_set_drvdata(pdev, NULL);
-
- raw_spin_lock(&uncore_box_lock);
- list_del(&box->list);
- if (list_empty(&pmu->box_list))
- last_box = true;
- raw_spin_unlock(&uncore_box_lock);
-
- for_each_possible_cpu(cpu) {
- if (*per_cpu_ptr(pmu->box, cpu) == box) {
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- atomic_dec(&box->refcnt);
- }
- }
-
- WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+ pmu->boxes[pkg] = NULL;
+ if (atomic_dec_return(&pmu->activeboxes) == 0)
+ uncore_pmu_unregister(pmu);
+ uncore_box_exit(box);
kfree(box);
-
- if (last_box)
- perf_pmu_unregister(&pmu->pmu);
}
static int __init uncore_pci_init(void)
{
+ size_t size;
int ret;
switch (boot_cpu_data.x86_model) {
@@ -999,25 +1023,40 @@ static int __init uncore_pci_init(void)
ret = skl_uncore_pci_init();
break;
default:
- return 0;
+ return -ENODEV;
}
if (ret)
return ret;
- ret = uncore_types_init(uncore_pci_uncores);
+ size = max_packages * sizeof(struct pci_extra_dev);
+ uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+ if (!uncore_extra_pci_dev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = uncore_types_init(uncore_pci_uncores, false);
if (ret)
- return ret;
+ goto errtype;
uncore_pci_driver->probe = uncore_pci_probe;
uncore_pci_driver->remove = uncore_pci_remove;
ret = pci_register_driver(uncore_pci_driver);
- if (ret == 0)
- pcidrv_registered = true;
- else
- uncore_types_exit(uncore_pci_uncores);
+ if (ret)
+ goto errtype;
+
+ pcidrv_registered = true;
+ return 0;
+errtype:
+ uncore_types_exit(uncore_pci_uncores);
+ kfree(uncore_extra_pci_dev);
+ uncore_extra_pci_dev = NULL;
+ uncore_free_pcibus_map();
+err:
+ uncore_pci_uncores = empty_uncore;
return ret;
}
@@ -1027,173 +1066,139 @@ static void __init uncore_pci_exit(void)
pcidrv_registered = false;
pci_unregister_driver(uncore_pci_driver);
uncore_types_exit(uncore_pci_uncores);
- }
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
- struct intel_uncore_box *box;
-
- while (!list_empty(&boxes_to_free)) {
- box = list_entry(boxes_to_free.next,
- struct intel_uncore_box, list);
- list_del(&box->list);
- kfree(box);
+ kfree(uncore_extra_pci_dev);
+ uncore_free_pcibus_map();
}
}
static void uncore_cpu_dying(int cpu)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- if (box && atomic_dec_and_test(&box->refcnt))
- list_add(&box->list, &boxes_to_free);
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
}
}
}
-static int uncore_cpu_starting(int cpu)
+static void uncore_cpu_starting(int cpu, bool init)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box, *exist;
- int i, j, k, phys_id;
-
- phys_id = topology_physical_package_id(cpu);
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- /* called by uncore_cpu_init? */
- if (box && box->phys_id >= 0) {
- uncore_box_init(box);
- continue;
- }
+ struct intel_uncore_box *box;
+ int i, pkg, ncpus = 1;
- for_each_online_cpu(k) {
- exist = *per_cpu_ptr(pmu->box, k);
- if (exist && exist->phys_id == phys_id) {
- atomic_inc(&exist->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = exist;
- if (box) {
- list_add(&box->list,
- &boxes_to_free);
- box = NULL;
- }
- break;
- }
- }
+ if (init) {
+ /*
+ * On init we get the number of online cpus in the package
+ * and set refcount for all of them.
+ */
+ ncpus = cpumask_weight(topology_core_cpumask(cpu));
+ }
- if (box) {
- box->phys_id = phys_id;
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
+ /* The first cpu on a package activates the box */
+ if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
uncore_box_init(box);
- }
}
}
- return 0;
}
-static int uncore_cpu_prepare(int cpu, int phys_id)
+static int uncore_cpu_prepare(int cpu)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (pmu->func_id < 0)
- pmu->func_id = j;
-
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ if (pmu->boxes[pkg])
+ continue;
+ /* First cpu of a package allocates the box */
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
-
box->pmu = pmu;
- box->phys_id = phys_id;
- *per_cpu_ptr(pmu->box, cpu) = box;
+ box->pkgid = pkg;
+ pmu->boxes[pkg] = box;
}
}
return 0;
}
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+ int new_cpu)
{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
- int i, j;
+ int i, pkg;
- for (i = 0; uncores[i]; i++) {
- type = uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (old_cpu < 0)
- box = uncore_pmu_to_box(pmu, new_cpu);
- else
- box = uncore_pmu_to_box(pmu, old_cpu);
- if (!box)
- continue;
-
- if (old_cpu < 0) {
- WARN_ON_ONCE(box->cpu != -1);
- box->cpu = new_cpu;
- continue;
- }
+ pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
- WARN_ON_ONCE(box->cpu != old_cpu);
- if (new_cpu >= 0) {
- uncore_pmu_cancel_hrtimer(box);
- perf_pmu_migrate_context(&pmu->pmu,
- old_cpu, new_cpu);
- box->cpu = new_cpu;
- } else {
- box->cpu = -1;
- }
+ if (old_cpu < 0) {
+ WARN_ON_ONCE(box->cpu != -1);
+ box->cpu = new_cpu;
+ continue;
}
+
+ WARN_ON_ONCE(box->cpu != old_cpu);
+ box->cpu = -1;
+ if (new_cpu < 0)
+ continue;
+
+ uncore_pmu_cancel_hrtimer(box);
+ perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+ box->cpu = new_cpu;
}
}
+static void uncore_change_context(struct intel_uncore_type **uncores,
+ int old_cpu, int new_cpu)
+{
+ for (; *uncores; uncores++)
+ uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
static void uncore_event_exit_cpu(int cpu)
{
- int i, phys_id, target;
+ int target;
- /* if exiting cpu is used for collecting uncore events */
+ /* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
return;
- /* find a new cpu to collect uncore events */
- phys_id = topology_physical_package_id(cpu);
- target = -1;
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
+ /* Find a new cpu to collect uncore events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- /* migrate uncore events to the new cpu */
- if (target >= 0)
+ /* Migrate uncore events to the new target */
+ if (target < nr_cpu_ids)
cpumask_set_cpu(target, &uncore_cpu_mask);
+ else
+ target = -1;
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
@@ -1201,13 +1206,15 @@ static void uncore_event_exit_cpu(int cpu)
static void uncore_event_init_cpu(int cpu)
{
- int i, phys_id;
+ int target;
- phys_id = topology_physical_package_id(cpu);
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
+ /*
+ * Check if there is an online cpu in the package
+ * which collects uncore events already.
+ */
+ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
@@ -1220,39 +1227,25 @@ static int uncore_cpu_notifier(struct notifier_block *self,
{
unsigned int cpu = (long)hcpu;
- /* allocate/free data structure for uncore box */
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
- uncore_cpu_prepare(cpu, -1);
- break;
+ return notifier_from_errno(uncore_cpu_prepare(cpu));
+
case CPU_STARTING:
- uncore_cpu_starting(cpu);
+ uncore_cpu_starting(cpu, false);
+ case CPU_DOWN_FAILED:
+ uncore_event_init_cpu(cpu);
break;
+
case CPU_UP_CANCELED:
case CPU_DYING:
uncore_cpu_dying(cpu);
break;
- case CPU_ONLINE:
- case CPU_DEAD:
- uncore_kfree_boxes();
- break;
- default:
- break;
- }
- /* select the cpu that collects uncore events */
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- case CPU_STARTING:
- uncore_event_init_cpu(cpu);
- break;
case CPU_DOWN_PREPARE:
uncore_event_exit_cpu(cpu);
break;
- default:
- break;
}
-
return NOTIFY_OK;
}
@@ -1265,9 +1258,29 @@ static struct notifier_block uncore_cpu_nb = {
.priority = CPU_PRI_PERF + 1,
};
-static void __init uncore_cpu_setup(void *dummy)
+static int __init type_pmu_register(struct intel_uncore_type *type)
{
- uncore_cpu_starting(smp_processor_id());
+ int i, ret;
+
+ for (i = 0; i < type->num_boxes; i++) {
+ ret = uncore_pmu_register(&type->pmus[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+ struct intel_uncore_type **types = uncore_msr_uncores;
+ int ret;
+
+ for (; *types; types++) {
+ ret = type_pmu_register(*types);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
static int __init uncore_cpu_init(void)
@@ -1311,71 +1324,61 @@ static int __init uncore_cpu_init(void)
knl_uncore_cpu_init();
break;
default:
- return 0;
+ return -ENODEV;
}
- ret = uncore_types_init(uncore_msr_uncores);
+ ret = uncore_types_init(uncore_msr_uncores, true);
if (ret)
- return ret;
+ goto err;
+ ret = uncore_msr_pmus_register();
+ if (ret)
+ goto err;
return 0;
+err:
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_msr_uncores = empty_uncore;
+ return ret;
}
-static int __init uncore_pmus_register(void)
+static void __init uncore_cpu_setup(void *dummy)
{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_type *type;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- uncore_pmu_register(pmu);
- }
- }
-
- return 0;
+ uncore_cpu_starting(smp_processor_id(), true);
}
-static void __init uncore_cpumask_init(void)
-{
- int cpu;
-
- /*
- * ony invoke once from msr or pci init code
- */
- if (!cpumask_empty(&uncore_cpu_mask))
- return;
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
- cpu_notifier_register_begin();
+static int __init uncore_cpumask_init(bool msr)
+{
+ unsigned int cpu;
for_each_online_cpu(cpu) {
- int i, phys_id = topology_physical_package_id(cpu);
+ unsigned int pkg = topology_logical_package_id(cpu);
+ int ret;
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i)) {
- phys_id = -1;
- break;
- }
- }
- if (phys_id < 0)
+ if (test_and_set_bit(pkg, packages))
continue;
-
- uncore_cpu_prepare(cpu, phys_id);
+ /*
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required.
+ */
+ if (msr) {
+ ret = uncore_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ }
uncore_event_init_cpu(cpu);
+ smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
}
- on_each_cpu(uncore_cpu_setup, NULL, 1);
-
__register_cpu_notifier(&uncore_cpu_nb);
-
- cpu_notifier_register_done();
+ return 0;
}
-
static int __init intel_uncore_init(void)
{
- int ret;
+ int pret, cret, ret;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
@@ -1383,19 +1386,27 @@ static int __init intel_uncore_init(void)
if (cpu_has_hypervisor)
return -ENODEV;
- ret = uncore_pci_init();
- if (ret)
- goto fail;
- ret = uncore_cpu_init();
- if (ret) {
- uncore_pci_exit();
- goto fail;
- }
- uncore_cpumask_init();
+ max_packages = topology_max_packages();
+
+ pret = uncore_pci_init();
+ cret = uncore_cpu_init();
- uncore_pmus_register();
+ if (cret && pret)
+ return -ENODEV;
+
+ cpu_notifier_register_begin();
+ ret = uncore_cpumask_init(!cret);
+ if (ret)
+ goto err;
+ cpu_notifier_register_done();
return 0;
-fail:
+
+err:
+ /* Undo box->init_box() */
+ on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_pci_exit();
+ cpu_notifier_register_done();
return ret;
}
device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h
index a7086b862156..79766b9a3580 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -1,8 +1,10 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/pci.h>
+#include <asm/apicdef.h>
+
#include <linux/perf_event.h>
-#include "perf_event.h"
+#include "../perf_event.h"
#define UNCORE_PMU_NAME_LEN 32
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
@@ -19,11 +21,12 @@
#define UNCORE_EXTRA_PCI_DEV 0xff
#define UNCORE_EXTRA_PCI_DEV_MAX 3
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX 8
-
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+struct pci_extra_dev {
+ struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
@@ -61,6 +64,7 @@ struct intel_uncore_type {
struct intel_uncore_ops {
void (*init_box)(struct intel_uncore_box *);
+ void (*exit_box)(struct intel_uncore_box *);
void (*disable_box)(struct intel_uncore_box *);
void (*enable_box)(struct intel_uncore_box *);
void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
@@ -73,13 +77,14 @@ struct intel_uncore_ops {
};
struct intel_uncore_pmu {
- struct pmu pmu;
- char name[UNCORE_PMU_NAME_LEN];
- int pmu_idx;
- int func_id;
- struct intel_uncore_type *type;
- struct intel_uncore_box ** __percpu box;
- struct list_head box_list;
+ struct pmu pmu;
+ char name[UNCORE_PMU_NAME_LEN];
+ int pmu_idx;
+ int func_id;
+ bool registered;
+ atomic_t activeboxes;
+ struct intel_uncore_type *type;
+ struct intel_uncore_box **boxes;
};
struct intel_uncore_extra_reg {
@@ -89,7 +94,8 @@ struct intel_uncore_extra_reg {
};
struct intel_uncore_box {
- int phys_id;
+ int pci_phys_id;
+ int pkgid;
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
@@ -123,7 +129,6 @@ struct pci2phy_map {
int pbus_to_physid[256];
};
-int uncore_pcibus_to_physid(struct pci_bus *bus);
struct pci2phy_map *__find_pci2phy_map(int segment);
ssize_t uncore_event_show(struct kobject *kobj,
@@ -305,14 +310,30 @@ static inline void uncore_box_init(struct intel_uncore_box *box)
}
}
+static inline void uncore_box_exit(struct intel_uncore_box *box)
+{
+ if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->exit_box)
+ box->pmu->type->ops->exit_box(box);
+ }
+}
+
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
- return (box->phys_id < 0);
+ return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+ return event->pmu_private;
}
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
@@ -328,7 +349,7 @@ extern struct intel_uncore_type **uncore_pci_uncores;
extern struct pci_driver *uncore_pci_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
/* perf_event_intel_uncore_snb.c */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 2749965afed0..cda569332005 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1,5 +1,5 @@
/* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
/* NHM-EX event control */
#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
@@ -201,6 +201,11 @@ static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
}
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
{
unsigned msr = uncore_msr_box_ctl(box);
@@ -250,6 +255,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
#define NHMEX_UNCORE_OPS_COMMON_INIT() \
.init_box = nhmex_uncore_msr_init_box, \
+ .exit_box = nhmex_uncore_msr_exit_box, \
.disable_box = nhmex_uncore_msr_disable_box, \
.enable_box = nhmex_uncore_msr_enable_box, \
.disable_event = nhmex_uncore_msr_disable_event, \
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 2bd030ddd0db..96531d2b843f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,5 +1,5 @@
/* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -95,6 +95,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
static struct uncore_event_desc snb_uncore_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
{ /* end: all zeroes */ },
@@ -116,6 +122,7 @@ static struct attribute_group snb_uncore_format_group = {
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
+ .exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
@@ -231,6 +238,11 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+ iounmap(box->io_addr);
+}
+
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}
@@ -301,6 +313,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = box->cpu;
+ event->pmu_private = box;
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
@@ -458,6 +471,7 @@ static struct pmu snb_uncore_imc_pmu = {
static struct intel_uncore_ops snb_uncore_imc_ops = {
.init_box = snb_uncore_imc_init_box,
+ .exit_box = snb_uncore_imc_exit_box,
.enable_box = snb_uncore_imc_enable_box,
.disable_box = snb_uncore_imc_disable_box,
.disable_event = snb_uncore_imc_disable_event,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 33acb884ccf1..93f6bd9bf761 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,5 @@
/* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
+#include "uncore.h"
/* SNB-EP Box level control */
#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
@@ -987,7 +986,9 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
+ int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
(u32)reg1->config);
@@ -2521,14 +2522,16 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
void hswep_uncore_cpu_init(void)
{
+ int pkg = topology_phys_to_logical_pkg(0);
+
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
/* Detect 6-8 core systems with only two SBOXes */
- if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+ if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
u32 capid4;
- pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+ pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
0x94, &capid4);
if (((capid4 >> 6) & 0x3) == 0)
hswep_uncore_sbox.num_boxes = 2;
@@ -2875,11 +2878,13 @@ static struct intel_uncore_type bdx_uncore_sbox = {
.format_group = &hswep_uncore_sbox_format_group,
};
+#define BDX_MSR_UNCORE_SBOX 3
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
- &bdx_uncore_sbox,
&hswep_uncore_pcu,
+ &bdx_uncore_sbox,
NULL,
};
@@ -2888,6 +2893,10 @@ void bdx_uncore_cpu_init(void)
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+
+ /* BDX-DE doesn't have SBOX */
+ if (boot_cpu_data.x86_model == 86)
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
}
static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c
index ec863b9a9f78..ec863b9a9f78 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/events/msr.c
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h
index 7bb61e32fb29..68155cafa8a1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -586,6 +586,7 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1;
int pebs_record_size;
+ int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
@@ -860,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
extern struct event_constraint intel_hsw_pebs_event_constraints[];
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
extern struct event_constraint intel_skl_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
@@ -904,6 +907,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_pebs_data_source_nhm(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85bbb8ff..99afb665a004 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection"
/*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
* Alternative instructions for different CPU types or capabilities.
*
* This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 3c56ef1ae068..5e828da2e18f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -27,15 +27,23 @@ struct amd_l3_cache {
};
struct threshold_block {
- unsigned int block;
- unsigned int bank;
- unsigned int cpu;
- u32 address;
- u16 interrupt_enable;
- bool interrupt_capable;
- u16 threshold_limit;
- struct kobject kobj;
- struct list_head miscj;
+ unsigned int block; /* Number within bank */
+ unsigned int bank; /* MCA bank the block belongs to */
+ unsigned int cpu; /* CPU which controls MCA bank */
+ u32 address; /* MSR address for the block */
+ u16 interrupt_enable; /* Enable/Disable APIC interrupt */
+ bool interrupt_capable; /* Bank can generate an interrupt. */
+
+ u16 threshold_limit; /*
+ * Value upon which threshold
+ * interrupt is generated.
+ */
+
+ struct kobject kobj; /* sysfs object */
+ struct list_head miscj; /*
+ * List of threshold blocks
+ * within a bank.
+ */
};
struct threshold_bank {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6f3da2..0899cfc8dfe8 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/processor.h>
#include <asm/apicdef.h>
#include <linux/atomic.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1ef709..02e799fa43d1 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
+#include <asm/cpufeatures.h>
+
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679aba703..f5063b6659eb 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -44,19 +44,22 @@
/* Exception table entry */
#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to) \
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
- .balign 8 ; \
+ .balign 4 ; \
.long (from) - . ; \
.long (to) - . ; \
+ .long (handler) - . ; \
.popsection
-# define _ASM_EXTABLE_EX(from,to) \
- .pushsection "__ex_table","a" ; \
- .balign 8 ; \
- .long (from) - . ; \
- .long (to) - . + 0x7ffffff0 ; \
- .popsection
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
@@ -89,19 +92,24 @@
.endm
#else
-# define _ASM_EXTABLE(from,to) \
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
+ " .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
+ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
" .popsection\n"
-# define _ASM_EXTABLE_EX(from,to) \
- " .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
- " .long (" #from ") - .\n" \
- " .long (" #to ") - . + 0x7ffffff0\n" \
- " .popsection\n"
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index a584e1c50918..bfb28caf97b1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -6,18 +6,17 @@
/*
* Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
+ * And yes, this might be required on UP too when we're talking
* to devices.
*/
#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
#else
#define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory")
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b954d5e4..7766d1cf096e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
-static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
-static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
@@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
@@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
@@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
@@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
-static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
@@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned long __ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
-static inline unsigned long ffz(unsigned long word)
+static __always_inline unsigned long ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long __fls(unsigned long word)
{
asm("bsr %1,%0"
: "=r" (word)
@@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
-static inline int ffs(int x)
+static __always_inline int ffs(int x)
{
int r;
@@ -434,7 +434,7 @@ static inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static inline int fls(int x)
+static __always_inline int fls(int x)
{
int r;
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e63aa38e85fb..61518cf79437 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
#ifdef CONFIG_DEBUG_RODATA_TEST
int rodata_test(void);
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eda81dc0f4ae..d194266acb28 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,10 +3,11 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE 0 /* No vDSO clock available. */
+#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
+#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
+#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841eddfe..9733361fed6f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
#define ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
/*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c9464297..68e4e8258b84 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,288 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS 16 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x) (NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
- (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
- cpu_has(&boot_cpu_data, bit))
+ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
@@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
/*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
* fast paths and boot_cpu_has() otherwise!
*/
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
*/
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
- /*
- * Catch too early usage of this before alternatives
- * have run.
- */
- asm_volatile_goto("1: jmp %l[t_warn]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* 1: do replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
- asm_volatile_goto("1: jmp %l[t_no]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (bit) : : t_no);
- return true;
- t_no:
- return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
- t_warn:
- warn_pre_alternatives();
- return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $0,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 3f - .\n"
- " .word %P1\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $1,%0\n"
- "4:\n"
- ".previous\n"
- : "=qm" (flag) : "i" (bit));
- return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit) \
-( \
- __builtin_constant_p(boot_cpu_has(bit)) ? \
- boot_cpu_has(bit) : \
- __builtin_constant_p(bit) ? \
- __static_cpu_has(bit) : \
- boot_cpu_has(bit) \
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
{
-#ifdef CC_HAVE_ASM_GOTO
- asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+ asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
@@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
- : : t_dynamic, t_no);
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+ t_yes:
return true;
t_no:
return false;
- t_dynamic:
- return __static_cpu_has_safe(bit);
-#else
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $2,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 3f - .\n" /* repl offset */
- " .word %P2\n" /* always replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $0,%0\n"
- "4:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 5f - .\n" /* repl offset */
- " .word %P1\n" /* feature bit */
- " .byte 4b - 3b\n" /* src len */
- " .byte 6f - 5f\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "5: movb $1,%0\n"
- "6:\n"
- ".previous\n"
- : "=qm" (flag)
- : "i" (bit), "i" (X86_FEATURE_ALWAYS));
- return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
}
-#define static_cpu_has_safe(bit) \
+#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
- _static_cpu_has_safe(bit) \
+ _static_cpu_has(bit) \
)
#else
/*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
*/
#define static_cpu_has(bit) boot_cpu_has(bit)
-#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
@@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
#define static_cpu_has_bug(bit) static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..074b7604bd51
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,300 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS 16 /* N 32-bit words worth of info */
+#define NBUGINTS 1 /* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
+ /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x) (NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f39856..eb5deb42484d 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -98,4 +98,27 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA (0 * (1 << 9))
+#define AR_TYPE_RWDATA (1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
+#define AR_TYPE_XOCODE (4 * (1 << 9))
+#define AR_TYPE_XRCODE (5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
+#define AR_TYPE_MASK (7 * (1 << 9))
+
+#define AR_DPL0 (0 * (1 << 13))
+#define AR_DPL3 (3 * (1 << 13))
+#define AR_DPL_MASK (3 * (1 << 13))
+
+#define AR_A (1 << 8) /* "Accessed" */
+#define AR_S (1 << 12) /* If clear, "System" segment */
+#define AR_P (1 << 15) /* "Present" */
+#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
+#define AR_L (1 << 21) /* "Long mode" for code segments */
+#define AR_DB (1 << 22) /* D/B, effect depends on type */
+#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
+
#endif /* _ASM_X86_DESC_DEFS_H */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 535192f6bfad..3c69fed215c5 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
/* Use early IO mappings for DMI because it's initialized early */
#define dmi_early_remap early_ioremap
#define dmi_early_unmap early_iounmap
-#define dmi_remap ioremap
+#define dmi_remap ioremap_cache
#define dmi_unmap iounmap
#endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0010c78c4998..08b1f2f6ea50 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -25,6 +25,8 @@
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
+#define MAX_CMDLINE_ADDRESS UINT_MAX
+
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1514753fd435..15340e36ddcb 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -256,7 +256,7 @@ extern int force_personality32;
instruction set this CPU supports. This could be done in user space,
but it's not easy, and we've already done it here. */
-#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX])
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6d7d0e52ed5a..8554f960e21b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set;
extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
+#define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440df63f1..a2124343edf5 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
/*
* High level FPU state handling functions:
@@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
*/
static __always_inline __pure bool use_eager_fpu(void)
{
- return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+ return static_cpu_has(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+ return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVE);
+ return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
- return static_cpu_has_safe(X86_FEATURE_FXSR);
+ return static_cpu_has(X86_FEATURE_FXSR);
}
/*
@@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
- if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
@@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = new_fpu->fpstate_active &&
+ fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5);
if (old_fpu->fpregs_active) {
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index af30fdeb140d..f23cd8c80b1c 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -20,16 +20,15 @@
/* Supported features which support lazy state saving */
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
- XFEATURE_MASK_SSE)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM)
+/* Supported features which require eager state saving */
+#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
/* All currently supported features */
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 793179cf8e21..6e4d170726b7 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
#include <asm/asm.h>
-/* The annotation hides the frame from the unwinder and makes it look
- like a ordinary ebp save/restore. This avoids some special cases for
- frame pointer later */
+/*
+ * These are stack frame creation macros. They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
+
#ifdef CONFIG_FRAME_POINTER
- .macro FRAME
- __ASM_SIZE(push,) %__ASM_REG(bp)
- __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
- .endm
- .macro ENDFRAME
- __ASM_SIZE(pop,) %__ASM_REG(bp)
- .endm
-#else
- .macro FRAME
- .endm
- .macro ENDFRAME
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+ push %_ASM_BP
+ _ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+ pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN \
+ "push %" _ASM_BP "\n" \
+ _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/x86/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index cd2ce4068441..ebea2c9d2cdc 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -53,7 +53,7 @@
#define IMR_MASK (IMR_ALIGN - 1)
int imr_add_range(phys_addr_t base, size_t size,
- unsigned int rmask, unsigned int wmask, bool lock);
+ unsigned int rmask, unsigned int wmask);
int imr_remove_range(phys_addr_t base, size_t size);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index cfc9a0d2d07c..a4fe16e42b7b 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void)
cpu_relax();
}
-static inline void
-__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- __xapic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
-static inline void
- __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
-{
- unsigned long cfg;
-
- /*
- * Wait for idle.
- */
- if (unlikely(vector == NMI_VECTOR))
- safe_apic_wait_icr_idle();
- else
- __xapic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- native_apic_mem_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
extern void default_send_IPI_single(int cpu, int vector);
extern void default_send_IPI_single_phys(int cpu, int vector);
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8e248b..d0afb05c84fc 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
static inline bool arch_irq_work_has_interrupt(void)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44adbb819041..01c8b501cb6d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
#include <asm/mtrr.h>
#include <asm/msr-index.h>
#include <asm/asm.h>
+#include <asm/kvm_page_track.h>
#define KVM_MAX_VCPUS 255
#define KVM_SOFT_MAX_VCPUS 160
@@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache {
void *objects[KVM_NR_MEM_OBJS];
};
+/*
+ * the pages used as guest page table on soft mmu are tracked by
+ * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
+ * by indirect shadow page can not be more than 15 bits.
+ *
+ * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
+ * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ */
union kvm_mmu_page_role {
unsigned word;
struct {
@@ -276,7 +285,7 @@ struct kvm_mmu_page {
#endif
/* Number of writes since the last time traversal visited this page. */
- int write_flooding_count;
+ atomic_t write_flooding_count;
};
struct kvm_pio_request {
@@ -338,12 +347,8 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
- /*
- * Bitmap: bit set = last pte in walk
- * index[0:1]: level (zero-based)
- * index[2]: pte.ps
- */
- u8 last_pte_bitmap;
+ /* Can have large pages at levels 2..last_nonleaf_level-1. */
+ u8 last_nonleaf_level;
bool nx;
@@ -498,7 +503,6 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
- bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -644,12 +648,13 @@ struct kvm_vcpu_arch {
};
struct kvm_lpage_info {
- int write_count;
+ int disallow_lpage;
};
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
+ unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
};
/*
@@ -694,6 +699,8 @@ struct kvm_arch {
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
+ struct kvm_page_track_notifier_node mmu_sp_tracker;
+ struct kvm_page_track_notifier_head track_notifier_head;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
@@ -754,6 +761,8 @@ struct kvm_arch {
bool irqchip_split;
u8 nr_reserved_ioapic_pins;
+
+ bool disabled_lapic_found;
};
struct kvm_vm_stat {
@@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_init_vm(struct kvm *kvm);
+void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
@@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
-void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
new file mode 100644
index 000000000000..c2b8d24a235c
--- /dev/null
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -0,0 +1,61 @@
+#ifndef _ASM_X86_KVM_PAGE_TRACK_H
+#define _ASM_X86_KVM_PAGE_TRACK_H
+
+enum kvm_page_track_mode {
+ KVM_PAGE_TRACK_WRITE,
+ KVM_PAGE_TRACK_MAX,
+};
+
+/*
+ * The notifier represented by @kvm_page_track_notifier_node is linked into
+ * the head which will be notified when guest is triggering the track event.
+ *
+ * Write access on the head is protected by kvm->mmu_lock, read access
+ * is protected by track_srcu.
+ */
+struct kvm_page_track_notifier_head {
+ struct srcu_struct track_srcu;
+ struct hlist_head track_notifier_list;
+};
+
+struct kvm_page_track_notifier_node {
+ struct hlist_node node;
+
+ /*
+ * It is called when guest is writing the write-tracked page
+ * and write emulation is finished at that time.
+ *
+ * @vcpu: the vcpu where the write access happened.
+ * @gpa: the physical address written by guest.
+ * @new: the data was written to the address.
+ * @bytes: the written length.
+ */
+ void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes);
+};
+
+void kvm_page_track_init(struct kvm *kvm);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont);
+int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages);
+
+void kvm_slot_page_track_add_page(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode);
+void kvm_slot_page_track_remove_page(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode);
+bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
+ enum kvm_page_track_mode mode);
+
+void
+kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
+void
+kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes);
+#endif
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c1adf33fdd0d..bc62e7cbf1b1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-#ifdef CONFIG_DEBUG_RODATA
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index e795f5274217..7e68f9558552 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -25,7 +25,6 @@
#include <linux/module.h>
#include <linux/ftrace.h>
-#ifdef CONFIG_LIVEPATCH
static inline int klp_check_compiler_support(void)
{
#ifndef CC_USING_FENTRY
@@ -40,8 +39,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
{
regs->ip = ip;
}
-#else
-#error Include linux/livepatch.h, not asm/livepatch.h
-#endif
#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2ea4527e462f..92b6f651fa4f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,8 +40,20 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* AMD-specific bits */
-#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
+#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+
+/*
+ * McaX field if set indicates a given bank supports MCA extensions:
+ * - Deferred error interrupt type is specifiable by bank.
+ * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
+ * But should not be used to determine MSR numbers.
+ * - TCC bit is present in MCx_STATUS.
+ */
+#define MCI_CONFIG_MCAX 0x1
+#define MCI_IPID_MCATYPE 0xFFFF0000
+#define MCI_IPID_HWID 0xFFF
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -91,6 +103,16 @@
#define MCE_LOG_LEN 32
#define MCE_LOG_SIGNATURE "MACHINECHECK"
+/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
+#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
+#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
@@ -113,6 +135,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
+ bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;
@@ -287,4 +310,49 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);
+/*
+ * Enumerate new IP types and HWID values in AMD processors which support
+ * Scalable MCA.
+ */
+#ifdef CONFIG_X86_MCE_AMD
+enum amd_ip_types {
+ SMCA_F17H_CORE = 0, /* Core errors */
+ SMCA_DF, /* Data Fabric */
+ SMCA_UMC, /* Unified Memory Controller */
+ SMCA_PB, /* Parameter Block */
+ SMCA_PSP, /* Platform Security Processor */
+ SMCA_SMU, /* System Management Unit */
+ N_AMD_IP_TYPES
+};
+
+struct amd_hwid {
+ const char *name;
+ unsigned int hwid;
+};
+
+extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
+
+enum amd_core_mca_blocks {
+ SMCA_LS = 0, /* Load Store */
+ SMCA_IF, /* Instruction Fetch */
+ SMCA_L2_CACHE, /* L2 cache */
+ SMCA_DE, /* Decoder unit */
+ RES, /* Reserved */
+ SMCA_EX, /* Execution unit */
+ SMCA_FP, /* Floating Point */
+ SMCA_L3_CACHE, /* L3 cache */
+ N_CORE_MCA_BLOCKS
+};
+
+extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+
+enum amd_df_mca_blocks {
+ SMCA_CS = 0, /* Coherent Slave */
+ SMCA_PIE, /* Power management, Interrupts, etc */
+ N_DF_BLOCKS
+};
+
+extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+#endif
+
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 1e1b07a5a738..9d3a96c4da78 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -3,6 +3,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
+#include <linux/initrd.h>
#define native_rdmsr(msr, val1, val2) \
do { \
@@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ return initrd_start;
+#else
+ return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+ unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+ return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+ return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+ return 0;
+#endif
+}
+
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 8559b0102ea1..603417f8dd6c 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -40,7 +40,6 @@ struct extended_sigtable {
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
-#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5e7160..1ea0baef1175 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,7 +19,8 @@ typedef struct {
#endif
struct mutex lock;
- void __user *vdso;
+ void __user *vdso; /* vdso base address */
+ const struct vdso_image *vdso_image; /* vdso image in use */
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402ef3b84..2da46ac16e37 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1,7 +1,12 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
-/* CPU model specific register (MSR) numbers */
+/*
+ * CPU model specific register (MSR) numbers.
+ *
+ * Do not add new entries to this file unless the definitions are shared
+ * between multiple compilation units.
+ */
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
@@ -230,10 +235,10 @@
#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
/* IA32_HWP_CAPABILITIES */
-#define HWP_HIGHEST_PERF(x) (x & 0xff)
-#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8)
-#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16)
-#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24)
+#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
#define HWP_MIN_PERF(x) (x & 0xff)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b5e5aa..0deeb2d26df7 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
#include <linux/sched.h>
+#include <asm/cpufeature.h>
+
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 462594320d39..9ab7507ca1c2 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -20,6 +20,9 @@ struct pci_sysdata {
#ifdef CONFIG_X86_64
void *iommu; /* IOMMU private data */
#endif
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+ void *fwnode; /* IRQ domain for MSI assignment */
+#endif
};
extern int pci_routeirq;
@@ -32,6 +35,7 @@ extern int noioapicreroute;
static inline int pci_domain_nr(struct pci_bus *bus)
{
struct pci_sysdata *sd = bus->sysdata;
+
return sd->domain;
}
@@ -41,6 +45,17 @@ static inline int pci_proc_domain(struct pci_bus *bus)
}
#endif
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+
+ return sd->fwnode;
+}
+
+#define pci_root_bus_fwnode _pci_root_bus_fwnode
+#endif
+
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
@@ -105,9 +120,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
#include <asm/pci_64.h>
#endif
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
/* generic pci stuff */
#include <asm-generic/pci.h>
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7bcb861a04e5..5a2ed3ed2f26 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -165,6 +165,7 @@ struct x86_pmu_capability {
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
* IBS cpuid feature detection
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index c57fd1ea9689..bf8b35d2035a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
arch_wb_cache_pmem(addr, size);
}
+static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+{
+ clflush_cache_range((void __force *) addr, size);
+}
+
static inline bool __arch_has_wmb_pmem(void)
{
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 20c11d1aa4cc..983738ac014c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@ struct vm86;
#include <asm/types.h>
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
@@ -24,7 +24,6 @@ struct vm86;
#include <asm/fpu/types.h>
#include <linux/personality.h>
-#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/math64.h>
@@ -129,6 +128,8 @@ struct cpuinfo_x86 {
u16 booted_cores;
/* Physical processor id: */
u16 phys_proc_id;
+ /* Logical processor id: */
+ u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
/* Compute unit id */
@@ -298,10 +299,13 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+#ifdef CONFIG_X86_32
/*
- * Space for the temporary SYSENTER stack:
+ * Space for the temporary SYSENTER stack.
*/
+ unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
+#endif
} ____cacheline_aligned;
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a77286cb1d..9b9b30b19441 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -7,12 +7,23 @@
void syscall_init(void);
+#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
-void entry_SYSCALL_compat(void);
+#endif
+
+#ifdef CONFIG_X86_32
void entry_INT80_32(void);
-void entry_INT80_compat(void);
void entry_SYSENTER_32(void);
+void __begin_SYSENTER_singlestep_region(void);
+void __end_SYSENTER_singlestep_region(void);
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
void entry_SYSENTER_compat(void);
+void __end_entry_SYSENTER_compat(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_compat(void);
+#endif
void x86_configure_nx(void);
void x86_report_nx(void);
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 0a5242428659..13b6cdd0af57 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -7,7 +7,7 @@
extern char __brk_base[], __brk_limit[];
extern struct exception_table_entry __stop___ex_table[];
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
#endif
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db46752a8f..452c88b8ad06 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665ebd17bb..db333300bd4b 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
#include <linux/stringify.h>
#include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC .byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf0727623b..20a3de5cb3b0 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
-#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ff8b9a17dc4b..ca6ba3607705 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif
+/**
+ * memcpy_mcsafe - copy memory with indication if a machine check happened
+ *
+ * @dst: destination address
+ * @src: source address
+ * @cnt: number of bytes to copy
+ *
+ * Low level memory copy function that catches machine checks
+ *
+ * Return true for success, false for fail
+ */
+bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b551028740..82866697fcf1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/atomic.h>
struct thread_info {
@@ -134,10 +134,13 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do on any return to user space */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029405a3..c24b4224d439 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,8 +5,57 @@
#include <linux/sched.h>
#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/special_insns.h>
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
+ if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+ */
+ invpcid_flush_all();
+ return;
+ }
+
/*
* Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0fb46482dfde..7f991bd5031b 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { }
extern const struct cpumask *cpu_coregroup_mask(int cpu);
+#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef ENABLE_TOPO_DEFINES
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages() (__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages() (1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c5479bcea..174c4212780a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void)
return rdtsc();
}
+extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a4a30e4b2d34..c0f27d7ea7ff 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
likely(!__range_not_ok(addr, size, user_addr_max()))
/*
- * The exception table consists of pairs of addresses relative to the
- * exception table enty itself: the first is the address of an
- * instruction that is allowed to fault, and the second is the address
- * at which the program should continue. No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * The exception table consists of triples of addresses relative to the
+ * exception table entry itself. The first address is of an instruction
+ * that is allowed to fault, the second is the target at which the program
+ * should continue. The third is a handler function to deal with the fault
+ * caused by the instruction in the first field.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
*/
struct exception_table_entry {
- int insn, fixup;
+ int insn, fixup, handler;
};
/* This is not the generic standard exception_table_entry format */
#define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE
-extern int fixup_exception(struct pt_regs *regs);
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern bool ex_has_fault_handler(unsigned long ip);
extern int early_fixup_exception(unsigned long *ip);
/*
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c4019b..307698688fa1 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
#include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/page.h>
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index deabaf9759b6..43dc55be524e 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -13,9 +13,6 @@ struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
- /* text_mapping.pages is big enough for data/size page pointers */
- struct vm_special_mapping text_mapping;
-
unsigned long alt, alt_len;
long sym_vvar_start; /* Negative offset to the vvar area */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index f556c4843aa1..e728699db774 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+ return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7956412d09bd..9b1a91834ac8 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -226,7 +226,9 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
-#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d485232f1e9f..62d4111c1c54 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -256,7 +256,7 @@ struct sigcontext_64 {
__u16 cs;
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ __u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
@@ -341,9 +341,37 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
+
+ /*
+ * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+ * Linux saved and restored fs and gs in these slots. This
+ * was counterproductive, as fsbase and gsbase were never
+ * saved, so arch_prctl was presumably unreliable.
+ *
+ * These slots should never be reused without extreme caution:
+ *
+ * - Some DOSEMU versions stash fs and gs in these slots manually,
+ * thus overwriting anything the kernel expects to be preserved
+ * in these slots.
+ *
+ * - If these slots are ever needed for any other purpose,
+ * there is some risk that very old 64-bit binaries could get
+ * confused. I doubt that many such binaries still work,
+ * though, since the same patch in 2.5.64 also removed the
+ * 64-bit set_thread_area syscall, so it appears that there
+ * is no TLS API beyond modify_ldt that works in both pre-
+ * and post-2.5.64 kernels.
+ *
+ * If the kernel ever adds explicit fs, gs, fsbase, and gsbase
+ * save/restore, it will most likely need to be opt-in and use
+ * different context slots.
+ */
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ union {
+ __u16 ss; /* If UC_SIGCONTEXT_SS */
+ __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
+ };
__u64 err;
__u64 trapno;
__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..e3d1ec90616e 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,54 @@
#ifndef _ASM_X86_UCONTEXT_H
#define _ASM_X86_UCONTEXT_H
-#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
- * information in the memory layout pointed
- * by the fpstate pointer in the ucontext's
- * sigcontext struct (uc_mcontext).
- */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE 0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext. All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ * saved CS is not 64-bit)
+ * new SS = saved SS (will fail IRET and signal if invalid)
+ * else
+ * new SS = a flat 32-bit data segment
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ * with zero or garbage in the SS slot (e.g. old CRIU) and call
+ * sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ * context, delete the old SS segment (with modify_ldt), and change
+ * the saved CS to a 64-bit segment. These DOSEMU versions expect
+ * sigreturn to send them back to 64-bit mode without killing them,
+ * despite the fact that the SS selector when the signal was raised is
+ * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
+ * will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ * modifying the saved context will end up in exactly the state they
+ * started in, even if they were running in a segmented context when
+ * the signal was raised.. Old kernels would lose track of the
+ * previous SS value.
+ */
+#define UC_SIGCONTEXT_SS 0x2
+#define UC_STRICT_RESTORE_SS 0x4
+#endif
#include <asm-generic/ucontext.h>
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f713641d..0a2bb1f62e72 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
return 0;
}
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
static int __init parse_gart_mem(char *p)
{
- if (!p)
- return -EINVAL;
-
- if (!strncmp(p, "off", 3))
- gart_fix_e820 = 0;
- else if (!strncmp(p, "on", 2))
- gart_fix_e820 = 1;
-
- return 0;
+ return kstrtobool(p, &gart_fix_e820);
}
early_param("gart_fix_e820", parse_gart_mem);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8a5cddac7d44..531b9611c51d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2078,6 +2078,20 @@ int generic_processor_info(int apicid, int version)
cpu = cpumask_next_zero(-1, cpu_present_mask);
/*
+ * This can happen on physical hotplug. The sanity check at boot time
+ * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+ * established.
+ */
+ if (topology_update_package_map(apicid, cpu) < 0) {
+ int thiscpu = max + disabled_cpus;
+
+ pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+ thiscpu, apicid);
+ disabled_cpus++;
+ return -ENOSPC;
+ }
+
+ /*
* Validate version
*/
if (version == 0x0) {
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 9968f30cca3e..76f89e2b245a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void _flat_send_IPI_mask(unsigned long mask, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec49..ab5c2c685a3c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eb45fc9b6124..28bde88b0085 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,66 @@
#include <asm/proto.h>
#include <asm/ipi.h>
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ __xapic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+{
+ unsigned long cfg;
+
+ /*
+ * Wait for idle.
+ */
+ if (unlikely(vector == NMI_VECTOR))
+ safe_apic_wait_icr_idle();
+ else
+ __xapic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ native_apic_mem_write(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
void default_send_IPI_single_phys(int cpu, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524b202c..5c042466f274 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f467..ecdc1d217dc0 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb5f24e..d875f97d4e0b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 58031303e304..0d373d7affc8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
- perf_event_intel_uncore_snb.o \
- perf_event_intel_uncore_snbep.o \
- perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
-endif
-
-
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
@@ -64,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a08936..97c59fd60702 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
- printk(KERN_INFO "AMD K6 stepping B detected - ");
+ pr_info("AMD K6 stepping B detected - ");
/*
* It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
if (d > 20*K6_BUG_LOOP)
- printk(KERN_CONT
- "system stability may be impaired when more than 32 MB are used.\n");
+ pr_cont("system stability may be impaired when more than 32 MB are used.\n");
else
- printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+ pr_cont("probably OK (after B9730xxxx).\n");
}
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+ pr_info("Enabling old style K6 write allocation for %d Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+ pr_info("Enabling new style K6 write allocation for %d Mb\n",
mbytes);
}
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
*/
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);
}
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
- printk(KERN_INFO
- "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
- l, ((l & 0x000fffff)|0x20000000));
+ pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+ l, ((l & 0x000fffff)|0x20000000));
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
}
}
@@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
unsigned long pfn = tseg >> PAGE_SHIFT;
- printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ pr_debug("tseg: %010llx\n", tseg);
if (pfn_range_is_mapped(pfn, pfn + 1))
set_memory_4k((unsigned long)__va(tseg), 1);
}
@@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
- printk(KERN_WARNING FW_BUG "TSC doesn't count "
- "with P0 frequency!\n");
+ pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
}
}
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 04f0fe5af83e..a972ac4c7e7d 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#if !defined(CONFIG_SMP)
- printk(KERN_INFO "CPU: ");
+ pr_info("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6e483c..1661d8ec9280 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_FCR, lo, hi);
lo |= ACE_FCR; /* enable ACE unit */
wrmsr(MSR_VIA_FCR, lo, hi);
- printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+ pr_info("CPU: Enabled ACE h/w crypto\n");
}
/* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_RNG, lo, hi);
lo |= RNG_ENABLE; /* enable RNG unit */
wrmsr(MSR_VIA_RNG, lo, hi);
- printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+ pr_info("CPU: Enabled h/w RNG\n");
}
/* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
name = "C6";
fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
fcr_clr = DPDC;
- printk(KERN_NOTICE "Disabling bugged TSC.\n");
+ pr_notice("Disabling bugged TSC.\n");
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
newlo = (lo|fcr_set) & (~fcr_clr);
if (newlo != lo) {
- printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+ pr_info("Centaur FCR was 0x%X now 0x%X\n",
lo, newlo);
wrmsr(MSR_IDT_FCR1, newlo, hi);
} else {
- printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+ pr_info("Centaur FCR is 0x%X\n", lo);
}
/* Emulate MTRRs using Centaur's MCR. */
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f60a..249461f95851 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+static int __init x86_noinvpcid_setup(char *s)
+{
+ /* noinvpcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_INVPCID))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+ pr_info("noinvpcid: INVPCID feature disabled\n");
+ return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -228,7 +244,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
lo |= 0x200000;
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
+ pr_notice("CPU serial number disabled.\n");
clear_cpu_cap(c, X86_FEATURE_PN);
/* Disabling the serial number may affect the cpuid level */
@@ -329,9 +345,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
if (!warn)
continue;
- printk(KERN_WARNING
- "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
- x86_cap_flag(df->feature), df->level);
+ pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
@@ -510,7 +525,7 @@ void detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+ pr_info_once("CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -531,10 +546,10 @@ void detect_ht(struct cpuinfo_x86 *c)
out:
if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
+ pr_info("CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ pr_info("CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
printed = 1;
}
#endif
@@ -559,9 +574,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
}
}
- printk_once(KERN_ERR
- "CPU: vendor_id '%s' unknown, using generic init.\n" \
- "CPU: Your system may be unstable.\n", v);
+ pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+ "CPU: Your system may be unstable.\n", v);
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
@@ -760,7 +774,7 @@ void __init early_cpu_init(void)
int count = 0;
#ifdef CONFIG_PROCESSOR_SELECT
- printk(KERN_INFO "KERNEL supported cpus:\n");
+ pr_info("KERNEL supported cpus:\n");
#endif
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +792,7 @@ void __init early_cpu_init(void)
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ pr_info(" %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
@@ -803,6 +817,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -977,6 +1016,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
+ /* The boot/hotplug time assigment got cleared, restore it */
+ c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
}
/*
@@ -1061,7 +1102,7 @@ static void __print_cpu_msr(void)
for (index = index_min; index < index_max; index++) {
if (rdmsrl_safe(index, &val))
continue;
- printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+ pr_info(" MSR%08x: %016llx\n", index, val);
}
}
}
@@ -1100,19 +1141,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
if (vendor && !strstr(c->x86_model_id, vendor))
- printk(KERN_CONT "%s ", vendor);
+ pr_cont("%s ", vendor);
if (c->x86_model_id[0])
- printk(KERN_CONT "%s", c->x86_model_id);
+ pr_cont("%s", c->x86_model_id);
else
- printk(KERN_CONT "%d86", c->x86);
+ pr_cont("%d86", c->x86);
- printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+ pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+ pr_cont(", stepping: 0x%x)\n", c->x86_mask);
else
- printk(KERN_CONT ")\n");
+ pr_cont(")\n");
print_cpu_msr(c);
}
@@ -1438,7 +1479,7 @@ void cpu_init(void)
show_ucode_info_early();
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
cpu_has_tsc ||
@@ -1475,20 +1516,6 @@ void cpu_init(void)
}
#endif
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
- WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
- return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e79637..6adef9cac23e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
+#include <asm/cpufeature.h>
#include "cpu.h"
@@ -103,7 +104,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
local_irq_restore(flags);
if (ccr5 & 2) { /* possible wrong calibration done */
- printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+ pr_info("Recalibrating delay loop with SLOP bit reset\n");
calibrate_delay();
c->loops_per_jiffy = loops_per_jiffy;
}
@@ -115,7 +116,7 @@ static void set_cx86_reorder(void)
{
u8 ccr3;
- printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
@@ -128,7 +129,7 @@ static void set_cx86_reorder(void)
static void set_cx86_memwb(void)
{
- printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +269,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
* VSA1 we work around however.
*/
- printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+ pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
isa_dma_bridge_buggy = 2;
/* We do this before the PCI layer is running. However we
@@ -426,7 +427,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
if (dir0 == 5 || dir0 == 3) {
unsigned char ccr3;
unsigned long flags;
- printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+ pr_info("Enabling CPUID on Cyrix processor.\n");
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
/* enable MAPEN */
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index d820d8eae96b..73d391ae452f 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
}
if (max_pri)
- printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+ pr_info("Hypervisor detected: %s\n", x86_hyper->name);
}
void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc1a0a..1f7fdb91a818 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
c->microcode < 0x20e) {
- printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+ pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
}
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
- printk(KERN_INFO "Disabled fast string operations\n");
+ pr_info("Disabled fast string operations\n");
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
setup_clear_cpu_cap(X86_FEATURE_ERMS);
}
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
pr_info("Disabling PGE capability bit\n");
setup_clear_cpu_cap(X86_FEATURE_PGE);
}
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
}
#ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask < 8) {
- printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+ pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
return 0;
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_F00F);
if (!f00f_workaround_enabled) {
- printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
f00f_workaround_enabled = 1;
}
}
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Forcefully enable PAE if kernel parameter "forcepae" is present.
*/
if (forcepae) {
- printk(KERN_WARNING "PAE forced!\n");
+ pr_warn("PAE forced!\n");
set_cpu_cap(c, X86_FEATURE_PAE);
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c52388cf4..de6626c18e42 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
#include <linux/sysfs.h>
#include <linux/pci.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
err = amd_set_l3_disable_slot(nb, cpu, slot, val);
if (err) {
if (err == -EEXIST)
- pr_warning("L3 slot %d in use/index already disabled!\n",
+ pr_warn("L3 slot %d in use/index already disabled!\n",
slot);
return err;
}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d487ea..fbb5e90557a5 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
#include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba4371a71..517619ea6498 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -115,7 +115,7 @@ static int raise_local(void)
int cpu = m->extcpu;
if (m->inject_flags & MCJ_EXCEPTION) {
- printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+ pr_info("Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
case MCJ_CTX_IRQ:
/*
@@ -128,15 +128,15 @@ static int raise_local(void)
raise_exception(m, NULL);
break;
default:
- printk(KERN_INFO "Invalid MCE context\n");
+ pr_info("Invalid MCE context\n");
ret = -EINVAL;
}
- printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+ pr_info("MCE exception done on CPU %d\n", cpu);
} else if (m->status) {
- printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+ pr_info("Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
- printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+ pr_info("Machine check poll done on CPU %d\n", cpu);
} else
m->finished = 0;
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
- printk(KERN_ERR
- "Timeout waiting for mce inject %lx\n",
+ pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
@@ -241,7 +240,7 @@ static int inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- printk(KERN_INFO "Machine check injector initialized\n");
+ pr_info("Machine check injector initialized\n");
register_mce_write_callback(mce_write);
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
"mce_notify");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222071..5119766d9889 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
+#include <asm/uaccess.h>
#include "mce-internal.h"
@@ -29,7 +30,7 @@
* panic situations)
*/
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@@ -48,6 +49,7 @@ static struct severity {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
+#define KERNEL_RECOV .context = IN_KERNEL_RECOV
#define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER
#define EXCP .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
+ PANIC, "In kernel and no restart IP",
+ EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+ ),
+ MCESEV(
DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
@@ -123,6 +129,11 @@ static struct severity {
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
),
MCESEV(
+ AR, "Action required: data load in error recoverable area of kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ KERNEL_RECOV
+ ),
+ MCESEV(
AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
@@ -170,6 +181,9 @@ static struct severity {
) /* always matches. keep at end */
};
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+ (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@ static struct severity {
*/
static int error_context(struct mce *m)
{
- return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+ if ((m->cs & 3) == 3)
+ return IN_USER;
+ if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
+ return IN_KERNEL_RECOV;
+ return IN_KERNEL;
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a006f4cd792b..f0c921b03e42 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
}
}
+static int do_memory_failure(struct mce *m)
+{
+ int flags = MF_ACTION_REQUIRED;
+ int ret;
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
+ flags |= MF_MUST_KILL;
+ ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+ if (ret)
+ pr_err("Memory error not recovered");
+ return ret;
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- u64 recover_paddr = ~0ull;
- int flags = MF_ACTION_REQUIRED;
int lmce = 0;
/* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
/*
- * At insane "tolerant" levels we take no action. Otherwise
- * we only die if we have no other choice. For less serious
- * issues we try to recover, or limit damage to the current
- * process.
+ * If tolerant is at an insane level we drop requests to kill
+ * processes and continue even when there is no way out.
*/
- if (cfg->tolerant < 3) {
- if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
- if (worst == MCE_AR_SEVERITY) {
- recover_paddr = m.addr;
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
- flags |= MF_MUST_KILL;
- } else if (kill_it) {
- force_sig(SIGBUS, current);
- }
- }
+ if (cfg->tolerant == 3)
+ kill_it = 0;
+ else if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst > 0)
mce_report_event(regs);
@@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
out:
sync_core();
- if (recover_paddr == ~0ull)
- goto done;
+ if (worst != MCE_AR_SEVERITY && !kill_it)
+ goto out_ist;
- pr_err("Uncorrected hardware memory error in user-access at %llx",
- recover_paddr);
- /*
- * We must call memory_failure() here even if the current process is
- * doomed. We still need to mark the page as poisoned and alert any
- * other users of the page.
- */
- ist_begin_non_atomic(regs);
- local_irq_enable();
- if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
- pr_err("Memory error not recovered");
- force_sig(SIGBUS, current);
+ /* Fault was in user mode and we need to take some action */
+ if ((m.cs & 3) == 3) {
+ ist_begin_non_atomic(regs);
+ local_irq_enable();
+
+ if (kill_it || do_memory_failure(&m))
+ force_sig(SIGBUS, current);
+ local_irq_disable();
+ ist_end_non_atomic();
+ } else {
+ if (!fixup_exception(regs, X86_TRAP_MC))
+ mce_panic("Failed kernel mode recovery", &m, NULL);
}
- local_irq_disable();
- ist_end_non_atomic();
-done:
+
+out_ist:
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1576,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+ /*
+ * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+ * whether this processor will actually generate recoverable
+ * machine checks. Check to see if this is an E7 model Xeon.
+ * We can't do a model number check because E5 and E7 use the
+ * same model number. E5 doesn't support recovery, E7 does.
+ */
+ if (mca_cfg.recovery || (mca_cfg.ser &&
+ !strncmp(c->x86_model_id,
+ "Intel(R) Xeon(R) CPU E7-", 24)))
+ set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -1617,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007);
- mce_amd_feature_init(c);
mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
mce_flags.smca = !!(ebx & BIT(3));
+ mce_amd_feature_init(c);
break;
}
@@ -2028,6 +2041,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
+ else if (!strcmp(str, "recovery"))
+ cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e99b15077e94..9d656fd436ef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,5 +1,5 @@
/*
- * (c) 2005-2015 Advanced Micro Devices, Inc.
+ * (c) 2005-2016 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -28,7 +28,7 @@
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
-#define NR_BLOCKS 9
+#define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000
#define MASK_VALID_HI 0x80000000
@@ -49,6 +49,19 @@
#define DEF_LVT_OFF 0x2
#define DEF_INT_TYPE_APIC 0x2
+/* Scalable MCA: */
+
+/* Threshold LVT offset is at MSR0xC0000410[15:12] */
+#define SMCA_THR_LVT_OFF 0xF000
+
+/*
+ * OS is required to set the MCAX bit to acknowledge that it is now using the
+ * new MSR ranges and new registers under each bank. It also means that the OS
+ * will configure deferred errors in the new MCx_CONFIG register. If the bit is
+ * not set, uncorrectable errors will cause a system panic.
+ */
+#define SMCA_MCAX_EN_OFF 0x1
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -58,6 +71,35 @@ static const char * const th_names[] = {
"execution_unit",
};
+/* Define HWID to IP type mappings for Scalable MCA */
+struct amd_hwid amd_hwids[] = {
+ [SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
+ [SMCA_DF] = { "data_fabric", 0x2E },
+ [SMCA_UMC] = { "umc", 0x96 },
+ [SMCA_PB] = { "param_block", 0x5 },
+ [SMCA_PSP] = { "psp", 0xFF },
+ [SMCA_SMU] = { "smu", 0x1 },
+};
+EXPORT_SYMBOL_GPL(amd_hwids);
+
+const char * const amd_core_mcablock_names[] = {
+ [SMCA_LS] = "load_store",
+ [SMCA_IF] = "insn_fetch",
+ [SMCA_L2_CACHE] = "l2_cache",
+ [SMCA_DE] = "decode_unit",
+ [RES] = "",
+ [SMCA_EX] = "execution_unit",
+ [SMCA_FP] = "floating_point",
+ [SMCA_L3_CACHE] = "l3_cache",
+};
+EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+
+const char * const amd_df_mcablock_names[] = {
+ [SMCA_CS] = "coherent_slave",
+ [SMCA_PIE] = "pie",
+};
+EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
@@ -84,6 +126,13 @@ struct thresh_restart {
static inline bool is_shared_bank(int bank)
{
+ /*
+ * Scalable MCA provides for only one core to have access to the MSRs of
+ * a shared bank.
+ */
+ if (mce_flags.smca)
+ return false;
+
/* Bank 4 is for northbridge reporting and is thus shared */
return (bank == 4);
}
@@ -135,6 +184,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
}
if (apic != msr) {
+ /*
+ * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+ * the BIOS provides the value. The original field where LVT offset
+ * was set is reserved. Return early here:
+ */
+ if (mce_flags.smca)
+ return 0;
+
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -144,10 +201,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return 1;
};
-/*
- * Called via smp_call_function_single(), must be called with correct
- * cpu affinity.
- */
+/* Reprogram MCx_MISC MSR behind this threshold bank. */
static void threshold_restart_bank(void *_tr)
{
struct thresh_restart *tr = _tr;
@@ -247,27 +301,116 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+ unsigned int bank, unsigned int block)
+{
+ u32 addr = 0, offset = 0;
+
+ if (mce_flags.smca) {
+ if (!block) {
+ addr = MSR_AMD64_SMCA_MCx_MISC(bank);
+ } else {
+ /*
+ * For SMCA enabled processors, BLKPTR field of the
+ * first MISC register (MCx_MISC0) indicates presence of
+ * additional MISC register set (MISC1-4).
+ */
+ u32 low, high;
+
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
+ return addr;
+ }
+
+ /* Fall back to method we used for older processors: */
+ switch (block) {
+ case 0:
+ addr = MSR_IA32_MCx_MISC(bank);
+ break;
+ case 1:
+ offset = ((low & MASK_BLKPTR_LO) >> 21);
+ if (offset)
+ addr = MCG_XBLK_ADDR + offset;
+ break;
+ default:
+ addr = ++current_addr;
+ }
+ return addr;
+}
+
+static int
+prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ int offset, u32 misc_high)
+{
+ unsigned int cpu = smp_processor_id();
+ struct threshold_block b;
+ int new;
+
+ if (!block)
+ per_cpu(bank_map, cpu) |= (1 << bank);
+
+ memset(&b, 0, sizeof(b));
+ b.cpu = cpu;
+ b.bank = bank;
+ b.block = block;
+ b.address = addr;
+ b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
+
+ if (!b.interrupt_capable)
+ goto done;
+
+ b.interrupt_enable = 1;
+
+ if (mce_flags.smca) {
+ u32 smca_low, smca_high;
+ u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+ smca_high |= SMCA_MCAX_EN_OFF;
+ wrmsr(smca_addr, smca_low, smca_high);
+ }
+
+ /* Gather LVT offset for thresholding: */
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+ goto out;
+
+ new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+ } else {
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ }
+
+ offset = setup_APIC_mce_threshold(offset, new);
+
+ if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+ mce_threshold_vector = amd_threshold_interrupt;
+
+done:
+ mce_threshold_block_init(&b, offset);
+
+out:
+ return offset;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- struct threshold_block b;
- unsigned int cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0;
unsigned int bank, block;
- int offset = -1, new;
+ int offset = -1;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0)
- address = MSR_IA32_MCx_MISC(bank);
- else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
-
- address += MCG_XBLK_ADDR;
- } else
- ++address;
+ address = get_block_address(address, low, high, bank, block);
+ if (!address)
+ break;
if (rdmsr_safe(address, &low, &high))
break;
@@ -279,29 +422,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
(high & MASK_LOCKED_HI))
continue;
- if (!block)
- per_cpu(bank_map, cpu) |= (1 << bank);
-
- memset(&b, 0, sizeof(b));
- b.cpu = cpu;
- b.bank = bank;
- b.block = block;
- b.address = address;
- b.interrupt_capable = lvt_interrupt_supported(bank, high);
-
- if (!b.interrupt_capable)
- goto init;
-
- b.interrupt_enable = 1;
- new = (high & MASK_LVTOFF_HI) >> 20;
- offset = setup_APIC_mce_threshold(offset, new);
-
- if ((offset == new) &&
- (mce_threshold_vector != amd_threshold_interrupt))
- mce_threshold_vector = amd_threshold_interrupt;
-
-init:
- mce_threshold_block_init(&b, offset);
+ offset = prepare_threshold_block(bank, block, address, offset, high);
}
}
@@ -394,16 +515,9 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0) {
- address = MSR_IA32_MCx_MISC(bank);
- } else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
- address += MCG_XBLK_ADDR;
- } else {
- ++address;
- }
+ address = get_block_address(address, low, high, bank, block);
+ if (!address)
+ break;
if (rdmsr_safe(address, &low, &high))
break;
@@ -623,16 +737,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- if (!block) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- return 0;
- address += MCG_XBLK_ADDR;
- } else {
- ++address;
- }
+ address = get_block_address(address, low, high, bank, ++block);
+ if (!address)
+ return 0;
- err = allocate_threshold_blocks(cpu, bank, ++block, address);
+ err = allocate_threshold_blocks(cpu, bank, block, address);
if (err)
goto out_free;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 12402e10aeff..2a0717bf8033 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
- printk(KERN_EMERG
- "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
- smp_processor_id(), loaddr, lotype);
+ pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
+ smp_processor_id(), loaddr, lotype);
if (lotype & (1<<5)) {
- printk(KERN_EMERG
- "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
- smp_processor_id());
+ pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+ smp_processor_id());
}
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
/* Read registers before enabling: */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
- printk(KERN_INFO
- "Intel old style machine check architecture supported.\n");
+ pr_info("Intel old style machine check architecture supported.\n");
/* Enable MCE: */
cr4_set_bits(X86_CR4_MCE);
- printk(KERN_INFO
- "Intel old style machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
+ pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8c2e2f..0b445c2ff735 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level)
/* if we just entered the thermal event */
if (new_event) {
if (event == THERMAL_THROTTLING_EVENT)
- printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+ pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
this_cpu,
level == CORE_LEVEL ? "Core" : "Package",
state->count);
@@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level)
}
if (old_event) {
if (event == THERMAL_THROTTLING_EVENT)
- printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
- this_cpu,
+ pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
level == CORE_LEVEL ? "Core" : "Package");
return 1;
}
@@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void)
static void unexpected_thermal_interrupt(void)
{
- printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
- smp_processor_id());
+ pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+ smp_processor_id());
}
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
if (system_state == SYSTEM_BOOTING)
- printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
@@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
- tm2 ? "TM2" : "TM1");
+ pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980186ee..fcf9ae9384f4 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -12,8 +12,8 @@
static void default_threshold_interrupt(void)
{
- printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
- THRESHOLD_APIC_VECTOR);
+ pr_err("Unexpected threshold interrupt at vector %x\n",
+ THRESHOLD_APIC_VECTOR);
}
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 01dd8702880b..c6a722e1d011 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
ist_enter(regs);
- printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+ pr_emerg("CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs);
@@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_MCE);
- printk(KERN_INFO
- "Winchip machine check reporting enabled on CPU#0.\n");
+ pr_info("Winchip machine check reporting enabled on CPU#0.\n");
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 2233f8a76615..8581963894c7 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void)
else
container = cont_va;
- if (ucode_new_rev)
- pr_info("microcode: updated early to new patch_level=0x%08x\n",
- ucode_new_rev);
-
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
@@ -469,8 +465,7 @@ void reload_ucode_amd(void)
if (mc && rev < mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
ucode_new_rev = mc->hdr.patch_id;
- pr_info("microcode: reload patch_level=0x%08x\n",
- ucode_new_rev);
+ pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
}
}
}
@@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
return -EINVAL;
}
- patch->data = kzalloc(patch_size, GFP_KERNEL);
+ patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
if (!patch->data) {
pr_err("Patch data allocation failure.\n");
kfree(patch);
return -EINVAL;
}
- /* All looks ok, copy patch... */
- memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
INIT_LIST_HEAD(&patch->plist);
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
@@ -953,10 +946,14 @@ struct microcode_ops * __init init_amd_microcode(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+ pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
return NULL;
}
+ if (ucode_new_rev)
+ pr_info_once("microcode updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
return &microcode_amd_ops;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index faec7120c508..ac360bfbbdb6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,16 +43,8 @@
#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-
static bool dis_ucode_ldr;
-static int __init disable_loader(char *str)
-{
- dis_ucode_ldr = true;
- return 1;
-}
-__setup("dis_ucode_ldr", disable_loader);
-
/*
* Synchronization.
*
@@ -81,15 +73,16 @@ struct cpu_info_ctx {
static bool __init check_loader_disabled_bsp(void)
{
+ static const char *__dis_opt_str = "dis_ucode_ldr";
+
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
- const char *opt = "dis_ucode_ldr";
- const char *option = (const char *)__pa_nodebug(opt);
+ const char *option = (const char *)__pa_nodebug(__dis_opt_str);
bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
#else /* CONFIG_X86_64 */
const char *cmdline = boot_command_line;
- const char *option = "dis_ucode_ldr";
+ const char *option = __dis_opt_str;
bool *res = &dis_ucode_ldr;
#endif
@@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
enum ucode_state ustate;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- if (uci && uci->valid)
+ if (uci->valid)
return UCODE_OK;
if (collect_cpu_info(cpu))
@@ -630,7 +623,7 @@ int __init microcode_init(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
- if (paravirt_enabled() || dis_ucode_ldr)
+ if (dis_ucode_ldr)
return -EINVAL;
if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ee81c544ee0d..cbb3cf09b065 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -39,9 +39,15 @@
#include <asm/setup.h>
#include <asm/msr.h>
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+/*
+ * Temporary microcode blobs pointers storage. We note here the pointers to
+ * microcode blobs we've got from whatever storage (detached initrd, builtin).
+ * Later on, we put those into final storage mc_saved_data.mc_saved.
+ */
+static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT];
+
static struct mc_saved_data {
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
} mc_saved_data;
@@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved,
}
static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
- unsigned long off, int num_saved)
+copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs,
+ unsigned long off, int num_saved)
{
int i;
for (i = 0; i < num_saved; i++)
- mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+ mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off);
}
#ifdef CONFIG_X86_32
static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
- struct mc_saved_data *mc_saved_data)
+microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs)
{
int i;
struct microcode_intel ***mc_saved;
- mc_saved = (struct microcode_intel ***)
- __pa_nodebug(&mc_saved_data->mc_saved);
- for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+ mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved);
+
+ for (i = 0; i < mcs->num_saved; i++) {
struct microcode_intel *p;
- p = *(struct microcode_intel **)
- __pa_nodebug(mc_saved_data->mc_saved + i);
+ p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i);
mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
}
}
#endif
static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
- unsigned long initrd_start, struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
+ unsigned long offset, struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int count = mc_saved_data->mc_saved_count;
+ unsigned int count = mcs->num_saved;
- if (!mc_saved_data->mc_saved) {
- copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+ if (!mcs->mc_saved) {
+ copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count);
return load_microcode_early(mc_saved_tmp, count, uci);
} else {
#ifdef CONFIG_X86_32
- microcode_phys(mc_saved_tmp, mc_saved_data);
+ microcode_phys(mc_saved_tmp, mcs);
return load_microcode_early(mc_saved_tmp, count, uci);
#else
- return load_microcode_early(mc_saved_data->mc_saved,
- count, uci);
+ return load_microcode_early(mcs->mc_saved, count, uci);
#endif
}
}
@@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
}
static int
-save_microcode(struct mc_saved_data *mc_saved_data,
+save_microcode(struct mc_saved_data *mcs,
struct microcode_intel **mc_saved_src,
- unsigned int mc_saved_count)
+ unsigned int num_saved)
{
int i, j;
struct microcode_intel **saved_ptr;
int ret;
- if (!mc_saved_count)
+ if (!num_saved)
return -EINVAL;
/*
* Copy new microcode data.
*/
- saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+ saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL);
if (!saved_ptr)
return -ENOMEM;
- for (i = 0; i < mc_saved_count; i++) {
+ for (i = 0; i < num_saved; i++) {
struct microcode_header_intel *mc_hdr;
struct microcode_intel *mc;
unsigned long size;
@@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data,
mc_hdr = &mc->hdr;
size = get_totalsize(mc_hdr);
- saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+ saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL);
if (!saved_ptr[i]) {
ret = -ENOMEM;
goto err;
}
-
- memcpy(saved_ptr[i], mc, size);
}
/*
* Point to newly saved microcode.
*/
- mc_saved_data->mc_saved = saved_ptr;
- mc_saved_data->mc_saved_count = mc_saved_count;
+ mcs->mc_saved = saved_ptr;
+ mcs->num_saved = num_saved;
return 0;
@@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved,
* BSP can stay in the platform.
*/
static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
- void *data, size_t size,
- struct mc_saved_data *mc_saved_data,
- unsigned long *mc_saved_in_initrd,
+get_matching_model_microcode(unsigned long start, void *data, size_t size,
+ struct mc_saved_data *mcs, unsigned long *mc_ptrs,
struct ucode_cpu_info *uci)
{
- u8 *ucode_ptr = data;
- unsigned int leftover = size;
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ struct microcode_header_intel *mc_header;
+ unsigned int num_saved = mcs->num_saved;
enum ucode_state state = UCODE_OK;
+ unsigned int leftover = size;
+ u8 *ucode_ptr = data;
unsigned int mc_size;
- struct microcode_header_intel *mc_header;
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
int i;
- while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+ while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) {
if (leftover < sizeof(mc_header))
break;
@@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start,
* the platform, we need to find and save microcode patches
* with the same family and model as the BSP.
*/
- if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
- UCODE_OK) {
+ if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) {
ucode_ptr += mc_size;
continue;
}
- mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved);
ucode_ptr += mc_size;
}
if (leftover) {
state = UCODE_ERROR;
- goto out;
+ return state;
}
- if (mc_saved_count == 0) {
+ if (!num_saved) {
state = UCODE_NFOUND;
- goto out;
+ return state;
}
- for (i = 0; i < mc_saved_count; i++)
- mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+ for (i = 0; i < num_saved; i++)
+ mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+ mcs->num_saved = num_saved;
- mc_saved_data->mc_saved_count = mc_saved_count;
-out:
return state;
}
@@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig.pf = 1 << ((val[1] >> 18) & 7);
}
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -396,11 +394,11 @@ static void show_saved_mc(void)
unsigned int sig, pf, rev, total_size, data_size, date;
struct ucode_cpu_info uci;
- if (mc_saved_data.mc_saved_count == 0) {
+ if (!mc_saved_data.num_saved) {
pr_debug("no microcode data saved.\n");
return;
}
- pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+ pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved);
collect_cpu_info_early(&uci);
@@ -409,7 +407,7 @@ static void show_saved_mc(void)
rev = uci.cpu_sig.rev;
pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
- for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+ for (i = 0; i < mc_saved_data.num_saved; i++) {
struct microcode_header_intel *mc_saved_header;
struct extended_sigtable *ext_header;
int ext_sigcount;
@@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
unsigned int mc_saved_count_init;
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
int ret = 0;
int i;
@@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc)
*/
mutex_lock(&x86_cpu_microcode_mutex);
- mc_saved_count_init = mc_saved_data.mc_saved_count;
- mc_saved_count = mc_saved_data.mc_saved_count;
+ mc_saved_count_init = mc_saved_data.num_saved;
+ num_saved = mc_saved_data.num_saved;
mc_saved = mc_saved_data.mc_saved;
- if (mc_saved && mc_saved_count)
+ if (mc_saved && num_saved)
memcpy(mc_saved_tmp, mc_saved,
- mc_saved_count * sizeof(struct microcode_intel *));
+ num_saved * sizeof(struct microcode_intel *));
/*
* Save the microcode patch mc in mc_save_tmp structure if it's a newer
* version.
*/
- mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, mc, num_saved);
/*
* Save the mc_save_tmp in global mc_saved_data.
*/
- ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+ ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved);
if (ret) {
pr_err("Cannot save microcode patch.\n");
goto out;
@@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size,
struct ucode_cpu_info *uci)
{
@@ -551,14 +549,18 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
cd.data = NULL;
cd.size = 0;
- cd = find_cpio_data(p, (void *)start, size, &offset);
- if (!cd.data) {
+ /* try built-in microcode if no initrd */
+ if (!size) {
if (!load_builtin_intel_microcode(&cd))
return UCODE_ERROR;
+ } else {
+ cd = find_cpio_data(p, (void *)start, size, &offset);
+ if (!cd.data)
+ return UCODE_ERROR;
}
- return get_matching_model_microcode(0, start, cd.data, cd.size,
- mc_saved_data, initrd, uci);
+ return get_matching_model_microcode(start, cd.data, cd.size,
+ mcs, mc_ptrs, uci);
}
/*
@@ -567,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
static void
print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
{
- int cpu = smp_processor_id();
-
- pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu,
- uci->cpu_sig.rev,
- date & 0xffff,
- date >> 24,
- (date >> 16) & 0xff);
+ pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+ uci->cpu_sig.rev,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
}
#ifdef CONFIG_X86_32
@@ -603,19 +602,19 @@ void show_ucode_info_early(void)
*/
static void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
int *delay_ucode_info_p;
int *current_mc_date_p;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
*delay_ucode_info_p = 1;
- *current_mc_date_p = mc_intel->hdr.date;
+ *current_mc_date_p = mc->hdr.date;
}
#else
@@ -630,37 +629,35 @@ static inline void flush_tlb_early(void)
static inline void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
}
#endif
static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
unsigned int val[2];
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return 0;
/* write microcode via MSR 0x79 */
- native_wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev)
+ if (val[1] != mc->hdr.rev)
return -1;
#ifdef CONFIG_X86_64
@@ -672,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (early)
print_ucode(uci);
else
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
return 0;
}
/*
* This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data.
*/
int __init save_microcode_in_initrd_intel(void)
{
- unsigned int count = mc_saved_data.mc_saved_count;
+ unsigned int count = mc_saved_data.num_saved;
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
int ret = 0;
- if (count == 0)
+ if (!count)
return ret;
- copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+ copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count);
+
ret = save_microcode(&mc_saved_data, mc_saved, count);
if (ret)
pr_err("Cannot save microcode patches from initrd.\n");
@@ -701,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void)
}
static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
- unsigned long *initrd,
+_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size)
{
struct ucode_cpu_info uci;
@@ -710,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
collect_cpu_info_early(&uci);
- ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+ ret = scan_microcode(mcs, mc_ptrs, start, size, &uci);
if (ret != UCODE_OK)
return;
- ret = load_microcode(mc_saved_data, initrd, start, &uci);
+ ret = load_microcode(mcs, mc_ptrs, start, &uci);
if (ret != UCODE_OK)
return;
@@ -728,53 +725,49 @@ void __init load_ucode_intel_bsp(void)
struct boot_params *p;
p = (struct boot_params *)__pa_nodebug(&boot_params);
- start = p->hdr.ramdisk_image;
size = p->hdr.ramdisk_size;
- _load_ucode_intel_bsp(
- (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
- (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
- start, size);
+ /*
+ * Set start only if we have an initrd image. We cannot use initrd_start
+ * because it is not set that early yet.
+ */
+ start = (size ? p->hdr.ramdisk_image : 0);
+
+ _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+ (unsigned long *)__pa_nodebug(&mc_tmp_ptrs),
+ start, size);
#else
- start = boot_params.hdr.ramdisk_image + PAGE_OFFSET;
size = boot_params.hdr.ramdisk_size;
+ start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
- _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+ _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size);
#endif
}
void load_ucode_intel_ap(void)
{
- struct mc_saved_data *mc_saved_data_p;
+ unsigned long *mcs_tmp_p;
+ struct mc_saved_data *mcs_p;
struct ucode_cpu_info uci;
- unsigned long *mc_saved_in_initrd_p;
- unsigned long initrd_start_addr;
enum ucode_state ret;
#ifdef CONFIG_X86_32
- unsigned long *initrd_start_p;
- mc_saved_in_initrd_p =
- (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
- initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
- initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+ mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs);
+ mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
#else
- mc_saved_data_p = &mc_saved_data;
- mc_saved_in_initrd_p = mc_saved_in_initrd;
- initrd_start_addr = initrd_start;
+ mcs_tmp_p = mc_tmp_ptrs;
+ mcs_p = &mc_saved_data;
#endif
/*
* If there is no valid ucode previously saved in memory, no need to
* update ucode on this AP.
*/
- if (mc_saved_data_p->mc_saved_count == 0)
+ if (!mcs_p->num_saved)
return;
collect_cpu_info_early(&uci);
- ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
- initrd_start_addr, &uci);
-
+ ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci);
if (ret != UCODE_OK)
return;
@@ -786,13 +779,13 @@ void reload_ucode_intel(void)
struct ucode_cpu_info uci;
enum ucode_state ret;
- if (!mc_saved_data.mc_saved_count)
+ if (!mc_saved_data.num_saved)
return;
collect_cpu_info_early(&uci);
ret = load_microcode_early(mc_saved_data.mc_saved,
- mc_saved_data.mc_saved_count, &uci);
+ mc_saved_data.num_saved, &uci);
if (ret != UCODE_OK)
return;
@@ -825,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
* return 0 - no update found
* return 1 - found update
*/
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+static int get_matching_mc(struct microcode_intel *mc, int cpu)
{
struct cpu_signature cpu_sig;
unsigned int csig, cpf, crev;
@@ -836,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
cpf = cpu_sig.pf;
crev = cpu_sig.rev;
- return has_newer_microcode(mc_intel, csig, cpf, crev);
+ return has_newer_microcode(mc, csig, cpf, crev);
}
static int apply_microcode_intel(int cpu)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
struct ucode_cpu_info *uci;
+ struct cpuinfo_x86 *c;
unsigned int val[2];
- int cpu_num = raw_smp_processor_id();
- struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
- uci = ucode_cpu_info + cpu;
- mc_intel = uci->mc;
/* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
+ if (WARN_ON(raw_smp_processor_id() != cpu))
+ return -1;
- if (mc_intel == NULL)
+ uci = ucode_cpu_info + cpu;
+ mc = uci->mc;
+ if (!mc)
return 0;
/*
* Microcode on this CPU could be updated earlier. Only apply the
- * microcode patch in mc_intel when it is newer than the one on this
+ * microcode patch in mc when it is newer than the one on this
* CPU.
*/
- if (get_matching_mc(mc_intel, cpu) == 0)
+ if (!get_matching_mc(mc, cpu))
return 0;
/* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -876,16 +866,19 @@ static int apply_microcode_intel(int cpu)
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev) {
+ if (val[1] != mc->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
- cpu_num, mc_intel->hdr.rev);
+ cpu, mc->hdr.rev);
return -1;
}
+
pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu_num, val[1],
- mc_intel->hdr.date & 0xffff,
- mc_intel->hdr.date >> 24,
- (mc_intel->hdr.date >> 16) & 0xff);
+ cpu, val[1],
+ mc->hdr.date & 0xffff,
+ mc->hdr.date >> 24,
+ (mc->hdr.date >> 16) & 0xff);
+
+ c = &cpu_data(cpu);
uci->cpu_sig.rev = val[1];
c->microcode = val[1];
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896bcbdaf..2ce1a7dc45b7 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err)
unsigned long total_size, data_size, ext_table_size;
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
- int sum, orig_sum, ext_sigcount = 0, i;
+ u32 sum, orig_sum, ext_sigcount = 0, i;
struct extended_signature *ext_sig;
total_size = get_totalsize(mc_header);
@@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err)
if (data_size + MC_HEADER_SIZE > total_size) {
if (print_err)
- pr_err("error! Bad data size in microcode data file\n");
+ pr_err("Error: bad microcode data file size.\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
if (print_err)
- pr_err("error! Unknown microcode update format\n");
+ pr_err("Error: invalid/unknown microcode update format.\n");
return -EINVAL;
}
+
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
+ u32 ext_table_sum = 0;
+ u32 *ext_tablep;
+
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
if (print_err)
- pr_err("error! Small exttable size in microcode data file\n");
+ pr_err("Error: truncated extended signature table.\n");
return -EINVAL;
}
+
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
if (print_err)
- pr_err("error! Bad exttable size in microcode data file\n");
+ pr_err("Error: extended signature table size mismatch.\n");
return -EFAULT;
}
+
ext_sigcount = ext_header->count;
- }
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
+ /*
+ * Check extended table checksum: the sum of all dwords that
+ * comprise a valid table must be 0.
+ */
+ ext_tablep = (u32 *)ext_header;
- i = ext_table_size / DWSIZE;
+ i = ext_table_size / sizeof(u32);
while (i--)
ext_table_sum += ext_tablep[i];
+
if (ext_table_sum) {
if (print_err)
- pr_warn("aborting, bad extended signature table checksum\n");
+ pr_warn("Bad extended signature table checksum, aborting.\n");
return -EINVAL;
}
}
- /* calculate the checksum */
+ /*
+ * Calculate the checksum of update data and header. The checksum of
+ * valid update data and header including the extended signature table
+ * must be 0.
+ */
orig_sum = 0;
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+ i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
while (i--)
- orig_sum += ((int *)mc)[i];
+ orig_sum += ((u32 *)mc)[i];
+
if (orig_sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad microcode data checksum, aborting.\n");
return -EINVAL;
}
+
if (!ext_table_size)
return 0;
- /* check extended signature checksum */
+
+ /*
+ * Check extended signature checksum: 0 => valid.
+ */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
- sum = orig_sum
- - (mc_header->sig + mc_header->pf + mc_header->cksum)
- + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+
+ sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad extended signature checksum, aborting.\n");
return -EINVAL;
}
}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710a5b23..6988c74409a8 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
IN=$1
@@ -49,8 +49,8 @@ dump_array()
trap 'rm "$OUT"' EXIT
(
- echo "#ifndef _ASM_X86_CPUFEATURE_H"
- echo "#include <asm/cpufeature.h>"
+ echo "#ifndef _ASM_X86_CPUFEATURES_H"
+ echo "#include <asm/cpufeatures.h>"
echo "#endif"
echo ""
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 20e242ea1bc4..4e7c6933691c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
- printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
- ms_hyperv.features, ms_hyperv.hints);
+ pr_info("HyperV: features 0x%x, hints 0x%x\n",
+ ms_hyperv.features, ms_hyperv.hints);
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
lapic_timer_frequency = hv_lapic_frequency;
- printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+ lapic_timer_frequency);
}
#endif
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 316fe3e60a97..3d689937fc1b 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
*/
if (type != MTRR_TYPE_WRCOMB &&
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
- pr_warning("mtrr: only write-combining%s supported\n",
+ pr_warn("mtrr: only write-combining%s supported\n",
centaur_mcr_type ? " and uncacheable are" : " is");
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0d98503c2245..31e951ce6dff 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -57,9 +57,9 @@ static int __initdata nr_range;
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
static int __init
@@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
base, base + size);
}
if (debug_print) {
- printk(KERN_DEBUG "After WB checking\n");
+ pr_debug("After WB checking\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
@@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
- printk(BIOS_BUG_MSG, i);
+ pr_warn(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))
continue;
size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
extra_remove_base + extra_remove_size);
if (debug_print) {
- printk(KERN_DEBUG "After UC checking\n");
+ pr_debug("After UC checking\n");
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
}
@@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
/* sort the ranges */
nr_range = clean_sort_range(range, RANGE_NUM);
if (debug_print) {
- printk(KERN_DEBUG "After sorting\n");
+ pr_debug("After sorting\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
@@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void)
start_base = to_size_factor(start_base, &start_factor),
type = range_state[i].type;
- printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
i, start_base, start_factor,
size_base, size_factor,
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits)
return 0;
/* Print original var MTRRs at first, for debugging: */
- printk(KERN_DEBUG "original variable MTRRs\n");
+ pr_debug("original variable MTRRs\n");
print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits)
x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
- printk(KERN_INFO "total RAM covered: %ldM\n",
+ pr_info("total RAM covered: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits)
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
- printk(KERN_DEBUG "New variable MTRRs\n");
+ pr_debug("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
}
- printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
- "will find optimal one\n");
+ pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
}
i = 0;
@@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits)
x_remove_base, x_remove_size, i);
if (debug_print) {
mtrr_print_out_one_result(i);
- printk(KERN_INFO "\n");
+ pr_info("\n");
}
i++;
@@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits)
index_good = mtrr_search_optimal_index();
if (index_good != -1) {
- printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+ pr_info("Found optimal setting for mtrr clean up\n");
i = index_good;
mtrr_print_out_one_result(i);
@@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits)
gran_size <<= 10;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
set_var_mtrr_all(address_bits);
- printk(KERN_DEBUG "New variable MTRRs\n");
+ pr_debug("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
} else {
@@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits)
mtrr_print_out_one_result(i);
}
- printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
- printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+ pr_info("mtrr_cleanup: can not find optimal value\n");
+ pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
return 0;
}
@@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
if (!highest_pfn) {
- printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+ pr_info("CPU MTRRs all blank - virtualized system.\n");
return 0;
}
@@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
end_pfn);
if (total_trim_size) {
- pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+ pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+ total_trim_size >> 20);
if (!changed_by_mtrr_cleanup)
WARN_ON(1);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index c870af161008..fcbcb2f678ca 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
rdmsr(MSR_K8_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
- printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+ pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
" not cleared by BIOS, clearing this bit\n",
smp_processor_id());
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@ void __init mtrr_state_warn(void)
if (!mask)
return;
if (mask & MTRR_CHANGE_MASK_FIXED)
- pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_VARIABLE)
- pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
- pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
- printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
- printk(KERN_INFO "mtrr: corrected configuration.\n");
+ pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+ pr_info("mtrr: corrected configuration.\n");
}
/*
@@ -519,8 +519,7 @@ void __init mtrr_state_warn(void)
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
{
if (wrmsr_safe(msr, a, b) < 0) {
- printk(KERN_ERR
- "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+ pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
smp_processor_id(), msr, a, b);
}
}
@@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
tmp |= ~((1ULL<<(hi - 1)) - 1);
if (tmp != mask) {
- printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+ pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
mask = tmp;
}
@@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
- pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+ pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
}
if (!(base + size < 0x70000 || base > 0x7003F) &&
(type == MTRR_TYPE_WRCOMB
|| type == MTRR_TYPE_WRBACK)) {
- pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+ pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
return -EINVAL;
}
}
@@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
lbase = lbase >> 1, last = last >> 1)
;
if (lbase != last) {
- pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+ pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
return -EINVAL;
}
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149ee91c..10f8d4796240 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
#include <linux/smp.h>
#include <linux/syscore_ops.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
- pr_warning("mtrr: type: %u invalid\n", type);
+ pr_warn("mtrr: type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- pr_warning("mtrr: your processor doesn't support write-combining\n");
+ pr_warn("mtrr: your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- pr_warning("mtrr: zero sized request\n");
+ pr_warn("mtrr: zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
- pr_warning("mtrr: base or size exceeds the MTRR width\n");
+ pr_warn("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
- pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+ pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
" 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
@@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+ pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
@@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg >= max) {
- pr_warning("mtrr: register: %d too big\n", reg);
+ pr_warn("mtrr: register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- pr_warning("mtrr: MTRR %d not used\n", reg);
+ pr_warn("mtrr: MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- pr_warning("mtrr: reg: %d has count=0\n", reg);
+ pr_warn("mtrr: reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 819d94982e07..f6f50c4ceaec 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
if (!rdrand_long(&tmp)) {
clear_cpu_cap(c, X86_FEATURE_RDRAND);
- printk_once(KERN_WARNING "rdrand: disabled\n");
+ pr_warn_once("rdrand: disabled\n");
return;
}
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4c60eaf0571c..cd531355e838 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
if (!printed) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ pr_info("CPU: Physical Processor ID: %d\n",
c->phys_proc_id);
if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ pr_info("CPU: Processor Core ID: %d\n",
c->cpu_core_id);
printed = 1;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7aceca6..34178564be2a 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
#include "cpu.h"
@@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
if (max >= 0x80860001) {
cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
if (cpu_rev != 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+ pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
(cpu_rev >> 24) & 0xff,
(cpu_rev >> 16) & 0xff,
(cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c)
if (max >= 0x80860002) {
cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
if (cpu_rev == 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+ pr_info("CPU: Processor revision %08X, %u MHz\n",
new_cpu_rev, cpu_freq);
}
- printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+ pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
(cms_rev1 >> 24) & 0xff,
(cms_rev1 >> 16) & 0xff,
(cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
(void *)&cpu_info[56],
(void *)&cpu_info[60]);
cpu_info[64] = '\0';
- printk(KERN_INFO "CPU: %s\n", cpu_info);
+ pr_info("CPU: %s\n", cpu_info);
}
/* Unhide possibly hidden capability flags */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 628a059a9a06..364e58346897 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void)
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
- printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+ pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
(unsigned long) tsc_hz / 1000,
(unsigned long) tsc_hz % 1000);
@@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void)
if (ebx != UINT_MAX)
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
else
- printk(KERN_WARNING
- "Failed to get TSC freq from the hypervisor\n");
+ pr_warn("Failed to get TSC freq from the hypervisor\n");
}
/*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 58f34319b29a..9ef978d69c22 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -57,10 +57,9 @@ struct crash_elf_data {
struct kimage *image;
/*
* Total number of ram ranges we have after various adjustments for
- * GART, crash reserved region etc.
+ * crash reserved region, etc.
*/
unsigned int max_nr_ranges;
- unsigned long gart_start, gart_end;
/* Pointer to elf header */
void *ehdr;
@@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
return 0;
}
-static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
-{
- struct crash_elf_data *ced = arg;
-
- ced->gart_start = start;
- ced->gart_end = end;
-
- /* Not expecting more than 1 gart aperture */
- return 1;
-}
-
/* Gather all the required information to prepare elf headers for ram regions */
static void fill_up_crash_elf_data(struct crash_elf_data *ced,
@@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
ced->max_nr_ranges = nr_ranges;
- /*
- * We don't create ELF headers for GART aperture as an attempt
- * to dump this memory in second kernel leads to hang/crash.
- * If gart aperture is present, one needs to exclude that region
- * and that could lead to need of extra phdr.
- */
- walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
- ced, get_gart_ranges_callback);
-
- /*
- * If we have gart region, excluding that could potentially split
- * a memory range, resulting in extra header. Account for that.
- */
- if (ced->gart_end)
- ced->max_nr_ranges++;
-
/* Exclusion of crash region could split memory ranges */
ced->max_nr_ranges++;
@@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
return ret;
}
- /* Exclude GART region */
- if (ced->gart_end) {
- ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
- if (ret)
- return ret;
- }
-
return ret;
}
@@ -599,12 +564,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
/* Add ACPI tables */
cmd.type = E820_ACPI;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+ walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
memmap_entry_callback);
/* Add ACPI Non-volatile Storage */
cmd.type = E820_NVS;
- walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+ walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
memmap_entry_callback);
/* Add crashk_low_res region */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 0d1ff4b407d4..8efa57a5f29e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -267,9 +267,8 @@ int __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_KASAN
printk("KASAN");
#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4f96fe..621b501f8935 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/cpufeature.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
@@ -925,6 +926,41 @@ static const char *e820_type_to_string(int e820_type)
}
}
+static unsigned long e820_type_to_iomem_type(int e820_type)
+{
+ switch (e820_type) {
+ case E820_RESERVED_KERN:
+ case E820_RAM:
+ return IORESOURCE_SYSTEM_RAM;
+ case E820_ACPI:
+ case E820_NVS:
+ case E820_UNUSABLE:
+ case E820_PRAM:
+ case E820_PMEM:
+ default:
+ return IORESOURCE_MEM;
+ }
+}
+
+static unsigned long e820_type_to_iores_desc(int e820_type)
+{
+ switch (e820_type) {
+ case E820_ACPI:
+ return IORES_DESC_ACPI_TABLES;
+ case E820_NVS:
+ return IORES_DESC_ACPI_NV_STORAGE;
+ case E820_PMEM:
+ return IORES_DESC_PERSISTENT_MEMORY;
+ case E820_PRAM:
+ return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+ case E820_RESERVED_KERN:
+ case E820_RAM:
+ case E820_UNUSABLE:
+ default:
+ return IORES_DESC_NONE;
+ }
+}
+
static bool do_mark_busy(u32 type, struct resource *res)
{
/* this is the legacy bios/dos rom-shadow + mmio region */
@@ -967,7 +1003,8 @@ void __init e820_reserve_resources(void)
res->start = e820.map[i].addr;
res->end = end;
- res->flags = IORESOURCE_MEM;
+ res->flags = e820_type_to_iomem_type(e820.map[i].type);
+ res->desc = e820_type_to_iores_desc(e820.map[i].type);
/*
* don't register the region that could be conflicted with
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d25097c3fc1d..0b1b9abd4d5f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable();
if (fpu->fpregs_active) {
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
copy_fpregs_to_fpstate(fpu);
} else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
@@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu)
preempt_disable();
if (fpu->fpregs_active) {
- if (!copy_fpregs_to_fpstate(fpu))
- fpregs_deactivate(fpu);
+ if (!copy_fpregs_to_fpstate(fpu)) {
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&fpu->state);
+ else
+ fpregs_deactivate(fpu);
+ }
}
preempt_enable();
}
@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state)
}
EXPORT_SYMBOL_GPL(fpstate_init);
-/*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In both the 'eager' and the 'lazy' case we save hardware registers
- * directly to the destination buffer.
- */
-static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
+ dst_fpu->counter = 0;
+ dst_fpu->fpregs_active = 0;
+ dst_fpu->last_cpu = -1;
+
+ if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ return 0;
+
WARN_ON_FPU(src_fpu != &current->thread.fpu);
/*
@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
* Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying.
- *
- * If the FPU context got destroyed in the process (FNSAVE
- * done on old CPUs) then copy it back into the source
- * context and mark the current task for lazy restore.
+ * In lazy mode, if the FPU context isn't loaded into
+ * fpregs, CR0.TS will be set and do_device_not_available
+ * will load the FPU context.
*
* We have to do all this with preemption disabled,
* mostly because of the FNSAVE case, because in that
@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
- fpregs_deactivate(src_fpu);
+
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&src_fpu->state);
+ else
+ fpregs_deactivate(src_fpu);
}
preempt_enable();
-}
-
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
-{
- dst_fpu->counter = 0;
- dst_fpu->fpregs_active = 0;
- dst_fpu->last_cpu = -1;
-
- if (src_fpu->fpstate_active && cpu_has_fpu)
- fpu_copy(dst_fpu, src_fpu);
return 0;
}
@@ -409,8 +411,10 @@ static inline void copy_init_fpstate_to_fpregs(void)
{
if (use_xsave())
copy_kernel_to_xregs(&init_fpstate.xsave, -1);
- else
+ else if (static_cpu_has(X86_FEATURE_FXSR))
copy_kernel_to_fxregs(&init_fpstate.fxsave);
+ else
+ copy_kernel_to_fregs(&init_fpstate.fsave);
}
/*
@@ -423,7 +427,7 @@ void fpu__clear(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
- if (!use_eager_fpu()) {
+ if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu);
} else {
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6d9f0a7ef4c8..54c86fffbf9f 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
write_cr0(cr0);
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
+ if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
- if (fsw == 0 && (fcw & 0x103f) == 0x003f)
- set_cpu_cap(c, X86_FEATURE_FPU);
- else
- clear_cpu_cap(c, X86_FEATURE_FPU);
+ if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+ set_cpu_cap(c, X86_FEATURE_FPU);
+ else
+ clear_cpu_cap(c, X86_FEATURE_FPU);
+ }
#ifndef CONFIG_MATH_EMULATION
if (!cpu_has_fpu) {
@@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void)
* Set up the legacy init FPU context. (xstate init might overwrite this
* with a more modern format, if the CPU supports it.)
*/
- fpstate_init_fxstate(&init_fpstate.fxsave);
+ fpstate_init(&init_fpstate);
fpu__init_system_mxcsr();
}
@@ -260,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* not only saved the restores along the way, but we also have the
* FPU ready to be used for the original task.
*
- * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * 'lazy' is deprecated because it's almost never a performance win
+ * and it's much more complicated than 'eager'.
+ *
+ * 'eager' switching is by default on all CPUs, there we switch the FPU
* state during every context switch, regardless of whether the task
* has used FPU instructions in that time slice or not. This is done
* because modern FPU context saving instructions are able to optimize
@@ -271,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* to use 'eager' restores, if we detect that a task is using the FPU
* frequently. See the fpu->counter logic in fpu/internal.h for that. ]
*/
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
/*
* Find supported xfeatures based on cpu features and command-line input.
@@ -300,12 +305,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void)
static void __init fpu__clear_eager_fpu_features(void)
{
setup_clear_cpu_cap(X86_FEATURE_MPX);
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
}
/*
@@ -348,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- /*
- * No need to check "eagerfpu=auto" again, since it is the
- * initial default.
- */
if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
eagerfpu = DISABLE;
fpu__clear_eager_fpu_features();
- } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
- eagerfpu = ENABLE;
}
if (cmdline_find_option_bool(boot_command_line, "no387"))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5ae6d..6e8354f5a593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 29408d6d6626..702547ce33c9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end)
static unsigned long text_ip_addr(unsigned long ip)
{
/*
- * On x86_64, kernel text mappings are mapped read-only with
- * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
- * of the kernel text mapping to modify the kernel text.
+ * On x86_64, kernel text mappings are mapped read-only, so we use
+ * the kernel identity mapping instead of the kernel text mapping
+ * to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_caller_end;
+ end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
}
@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/*
* Allocate enough size to store the ftrace_caller code,
- * the jmp to ftrace_return, as well as the address of
+ * the jmp to ftrace_epilogue, as well as the address of
* the ftrace_ops this trampoline is used for.
*/
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = (unsigned long)trampoline + size;
- /* The trampoline ends with a jmp to ftrace_return */
- jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+ /* The trampoline ends with a jmp to ftrace_epilogue */
+ jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2c0f3407bd1f..1f4422d5c8d0 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
- unsigned long i;
-
- for (i = 0; i < PTRS_PER_PGD-1; i++)
- early_level4_pgt[i].pgd = 0;
-
+ memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
next_early_pgt = 0;
-
write_cr3(__pa_nodebug(early_level4_pgt));
}
@@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void)
int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
- unsigned long i;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
@@ -81,8 +75,7 @@ again:
}
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PUD; i++)
- pud_p[i] = 0;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
@@ -97,8 +90,7 @@ again:
}
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PMD; i++)
- pmd_p[i] = 0;
+ memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae24b6d2..54cdbd2003fe 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
@@ -389,6 +389,12 @@ default_entry:
/* Make changes effective */
wrmsr
+ /*
+ * And make sure that all the mappings we set up have NX set from
+ * the beginning.
+ */
+ orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
+
enable_paging:
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..22fbf9df61bb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,6 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
@@ -76,9 +75,7 @@ startup_64:
subq $_text - __START_KERNEL_map, %rbp
/* Is the address not 2M aligned? */
- movq %rbp, %rax
- andl $~PMD_PAGE_MASK, %eax
- testl %eax, %eax
+ testl $~PMD_PAGE_MASK, %ebp
jnz bad_address
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5cd5d0..be0ebbb6d1d1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
+#include <asm/cpufeature.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a62702b..ed15cd486d06 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
-#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
return err;
err = probe_kernel_write((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
if (!err)
return err;
/*
@@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
return -EINVAL;
bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
return err;
}
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
-#ifdef CONFIG_DEBUG_RODATA
int err;
char opc[BREAK_INSTR_SIZE];
@@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
goto knl_write;
return err;
+
knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
return probe_kernel_write((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6cc873..0f05deeff5ce 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- if (fixup_exception(regs))
+ if (fixup_exception(regs, trapnr))
return 1;
/*
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762e2bca..ed48a9f465f8 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
restore_mcount_regs
/*
- * The copied trampoline must call ftrace_return as it
+ * The copied trampoline must call ftrace_epilogue as it
* still may need to call the function graph tracer.
+ *
+ * The code up to this label is copied into trampolines so
+ * think twice before adding any new code or changing the
+ * layout here.
*/
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
popfq
/*
- * As this jmp to ftrace_return can be a short jump
+ * As this jmp to ftrace_epilogue can be a short jump
* it must not be copied into the trampoline.
* The trampoline will add the code to jump
* to the return.
*/
GLOBAL(ftrace_regs_caller_end)
- jmp ftrace_return
+ jmp ftrace_epilogue
END(ftrace_regs_caller)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 30ca7607cbbb..97340f2c437c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
processor.cpuflag = CPU_ENABLED;
processor.cpufeature = (boot_cpu_data.x86 << 8) |
(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
- processor.featureflag = boot_cpu_data.x86_capability[0];
+ processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
processor.reserved[0] = 0;
processor.reserved[1] = 0;
for (i = 0; i < 2; i++) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616f93f1..7f3550acde1b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8a2cdd736fa4..04b132a767f1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -30,6 +30,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
+#include <asm/cache.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -69,7 +70,7 @@ struct nmi_stats {
static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
int unknown_nmi_panic;
/*
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 14415aff1813..92f70147a9a6 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data)
static __init int register_e820_pmem(void)
{
- char *pmem = "Persistent Memory (legacy)";
struct platform_device *pdev;
int rc;
- rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+ rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+ IORESOURCE_MEM, 0, -1, NULL, found);
if (rc <= 0)
return 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f7c21c22477..2915d54e9dd5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
+#ifdef CONFIG_X86_32
+ .SYSENTER_stack_canary = STACK_END_MAGIC,
+#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
@@ -418,9 +421,9 @@ static void mwait_idle(void)
if (!current_set_polling_and_test()) {
trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
- smp_mb(); /* quirk */
+ mb(); /* quirk */
clflush((void *)&current_thread_info()->flags);
- smp_mb(); /* quirk */
+ mb(); /* quirk */
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3d80e6d42a2..aa52c1009475 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -152,21 +152,21 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c3638f..548ddf7d6fd2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,38 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it. Otherwise leave it
+ * alone. Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+ u32 ar;
+ asm volatile ("lar %[old_ss], %[ar]\n\t"
+ "jz 1f\n\t" /* If invalid: */
+ "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
+ "1:"
+ : [ar] "=r" (ar)
+ : [old_ss] "rm" ((u16)regs->ss));
+
+ /*
+ * For a valid 64-bit user context, we need DPL 3, type
+ * read-write data or read-write exp-down data, and S and P
+ * set. We can't use VERW because VERW doesn't check the
+ * P bit.
+ */
+ ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+ if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+ ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+ regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc,
+ unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
COPY(r15);
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+ user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
+ put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return 0;
}
#else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (cpu_has_xsave)
+ flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+ else
+ flags = UC_SIGCONTEXT_SS;
+
+ if (likely(user_64bit_mode(regs)))
+ flags |= UC_STRICT_RESTORE_SS;
+
+ return flags;
+}
+
static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value. We also have a compatbility issue here: DOSEMU
+ * relies on the contents of the SS register indicating the
+ * SS value at the time of the signal, even though that code in
+ * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
+ * avoids relying on sigreturn to restore SS; instead it uses
+ * a trampoline.) So we do our best: if the old SS was valid,
+ * we keep it. Otherwise we replace it.
+ */
regs->cs = __USER_CS;
+ if (unlikely(regs->ss != __USER_DS))
+ force_valid_ss(regs);
+
return 0;
}
#endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc))
+ /*
+ * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+ * Save a few cycles by skipping the __get_user.
+ */
+ if (restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+ if (is_ia32_task())
+ return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+ return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
- return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
- __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
}
/*
@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f77b3c1..643dbdccf4bc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -248,7 +256,98 @@ static void notrace start_secondary(void *unused)
x86_cpuinit.setup_percpu_clockev();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+ unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+ /* Called from early boot ? */
+ if (!physical_package_map)
+ return 0;
+
+ if (pkg >= max_physical_pkg_id)
+ return -EINVAL;
+
+ /* Set the logical package id */
+ if (test_and_set_bit(pkg, physical_package_map))
+ goto found;
+
+ if (pkg < __max_logical_packages) {
+ set_bit(pkg, logical_package_map);
+ physical_to_logical_pkg[pkg] = pkg;
+ goto found;
+ }
+ new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+ if (new >= __max_logical_packages) {
+ physical_to_logical_pkg[pkg] = -1;
+ pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+ apicid, pkg);
+ return -ENOSPC;
+ }
+ set_bit(new, logical_package_map);
+ pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+ apicid, pkg, new);
+ physical_to_logical_pkg[pkg] = new;
+
+found:
+ cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+ if (phys_pkg >= max_physical_pkg_id)
+ return -1;
+ return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+ unsigned int ncpus, cpu;
+ size_t size;
+
+ /*
+ * Today neither Intel nor AMD support heterogenous systems. That
+ * might change in the future....
+ */
+ ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
+ __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+
+ /*
+ * Possibly larger than what we need as the number of apic ids per
+ * package can be smaller than the actual used apic ids.
+ */
+ max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+ size = max_physical_pkg_id * sizeof(unsigned int);
+ physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+ memset(physical_to_logical_pkg, 0xff, size);
+ size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+ physical_package_map = kzalloc(size, GFP_KERNEL);
+ size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+ logical_package_map = kzalloc(size, GFP_KERNEL);
+
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+ for_each_present_cpu(cpu) {
+ unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+ if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+ continue;
+ if (!topology_update_package_map(apicid, cpu))
+ continue;
+ pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+ per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+ set_cpu_possible(cpu, false);
+ set_cpu_present(cpu, false);
+ }
}
void __init smp_store_boot_cpu_info(void)
@@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
+ smp_init_package_map();
}
/*
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 91a4496db434..e72a07f20b05 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd)
return -1;
- pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+ pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
if (!pte)
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce07e525..27538f183c3b 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -142,7 +142,6 @@ static int test_NX(void)
* by the error message
*/
-#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
if (rodata_test_data != 0xC3) {
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@ static int test_NX(void)
printk(KERN_ERR "test_nx: .rodata section is executable\n");
ret = -ENODEV;
}
-#endif
#if 0
/* Test 4: Check if the .data section of a module is executable */
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 5ecbfe5099da..cb4a01b41e27 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -76,5 +76,5 @@ int rodata_test(void)
}
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ade185a46b1d..06cbe25861f1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_count_inc();
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- preempt_count_dec();
}
void ist_enter(struct pt_regs *regs)
@@ -199,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, trapnr)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
-#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
print_vma_addr(" in ", regs->ip);
pr_cont("\n");
}
-#endif
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
}
@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs))
die("bounds", regs, error_code);
@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
struct task_struct *tsk;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
local_irq_enable();
@@ -453,7 +437,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
tsk = current;
if (!user_mode(regs)) {
- if (fixup_exception(regs))
+ if (fixup_exception(regs, X86_TRAP_GP))
return;
tsk->thread.error_code = error_code;
@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+ /*
+ * We don't try for precision here. If we're anywhere in the region of
+ * code that can be single-stepped in the SYSENTER entry path, then
+ * assume that this is a useless single-step trap due to SYSENTER
+ * being invoked with TF set. (We don't know in advance exactly
+ * which instructions will be hit because BTF could plausibly
+ * be set.)
+ */
+#ifdef CONFIG_X86_32
+ return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+ (unsigned long)__end_SYSENTER_singlestep_region -
+ (unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+ return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+ (unsigned long)__end_entry_SYSENTER_compat -
+ (unsigned long)entry_SYSENTER_compat;
+#else
+ return false;
+#endif
+}
+
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
ist_enter(regs);
get_debugreg(dr6, 6);
+ /*
+ * The Intel SDM says:
+ *
+ * Certain debug exceptions may clear bits 0-3. The remaining
+ * contents of the DR6 register are never cleared by the
+ * processor. To avoid confusion in identifying debug
+ * exceptions, debug handlers should clear the register before
+ * returning to the interrupted task.
+ *
+ * Keep it simple: clear DR6 immediately.
+ */
+ set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
/*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+ if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+ is_sysenter_singlestep(regs))) {
+ dr6 &= ~DR_STEP;
+ if (!dr6)
+ goto exit;
+ /*
+ * else we might have gotten a single-step trap and hit a
+ * watchpoint at the same time, in which case we should fall
+ * through and handle the watchpoint.
+ */
+ }
+
+ /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (!dr6 && user_mode(regs))
user_icebp = 1;
- /* Catch kmemcheck conditions first of all! */
+ /* Catch kmemcheck conditions! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
goto exit;
- /* DR6 may or may not be cleared by the CPU */
- set_debugreg(0, 6);
-
- /*
- * The processor cleared BTF, so don't mark that we need it set.
- */
- clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
- /*
- * Single-stepping through system calls: ignore any exceptions in
- * kernel space, but re-enable TF when returning to user mode.
- *
- * We already checked v86 mode above, so we can check for kernel mode
- * by just checking the CPL of CS.
- */
- if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+ /*
+ * Historical junk that used to handle SYSENTER single-stepping.
+ * This should be unreachable now. If we survive for a while
+ * without anyone hitting this warning, we'll turn this into
+ * an oops.
+ */
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
+#if defined(CONFIG_X86_32)
+ /*
+ * This is the most likely code path that involves non-trivial use
+ * of the SYSENTER stack. Check that we haven't overrun it.
+ */
+ WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+ "Overran or corrupted SYSENTER stack\n");
+#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -696,10 +738,10 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, trapnr)) {
task->thread.error_code = error_code;
task->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
}
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
- if (read_cr0() & X86_CR0_EM) {
+ if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
struct math_emu_info info = { };
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
info.regs = regs;
math_emulate(&info);
@@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
#endif
}
NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +909,7 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
- set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3d743da828d3..56380440d862 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -43,6 +43,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc);
int tsc_clocksource_reliable;
+static u32 art_to_tsc_numerator;
+static u32 art_to_tsc_denominator;
+static u64 art_to_tsc_offset;
+struct clocksource *art_related_clocksource;
+
/*
* Use a ring-buffer like data structure, where a writer advances the head by
* writing a new data entry and a reader advances the tail when it observes a
@@ -964,6 +969,37 @@ core_initcall(cpufreq_tsc);
#endif /* CONFIG_CPU_FREQ */
+#define ART_CPUID_LEAF (0x15)
+#define ART_MIN_DENOMINATOR (1)
+
+
+/*
+ * If ART is present detect the numerator:denominator to convert to TSC
+ */
+static void detect_art(void)
+{
+ unsigned int unused[2];
+
+ if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
+ return;
+
+ cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
+ &art_to_tsc_numerator, unused, unused+1);
+
+ /* Don't enable ART in a VM, non-stop TSC required */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
+ !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+ art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+ return;
+
+ if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
+ return;
+
+ /* Make this sticky over multiple CPU init calls */
+ setup_force_cpu_cap(X86_FEATURE_ART);
+}
+
+
/* clocksource code */
static struct clocksource clocksource_tsc;
@@ -1071,6 +1107,25 @@ int unsynchronized_tsc(void)
return 0;
}
+/*
+ * Convert ART to TSC given numerator/denominator found in detect_art()
+ */
+struct system_counterval_t convert_art_to_tsc(cycle_t art)
+{
+ u64 tmp, res, rem;
+
+ rem = do_div(art, art_to_tsc_denominator);
+
+ res = art * art_to_tsc_numerator;
+ tmp = rem * art_to_tsc_numerator;
+
+ do_div(tmp, art_to_tsc_denominator);
+ res += tmp + art_to_tsc_offset;
+
+ return (struct system_counterval_t) {.cs = art_related_clocksource,
+ .cycles = res};
+}
+EXPORT_SYMBOL(convert_art_to_tsc);
static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
@@ -1142,6 +1197,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz % 1000);
out:
+ if (boot_cpu_has(X86_FEATURE_ART))
+ art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
}
@@ -1235,6 +1292,8 @@ void __init tsc_init(void)
mark_tsc_unstable("TSCs unsynchronized");
check_system_tsc_reliable();
+
+ detect_art();
}
#ifdef CONFIG_SMP
@@ -1246,14 +1305,14 @@ void __init tsc_init(void)
*/
unsigned long calibrate_delay_is_known(void)
{
- int i, cpu = smp_processor_id();
+ int sibling, cpu = smp_processor_id();
if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
return 0;
- for_each_online_cpu(i)
- if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
- return cpu_data(i).loops_per_jiffy;
+ sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ if (sibling < nr_cpu_ids)
+ return cpu_data(sibling).loops_per_jiffy;
return 0;
}
#endif
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35ee4bc..014ea59aa153 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
* appropriately. Either display a message or halt.
*/
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
verify_cpu:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b8546518..3dce1ca0a653 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (static_cpu_has_safe(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf11f562..5af9958cbdb6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,29 +41,28 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
*
* However, kernel identity mappings will have different RWX permissions
* to the pages mapping to text and to the pages padding (which are freed) the
* text section. Hence kernel identity mappings will be broken to smaller
* pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
*/
-#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
-#define X64_ALIGN_DEBUG_RODATA_END \
+#define X64_ALIGN_RODATA_END \
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#else
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
#endif
@@ -112,13 +111,11 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
-#if defined(CONFIG_DEBUG_RODATA)
/* .text should occupy whole number of pages */
. = ALIGN(PAGE_SIZE);
-#endif
- X64_ALIGN_DEBUG_RODATA_BEGIN
+ X64_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
- X64_ALIGN_DEBUG_RODATA_END
+ X64_ALIGN_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -195,6 +192,17 @@ SECTIONS
:init
#endif
+ /*
+ * Section for code used exclusively before alternatives are run. All
+ * references to such code must be patched out by alternatives, normally
+ * by using X86_FEATURE_ALWAYS CPU feature bit.
+ *
+ * See static_cpu_has() for an example.
+ */
+ .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+ *(.altinstr_aux)
+ }
+
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a0695be19864..cd05942bc918 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a1ff508bb423..464fa477afbf 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,9 +13,10 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o
+ hyperv.o page_track.o
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
+
kvm-intel-y += vmx.o pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 9dc091acd5fb..308b8597c691 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -51,11 +51,9 @@ struct kvm_assigned_dev_kernel {
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
- struct list_head *ptr;
struct kvm_assigned_dev_kernel *match;
- list_for_each(ptr, head) {
- match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+ list_for_each_entry(match, head, list) {
if (match->assigned_dev_id == assigned_dev_id)
return match;
}
@@ -373,14 +371,10 @@ static void kvm_free_assigned_device(struct kvm *kvm,
void kvm_free_all_assigned_devices(struct kvm *kvm)
{
- struct list_head *ptr, *ptr2;
- struct kvm_assigned_dev_kernel *assigned_dev;
-
- list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
- assigned_dev = list_entry(ptr,
- struct kvm_assigned_dev_kernel,
- list);
+ struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
+ list_for_each_entry_safe(assigned_dev, tmp,
+ &kvm->arch.assigned_dev_head, list) {
kvm_free_assigned_device(kvm, assigned_dev);
}
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6525e926f566..0029644bf09c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
+bool kvm_mpx_supported(void)
+{
+ return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
+ && kvm_x86_ops->mpx_supported());
+}
+EXPORT_SYMBOL_GPL(kvm_mpx_supported);
+
u64 kvm_supported_xcr0(void)
{
u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
- if (!kvm_x86_ops->mpx_supported())
+ if (!kvm_mpx_supported())
xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
return xcr0;
@@ -97,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
- if (vcpu->arch.eager_fpu)
+ if (use_eager_fpu())
kvm_x86_ops->fpu_activate(vcpu);
/*
@@ -295,7 +301,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+ unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
/* cpuid 1.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8eda1498121..66a6581724ad 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
#include <asm/cpu.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
+bool kvm_mpx_supported(void);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -135,14 +136,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
-static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_MPX));
-}
-
static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c58ba67175ac..5ff3485acb60 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1043,6 +1043,27 @@ bool kvm_hv_hypercall_enabled(struct kvm *kvm)
return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
}
+static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
+{
+ bool longmode;
+
+ longmode = is_64_bit_mode(vcpu);
+ if (longmode)
+ kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+ else {
+ kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
+ }
+}
+
+static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
+ return 1;
+}
+
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
u64 param, ingpa, outgpa, ret;
@@ -1055,7 +1076,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
*/
if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
- return 0;
+ return 1;
}
longmode = is_64_bit_mode(vcpu);
@@ -1083,22 +1104,33 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+ /* Hypercall continuation is not supported yet */
+ if (rep_cnt || rep_idx) {
+ res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ goto set_result;
+ }
+
switch (code) {
- case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+ case HVCALL_NOTIFY_LONG_SPIN_WAIT:
kvm_vcpu_on_spin(vcpu);
break;
+ case HVCALL_POST_MESSAGE:
+ case HVCALL_SIGNAL_EVENT:
+ vcpu->run->exit_reason = KVM_EXIT_HYPERV;
+ vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
+ vcpu->run->hyperv.u.hcall.input = param;
+ vcpu->run->hyperv.u.hcall.params[0] = ingpa;
+ vcpu->run->hyperv.u.hcall.params[1] = outgpa;
+ vcpu->arch.complete_userspace_io =
+ kvm_hv_hypercall_complete_userspace;
+ return 0;
default:
res = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
+set_result:
ret = res | (((u64)rep_done & 0xfff) << 32);
- if (longmode) {
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
- } else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
- }
-
+ kvm_hv_hypercall_set_result(vcpu, ret);
return 1;
}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b0ea42b78ccd..a4bf5b45d65a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -51,32 +51,9 @@
#define RW_STATE_WORD0 3
#define RW_STATE_WORD1 4
-/* Compute with 96 bit intermediate result: (a*b)/c */
-static u64 muldiv64(u64 a, u32 b, u32 c)
+static void pit_set_gate(struct kvm_pit *pit, int channel, u32 val)
{
- union {
- u64 ll;
- struct {
- u32 low, high;
- } l;
- } u, res;
- u64 rl, rh;
-
- u.ll = a;
- rl = (u64)u.l.low * (u64)b;
- rh = (u64)u.l.high * (u64)b;
- rh += (rl >> 32);
- res.l.high = div64_u64(rh, c);
- res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
- return res.ll;
-}
-
-static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
-{
- struct kvm_kpit_channel_state *c =
- &kvm->arch.vpit->pit_state.channels[channel];
-
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+ struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
switch (c->mode) {
default:
@@ -97,18 +74,16 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
c->gate = val;
}
-static int pit_get_gate(struct kvm *kvm, int channel)
+static int pit_get_gate(struct kvm_pit *pit, int channel)
{
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
-
- return kvm->arch.vpit->pit_state.channels[channel].gate;
+ return pit->pit_state.channels[channel].gate;
}
-static s64 __kpit_elapsed(struct kvm *kvm)
+static s64 __kpit_elapsed(struct kvm_pit *pit)
{
s64 elapsed;
ktime_t remaining;
- struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+ struct kvm_kpit_state *ps = &pit->pit_state;
if (!ps->period)
return 0;
@@ -128,26 +103,23 @@ static s64 __kpit_elapsed(struct kvm *kvm)
return elapsed;
}
-static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
+static s64 kpit_elapsed(struct kvm_pit *pit, struct kvm_kpit_channel_state *c,
int channel)
{
if (channel == 0)
- return __kpit_elapsed(kvm);
+ return __kpit_elapsed(pit);
return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
}
-static int pit_get_count(struct kvm *kvm, int channel)
+static int pit_get_count(struct kvm_pit *pit, int channel)
{
- struct kvm_kpit_channel_state *c =
- &kvm->arch.vpit->pit_state.channels[channel];
+ struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
s64 d, t;
int counter;
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
-
- t = kpit_elapsed(kvm, c, channel);
- d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+ t = kpit_elapsed(pit, c, channel);
+ d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
case 0:
@@ -167,17 +139,14 @@ static int pit_get_count(struct kvm *kvm, int channel)
return counter;
}
-static int pit_get_out(struct kvm *kvm, int channel)
+static int pit_get_out(struct kvm_pit *pit, int channel)
{
- struct kvm_kpit_channel_state *c =
- &kvm->arch.vpit->pit_state.channels[channel];
+ struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
s64 d, t;
int out;
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
-
- t = kpit_elapsed(kvm, c, channel);
- d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+ t = kpit_elapsed(pit, c, channel);
+ d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
default:
@@ -202,29 +171,23 @@ static int pit_get_out(struct kvm *kvm, int channel)
return out;
}
-static void pit_latch_count(struct kvm *kvm, int channel)
+static void pit_latch_count(struct kvm_pit *pit, int channel)
{
- struct kvm_kpit_channel_state *c =
- &kvm->arch.vpit->pit_state.channels[channel];
-
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+ struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
if (!c->count_latched) {
- c->latched_count = pit_get_count(kvm, channel);
+ c->latched_count = pit_get_count(pit, channel);
c->count_latched = c->rw_mode;
}
}
-static void pit_latch_status(struct kvm *kvm, int channel)
+static void pit_latch_status(struct kvm_pit *pit, int channel)
{
- struct kvm_kpit_channel_state *c =
- &kvm->arch.vpit->pit_state.channels[channel];
-
- WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+ struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
if (!c->status_latched) {
/* TODO: Return NULL COUNT (bit 6). */
- c->status = ((pit_get_out(kvm, channel) << 7) |
+ c->status = ((pit_get_out(pit, channel) << 7) |
(c->rw_mode << 4) |
(c->mode << 1) |
c->bcd);
@@ -232,26 +195,24 @@ static void pit_latch_status(struct kvm *kvm, int channel)
}
}
+static inline struct kvm_pit *pit_state_to_pit(struct kvm_kpit_state *ps)
+{
+ return container_of(ps, struct kvm_pit, pit_state);
+}
+
static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
- int value;
-
- spin_lock(&ps->inject_lock);
- value = atomic_dec_return(&ps->pending);
- if (value < 0)
- /* spurious acks can be generated if, for example, the
- * PIC is being reset. Handle it gracefully here
- */
- atomic_inc(&ps->pending);
- else if (value > 0)
- /* in this case, we had multiple outstanding pit interrupts
- * that we needed to inject. Reinject
- */
- queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
- ps->irq_ack = 1;
- spin_unlock(&ps->inject_lock);
+ struct kvm_pit *pit = pit_state_to_pit(ps);
+
+ atomic_set(&ps->irq_ack, 1);
+ /* irq_ack should be set before pending is read. Order accesses with
+ * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
+ */
+ smp_mb();
+ if (atomic_dec_if_positive(&ps->pending) > 0)
+ queue_kthread_work(&pit->worker, &pit->expired);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -282,45 +243,36 @@ static void pit_do_work(struct kthread_work *work)
struct kvm_vcpu *vcpu;
int i;
struct kvm_kpit_state *ps = &pit->pit_state;
- int inject = 0;
- /* Try to inject pending interrupts when
- * last one has been acked.
+ if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
+ return;
+
+ kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false);
+ kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false);
+
+ /*
+ * Provides NMI watchdog support via Virtual Wire mode.
+ * The route is: PIT -> LVT0 in NMI mode.
+ *
+ * Note: Our Virtual Wire implementation does not follow
+ * the MP specification. We propagate a PIT interrupt to all
+ * VCPUs and only when LVT0 is in NMI mode. The interrupt can
+ * also be simultaneously delivered through PIC and IOAPIC.
*/
- spin_lock(&ps->inject_lock);
- if (ps->irq_ack) {
- ps->irq_ack = 0;
- inject = 1;
- }
- spin_unlock(&ps->inject_lock);
- if (inject) {
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
-
- /*
- * Provides NMI watchdog support via Virtual Wire mode.
- * The route is: PIT -> PIC -> LVT0 in NMI mode.
- *
- * Note: Our Virtual Wire implementation is simplified, only
- * propagating PIT interrupts to all VCPUs when they have set
- * LVT0 to NMI delivery. Other PIC interrupts are just sent to
- * VCPU0, and only if its LVT0 is in EXTINT mode.
- */
- if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_apic_nmi_wd_deliver(vcpu);
- }
+ if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_apic_nmi_wd_deliver(vcpu);
}
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
- struct kvm_pit *pt = ps->kvm->arch.vpit;
+ struct kvm_pit *pt = pit_state_to_pit(ps);
- if (ps->reinject || !atomic_read(&ps->pending)) {
+ if (atomic_read(&ps->reinject))
atomic_inc(&ps->pending);
- queue_kthread_work(&pt->worker, &pt->expired);
- }
+
+ queue_kthread_work(&pt->worker, &pt->expired);
if (ps->is_periodic) {
hrtimer_add_expires_ns(&ps->timer, ps->period);
@@ -329,30 +281,54 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
+static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
{
- struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+ atomic_set(&pit->pit_state.pending, 0);
+ atomic_set(&pit->pit_state.irq_ack, 1);
+}
+
+void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
+{
+ struct kvm_kpit_state *ps = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+
+ if (atomic_read(&ps->reinject) == reinject)
+ return;
+
+ if (reinject) {
+ /* The initial state is preserved while ps->reinject == 0. */
+ kvm_pit_reset_reinject(pit);
+ kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
+ kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+ } else {
+ kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
+ kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+ }
+
+ atomic_set(&ps->reinject, reinject);
+}
+
+static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period)
+{
+ struct kvm_kpit_state *ps = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
s64 interval;
if (!ioapic_in_kernel(kvm) ||
ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
return;
- interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+ interval = mul_u64_u32_div(val, NSEC_PER_SEC, KVM_PIT_FREQ);
pr_debug("create pit timer, interval is %llu nsec\n", interval);
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&ps->timer);
- flush_kthread_work(&ps->pit->expired);
+ flush_kthread_work(&pit->expired);
ps->period = interval;
ps->is_periodic = is_period;
- ps->timer.function = pit_timer_fn;
- ps->kvm = ps->pit->kvm;
-
- atomic_set(&ps->pending, 0);
- ps->irq_ack = 1;
+ kvm_pit_reset_reinject(pit);
/*
* Do not allow the guest to program periodic timers with small
@@ -375,11 +351,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
HRTIMER_MODE_ABS);
}
-static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+static void pit_load_count(struct kvm_pit *pit, int channel, u32 val)
{
- struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-
- WARN_ON(!mutex_is_locked(&ps->lock));
+ struct kvm_kpit_state *ps = &pit->pit_state;
pr_debug("load_count val is %d, channel is %d\n", val, channel);
@@ -404,29 +378,33 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
case 1:
/* FIXME: enhance mode 4 precision */
case 4:
- create_pit_timer(kvm, val, 0);
+ create_pit_timer(pit, val, 0);
break;
case 2:
case 3:
- create_pit_timer(kvm, val, 1);
+ create_pit_timer(pit, val, 1);
break;
default:
- destroy_pit_timer(kvm->arch.vpit);
+ destroy_pit_timer(pit);
}
}
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
+void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
+ int hpet_legacy_start)
{
u8 saved_mode;
+
+ WARN_ON_ONCE(!mutex_is_locked(&pit->pit_state.lock));
+
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
WARN_ON(channel != 0);
- saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
- kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
- pit_load_count(kvm, channel, val);
- kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
+ saved_mode = pit->pit_state.channels[0].mode;
+ pit->pit_state.channels[0].mode = 0xff; /* disable timer */
+ pit_load_count(pit, channel, val);
+ pit->pit_state.channels[0].mode = saved_mode;
} else {
- pit_load_count(kvm, channel, val);
+ pit_load_count(pit, channel, val);
}
}
@@ -452,7 +430,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
{
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
int channel, access;
struct kvm_kpit_channel_state *s;
u32 val = *(u32 *) data;
@@ -476,9 +453,9 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
s = &pit_state->channels[channel];
if (val & (2 << channel)) {
if (!(val & 0x20))
- pit_latch_count(kvm, channel);
+ pit_latch_count(pit, channel);
if (!(val & 0x10))
- pit_latch_status(kvm, channel);
+ pit_latch_status(pit, channel);
}
}
} else {
@@ -486,7 +463,7 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
s = &pit_state->channels[channel];
access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
if (access == 0) {
- pit_latch_count(kvm, channel);
+ pit_latch_count(pit, channel);
} else {
s->rw_mode = access;
s->read_state = access;
@@ -503,17 +480,17 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
switch (s->write_state) {
default:
case RW_STATE_LSB:
- pit_load_count(kvm, addr, val);
+ pit_load_count(pit, addr, val);
break;
case RW_STATE_MSB:
- pit_load_count(kvm, addr, val << 8);
+ pit_load_count(pit, addr, val << 8);
break;
case RW_STATE_WORD0:
s->write_latch = val;
s->write_state = RW_STATE_WORD1;
break;
case RW_STATE_WORD1:
- pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ pit_load_count(pit, addr, s->write_latch | (val << 8));
s->write_state = RW_STATE_WORD0;
break;
}
@@ -529,7 +506,6 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu,
{
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
int ret, count;
struct kvm_kpit_channel_state *s;
if (!pit_in_range(addr))
@@ -566,20 +542,20 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu,
switch (s->read_state) {
default:
case RW_STATE_LSB:
- count = pit_get_count(kvm, addr);
+ count = pit_get_count(pit, addr);
ret = count & 0xff;
break;
case RW_STATE_MSB:
- count = pit_get_count(kvm, addr);
+ count = pit_get_count(pit, addr);
ret = (count >> 8) & 0xff;
break;
case RW_STATE_WORD0:
- count = pit_get_count(kvm, addr);
+ count = pit_get_count(pit, addr);
ret = count & 0xff;
s->read_state = RW_STATE_WORD1;
break;
case RW_STATE_WORD1:
- count = pit_get_count(kvm, addr);
+ count = pit_get_count(pit, addr);
ret = (count >> 8) & 0xff;
s->read_state = RW_STATE_WORD0;
break;
@@ -600,14 +576,13 @@ static int speaker_ioport_write(struct kvm_vcpu *vcpu,
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
u32 val = *(u32 *) data;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
return -EOPNOTSUPP;
mutex_lock(&pit_state->lock);
pit_state->speaker_data_on = (val >> 1) & 1;
- pit_set_gate(kvm, 2, val & 1);
+ pit_set_gate(pit, 2, val & 1);
mutex_unlock(&pit_state->lock);
return 0;
}
@@ -618,7 +593,6 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
unsigned int refresh_clock;
int ret;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
@@ -628,8 +602,8 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
mutex_lock(&pit_state->lock);
- ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
- (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) |
+ (pit_get_out(pit, 2) << 5) | (refresh_clock << 4));
if (len > sizeof(ret))
len = sizeof(ret);
memcpy(data, (char *)&ret, len);
@@ -637,33 +611,28 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
return 0;
}
-void kvm_pit_reset(struct kvm_pit *pit)
+static void kvm_pit_reset(struct kvm_pit *pit)
{
int i;
struct kvm_kpit_channel_state *c;
- mutex_lock(&pit->pit_state.lock);
pit->pit_state.flags = 0;
for (i = 0; i < 3; i++) {
c = &pit->pit_state.channels[i];
c->mode = 0xff;
c->gate = (i != 2);
- pit_load_count(pit->kvm, i, 0);
+ pit_load_count(pit, i, 0);
}
- mutex_unlock(&pit->pit_state.lock);
- atomic_set(&pit->pit_state.pending, 0);
- pit->pit_state.irq_ack = 1;
+ kvm_pit_reset_reinject(pit);
}
static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
{
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
- if (!mask) {
- atomic_set(&pit->pit_state.pending, 0);
- pit->pit_state.irq_ack = 1;
- }
+ if (!mask)
+ kvm_pit_reset_reinject(pit);
}
static const struct kvm_io_device_ops pit_dev_ops = {
@@ -690,14 +659,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
return NULL;
pit->irq_source_id = kvm_request_irq_source_id(kvm);
- if (pit->irq_source_id < 0) {
- kfree(pit);
- return NULL;
- }
+ if (pit->irq_source_id < 0)
+ goto fail_request;
mutex_init(&pit->pit_state.lock);
- mutex_lock(&pit->pit_state.lock);
- spin_lock_init(&pit->pit_state.inject_lock);
pid = get_pid(task_tgid(current));
pid_nr = pid_vnr(pid);
@@ -706,36 +671,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
init_kthread_worker(&pit->worker);
pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
"kvm-pit/%d", pid_nr);
- if (IS_ERR(pit->worker_task)) {
- mutex_unlock(&pit->pit_state.lock);
- kvm_free_irq_source_id(kvm, pit->irq_source_id);
- kfree(pit);
- return NULL;
- }
+ if (IS_ERR(pit->worker_task))
+ goto fail_kthread;
+
init_kthread_work(&pit->expired, pit_do_work);
- kvm->arch.vpit = pit;
pit->kvm = kvm;
pit_state = &pit->pit_state;
- pit_state->pit = pit;
hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ pit_state->timer.function = pit_timer_fn;
+
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
- kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
- pit_state->reinject = true;
- mutex_unlock(&pit->pit_state.lock);
+ pit->mask_notifier.func = pit_mask_notifer;
kvm_pit_reset(pit);
- pit->mask_notifier.func = pit_mask_notifer;
- kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+ kvm_pit_set_reinject(pit, true);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
KVM_PIT_MEM_LENGTH, &pit->dev);
if (ret < 0)
- goto fail;
+ goto fail_register_pit;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
@@ -743,42 +702,35 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
KVM_SPEAKER_BASE_ADDRESS, 4,
&pit->speaker_dev);
if (ret < 0)
- goto fail_unregister;
+ goto fail_register_speaker;
}
return pit;
-fail_unregister:
+fail_register_speaker:
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
-
-fail:
- kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
- kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
- kvm_free_irq_source_id(kvm, pit->irq_source_id);
+fail_register_pit:
+ kvm_pit_set_reinject(pit, false);
kthread_stop(pit->worker_task);
+fail_kthread:
+ kvm_free_irq_source_id(kvm, pit->irq_source_id);
+fail_request:
kfree(pit);
return NULL;
}
void kvm_free_pit(struct kvm *kvm)
{
- struct hrtimer *timer;
-
- if (kvm->arch.vpit) {
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
- &kvm->arch.vpit->speaker_dev);
- kvm_unregister_irq_mask_notifier(kvm, 0,
- &kvm->arch.vpit->mask_notifier);
- kvm_unregister_irq_ack_notifier(kvm,
- &kvm->arch.vpit->pit_state.irq_ack_notifier);
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- timer = &kvm->arch.vpit->pit_state.timer;
- hrtimer_cancel(timer);
- flush_kthread_work(&kvm->arch.vpit->expired);
- kthread_stop(kvm->arch.vpit->worker_task);
- kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
- kfree(kvm->arch.vpit);
+ struct kvm_pit *pit = kvm->arch.vpit;
+
+ if (pit) {
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
+ kvm_pit_set_reinject(pit, false);
+ hrtimer_cancel(&pit->pit_state.timer);
+ flush_kthread_work(&pit->expired);
+ kthread_stop(pit->worker_task);
+ kvm_free_irq_source_id(kvm, pit->irq_source_id);
+ kfree(pit);
}
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c84990b42b5b..2f5af0798326 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -22,19 +22,18 @@ struct kvm_kpit_channel_state {
};
struct kvm_kpit_state {
+ /* All members before "struct mutex lock" are protected by the lock. */
struct kvm_kpit_channel_state channels[3];
u32 flags;
bool is_periodic;
s64 period; /* unit: ns */
struct hrtimer timer;
- atomic_t pending; /* accumulated triggered timers */
- bool reinject;
- struct kvm *kvm;
u32 speaker_data_on;
+
struct mutex lock;
- struct kvm_pit *pit;
- spinlock_t inject_lock;
- unsigned long irq_ack;
+ atomic_t reinject;
+ atomic_t pending; /* accumulated triggered timers */
+ atomic_t irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};
@@ -57,9 +56,11 @@ struct kvm_pit {
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
#define KVM_PIT_CHANNEL_MASK 0x3
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
-void kvm_pit_reset(struct kvm_pit *pit);
+
+void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
+ int hpet_legacy_start);
+void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject);
#endif
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1facfd60b04a..9db47090ead0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
@@ -117,16 +117,16 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
return;
new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
- old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+ old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
if (new_val == old_val)
return;
if (new_val) {
- __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+ __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
ioapic->rtc_status.pending_eoi++;
} else {
- __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+ __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
ioapic->rtc_status.pending_eoi--;
rtc_status_pending_eoi_check_valid(ioapic);
}
@@ -156,7 +156,8 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
{
- if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
+ if (test_and_clear_bit(vcpu->vcpu_id,
+ ioapic->rtc_status.dest_map.map)) {
--ioapic->rtc_status.pending_eoi;
rtc_status_pending_eoi_check_valid(ioapic);
}
@@ -236,10 +237,17 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
{
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+ struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
union kvm_ioapic_redirect_entry *e;
int index;
spin_lock(&ioapic->lock);
+
+ /* Make sure we see any missing RTC EOI */
+ if (test_bit(vcpu->vcpu_id, dest_map->map))
+ __set_bit(dest_map->vectors[vcpu->vcpu_id],
+ ioapic_handled_vectors);
+
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
e = &ioapic->redirtbl[index];
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
@@ -346,7 +354,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
*/
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
- ioapic->rtc_status.dest_map);
+ &ioapic->rtc_status.dest_map);
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
@@ -407,8 +415,14 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
- int i;
+ struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
struct kvm_lapic *apic = vcpu->arch.apic;
+ int i;
+
+ /* RTC special handling */
+ if (test_bit(vcpu->vcpu_id, dest_map->map) &&
+ vector == dest_map->vectors[vcpu->vcpu_id])
+ rtc_irq_eoi(ioapic, vcpu);
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -416,8 +430,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
if (ent->fields.vector != vector)
continue;
- if (i == RTC_GSI)
- rtc_irq_eoi(ioapic, vcpu);
/*
* We are dropping lock while calling ack notifiers because ack
* notifier callbacks for assigned devices call into IOAPIC
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 2d16dc251d81..7d2692a49657 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -40,9 +40,21 @@ struct kvm_vcpu;
#define RTC_GSI -1U
#endif
+struct dest_map {
+ /* vcpu bitmap where IRQ has been sent */
+ DECLARE_BITMAP(map, KVM_MAX_VCPUS);
+
+ /*
+ * Vector sent to a given vcpu, only valid when
+ * the vcpu's bit in map is set
+ */
+ u8 vectors[KVM_MAX_VCPUS];
+};
+
+
struct rtc_status {
int pending_eoi;
- DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+ struct dest_map dest_map;
};
union kvm_ioapic_redirect_entry {
@@ -118,7 +130,8 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status);
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, unsigned long *dest_map);
+ struct kvm_lapic_irq *irq,
+ struct dest_map *dest_map);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 3982b479bb5f..95fcc7b13866 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -33,7 +33,10 @@
*/
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return apic_has_pending_timer(vcpu);
+ if (lapic_in_kernel(vcpu))
+ return apic_has_pending_timer(vcpu);
+
+ return 0;
}
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
@@ -137,8 +140,8 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
- kvm_inject_apic_timer_irqs(vcpu);
- /* TODO: PIT, RTC etc. */
+ if (lapic_in_kernel(vcpu))
+ kvm_inject_apic_timer_irqs(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ae5c78f2337d..61ebdc13a29a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -109,14 +109,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return ret;
}
-static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
-{
- /* Same as irqchip_in_kernel(vcpu->kvm), but with less
- * pointer chasing and no unnecessary memory barriers.
- */
- return vcpu->arch.apic != NULL;
-}
-
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8fc89efb5250..54ead79e444b 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -34,6 +34,7 @@
#include "lapic.h"
#include "hyperv.h"
+#include "x86.h"
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
@@ -53,10 +54,12 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
}
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, unsigned long *dest_map)
+ struct kvm_lapic_irq *irq, struct dest_map *dest_map)
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
+ unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int dest_vcpus = 0;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_lowest_prio_delivery(irq)) {
@@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
return r;
+ memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
continue;
@@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
} else if (kvm_lapic_enabled(vcpu)) {
- if (!lowest)
- lowest = vcpu;
- else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
- lowest = vcpu;
+ if (!kvm_vector_hashing_enabled()) {
+ if (!lowest)
+ lowest = vcpu;
+ else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+ lowest = vcpu;
+ } else {
+ __set_bit(i, dest_vcpu_bitmap);
+ dest_vcpus++;
+ }
}
}
+ if (dest_vcpus != 0) {
+ int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
+ dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+ lowest = kvm_get_vcpu(kvm, idx);
+ }
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 36591faed13b..443d2a57ad3d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -281,7 +281,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *feat;
u32 v = APIC_VERSION;
- if (!kvm_vcpu_has_lapic(vcpu))
+ if (!lapic_in_kernel(vcpu))
return;
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -475,26 +475,20 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
- int highest_irr;
-
/* This may race with setting of irr in __apic_accept_irq() and
* value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
* will cause vmexit immediately and the value will be recalculated
* on the next vmentry.
*/
- if (!kvm_vcpu_has_lapic(vcpu))
- return 0;
- highest_irr = apic_find_highest_irr(vcpu->arch.apic);
-
- return highest_irr;
+ return apic_find_highest_irr(vcpu->arch.apic);
}
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
- unsigned long *dest_map);
+ struct dest_map *dest_map);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
- unsigned long *dest_map)
+ struct dest_map *dest_map)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -675,8 +669,33 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
}
}
+int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size)
+{
+ u32 mod;
+ int i, idx = -1;
+
+ mod = vector % dest_vcpus;
+
+ for (i = 0; i <= mod; i++) {
+ idx = find_next_bit(bitmap, bitmap_size, idx + 1);
+ BUG_ON(idx == bitmap_size);
+ }
+
+ return idx;
+}
+
+static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
+{
+ if (!kvm->arch.disabled_lapic_found) {
+ kvm->arch.disabled_lapic_found = true;
+ printk(KERN_INFO
+ "Disabled LAPIC found during irq injection\n");
+ }
+}
+
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
+ struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
{
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@@ -727,21 +746,42 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
dst = map->logical_map[cid];
- if (kvm_lowest_prio_delivery(irq)) {
+ if (!kvm_lowest_prio_delivery(irq))
+ goto set_irq;
+
+ if (!kvm_vector_hashing_enabled()) {
int l = -1;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
if (l < 0)
l = i;
- else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+ else if (kvm_apic_compare_prio(dst[i]->vcpu,
+ dst[l]->vcpu) < 0)
l = i;
}
-
bitmap = (l >= 0) ? 1 << l : 0;
+ } else {
+ int idx;
+ unsigned int dest_vcpus;
+
+ dest_vcpus = hweight16(bitmap);
+ if (dest_vcpus == 0)
+ goto out;
+
+ idx = kvm_vector_to_index(irq->vector,
+ dest_vcpus, &bitmap, 16);
+
+ if (!dst[idx]) {
+ kvm_apic_disabled_lapic_found(kvm);
+ goto out;
+ }
+
+ bitmap = (idx >= 0) ? 1 << idx : 0;
}
}
+set_irq:
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
@@ -754,6 +794,20 @@ out:
return ret;
}
+/*
+ * This routine tries to handler interrupts in posted mode, here is how
+ * it deals with different cases:
+ * - For single-destination interrupts, handle it in posted mode
+ * - Else if vector hashing is enabled and it is a lowest-priority
+ * interrupt, handle it in posted mode and use the following mechanism
+ * to find the destinaiton vCPU.
+ * 1. For lowest-priority interrupts, store all the possible
+ * destination vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find
+ * the right destination vCPU in the array for the lowest-priority
+ * interrupt.
+ * - Otherwise, use remapped mode to inject the interrupt.
+ */
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
@@ -795,16 +849,37 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
if (cid >= ARRAY_SIZE(map->logical_map))
goto out;
- for_each_set_bit(i, &bitmap, 16) {
- dst = map->logical_map[cid][i];
- if (++r == 2)
+ if (kvm_vector_hashing_enabled() &&
+ kvm_lowest_prio_delivery(irq)) {
+ int idx;
+ unsigned int dest_vcpus;
+
+ dest_vcpus = hweight16(bitmap);
+ if (dest_vcpus == 0)
goto out;
- }
- if (dst && kvm_apic_present(dst->vcpu))
+ idx = kvm_vector_to_index(irq->vector, dest_vcpus,
+ &bitmap, 16);
+
+ dst = map->logical_map[cid][idx];
+ if (!dst) {
+ kvm_apic_disabled_lapic_found(kvm);
+ goto out;
+ }
+
*dest_vcpu = dst->vcpu;
- else
- goto out;
+ } else {
+ for_each_set_bit(i, &bitmap, 16) {
+ dst = map->logical_map[cid][i];
+ if (++r == 2)
+ goto out;
+ }
+
+ if (dst && kvm_apic_present(dst->vcpu))
+ *dest_vcpu = dst->vcpu;
+ else
+ goto out;
+ }
}
ret = true;
@@ -819,7 +894,7 @@ out:
*/
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
- unsigned long *dest_map)
+ struct dest_map *dest_map)
{
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -839,8 +914,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
result = 1;
- if (dest_map)
- __set_bit(vcpu->vcpu_id, dest_map);
+ if (dest_map) {
+ __set_bit(vcpu->vcpu_id, dest_map->map);
+ dest_map->vectors[vcpu->vcpu_id] = vector;
+ }
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
@@ -1195,7 +1272,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
- wait_queue_head_t *q = &vcpu->wq;
+ struct swait_queue_head *q = &vcpu->wq;
struct kvm_timer *ktimer = &apic->lapic_timer;
if (atomic_read(&apic->lapic_timer.pending))
@@ -1204,8 +1281,8 @@ static void apic_timer_expired(struct kvm_lapic *apic)
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
- if (waitqueue_active(q))
- wake_up_interruptible(q);
+ if (swait_active(q))
+ swake_up(q);
if (apic_lvtt_tscdeadline(apic))
ktimer->expired_tscdeadline = ktimer->tscdeadline;
@@ -1239,7 +1316,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
- if (!kvm_vcpu_has_lapic(vcpu))
+ if (!lapic_in_kernel(vcpu))
return;
if (apic->lapic_timer.expired_tscdeadline == 0)
@@ -1515,8 +1592,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_has_lapic(vcpu))
- apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
+ apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1566,7 +1642,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+ if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
apic_lvtt_period(apic))
return 0;
@@ -1577,7 +1653,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+ if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
apic_lvtt_period(apic))
return;
@@ -1590,9 +1666,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_vcpu_has_lapic(vcpu))
- return;
-
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
| (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
}
@@ -1601,9 +1674,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
{
u64 tpr;
- if (!kvm_vcpu_has_lapic(vcpu))
- return 0;
-
tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
return (tpr & 0xf0) >> 4;
@@ -1728,8 +1798,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
- apic_lvt_enabled(apic, APIC_LVTT))
+ if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
return atomic_read(&apic->lapic_timer.pending);
return 0;
@@ -1826,7 +1895,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
int highest_irr;
- if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic))
+ if (!apic_enabled(apic))
return -1;
apic_update_ppr(apic);
@@ -1854,9 +1923,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_vcpu_has_lapic(vcpu))
- return;
-
if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
@@ -1932,7 +1998,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
struct hrtimer *timer;
- if (!kvm_vcpu_has_lapic(vcpu))
+ if (!lapic_in_kernel(vcpu))
return;
timer = &vcpu->arch.apic->lapic_timer.timer;
@@ -2105,7 +2171,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_vcpu_has_lapic(vcpu))
+ if (!lapic_in_kernel(vcpu))
return 1;
/* if this is ICR write vector before command */
@@ -2119,7 +2185,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 low, high = 0;
- if (!kvm_vcpu_has_lapic(vcpu))
+ if (!lapic_in_kernel(vcpu))
return 1;
if (apic_reg_read(apic, reg, 4, &low))
@@ -2151,7 +2217,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
u8 sipi_vector;
unsigned long pe;
- if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
+ if (!lapic_in_kernel(vcpu) || !apic->pending_events)
return;
/*
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 41bdb35b4b67..f71183e502ee 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -42,6 +42,9 @@ struct kvm_lapic {
unsigned long pending_events;
unsigned int sipi_vector;
};
+
+struct dest_map;
+
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
@@ -60,11 +63,11 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void __kvm_apic_update_irr(u32 *pir, void *regs);
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
- unsigned long *dest_map);
+ struct dest_map *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
+ struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -103,7 +106,7 @@ static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
extern struct static_key kvm_no_apic_vcpu;
-static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu)
+static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
{
if (static_key_false(&kvm_no_apic_vcpu))
return vcpu->arch.apic;
@@ -130,7 +133,7 @@ static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
+ return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
}
static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
@@ -150,7 +153,7 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events;
+ return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
}
static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
@@ -161,7 +164,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
static inline int kvm_apic_id(struct kvm_lapic *apic)
@@ -175,4 +178,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
+int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 95a955de5964..c512f095cdac 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -41,6 +41,7 @@
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/vmx.h>
+#include <asm/kvm_page_track.h>
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@@ -776,62 +777,85 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
return &slot->arch.lpage_info[level - 2][idx];
}
+static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
+ gfn_t gfn, int count)
+{
+ struct kvm_lpage_info *linfo;
+ int i;
+
+ for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+ linfo = lpage_info_slot(gfn, slot, i);
+ linfo->disallow_lpage += count;
+ WARN_ON(linfo->disallow_lpage < 0);
+ }
+}
+
+void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ update_gfn_disallow_lpage_count(slot, gfn, 1);
+}
+
+void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ update_gfn_disallow_lpage_count(slot, gfn, -1);
+}
+
static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
- struct kvm_lpage_info *linfo;
gfn_t gfn;
- int i;
+ kvm->arch.indirect_shadow_pages++;
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
- for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- linfo = lpage_info_slot(gfn, slot, i);
- linfo->write_count += 1;
- }
- kvm->arch.indirect_shadow_pages++;
+
+ /* the non-leaf shadow pages are keeping readonly. */
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ return kvm_slot_page_track_add_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_WRITE);
+
+ kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
- struct kvm_lpage_info *linfo;
gfn_t gfn;
- int i;
+ kvm->arch.indirect_shadow_pages--;
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
- for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- linfo = lpage_info_slot(gfn, slot, i);
- linfo->write_count -= 1;
- WARN_ON(linfo->write_count < 0);
- }
- kvm->arch.indirect_shadow_pages--;
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ return kvm_slot_page_track_remove_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_WRITE);
+
+ kvm_mmu_gfn_allow_lpage(slot, gfn);
}
-static int __has_wrprotected_page(gfn_t gfn, int level,
- struct kvm_memory_slot *slot)
+static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+ struct kvm_memory_slot *slot)
{
struct kvm_lpage_info *linfo;
if (slot) {
linfo = lpage_info_slot(gfn, slot, level);
- return linfo->write_count;
+ return !!linfo->disallow_lpage;
}
- return 1;
+ return true;
}
-static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
+ int level)
{
struct kvm_memory_slot *slot;
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- return __has_wrprotected_page(gfn, level, slot);
+ return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
}
static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
@@ -897,7 +921,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
- if (__has_wrprotected_page(large_gfn, level, slot))
+ if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
break;
return level - 1;
@@ -1323,23 +1347,29 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn)
{
- struct kvm_memory_slot *slot;
struct kvm_rmap_head *rmap_head;
int i;
bool write_protected = false;
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
rmap_head = __gfn_to_rmap(gfn, i, slot);
- write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true);
+ write_protected |= __rmap_write_protect(kvm, rmap_head, true);
}
return write_protected;
}
+static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+{
+ struct kvm_memory_slot *slot;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
+}
+
static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
@@ -1754,7 +1784,7 @@ static void mark_unsync(u64 *spte)
static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
- return 1;
+ return 0;
}
static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
@@ -1840,13 +1870,16 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
return nr_unsync_leaf;
}
+#define INVALID_INDEX (-1)
+
static int mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_pages *pvec)
{
+ pvec->nr = 0;
if (!sp->unsync_children)
return 0;
- mmu_pages_add(pvec, sp, 0);
+ mmu_pages_add(pvec, sp, INVALID_INDEX);
return __mmu_unsync_walk(sp, pvec);
}
@@ -1883,37 +1916,35 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if ((_sp)->role.direct || (_sp)->role.invalid) {} else
/* @sp->gfn should be write-protected at the call site */
-static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- struct list_head *invalid_list, bool clear_unsync)
+static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
- return 1;
+ return false;
}
- if (clear_unsync)
- kvm_unlink_unsync_page(vcpu->kvm, sp);
-
- if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+ if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
- return 1;
+ return false;
}
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- return 0;
+ return true;
}
-static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
+static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
+ struct list_head *invalid_list,
+ bool remote_flush, bool local_flush)
{
- LIST_HEAD(invalid_list);
- int ret;
-
- ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
- if (ret)
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ if (!list_empty(invalid_list)) {
+ kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
+ return;
+ }
- return ret;
+ if (remote_flush)
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ else if (local_flush)
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
#ifdef CONFIG_KVM_MMU_AUDIT
@@ -1923,46 +1954,38 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
- return __kvm_sync_page(vcpu, sp, invalid_list, true);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
+ return __kvm_sync_page(vcpu, sp, invalid_list);
}
/* @gfn should be write-protected at the call site */
-static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
+static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
+ struct list_head *invalid_list)
{
struct kvm_mmu_page *s;
- LIST_HEAD(invalid_list);
- bool flush = false;
+ bool ret = false;
for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (!s->unsync)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
- kvm_unlink_unsync_page(vcpu->kvm, s);
- if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
- (vcpu->arch.mmu.sync_page(vcpu, s))) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
- continue;
- }
- flush = true;
+ ret |= kvm_sync_page(vcpu, s, invalid_list);
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- if (flush)
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return ret;
}
struct mmu_page_path {
- struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
- unsigned int idx[PT64_ROOT_LEVEL-1];
+ struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
+ unsigned int idx[PT64_ROOT_LEVEL];
};
#define for_each_sp(pvec, sp, parents, i) \
- for (i = mmu_pages_next(&pvec, &parents, -1), \
- sp = pvec.page[i].sp; \
+ for (i = mmu_pages_first(&pvec, &parents); \
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
@@ -1974,19 +1997,43 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
for (n = i+1; n < pvec->nr; n++) {
struct kvm_mmu_page *sp = pvec->page[n].sp;
+ unsigned idx = pvec->page[n].idx;
+ int level = sp->role.level;
- if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
- parents->idx[0] = pvec->page[n].idx;
- return n;
- }
+ parents->idx[level-1] = idx;
+ if (level == PT_PAGE_TABLE_LEVEL)
+ break;
- parents->parent[sp->role.level-2] = sp;
- parents->idx[sp->role.level-1] = pvec->page[n].idx;
+ parents->parent[level-2] = sp;
}
return n;
}
+static int mmu_pages_first(struct kvm_mmu_pages *pvec,
+ struct mmu_page_path *parents)
+{
+ struct kvm_mmu_page *sp;
+ int level;
+
+ if (pvec->nr == 0)
+ return 0;
+
+ WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+
+ sp = pvec->page[0].sp;
+ level = sp->role.level;
+ WARN_ON(level == PT_PAGE_TABLE_LEVEL);
+
+ parents->parent[level-2] = sp;
+
+ /* Also set up a sentinel. Further entries in pvec are all
+ * children of sp, so this element is never overwritten.
+ */
+ parents->parent[level-1] = NULL;
+ return mmu_pages_next(pvec, parents, 0);
+}
+
static void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct kvm_mmu_page *sp;
@@ -1994,22 +2041,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
do {
unsigned int idx = parents->idx[level];
-
sp = parents->parent[level];
if (!sp)
return;
+ WARN_ON(idx == INVALID_INDEX);
clear_unsync_child_bit(sp, idx);
level++;
- } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
-}
-
-static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
- struct mmu_page_path *parents,
- struct kvm_mmu_pages *pvec)
-{
- parents->parent[parent->role.level-1] = NULL;
- pvec->nr = 0;
+ } while (!sp->unsync_children);
}
static void mmu_sync_children(struct kvm_vcpu *vcpu,
@@ -2020,30 +2059,36 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
struct mmu_page_path parents;
struct kvm_mmu_pages pages;
LIST_HEAD(invalid_list);
+ bool flush = false;
- kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
bool protected = false;
for_each_sp(pages, sp, parents, i)
protected |= rmap_write_protect(vcpu, sp->gfn);
- if (protected)
+ if (protected) {
kvm_flush_remote_tlbs(vcpu->kvm);
+ flush = false;
+ }
for_each_sp(pages, sp, parents, i) {
- kvm_sync_page(vcpu, sp, &invalid_list);
+ flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- cond_resched_lock(&vcpu->kvm->mmu_lock);
- kvm_mmu_pages_init(parent, &parents, &pages);
+ if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ cond_resched_lock(&vcpu->kvm->mmu_lock);
+ flush = false;
+ }
}
+
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
}
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
{
- sp->write_flooding_count = 0;
+ atomic_set(&sp->write_flooding_count, 0);
}
static void clear_sp_write_flooding_count(u64 *spte)
@@ -2069,6 +2114,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
unsigned quadrant;
struct kvm_mmu_page *sp;
bool need_sync = false;
+ bool flush = false;
+ LIST_HEAD(invalid_list);
role = vcpu->arch.mmu.base_role;
role.level = level;
@@ -2092,8 +2139,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (sp->role.word != role.word)
continue;
- if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
- break;
+ if (sp->unsync) {
+ /* The page is good, but __kvm_sync_page might still end
+ * up zapping it. If so, break in order to rebuild it.
+ */
+ if (!__kvm_sync_page(vcpu, sp, &invalid_list))
+ break;
+
+ WARN_ON(!list_empty(&invalid_list));
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
if (sp->unsync_children)
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -2112,16 +2167,24 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
hlist_add_head(&sp->hash_link,
&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
if (!direct) {
- if (rmap_write_protect(vcpu, gfn))
+ /*
+ * we should do write protection before syncing pages
+ * otherwise the content of the synced shadow page may
+ * be inconsistent with guest page table.
+ */
+ account_shadowed(vcpu->kvm, sp);
+ if (level == PT_PAGE_TABLE_LEVEL &&
+ rmap_write_protect(vcpu, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
- if (level > PT_PAGE_TABLE_LEVEL && need_sync)
- kvm_sync_pages(vcpu, gfn);
- account_shadowed(vcpu->kvm, sp);
+ if (level > PT_PAGE_TABLE_LEVEL && need_sync)
+ flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
+
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
return sp;
}
@@ -2269,7 +2332,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
if (parent->role.level == PT_PAGE_TABLE_LEVEL)
return 0;
- kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
struct kvm_mmu_page *sp;
@@ -2278,7 +2340,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
mmu_pages_clear_parents(&parents);
zapped++;
}
- kvm_mmu_pages_init(parent, &parents, &pages);
}
return zapped;
@@ -2354,8 +2415,8 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
if (list_empty(&kvm->arch.active_mmu_pages))
return false;
- sp = list_entry(kvm->arch.active_mmu_pages.prev,
- struct kvm_mmu_page, link);
+ sp = list_last_entry(&kvm->arch.active_mmu_pages,
+ struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
return true;
@@ -2408,7 +2469,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
-static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
trace_kvm_mmu_unsync_page(sp);
++vcpu->kvm->stat.mmu_unsync;
@@ -2417,37 +2478,26 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
kvm_mmu_mark_parents_unsync(sp);
}
-static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
+static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool can_unsync)
{
- struct kvm_mmu_page *s;
-
- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
- if (s->unsync)
- continue;
- WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
- __kvm_unsync_page(vcpu, s);
- }
-}
+ struct kvm_mmu_page *sp;
-static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
- bool can_unsync)
-{
- struct kvm_mmu_page *s;
- bool need_unsync = false;
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ return true;
- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (!can_unsync)
- return 1;
+ return true;
- if (s->role.level != PT_PAGE_TABLE_LEVEL)
- return 1;
+ if (sp->unsync)
+ continue;
- if (!s->unsync)
- need_unsync = true;
+ WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
+ kvm_unsync_page(vcpu, sp);
}
- if (need_unsync)
- kvm_unsync_pages(vcpu, gfn);
- return 0;
+
+ return false;
}
static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
@@ -2503,7 +2553,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* be fixed if guest refault.
*/
if (level > PT_PAGE_TABLE_LEVEL &&
- has_wrprotected_page(vcpu, gfn, level))
+ mmu_gfn_lpage_is_disallowed(vcpu, gfn, level))
goto done;
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
@@ -2768,7 +2818,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
level == PT_PAGE_TABLE_LEVEL &&
PageTransCompound(pfn_to_page(pfn)) &&
- !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
+ !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
/*
* mmu_notifier_retry was successful and we hold the
@@ -2796,20 +2846,16 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
kvm_pfn_t pfn, unsigned access, int *ret_val)
{
- bool ret = true;
-
/* The pfn is invalid, report the error! */
if (unlikely(is_error_pfn(pfn))) {
*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
- goto exit;
+ return true;
}
if (unlikely(is_noslot_pfn(pfn)))
vcpu_cache_mmio_info(vcpu, gva, gfn, access);
- ret = false;
-exit:
- return ret;
+ return false;
}
static bool page_fault_can_be_fast(u32 error_code)
@@ -3273,7 +3319,7 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
}
-static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
if (direct)
return vcpu_match_mmio_gpa(vcpu, addr);
@@ -3332,7 +3378,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
u64 spte;
bool reserved;
- if (quickly_check_mmio_pf(vcpu, addr, direct))
+ if (mmio_info_in_cache(vcpu, addr, direct))
return RET_MMIO_PF_EMULATE;
reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
@@ -3362,20 +3408,53 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
}
EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
+static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
+ u32 error_code, gfn_t gfn)
+{
+ if (unlikely(error_code & PFERR_RSVD_MASK))
+ return false;
+
+ if (!(error_code & PFERR_PRESENT_MASK) ||
+ !(error_code & PFERR_WRITE_MASK))
+ return false;
+
+ /*
+ * guest is writing the page which is write tracked which can
+ * not be fixed by page fault handler.
+ */
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ return true;
+
+ return false;
+}
+
+static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
+{
+ struct kvm_shadow_walk_iterator iterator;
+ u64 spte;
+
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return;
+
+ walk_shadow_page_lockless_begin(vcpu);
+ for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+ clear_sp_write_flooding_count(iterator.sptep);
+ if (!is_shadow_present_pte(spte))
+ break;
+ }
+ walk_shadow_page_lockless_end(vcpu);
+}
+
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code, bool prefault)
{
- gfn_t gfn;
+ gfn_t gfn = gva >> PAGE_SHIFT;
int r;
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
- if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, gva, true);
-
- if (likely(r != RET_MMIO_PF_INVALID))
- return r;
- }
+ if (page_fault_handle_page_track(vcpu, error_code, gfn))
+ return 1;
r = mmu_topup_memory_caches(vcpu);
if (r)
@@ -3383,7 +3462,6 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
- gfn = gva >> PAGE_SHIFT;
return nonpaging_map(vcpu, gva & PAGE_MASK,
error_code, gfn, prefault);
@@ -3460,12 +3538,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
- if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, gpa, true);
-
- if (likely(r != RET_MMIO_PF_INVALID))
- return r;
- }
+ if (page_fault_handle_page_track(vcpu, error_code, gfn))
+ return 1;
r = mmu_topup_memory_caches(vcpu);
if (r)
@@ -3558,13 +3632,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
return false;
}
-static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
+static inline bool is_last_gpte(struct kvm_mmu *mmu,
+ unsigned level, unsigned gpte)
{
- unsigned index;
+ /*
+ * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
+ * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+ * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+ */
+ gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
- index = level - 1;
- index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
- return mmu->last_pte_bitmap & (1 << index);
+ /*
+ * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
+ * If it is clear, there are no large pages at this level, so clear
+ * PT_PAGE_SIZE_MASK in gpte if that is the case.
+ */
+ gpte &= level - mmu->last_nonleaf_level;
+
+ return gpte & PT_PAGE_SIZE_MASK;
}
#define PTTYPE_EPT 18 /* arbitrary */
@@ -3721,13 +3806,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
+ bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+
/*
* Passing "true" to the last argument is okay; it adds a check
* on bit 8 of the SPTEs which KVM doesn't use anyway.
*/
__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
- context->shadow_root_level, context->nx,
+ context->shadow_root_level, uses_nx,
guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
true);
}
@@ -3836,22 +3923,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
}
}
-static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
- u8 map;
- unsigned level, root_level = mmu->root_level;
- const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
-
- if (root_level == PT32E_ROOT_LEVEL)
- --root_level;
- /* PT_PAGE_TABLE_LEVEL always terminates */
- map = 1 | (1 << ps_set_index);
- for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
- if (level <= PT_PDPE_LEVEL
- && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
- map |= 1 << (ps_set_index | (level - 1));
- }
- mmu->last_pte_bitmap = map;
+ unsigned root_level = mmu->root_level;
+
+ mmu->last_nonleaf_level = root_level;
+ if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
+ mmu->last_nonleaf_level++;
}
static void paging64_init_context_common(struct kvm_vcpu *vcpu,
@@ -3863,7 +3941,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
MMU_WARN_ON(!is_pae(vcpu));
context->page_fault = paging64_page_fault;
@@ -3890,7 +3968,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
@@ -3948,7 +4026,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4054,7 +4132,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, g_context, false);
- update_last_pte_bitmap(vcpu, g_context);
+ update_last_nonleaf_level(vcpu, g_context);
}
static void init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -4125,18 +4203,6 @@ static bool need_remote_flush(u64 old, u64 new)
return (old & ~new & PT64_PERM_MASK) != 0;
}
-static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
- bool remote_flush, bool local_flush)
-{
- if (zap_page)
- return;
-
- if (remote_flush)
- kvm_flush_remote_tlbs(vcpu->kvm);
- else if (local_flush)
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes)
{
@@ -4186,7 +4252,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
if (sp->role.level == PT_PAGE_TABLE_LEVEL)
return false;
- return ++sp->write_flooding_count >= 3;
+ atomic_inc(&sp->write_flooding_count);
+ return atomic_read(&sp->write_flooding_count) >= 3;
}
/*
@@ -4248,15 +4315,15 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
return spte;
}
-void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
- bool remote_flush, local_flush, zap_page;
+ bool remote_flush, local_flush;
union kvm_mmu_page_role mask = { };
mask.cr0_wp = 1;
@@ -4273,7 +4340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
return;
- zap_page = remote_flush = local_flush = false;
+ remote_flush = local_flush = false;
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
@@ -4293,8 +4360,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
- zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
++vcpu->kvm->stat.mmu_flooded;
continue;
}
@@ -4316,8 +4382,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++spte;
}
}
- mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
}
@@ -4354,32 +4419,34 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
-static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
-{
- if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
- return vcpu_match_mmio_gpa(vcpu, addr);
-
- return vcpu_match_mmio_gva(vcpu, addr);
-}
-
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
void *insn, int insn_len)
{
int r, emulation_type = EMULTYPE_RETRY;
enum emulation_result er;
+ bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
+
+ if (unlikely(error_code & PFERR_RSVD_MASK)) {
+ r = handle_mmio_page_fault(vcpu, cr2, direct);
+ if (r == RET_MMIO_PF_EMULATE) {
+ emulation_type = 0;
+ goto emulate;
+ }
+ if (r == RET_MMIO_PF_RETRY)
+ return 1;
+ if (r < 0)
+ return r;
+ }
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
if (r < 0)
- goto out;
-
- if (!r) {
- r = 1;
- goto out;
- }
+ return r;
+ if (!r)
+ return 1;
- if (is_mmio_page_fault(vcpu, cr2))
+ if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
-
+emulate:
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
switch (er) {
@@ -4393,8 +4460,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
default:
BUG();
}
-out:
- return r;
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -4463,6 +4528,21 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
init_kvm_mmu(vcpu);
}
+void kvm_mmu_init_vm(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+
+ node->track_write = kvm_mmu_pte_write;
+ kvm_page_track_register_notifier(kvm, node);
+}
+
+void kvm_mmu_uninit_vm(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+
+ kvm_page_track_unregister_notifier(kvm, node);
+}
+
/* The return value indicates if tlb flush on all vcpus is needed. */
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 55ffb7b0f95e..58fe98a0a526 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -174,4 +174,9 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+
+void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
+bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn);
#endif
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
new file mode 100644
index 000000000000..11f76436f74f
--- /dev/null
+++ b/arch/x86/kvm/page_track.c
@@ -0,0 +1,222 @@
+/*
+ * Support KVM gust page tracking
+ *
+ * This feature allows us to track page access in guest. Currently, only
+ * write access is tracked.
+ *
+ * Copyright(C) 2015 Intel Corporation.
+ *
+ * Author:
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_page_track.h>
+
+#include "mmu.h"
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ int i;
+
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++)
+ if (!dont || free->arch.gfn_track[i] !=
+ dont->arch.gfn_track[i]) {
+ kvfree(free->arch.gfn_track[i]);
+ free->arch.gfn_track[i] = NULL;
+ }
+}
+
+int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ int i;
+
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+ slot->arch.gfn_track[i] = kvm_kvzalloc(npages *
+ sizeof(*slot->arch.gfn_track[i]));
+ if (!slot->arch.gfn_track[i])
+ goto track_free;
+ }
+
+ return 0;
+
+track_free:
+ kvm_page_track_free_memslot(slot, NULL);
+ return -ENOMEM;
+}
+
+static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+{
+ if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
+ return false;
+
+ return true;
+}
+
+static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode, short count)
+{
+ int index, val;
+
+ index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+
+ val = slot->arch.gfn_track[mode][index];
+
+ if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+ return;
+
+ slot->arch.gfn_track[mode][index] += count;
+}
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * It should be called under the protection both of mmu-lock and kvm->srcu
+ * or kvm->slots_lock.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @slot: the @gfn belongs to.
+ * @gfn: the guest page.
+ * @mode: tracking mode, currently only write track is supported.
+ */
+void kvm_slot_page_track_add_page(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode)
+{
+
+ if (WARN_ON(!page_track_mode_is_valid(mode)))
+ return;
+
+ update_gfn_track(slot, gfn, mode, 1);
+
+ /*
+ * new track stops large page mapping for the
+ * tracked page.
+ */
+ kvm_mmu_gfn_disallow_lpage(slot, gfn);
+
+ if (mode == KVM_PAGE_TRACK_WRITE)
+ if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
+ kvm_flush_remote_tlbs(kvm);
+}
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page. It is the opposed operation of
+ * kvm_slot_page_track_add_page().
+ *
+ * It should be called under the protection both of mmu-lock and kvm->srcu
+ * or kvm->slots_lock.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @slot: the @gfn belongs to.
+ * @gfn: the guest page.
+ * @mode: tracking mode, currently only write track is supported.
+ */
+void kvm_slot_page_track_remove_page(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode)
+{
+ if (WARN_ON(!page_track_mode_is_valid(mode)))
+ return;
+
+ update_gfn_track(slot, gfn, mode, -1);
+
+ /*
+ * allow large page mapping for the tracked page
+ * after the tracker is gone.
+ */
+ kvm_mmu_gfn_allow_lpage(slot, gfn);
+}
+
+/*
+ * check if the corresponding access on the specified guest page is tracked.
+ */
+bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
+ enum kvm_page_track_mode mode)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ int index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+
+ if (WARN_ON(!page_track_mode_is_valid(mode)))
+ return false;
+
+ return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
+}
+
+void kvm_page_track_init(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+ init_srcu_struct(&head->track_srcu);
+ INIT_HLIST_HEAD(&head->track_notifier_list);
+}
+
+/*
+ * register the notifier so that event interception for the tracked guest
+ * pages can be received.
+ */
+void
+kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+
+ spin_lock(&kvm->mmu_lock);
+ hlist_add_head_rcu(&n->node, &head->track_notifier_list);
+ spin_unlock(&kvm->mmu_lock);
+}
+
+/*
+ * stop receiving the event interception. It is the opposed operation of
+ * kvm_page_track_register_notifier().
+ */
+void
+kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+
+ spin_lock(&kvm->mmu_lock);
+ hlist_del_rcu(&n->node);
+ spin_unlock(&kvm->mmu_lock);
+ synchronize_srcu(&head->track_srcu);
+}
+
+/*
+ * Notify the node that write access is intercepted and write emulation is
+ * finished at this time.
+ *
+ * The node should figure out if the written page is the one that node is
+ * interested in by itself.
+ */
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes)
+{
+ struct kvm_page_track_notifier_head *head;
+ struct kvm_page_track_notifier_node *n;
+ int idx;
+
+ head = &vcpu->kvm->arch.track_notifier_head;
+
+ if (hlist_empty(&head->track_notifier_list))
+ return;
+
+ idx = srcu_read_lock(&head->track_srcu);
+ hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ if (n->track_write)
+ n->track_write(vcpu, gpa, new, bytes);
+ srcu_read_unlock(&head->track_srcu, idx);
+}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2ce4f05e81d3..e159a8185ad9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
ACC_USER_MASK;
#else
- access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
- access &= ~(gpte >> PT64_NX_SHIFT);
+ BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
+ BUILD_BUG_ON(ACC_EXEC_MASK != 1);
+ access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
+ /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */
+ access ^= (gpte >> PT64_NX_SHIFT);
#endif
return access;
@@ -702,24 +705,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
- if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu));
- if (likely(r != RET_MMIO_PF_INVALID))
- return r;
-
- /*
- * page fault with PFEC.RSVD = 1 is caused by shadow
- * page fault, should not be used to walk guest page
- * table.
- */
- error_code &= ~PFERR_RSVD_MASK;
- };
-
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
+ * If PFEC.RSVD is set, this is a shadow page fault.
+ * The bit needs to be cleared before walking guest page tables.
+ */
+ error_code &= ~PFERR_RSVD_MASK;
+
+ /*
* Look up the guest pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
@@ -735,6 +731,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
return 0;
}
+ if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
+ shadow_page_table_clear_flood(vcpu, addr);
+ return 1;
+ }
+
vcpu->arch.write_fault_to_shadow_pgtable = false;
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
@@ -945,7 +946,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
sizeof(pt_element_t)))
- return -EINVAL;
+ return 0;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
vcpu->kvm->tlbs_dirty++;
@@ -977,7 +978,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
host_writable);
}
- return !nr_present;
+ return nr_present;
}
#undef pt_element_t
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 31aa2c85dc97..06ce377dcbc9 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -257,7 +257,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.apic)
+ if (lapic_in_kernel(vcpu))
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c13a64b7d789..95070386d599 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1858,8 +1858,7 @@ static int halt_interception(struct vcpu_svm *svm)
static int vmmcall_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- kvm_emulate_hypercall(&svm->vcpu);
- return 1;
+ return kvm_emulate_hypercall(&svm->vcpu);
}
static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index ad9f6a23f139..2f1ea2f61e1f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -996,11 +996,13 @@ TRACE_EVENT(kvm_enter_smm,
* Tracepoint for VT-d posted-interrupts.
*/
TRACE_EVENT(kvm_pi_irte_update,
- TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
- unsigned int gvec, u64 pi_desc_addr, bool set),
- TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+ TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
+ unsigned int gsi, unsigned int gvec,
+ u64 pi_desc_addr, bool set),
+ TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set),
TP_STRUCT__entry(
+ __field( unsigned int, host_irq )
__field( unsigned int, vcpu_id )
__field( unsigned int, gsi )
__field( unsigned int, gvec )
@@ -1009,6 +1011,7 @@ TRACE_EVENT(kvm_pi_irte_update,
),
TP_fast_assign(
+ __entry->host_irq = host_irq;
__entry->vcpu_id = vcpu_id;
__entry->gsi = gsi;
__entry->gvec = gvec;
@@ -1016,9 +1019,10 @@ TRACE_EVENT(kvm_pi_irte_update,
__entry->set = set;
),
- TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+ TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
"gvec: 0x%x, pi_desc_addr: 0x%llx",
__entry->set ? "enabled and being updated" : "disabled",
+ __entry->host_irq,
__entry->vcpu_id,
__entry->gsi,
__entry->gvec,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0ff453749a90..5e45c2731a5d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
-static bool vmx_mpx_supported(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -963,25 +962,36 @@ static const u32 vmx_msr_index[] = {
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
};
-static inline bool is_page_fault(u32 intr_info)
+static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
+}
+
+static inline bool is_debug(u32 intr_info)
+{
+ return is_exception_n(intr_info, DB_VECTOR);
+}
+
+static inline bool is_breakpoint(u32 intr_info)
+{
+ return is_exception_n(intr_info, BP_VECTOR);
+}
+
+static inline bool is_page_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, PF_VECTOR);
}
static inline bool is_no_device(u32 intr_info)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
- INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ return is_exception_n(intr_info, NM_VECTOR);
}
static inline bool is_invalid_opcode(u32 intr_info)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
- INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ return is_exception_n(intr_info, UD_VECTOR);
}
static inline bool is_external_interrupt(u32 intr_info)
@@ -1813,6 +1823,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
return;
}
break;
+ case MSR_IA32_PEBS_ENABLE:
+ /* PEBS needs a quiescent period after being disabled (to write
+ * a record). Disabling PEBS through VMX MSR swapping doesn't
+ * provide that period, so a CPU could write host's record into
+ * guest's memory.
+ */
+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
for (i = 0; i < m->nr; ++i)
@@ -1850,26 +1867,31 @@ static void reload_tss(void)
static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
{
- u64 guest_efer;
- u64 ignore_bits;
+ u64 guest_efer = vmx->vcpu.arch.efer;
+ u64 ignore_bits = 0;
- guest_efer = vmx->vcpu.arch.efer;
+ if (!enable_ept) {
+ /*
+ * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
+ * host CPUID is more efficient than testing guest CPUID
+ * or CR4. Host SMEP is anyway a requirement for guest SMEP.
+ */
+ if (boot_cpu_has(X86_FEATURE_SMEP))
+ guest_efer |= EFER_NX;
+ else if (!(guest_efer & EFER_NX))
+ ignore_bits |= EFER_NX;
+ }
/*
- * NX is emulated; LMA and LME handled by hardware; SCE meaningless
- * outside long mode
+ * LMA and LME handled by hardware; SCE meaningless outside long mode.
*/
- ignore_bits = EFER_NX | EFER_SCE;
+ ignore_bits |= EFER_SCE;
#ifdef CONFIG_X86_64
ignore_bits |= EFER_LMA | EFER_LME;
/* SCE is meaningful only in long mode on Intel */
if (guest_efer & EFER_LMA)
ignore_bits &= ~(u64)EFER_SCE;
#endif
- guest_efer &= ~ignore_bits;
- guest_efer |= host_efer & ignore_bits;
- vmx->guest_msrs[efer_offset].data = guest_efer;
- vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
clear_atomic_switch_msr(vmx, MSR_EFER);
@@ -1880,16 +1902,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
*/
if (cpu_has_load_ia32_efer ||
(enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
- guest_efer = vmx->vcpu.arch.efer;
if (!(guest_efer & EFER_LMA))
guest_efer &= ~EFER_LME;
if (guest_efer != host_efer)
add_atomic_switch_msr(vmx, MSR_EFER,
guest_efer, host_efer);
return false;
- }
+ } else {
+ guest_efer &= ~ignore_bits;
+ guest_efer |= host_efer & ignore_bits;
- return true;
+ vmx->guest_msrs[efer_offset].data = guest_efer;
+ vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+ return true;
+ }
}
static unsigned long segment_base(u16 selector)
@@ -2588,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* We support free control of debug control saving. */
@@ -2609,7 +2636,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VM_ENTRY_LOAD_IA32_PAT;
vmx->nested.nested_vmx_entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
@@ -2853,7 +2880,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -2930,7 +2957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
@@ -3403,7 +3430,7 @@ static void init_vmcs_shadow_fields(void)
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
switch (shadow_read_write_fields[i]) {
case GUEST_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
continue;
break;
default:
@@ -5612,11 +5639,8 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
if (vcpu->guest_debug == 0) {
- u32 cpu_based_vm_exec_control;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_MOV_DR_EXITING);
/*
* No more DR vmexits; force a reload of the debug registers
@@ -5653,8 +5677,6 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
- u32 cpu_based_vm_exec_control;
-
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
@@ -5663,10 +5685,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
}
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -5751,8 +5770,7 @@ static int handle_halt(struct kvm_vcpu *vcpu)
static int handle_vmcall(struct kvm_vcpu *vcpu)
{
- kvm_emulate_hypercall(vcpu);
- return 1;
+ return kvm_emulate_hypercall(vcpu);
}
static int handle_invd(struct kvm_vcpu *vcpu)
@@ -6439,8 +6457,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
/* Recycle the least recently used VMCS. */
- item = list_entry(vmx->nested.vmcs02_pool.prev,
- struct vmcs02_list, list);
+ item = list_last_entry(&vmx->nested.vmcs02_pool,
+ struct vmcs02_list, list);
item->vmptr = vmx->nested.current_vmptr;
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
@@ -7756,6 +7774,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
else if (is_no_device(intr_info) &&
!(vmcs12->guest_cr0 & X86_CR0_TS))
return false;
+ else if (is_debug(intr_info) &&
+ vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return false;
+ else if (is_breakpoint(intr_info) &&
+ vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ return false;
return vmcs12->exception_bitmap &
(1u << (intr_info & INTR_INFO_VECTOR_MASK));
case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -10260,7 +10285,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
if (nested_cpu_has_xsaves(vmcs12))
vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
@@ -10768,13 +10793,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
*/
kvm_set_msi_irq(e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ /*
+ * Make sure the IRTE is in remapped mode if
+ * we don't handle it in posted mode.
+ */
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+ if (ret < 0) {
+ printk(KERN_INFO
+ "failed to back to remapped mode, irq: %u\n",
+ host_irq);
+ goto out;
+ }
+
continue;
+ }
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
- trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi,
+ trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
if (set)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eaf6ee8c28b8..7236bd3a4c3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
unsigned int __read_mostly lapic_timer_advance_ns = 0;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+static bool __read_mostly vector_hashing = true;
+module_param(vector_hashing, bool, S_IRUGO);
+
static bool __read_mostly backwards_tsc_observed = false;
#define KVM_NR_SHARED_MSRS 16
@@ -1196,17 +1199,11 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
{
- uint32_t quotient, remainder;
-
- /* Don't try to replace with do_div(), this one calculates
- * "(dividend << 32) / divisor" */
- __asm__ ( "divl %4"
- : "=a" (quotient), "=d" (remainder)
- : "0" (0), "1" (dividend), "r" (divisor) );
- return quotient;
+ do_shl32_div32(dividend, divisor);
+ return dividend;
}
-static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
+static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
s8 *pshift, u32 *pmultiplier)
{
uint64_t scaled64;
@@ -1214,8 +1211,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
uint64_t tps64;
uint32_t tps32;
- tps64 = base_khz * 1000LL;
- scaled64 = scaled_khz * 1000LL;
+ tps64 = base_hz;
+ scaled64 = scaled_hz;
while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
tps64 >>= 1;
shift--;
@@ -1233,8 +1230,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
*pshift = shift;
*pmultiplier = div_frac(scaled64, tps32);
- pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
- __func__, base_khz, scaled_khz, shift, *pmultiplier);
+ pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
+ __func__, base_hz, scaled_hz, shift, *pmultiplier);
}
#ifdef CONFIG_X86_64
@@ -1293,23 +1290,23 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
return 0;
}
-static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
{
u32 thresh_lo, thresh_hi;
int use_scaling = 0;
/* tsc_khz can be zero if TSC calibration fails */
- if (this_tsc_khz == 0) {
+ if (user_tsc_khz == 0) {
/* set tsc_scaling_ratio to a safe value */
vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
return -1;
}
/* Compute a scale to convert nanoseconds in TSC cycles */
- kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
+ kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
&vcpu->arch.virtual_tsc_shift,
&vcpu->arch.virtual_tsc_mult);
- vcpu->arch.virtual_tsc_khz = this_tsc_khz;
+ vcpu->arch.virtual_tsc_khz = user_tsc_khz;
/*
* Compute the variation in TSC rate which is acceptable
@@ -1319,11 +1316,11 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
*/
thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
- if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
- pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
+ if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
+ pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
use_scaling = 1;
}
- return set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
+ return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
@@ -1716,7 +1713,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
static int kvm_guest_time_update(struct kvm_vcpu *v)
{
- unsigned long flags, this_tsc_khz, tgt_tsc_khz;
+ unsigned long flags, tgt_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
@@ -1742,8 +1739,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
- this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
- if (unlikely(this_tsc_khz == 0)) {
+ tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
+ if (unlikely(tgt_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
return 1;
@@ -1778,13 +1775,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (!vcpu->pv_time_enabled)
return 0;
- if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
- tgt_tsc_khz = kvm_has_tsc_control ?
- vcpu->virtual_tsc_khz : this_tsc_khz;
- kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz,
+ if (kvm_has_tsc_control)
+ tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
+
+ if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
- vcpu->hw_tsc_khz = this_tsc_khz;
+ vcpu->hw_tsc_khz = tgt_tsc_khz;
}
/* With all the info we got, fill in the values */
@@ -2987,7 +2985,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
- kvm_vcpu_has_lapic(vcpu))
+ lapic_in_kernel(vcpu))
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
@@ -3000,7 +2998,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
- if (kvm_vcpu_has_lapic(vcpu)) {
+ if (lapic_in_kernel(vcpu)) {
if (events->smi.latched_init)
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
else
@@ -3240,7 +3238,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
switch (ioctl) {
case KVM_GET_LAPIC: {
r = -EINVAL;
- if (!vcpu->arch.apic)
+ if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
@@ -3258,7 +3256,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_LAPIC: {
r = -EINVAL;
- if (!vcpu->arch.apic)
+ if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = memdup_user(argp, sizeof(*u.lapic));
if (IS_ERR(u.lapic))
@@ -3605,20 +3603,26 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
{
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
+
+ BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
+
+ mutex_lock(&kps->lock);
+ memcpy(ps, &kps->channels, sizeof(*ps));
+ mutex_unlock(&kps->lock);
return 0;
}
static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
{
int i;
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
+ struct kvm_pit *pit = kvm->arch.vpit;
+
+ mutex_lock(&pit->pit_state.lock);
+ memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
for (i = 0; i < 3; i++)
- kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
+ mutex_unlock(&pit->pit_state.lock);
return 0;
}
@@ -3638,29 +3642,39 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
int start = 0;
int i;
u32 prev_legacy, cur_legacy;
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+ struct kvm_pit *pit = kvm->arch.vpit;
+
+ mutex_lock(&pit->pit_state.lock);
+ prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
if (!prev_legacy && cur_legacy)
start = 1;
- memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
- sizeof(kvm->arch.vpit->pit_state.channels));
- kvm->arch.vpit->pit_state.flags = ps->flags;
+ memcpy(&pit->pit_state.channels, &ps->channels,
+ sizeof(pit->pit_state.channels));
+ pit->pit_state.flags = ps->flags;
for (i = 0; i < 3; i++)
- kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+ kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
start && i == 0);
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ mutex_unlock(&pit->pit_state.lock);
return 0;
}
static int kvm_vm_ioctl_reinject(struct kvm *kvm,
struct kvm_reinject_control *control)
{
- if (!kvm->arch.vpit)
+ struct kvm_pit *pit = kvm->arch.vpit;
+
+ if (!pit)
return -ENXIO;
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+
+ /* pit->pit_state.lock was overloaded to prevent userspace from getting
+ * an inconsistent state after running multiple KVM_REINJECT_CONTROL
+ * ioctls in parallel. Use a separate lock if that ioctl isn't rare.
+ */
+ mutex_lock(&pit->pit_state.lock);
+ kvm_pit_set_reinject(pit, control->pit_reinject);
+ mutex_unlock(&pit->pit_state.lock);
+
return 0;
}
@@ -4093,7 +4107,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
do {
n = min(len, 8);
- if (!(vcpu->arch.apic &&
+ if (!(lapic_in_kernel(vcpu) &&
!kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
&& kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
@@ -4113,7 +4127,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
do {
n = min(len, 8);
- if (!(vcpu->arch.apic &&
+ if (!(lapic_in_kernel(vcpu) &&
!kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
@@ -4346,7 +4360,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+ kvm_page_track_write(vcpu, gpa, val, bytes);
return 1;
}
@@ -4604,7 +4618,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CMPXCHG_FAILED;
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
- kvm_mmu_pte_write(vcpu, gpa, new, bytes);
+ kvm_page_track_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -6010,7 +6024,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
if (!kvm_x86_ops->update_cr8_intercept)
return;
- if (!vcpu->arch.apic)
+ if (!lapic_in_kernel(vcpu))
return;
if (vcpu->arch.apicv_active)
@@ -7038,7 +7052,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (!kvm_vcpu_has_lapic(vcpu) &&
+ if (!lapic_in_kernel(vcpu) &&
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
return -EINVAL;
@@ -7314,7 +7328,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
* Every 255 times fpu_counter rolls over to 0; a guest that uses
* the FPU in bursts will revert to loading it on demand.
*/
- if (!vcpu->arch.eager_fpu) {
+ if (!use_eager_fpu()) {
if (++vcpu->fpu_counter < 5)
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
}
@@ -7593,6 +7607,7 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
}
struct static_key kvm_no_apic_vcpu __read_mostly;
+EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
@@ -7724,6 +7739,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
+ kvm_page_track_init(kvm);
+ kvm_mmu_init_vm(kvm);
+
return 0;
}
@@ -7850,6 +7868,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kvm_mmu_uninit_vm(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -7871,6 +7890,8 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
free->arch.lpage_info[i - 1] = NULL;
}
}
+
+ kvm_page_track_free_memslot(free, dont);
}
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
@@ -7879,6 +7900,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+ struct kvm_lpage_info *linfo;
unsigned long ugfn;
int lpages;
int level = i + 1;
@@ -7893,15 +7915,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
if (i == 0)
continue;
- slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
- sizeof(*slot->arch.lpage_info[i - 1]));
- if (!slot->arch.lpage_info[i - 1])
+ linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
+ if (!linfo)
goto out_free;
+ slot->arch.lpage_info[i - 1] = linfo;
+
if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
- slot->arch.lpage_info[i - 1][0].write_count = 1;
+ linfo[0].disallow_lpage = 1;
if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
- slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
+ linfo[lpages - 1].disallow_lpage = 1;
ugfn = slot->userspace_addr >> PAGE_SHIFT;
/*
* If the gfn and userspace address are not aligned wrt each
@@ -7913,10 +7936,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long j;
for (j = 0; j < lpages; ++j)
- slot->arch.lpage_info[i - 1][j].write_count = 1;
+ linfo[j].disallow_lpage = 1;
}
}
+ if (kvm_page_track_create_memslot(slot, npages))
+ goto out_free;
+
return 0;
out_free:
@@ -8370,6 +8396,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
}
+bool kvm_vector_hashing_enabled(void)
+{
+ return vector_hashing;
+}
+EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f2afa5fe48a6..007940faa5c6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
+bool kvm_vector_hashing_enabled(void);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
@@ -192,4 +193,19 @@ extern unsigned int min_timer_period_us;
extern unsigned int lapic_timer_advance_ns;
extern struct static_key kvm_no_apic_vcpu;
+
+/* Same "calling convention" as do_div:
+ * - divide (n << 32) by base
+ * - put result in n
+ * - return remainder
+ */
+#define do_shl32_div32(n, base) \
+ ({ \
+ u32 __quot, __rem; \
+ asm("divl %2" : "=a" (__quot), "=d" (__rem) \
+ : "rm" (base), "0" (0), "1" ((u32) n)); \
+ n = __quot; \
+ __rem; \
+ })
+
#endif
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4ba229ac3f4f..fd57d3ae7e16 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1520,12 +1520,6 @@ __init void lguest_init(void)
*/
reserve_top_address(lguest_data.reserve_mem);
- /*
- * If we don't initialize the lock dependency checker now, it crashes
- * atomic_notifier_chain_register, then paravirt_disable_iospace.
- */
- lockdep_init();
-
/* Hook in our special panic hypercall code. */
atomic_notifier_chain_register(&panic_notifier_list, &paniced);
@@ -1535,7 +1529,7 @@ __init void lguest_init(void)
*/
cpu_detect(&new_cpu_data);
/* head.S usually sets up the first capability word, so do it here. */
- new_cpu_data.x86_capability[0] = cpuid_edx(1);
+ new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
/* Math is always hard! */
set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b00cce..65be7cfaf947 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,5 @@
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 422db000d727..5cc78bf57232 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -21,12 +21,16 @@ static inline int myisspace(u8 c)
* @option: option string to look for
*
* Returns the position of that @option (starts counting with 1)
- * or 0 on not found.
+ * or 0 on not found. @option will only be found if it is found
+ * as an entire word in @cmdline. For instance, if @option="car"
+ * then a cmdline which contains "cart" will not match.
*/
-int cmdline_find_option_bool(const char *cmdline, const char *option)
+static int
+__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
+ const char *option)
{
char c;
- int len, pos = 0, wstart = 0;
+ int pos = 0, wstart = 0;
const char *opptr = NULL;
enum {
st_wordstart = 0, /* Start of word/after whitespace */
@@ -37,11 +41,11 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
if (!cmdline)
return -1; /* No command line */
- len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
- if (!len)
- return 0;
-
- while (len--) {
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos < max_cmdline_size) {
c = *(char *)cmdline++;
pos++;
@@ -58,18 +62,35 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
/* fall through */
case st_wordcmp:
- if (!*opptr)
+ if (!*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for. If the
+ * command-line has a space _or_ ends, then
+ * we matched!
+ */
if (!c || myisspace(c))
return wstart;
- else
- state = st_wordskip;
- else if (!c)
+ /*
+ * We hit the end of the option, but _not_
+ * the end of a word on the cmdline. Not
+ * a match.
+ */
+ } else if (!c) {
+ /*
+ * Hit the NULL terminator on the end of
+ * cmdline.
+ */
return 0;
- else if (c != *opptr++)
- state = st_wordskip;
- else if (!len) /* last word and is matching */
- return wstart;
- break;
+ } else if (c == *opptr++) {
+ /*
+ * We are currently matching, so continue
+ * to the next character on the cmdline.
+ */
+ break;
+ }
+ state = st_wordskip;
+ /* fall through */
case st_wordskip:
if (!c)
@@ -82,3 +103,8 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
return 0; /* Buffer overrun */
}
+
+int cmdline_find_option_bool(const char *cmdline, const char *option)
+{
+ return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+}
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f98216b7e..24ef1c2104d4 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,7 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 27f89c79a44b..2b0ef26da0bd 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -10,7 +10,7 @@
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e912b2f6d36e..2f07c291dcc8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -102,7 +102,7 @@ static void delay_mwaitx(unsigned long __loops)
* Use cpu_tss as a cacheline-aligned, seldomly
* accessed per-cpu variable as the monitor target.
*/
- __monitorx(this_cpu_ptr(&cpu_tss), 0, 0);
+ __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
/*
* AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bba87de..cbb8ee5830ff 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,7 +1,7 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
@@ -177,3 +177,120 @@ ENTRY(memcpy_orig)
.Lend:
retq
ENDPROC(memcpy_orig)
+
+#ifndef CONFIG_UML
+/*
+ * memcpy_mcsafe - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(memcpy_mcsafe)
+ cmpl $8, %edx
+ /* Less than 8 bytes? Go to byte copy loop */
+ jb .L_no_whole_words
+
+ /* Check for bad alignment of source */
+ testl $7, %esi
+ /* Already aligned */
+ jz .L_8byte_aligned
+
+ /* Copy one byte at a time until source is 8-byte aligned */
+ movl %esi, %ecx
+ andl $7, %ecx
+ subl $8, %ecx
+ negl %ecx
+ subl %ecx, %edx
+.L_copy_leading_bytes:
+ movb (%rsi), %al
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_copy_leading_bytes
+
+.L_8byte_aligned:
+ /* Figure out how many whole cache lines (64-bytes) to copy */
+ movl %edx, %ecx
+ andl $63, %edx
+ shrl $6, %ecx
+ jz .L_no_whole_cache_lines
+
+ /* Loop copying whole cache lines */
+.L_cache_w0: movq (%rsi), %r8
+.L_cache_w1: movq 1*8(%rsi), %r9
+.L_cache_w2: movq 2*8(%rsi), %r10
+.L_cache_w3: movq 3*8(%rsi), %r11
+ movq %r8, (%rdi)
+ movq %r9, 1*8(%rdi)
+ movq %r10, 2*8(%rdi)
+ movq %r11, 3*8(%rdi)
+.L_cache_w4: movq 4*8(%rsi), %r8
+.L_cache_w5: movq 5*8(%rsi), %r9
+.L_cache_w6: movq 6*8(%rsi), %r10
+.L_cache_w7: movq 7*8(%rsi), %r11
+ movq %r8, 4*8(%rdi)
+ movq %r9, 5*8(%rdi)
+ movq %r10, 6*8(%rdi)
+ movq %r11, 7*8(%rdi)
+ leaq 64(%rsi), %rsi
+ leaq 64(%rdi), %rdi
+ decl %ecx
+ jnz .L_cache_w0
+
+ /* Are there any trailing 8-byte words? */
+.L_no_whole_cache_lines:
+ movl %edx, %ecx
+ andl $7, %edx
+ shrl $3, %ecx
+ jz .L_no_whole_words
+
+ /* Copy trailing words */
+.L_copy_trailing_words:
+ movq (%rsi), %r8
+ mov %r8, (%rdi)
+ leaq 8(%rsi), %rsi
+ leaq 8(%rdi), %rdi
+ decl %ecx
+ jnz .L_copy_trailing_words
+
+ /* Any trailing bytes? */
+.L_no_whole_words:
+ andl %edx, %edx
+ jz .L_done_memcpy_trap
+
+ /* Copy trailing bytes */
+ movl %edx, %ecx
+.L_copy_trailing_bytes:
+ movb (%rsi), %al
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_copy_trailing_bytes
+
+ /* Copy successful. Return true */
+.L_done_memcpy_trap:
+ xorq %rax, %rax
+ ret
+ENDPROC(memcpy_mcsafe)
+
+ .section .fixup, "ax"
+ /* Return false for any failure */
+.L_memcpy_mcsafe_fail:
+ mov $1, %rax
+ ret
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+#endif
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd6d98e..90ce01bee00c 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,7 @@
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad05827..c9c81227ea37 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,7 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
.weak memset
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 4a6f1d9b5106..99bfb192803f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-#ifdef CONFIG_X86_64
static inline bool is_hypervisor_range(int idx)
{
+#ifdef CONFIG_X86_64
/*
* ffff800000000000 - ffff87ffffffffff is reserved for
* the hypervisor.
*/
- return paravirt_enabled() &&
- (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
- (idx < pgd_index(__PAGE_OFFSET));
-}
+ return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+ (idx < pgd_index(__PAGE_OFFSET));
#else
-static inline bool is_hypervisor_range(int idx) { return false; }
+ return false;
#endif
+}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e9c326..9dd7e4b7fcde 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -3,6 +3,9 @@
#include <linux/sort.h>
#include <asm/uaccess.h>
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+ struct pt_regs *, int);
+
static inline unsigned long
ex_insn_addr(const struct exception_table_entry *x)
{
@@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+ return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
-int fixup_exception(struct pt_regs *regs)
+bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
- const struct exception_table_entry *fixup;
- unsigned long new_ip;
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_default);
+
+bool ex_handler_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fault);
+
+bool ex_handler_ext(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ /* Special hack for uaccess_err */
+ current_thread_info()->uaccess_err = 1;
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_ext);
+
+bool ex_has_fault_handler(unsigned long ip)
+{
+ const struct exception_table_entry *e;
+ ex_handler_t handler;
+
+ e = search_exception_tables(ip);
+ if (!e)
+ return false;
+ handler = ex_fixup_handler(e);
+
+ return handler == ex_handler_fault;
+}
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
+{
+ const struct exception_table_entry *e;
+ ex_handler_t handler;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs)
}
#endif
- fixup = search_exception_tables(regs->ip);
- if (fixup) {
- new_ip = ex_fixup_addr(fixup);
-
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* Special hack for uaccess_err */
- current_thread_info()->uaccess_err = 1;
- new_ip -= 0x7ffffff0;
- }
- regs->ip = new_ip;
- return 1;
- }
+ e = search_exception_tables(regs->ip);
+ if (!e)
+ return 0;
- return 0;
+ handler = ex_fixup_handler(e);
+ return handler(e, regs, trapnr);
}
/* Restricted version used during very early boot */
int __init early_fixup_exception(unsigned long *ip)
{
- const struct exception_table_entry *fixup;
+ const struct exception_table_entry *e;
unsigned long new_ip;
+ ex_handler_t handler;
- fixup = search_exception_tables(*ip);
- if (fixup) {
- new_ip = ex_fixup_addr(fixup);
+ e = search_exception_tables(*ip);
+ if (!e)
+ return 0;
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* uaccess handling not supported during early boot */
- return 0;
- }
+ new_ip = ex_fixup_addr(e);
+ handler = ex_fixup_handler(e);
- *ip = new_ip;
- return 1;
- }
+ /* special handling not supported during early boot */
+ if (handler != ex_handler_default)
+ return 0;
- return 0;
+ *ip = new_ip;
+ return 1;
}
/*
@@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4;
p->fixup += i;
i += 4;
+ p->handler += i;
+ i += 4;
}
sort(start, finish - start, sizeof(struct exception_table_entry),
@@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4;
p->fixup -= i;
i += 4;
+ p->handler -= i;
+ i += 4;
}
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e830c71a1323..03898aea6e0f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -663,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs)) {
+ if (fixup_exception(regs, X86_TRAP_PF)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index d8a798d8bf50..f8d0b5e8bdfd 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -131,7 +131,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
{
VM_BUG_ON_PAGE(page != compound_head(page), page);
VM_BUG_ON_PAGE(page_count(page) == 0, page);
- atomic_add(nr, &page->_count);
+ page_ref_add(page, nr);
SetPageReferenced(page);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 493f54172b4a..9d56f271d519 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -150,13 +150,14 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_KMEMCHECK)
/*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
+ * use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (cpu_has_pse)
+ if (cpu_has_pse && !debug_pagealloc_enabled())
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
@@ -666,21 +667,22 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
* mark them not present - any buggy init-section access will
* create a kernel page fault:
*/
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
- begin, end - 1);
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
-#else
- /*
- * We just marked the kernel text read only above, now that
- * we are going to free part of that, we need to make that
- * writeable and non-executable first.
- */
- set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
- set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+ if (debug_pagealloc_enabled()) {
+ pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
+ begin, end - 1);
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+ } else {
+ /*
+ * We just marked the kernel text read only above, now that
+ * we are going to free part of that, we need to make that
+ * writeable and non-executable first.
+ */
+ set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
+ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
- free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);
-#endif
+ free_reserved_area((void *)begin, (void *)end,
+ POISON_FREE_INITMEM, what);
+ }
}
void free_initmem(void)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cb4ef3de61f9..bd7a9b9e2e14 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -388,7 +388,6 @@ repeat:
}
pte_t *kmap_pte;
-pgprot_t kmap_prot;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
@@ -405,8 +404,6 @@ static void __init kmap_init(void)
*/
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
}
#ifdef CONFIG_HIGHMEM
@@ -871,7 +868,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -960,5 +956,3 @@ void mark_rodata_ro(void)
if (__supported_pte_mask & _PAGE_NX)
debug_checkwx();
}
-#endif
-
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5488d21123bd..214afda97911 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
#include <asm/init.h>
+#include <asm/uv/uv.h>
#include <asm/setup.h>
#include "mm_internal.h"
@@ -1074,7 +1075,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -1166,8 +1166,6 @@ void mark_rodata_ro(void)
debug_checkwx();
}
-#endif
-
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1206,26 +1204,13 @@ int kern_addr_valid(unsigned long addr)
static unsigned long probe_memory_block_size(void)
{
- /* start from 2g */
- unsigned long bz = 1UL<<31;
-
- if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) {
- pr_info("Using 2GB memory block size for large-memory system\n");
- return 2UL * 1024 * 1024 * 1024;
- }
-
- /* less than 64g installed */
- if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
- return MIN_MEMORY_BLOCK_SIZE;
+ unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
- /* get the tail size */
- while (bz > MIN_MEMORY_BLOCK_SIZE) {
- if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
- break;
- bz >>= 1;
- }
+ /* if system is UV or has 64GB of RAM or more, use large blocks */
+ if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+ bz = 2UL << 30; /* 2GB */
- printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+ pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz;
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d470cf219a2d..1b1110fa0057 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -120,11 +120,22 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
- memset(kasan_zero_page, 0, PAGE_SIZE);
-
load_cr3(init_level4_pgt);
__flush_tlb_all();
- init_task.kasan_depth = 0;
+ /*
+ * kasan_zero_page has been used as early shadow memory, thus it may
+ * contain some garbage. Now we can clear and write protect it, since
+ * after the TLB flush no one should write to it.
+ */
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+ set_pte(&kasan_zero_pte[i], pte);
+ }
+ /* Flush TLBs again to be sure that write protection applied. */
+ __flush_tlb_all();
+
+ init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34ed632..ddb2244b06a1 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
struct kmmio_fault_page {
struct list_head list;
struct kmmio_fault_page *release_next;
- unsigned long page; /* location of the fault page */
+ unsigned long addr; /* the requested address */
pteval_t old_presence; /* page presence prior to arming */
bool armed;
@@ -70,9 +70,16 @@ unsigned int kmmio_count;
static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
static LIST_HEAD(kmmio_probes);
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
{
- return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+
+ return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
}
/* Accessed per-cpu */
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
}
/* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
{
struct list_head *head;
struct kmmio_fault_page *f;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
- page &= PAGE_MASK;
- head = kmmio_page_list(page);
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+ head = kmmio_page_list(addr);
list_for_each_entry_rcu(f, head, list) {
- if (f->page == page)
+ if (f->addr == addr)
return f;
}
return NULL;
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
{
unsigned int level;
- pte_t *pte = lookup_address(f->page, &level);
+ pte_t *pte = lookup_address(f->addr, &level);
if (!pte) {
- pr_err("no pte for page 0x%08lx\n", f->page);
+ pr_err("no pte for addr 0x%08lx\n", f->addr);
return -1;
}
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
return -1;
}
- __flush_tlb_one(f->page);
+ __flush_tlb_one(f->addr);
return 0;
}
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
- f->page, f->count, !!f->old_presence);
+ pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
- WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
- f->page);
+ WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+ f->addr);
f->armed = true;
return ret;
}
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{
int ret = clear_page_presence(f, false);
WARN_ONCE(ret < 0,
- KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+ KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
f->armed = false;
}
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
struct kmmio_context *ctx;
struct kmmio_fault_page *faultpage;
int ret = 0; /* default to fault not handled */
+ unsigned long page_base = addr;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+ if (!pte)
+ return -EINVAL;
+ page_base &= page_level_mask(l);
/*
* Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
preempt_disable();
rcu_read_lock();
- faultpage = get_kmmio_fault_page(addr);
+ faultpage = get_kmmio_fault_page(page_base);
if (!faultpage) {
/*
* Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
- if (addr == ctx->addr) {
+ if (page_base == ctx->addr) {
/*
* A second fault on the same page means some other
* condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx->active++;
ctx->fpage = faultpage;
- ctx->probe = get_kmmio_probe(addr);
+ ctx->probe = get_kmmio_probe(page_base);
ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- ctx->addr = addr;
+ ctx->addr = page_base;
if (ctx->probe && ctx->probe->pre_handler)
ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@ out:
}
/* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (f) {
if (!f->count)
arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
return -1;
f->count = 1;
- f->page = page;
+ f->addr = addr;
if (arm_kmmio_fault_page(f)) {
kfree(f);
return -1;
}
- list_add_rcu(&f->list, kmmio_page_list(f->page));
+ list_add_rcu(&f->list, kmmio_page_list(f->addr));
return 0;
}
/* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
struct kmmio_fault_page **release_list)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (!f)
return;
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
int ret = 0;
unsigned long size = 0;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ unsigned int l;
+ pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
if (get_kmmio_probe(p->addr)) {
ret = -EEXIST;
goto out;
}
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte) {
+ ret = -EINVAL;
+ goto out;
+ }
+
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
pr_err("Unable to set page fault.\n");
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
out:
spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
+ unsigned int l;
+ pte_t *pte;
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte)
+ return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
release_kmmio_fault_page(p->addr + size, &release_list);
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
list_del_rcu(&p->list);
kmmio_count--;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 72bb52f93c3d..d2dc0438d654 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
}
/*
- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
- * does, but not when emulating X86_32
- */
-static unsigned long mmap_legacy_base(unsigned long rnd)
-{
- if (mmap_is_ia32())
- return TASK_UNMAPPED_BASE;
- else
- return TASK_UNMAPPED_BASE + rnd;
-}
-
-/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
- mm->mmap_legacy_base = mmap_legacy_base(random_factor);
+ mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index d04f8094bc23..f70c1ff46125 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
return true;
}
+/*
+ * Mark all currently memblock-reserved physical memory (which covers the
+ * kernel's own memory ranges) as hot-unswappable.
+ */
static void __init numa_clear_kernel_node_hotplug(void)
{
- int i, nid;
- nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
- phys_addr_t start, end;
- struct memblock_region *r;
+ nodemask_t reserved_nodemask = NODE_MASK_NONE;
+ struct memblock_region *mb_region;
+ int i;
/*
+ * We have to do some preprocessing of memblock regions, to
+ * make them suitable for reservation.
+ *
* At this time, all memory regions reserved by memblock are
- * used by the kernel. Set the nid in memblock.reserved will
- * mark out all the nodes the kernel resides in.
+ * used by the kernel, but those regions are not split up
+ * along node boundaries yet, and don't necessarily have their
+ * node ID set yet either.
+ *
+ * So iterate over all memory known to the x86 architecture,
+ * and use those ranges to set the nid in memblock.reserved.
+ * This will split up the memblock regions along node
+ * boundaries and will set the node IDs as well.
*/
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = &numa_meminfo.blk[i];
+ struct numa_memblk *mb = numa_meminfo.blk + i;
+ int ret;
- memblock_set_node(mb->start, mb->end - mb->start,
- &memblock.reserved, mb->nid);
+ ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
+ WARN_ON_ONCE(ret);
}
/*
- * Mark all kernel nodes.
+ * Now go over all reserved memblock regions, to construct a
+ * node mask of all kernel reserved memory areas.
*
- * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
- * may not include all the memblock.reserved memory ranges because
- * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+ * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+ * numa_meminfo might not include all memblock.reserved
+ * memory ranges, because quirks such as trim_snb_memory()
+ * reserve specific pages for Sandy Bridge graphics. ]
*/
- for_each_memblock(reserved, r)
- if (r->nid != MAX_NUMNODES)
- node_set(r->nid, numa_kernel_nodes);
+ for_each_memblock(reserved, mb_region) {
+ if (mb_region->nid != MAX_NUMNODES)
+ node_set(mb_region->nid, reserved_nodemask);
+ }
- /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+ /*
+ * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
+ * belonging to the reserved node mask.
+ *
+ * Note that this will include memory regions that reside
+ * on nodes that contain kernel memory - entire nodes
+ * become hot-unpluggable:
+ */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- nid = numa_meminfo.blk[i].nid;
- if (!node_isset(nid, numa_kernel_nodes))
- continue;
+ struct numa_memblk *mb = numa_meminfo.blk + i;
- start = numa_meminfo.blk[i].start;
- end = numa_meminfo.blk[i].end;
+ if (!node_isset(mb->nid, reserved_nodemask))
+ continue;
- memblock_clear_hotplug(start, end - start);
+ memblock_clear_hotplug(mb->start, mb->end - mb->start);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9cf96d82147a..4d0b26253042 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,12 +106,6 @@ static inline unsigned long highmap_end_pfn(void)
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-# define debug_pagealloc 1
-#else
-# define debug_pagealloc 0
-#endif
-
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -283,7 +277,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
* kernel text mappings for the large page aligned text, rodata sections
@@ -714,10 +708,10 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
{
struct page *base;
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
if (!base)
return -ENOMEM;
@@ -1128,8 +1122,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
/*
* Ignore all non primary paths.
*/
- if (!primary)
+ if (!primary) {
+ cpa->numpages = 1;
return 0;
+ }
/*
* Ignore the NULL PTE for kernel identity mapping, as it is expected
@@ -1337,10 +1333,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
cpa->numpages = 1;
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
ret = __change_page_attr(cpa, checkalias);
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
if (ret)
return ret;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f4ae536b0914..04e2e7144bee 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
@@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
/* Set prot based on lookup */
pcm = lookup_memtype(pfn_t_to_phys(pfn));
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eacb3321..8bea84724a7d 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
+#include <asm/cpufeature.h>
static int disable_nx;
@@ -31,9 +32,8 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
- __supported_pte_mask |= _PAGE_NX;
- else
+ /* If disable_nx is set, clear NX on all new mappings going forward. */
+ if (disable_nx)
__supported_pte_mask &= ~_PAGE_NX;
}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0e9ba4..660a83c8287b 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -24,7 +24,6 @@
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/processor.h>
-#include <asm/cpufeature.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index d34b5118b4e8..381a43c40bf7 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -12,7 +12,6 @@
#include <linux/dmi.h>
#include <linux/slab.h>
-#include <asm-generic/pci-bridge.h>
#include <asm/acpi.h>
#include <asm/segment.h>
#include <asm/io.h>
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index e58565556703..b7de1929714b 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -297,14 +297,14 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r
*
* The standard boot ROM sequence for an x86 machine uses the BIOS
* to select an initial video card for boot display. This boot video
- * card will have it's BIOS copied to C0000 in system RAM.
+ * card will have its BIOS copied to 0xC0000 in system RAM.
* IORESOURCE_ROM_SHADOW is used to associate the boot video
* card with this copy. On laptops this copy has to be used since
* the main ROM may be compressed or combined with another image.
* See pci_map_rom() for use of this flag. Before marking the device
* with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
- * by either arch cde or vga-arbitration, if so only apply the fixup to this
- * already determined primary video card.
+ * by either arch code or vga-arbitration; if so only apply the fixup to this
+ * already-determined primary video card.
*/
static void pci_fixup_video(struct pci_dev *pdev)
@@ -312,6 +312,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
struct pci_dev *bridge;
struct pci_bus *bus;
u16 config;
+ struct resource *res;
/* Is VGA routed to us? */
bus = pdev->bus;
@@ -336,8 +337,18 @@ static void pci_fixup_video(struct pci_dev *pdev)
if (!vga_default_device() || pdev == vga_default_device()) {
pci_read_config_word(pdev, PCI_COMMAND, &config);
if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
- pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
- dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n");
+ res = &pdev->resource[PCI_ROM_RESOURCE];
+
+ pci_disable_rom(pdev);
+ if (res->parent)
+ release_resource(res);
+
+ res->start = 0xC0000;
+ res->end = res->start + 0x20000 - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
+ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n",
+ res);
}
}
}
@@ -540,3 +551,10 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev)
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
+
+static void pci_bdwep_bar(struct pci_dev *dev)
+{
+ dev->non_compliant_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index d57e48016f15..7792aba266df 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -503,6 +503,18 @@ static struct pci_ops vmd_ops = {
.write = vmd_pci_write,
};
+static void vmd_attach_resources(struct vmd_dev *vmd)
+{
+ vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
+ vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
+}
+
+static void vmd_detach_resources(struct vmd_dev *vmd)
+{
+ vmd->dev->resource[VMD_MEMBAR1].child = NULL;
+ vmd->dev->resource[VMD_MEMBAR2].child = NULL;
+}
+
/*
* VMD domains start at 0x1000 to not clash with ACPI _SEG domains.
*/
@@ -527,11 +539,28 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
res = &vmd->dev->resource[VMD_CFGBAR];
vmd->resources[0] = (struct resource) {
.name = "VMD CFGBAR",
- .start = res->start,
+ .start = 0,
.end = (resource_size(res) >> 20) - 1,
.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
};
+ /*
+ * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
+ * put 32-bit resources in the window.
+ *
+ * There's no hardware reason why a 64-bit window *couldn't*
+ * contain a 32-bit resource, but pbus_size_mem() computes the
+ * bridge window size assuming a 64-bit window will contain no
+ * 32-bit resources. __pci_assign_resource() enforces that
+ * artificial restriction to make sure everything will fit.
+ *
+ * The only way we could use a 64-bit non-prefechable MEMBAR is
+ * if its address is <4GB so that we can convert it to a 32-bit
+ * resource. To be visible to the host OS, all VMD endpoints must
+ * be initially configured by platform BIOS, which includes setting
+ * up these resources. We can assume the device is configured
+ * according to the platform needs.
+ */
res = &vmd->dev->resource[VMD_MEMBAR1];
upper_bits = upper_32_bits(res->end);
flags = res->flags & ~IORESOURCE_SIZEALIGN;
@@ -542,6 +571,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
.start = res->start,
.end = res->end,
.flags = flags,
+ .parent = res,
};
res = &vmd->dev->resource[VMD_MEMBAR2];
@@ -554,6 +584,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
.start = res->start + 0x2000,
.end = res->end,
.flags = flags,
+ .parent = res,
};
sd->domain = vmd_find_free_domain();
@@ -578,6 +609,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
return -ENODEV;
}
+ vmd_attach_resources(vmd);
vmd_setup_dma_ops(vmd);
dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
pci_rescan_bus(vmd->bus);
@@ -674,6 +706,7 @@ static void vmd_remove(struct pci_dev *dev)
{
struct vmd_dev *vmd = pci_get_drvdata(dev);
+ vmd_detach_resources(vmd);
pci_set_drvdata(dev, NULL);
sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
pci_stop_root_bus(vmd->bus);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 2d66db8f80f9..ed30e79347e8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -131,6 +131,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
EXPORT_SYMBOL_GPL(efi_query_variable_store);
/*
+ * Helper function for efi_reserve_boot_services() to figure out if we
+ * can free regions in efi_free_boot_services().
+ *
+ * Use this function to ensure we do not free regions owned by somebody
+ * else. We must only reserve (and then free) regions:
+ *
+ * - Not within any part of the kernel
+ * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc)
+ */
+static bool can_free_region(u64 start, u64 size)
+{
+ if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
+ return false;
+
+ if (!e820_all_mapped(start, start+size, E820_RAM))
+ return false;
+
+ return true;
+}
+
+/*
* The UEFI specification makes it clear that the operating system is free to do
* whatever it wants with boot services code after ExitBootServices() has been
* called. Ignoring this recommendation a significant bunch of EFI implementations
@@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void)
efi_memory_desc_t *md = p;
u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
+ bool already_reserved;
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA)
continue;
- /* Only reserve where possible:
- * - Not within any already allocated areas
- * - Not over any memory area (really needed, if above?)
- * - Not within any part of the kernel
- * - Not the bios reserved area
- */
- if ((start + size > __pa_symbol(_text)
- && start <= __pa_symbol(_end)) ||
- !e820_all_mapped(start, start+size, E820_RAM) ||
- memblock_is_region_reserved(start, size)) {
- /* Could not reserve, skip it */
- md->num_pages = 0;
- memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
- start, start+size-1);
- } else
+
+ already_reserved = memblock_is_region_reserved(start, size);
+
+ /*
+ * Because the following memblock_reserve() is paired
+ * with free_bootmem_late() for this region in
+ * efi_free_boot_services(), we must be extremely
+ * careful not to reserve, and subsequently free,
+ * critical regions of memory (like the kernel image) or
+ * those regions that somebody else has already
+ * reserved.
+ *
+ * A good example of a critical region that must not be
+ * freed is page zero (first 4Kb of memory), which may
+ * contain boot services code/data but is marked
+ * E820_RESERVED by trim_bios_range().
+ */
+ if (!already_reserved) {
memblock_reserve(start, size);
+
+ /*
+ * If we are the first to reserve the region, no
+ * one else cares about it. We own it and can
+ * free it later.
+ */
+ if (can_free_region(start, size))
+ continue;
+ }
+
+ /*
+ * We don't own the region. We must not free it.
+ *
+ * Setting this bit for a boot services region really
+ * doesn't make sense as far as the firmware is
+ * concerned, but it does provide us with a way to tag
+ * those regions that must not be paired with
+ * free_bootmem_late().
+ */
+ md->attribute |= EFI_MEMORY_RUNTIME;
}
}
@@ -183,8 +228,8 @@ void __init efi_free_boot_services(void)
md->type != EFI_BOOT_SERVICES_DATA)
continue;
- /* Could not reserve boot area */
- if (!size)
+ /* Do not free, someone else owns it: */
+ if (md->attribute & EFI_MEMORY_RUNTIME)
continue;
free_bootmem_late(start, size);
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 76b6632d3143..1865c196f136 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -21,7 +21,7 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -35,6 +35,11 @@
#define BIOS_SIGNATURE_COREBOOT 0x500
#define BIOS_REGION_SIZE 0x10000
+/*
+ * This driver is not modular, but to keep back compatibility
+ * with existing use cases, continuing with module_param is
+ * the easiest way forward.
+ */
static bool force = 0;
module_param(force, bool, 0444);
/* FIXME: Award bios is not automatically detected as Alix platform */
@@ -192,9 +197,4 @@ static int __init alix_init(void)
return 0;
}
-
-module_init(alix_init);
-
-MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
-MODULE_DESCRIPTION("PCEngines ALIX System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(alix_init);
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index aa733fba2471..4fcdb91318a0 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -19,7 +19,6 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -120,9 +119,4 @@ static int __init geos_init(void)
return 0;
}
-
-module_init(geos_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(geos_init);
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 927e38c0089f..a2f6b982a729 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -20,7 +20,6 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -146,9 +145,4 @@ static int __init net5501_init(void)
return 0;
}
-
-module_init(net5501_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Soekris net5501 System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(net5501_init);
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 23381d2174ae..1eb47b6298c2 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -52,10 +52,7 @@ static unsigned long __init mfld_calibrate_tsc(void)
/* mark tsc clocksource as reliable */
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
+ return fast_calibrate;
}
static void __init penwell_arch_setup(void)
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
index aaca91753d32..bd1adc621781 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfl.c
@@ -81,10 +81,7 @@ static unsigned long __init tangier_calibrate_tsc(void)
/* mark tsc clocksource as reliable */
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
+ return fast_calibrate;
}
static void __init tangier_arch_setup(void)
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index bfadcd0f4944..17d6d2296e4d 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -1,5 +1,5 @@
/**
- * imr.c
+ * imr.c -- Intel Isolated Memory Region driver
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -31,7 +31,6 @@
#include <linux/debugfs.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/types.h>
struct imr_device {
@@ -135,11 +134,9 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
* @idev: pointer to imr_device structure.
* @imr_id: IMR entry to write.
* @imr: IMR structure representing address and access masks.
- * @lock: indicates if the IMR lock bit should be applied.
* @return: 0 on success or error code passed from mbi_iosf on failure.
*/
-static int imr_write(struct imr_device *idev, u32 imr_id,
- struct imr_regs *imr, bool lock)
+static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
{
unsigned long flags;
u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
@@ -163,15 +160,6 @@ static int imr_write(struct imr_device *idev, u32 imr_id,
if (ret)
goto failed;
- /* Lock bit must be set separately to addr_lo address bits. */
- if (lock) {
- imr->addr_lo |= IMR_LOCK;
- ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE,
- reg - IMR_NUM_REGS, imr->addr_lo);
- if (ret)
- goto failed;
- }
-
local_irq_restore(flags);
return 0;
failed:
@@ -270,17 +258,6 @@ static int imr_debugfs_register(struct imr_device *idev)
}
/**
- * imr_debugfs_unregister - unregister debugfs hooks.
- *
- * @idev: pointer to imr_device structure.
- * @return:
- */
-static void imr_debugfs_unregister(struct imr_device *idev)
-{
- debugfs_remove(idev->file);
-}
-
-/**
* imr_check_params - check passed address range IMR alignment and non-zero size
*
* @base: base address of intended IMR.
@@ -334,11 +311,10 @@ static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
* @size: physical size of region in bytes must be aligned to 1KiB.
* @read_mask: read access mask.
* @write_mask: write access mask.
- * @lock: indicates whether or not to permanently lock this region.
* @return: zero on success or negative value indicating error.
*/
int imr_add_range(phys_addr_t base, size_t size,
- unsigned int rmask, unsigned int wmask, bool lock)
+ unsigned int rmask, unsigned int wmask)
{
phys_addr_t end;
unsigned int i;
@@ -411,7 +387,7 @@ int imr_add_range(phys_addr_t base, size_t size,
imr.rmask = rmask;
imr.wmask = wmask;
- ret = imr_write(idev, reg, &imr, lock);
+ ret = imr_write(idev, reg, &imr);
if (ret < 0) {
/*
* In the highly unlikely event iosf_mbi_write failed
@@ -422,7 +398,7 @@ int imr_add_range(phys_addr_t base, size_t size,
imr.addr_hi = 0;
imr.rmask = IMR_READ_ACCESS_ALL;
imr.wmask = IMR_WRITE_ACCESS_ALL;
- imr_write(idev, reg, &imr, false);
+ imr_write(idev, reg, &imr);
}
failed:
mutex_unlock(&idev->lock);
@@ -518,7 +494,7 @@ static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
imr.rmask = IMR_READ_ACCESS_ALL;
imr.wmask = IMR_WRITE_ACCESS_ALL;
- ret = imr_write(idev, reg, &imr, false);
+ ret = imr_write(idev, reg, &imr);
failed:
mutex_unlock(&idev->lock);
@@ -599,7 +575,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
* We don't round up @size since it is already PAGE_SIZE aligned.
* See vmlinux.lds.S for details.
*/
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
if (ret < 0) {
pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
size / 1024, start, end);
@@ -614,7 +590,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
{ X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
{}
};
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
/**
* imr_init - entry point for IMR driver.
@@ -640,22 +615,4 @@ static int __init imr_init(void)
imr_fixup_memmap(idev);
return 0;
}
-
-/**
- * imr_exit - exit point for IMR code.
- *
- * Deregisters debugfs, leave IMR state as-is.
- *
- * return:
- */
-static void __exit imr_exit(void)
-{
- imr_debugfs_unregister(&imr_dev);
-}
-
-module_init(imr_init);
-module_exit(imr_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_init);
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 278e4da4222f..f5bad40936ac 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -1,5 +1,5 @@
/**
- * imr_selftest.c
+ * imr_selftest.c -- Intel Isolated Memory Region self-test driver
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -15,7 +15,6 @@
#include <asm/imr.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/types.h>
#define SELFTEST KBUILD_MODNAME ": "
@@ -61,30 +60,30 @@ static void __init imr_self_test(void)
int ret;
/* Test zero zero. */
- ret = imr_add_range(0, 0, 0, 0, false);
+ ret = imr_add_range(0, 0, 0, 0);
imr_self_test_result(ret < 0, "zero sized IMR\n");
/* Test exact overlap. */
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test overlap with base inside of existing. */
base += size - IMR_ALIGN;
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test overlap with end inside of existing. */
base -= size + IMR_ALIGN * 2;
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
- IMR_WRITE_ACCESS_ALL, false);
+ IMR_WRITE_ACCESS_ALL);
imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
/* Test that a 1 KiB IMR @ zero with CPU only will work. */
- ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU);
imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
if (ret >= 0) {
ret = imr_remove_range(0, IMR_ALIGN);
@@ -93,8 +92,7 @@ static void __init imr_self_test(void)
/* Test 2 KiB works. */
size = IMR_ALIGN * 2;
- ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
- IMR_WRITE_ACCESS_ALL, false);
+ ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL);
imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
if (ret >= 0) {
ret = imr_remove_range(0, size);
@@ -106,7 +104,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
{ X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
{}
};
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
/**
* imr_self_test_init - entry point for IMR driver.
@@ -125,13 +122,4 @@ static int __init imr_self_test_init(void)
*
* return:
*/
-static void __exit imr_self_test_exit(void)
-{
-}
-
-module_init(imr_self_test_init);
-module_exit(imr_self_test_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_self_test_init);
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 174781a404ff..00c319048d52 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -3,7 +3,7 @@
#include <asm/asm.h>
#include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cmpxchg.h>
#include <asm/nops.h>
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 439c0994b696..bfce503dffae 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,11 +25,11 @@
#define old_mmap sys_old_mmap
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b74ea6c2c0e7..f306413d3eb6 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -35,14 +35,11 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ce7e3607a870..470564bbd08e 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -9,14 +9,12 @@
#include <asm/types.h>
#ifdef __i386__
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
#else
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_64.h>
};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d09e4c9d7cc5..2c261082eadf 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
cpu_detect(&new_cpu_data);
set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
new_cpu_data.wp_works_ok = 1;
- new_cpu_data.x86_capability[0] = cpuid_edx(1);
+ new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
#endif
if (xen_start_info->mod_start) {
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 724a08740a04..9466354d3e49 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -11,7 +11,7 @@
#include "pmu.h"
/* x86_pmu.handle_irq definition */
-#include "../kernel/cpu/perf_event.h"
+#include "../events/perf_event.h"
#define XENPMU_IRQ_PROCESSING 1
struct xenpmu {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0261f2..3c6d17fd423a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu)
xen_pvh_secondary_vcpu_init(cpu);
#endif
cpu_bringup();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void xen_smp_intr_free(unsigned int cpu)
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e9df1567d778..7e9464b0fc00 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -413,8 +413,6 @@ config FORCE_MAX_ZONEORDER
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config PLATFORM_WANT_DEFAULT_MEM
def_bool n
diff --git a/arch/xtensa/include/asm/gpio.h b/arch/xtensa/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/xtensa/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index e438a00fbd63..5d6bd932ba4e 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -55,9 +55,6 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
#endif /* __KERNEL__ */
-/* Implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
/* Generic PCI */
#include <asm-generic/pci.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 4d02e38514f5..fc4ad21a5ed4 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -157,7 +157,7 @@ void secondary_start_kernel(void)
complete(&cpu_running);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void mx_cpu_start(void *p)
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index c9784c1b18d8..7f4a1fdb1502 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -146,7 +146,7 @@ good_area:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
if (flags & VM_FAULT_MAJOR)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
- else if (flags & VM_FAULT_MINOR)
+ else
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
return;
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 70cb408bc20d..c54505dcf4db 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -28,10 +28,6 @@
#include <linux/tty.h>
#include <linux/tty_flip.h>
-#ifdef SERIAL_INLINE
-#define _INLINE_ inline
-#endif
-
#define SERIAL_MAX_NUM_LINES 1
#define SERIAL_TIMER_VALUE (HZ / 10)
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 87678961a8c8..5f4bd71971d6 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -113,7 +113,7 @@ void platform_heartbeat(void)
}
//#define RS_TABLE_SIZE 2
-//#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST)
+//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST)
#define _SERIAL_PORT(_base,_irq) \
{ \