From afc2f792cdcb67f4257f0e68d10ee4a7b7eae57a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 5 May 2013 20:03:40 +0800 Subject: KVM: add missing misc_deregister() on error in kvm_init() Add the missing misc_deregister() before return from kvm_init() in the debugfs init error handling case. Signed-off-by: Wei Yongjun Signed-off-by: Gleb Natapov --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 302681c4aa44..b547a1ceecbc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3181,6 +3181,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, out_undebugfs: unregister_syscore_ops(&kvm_syscore_ops); + misc_deregister(&kvm_dev); out_unreg: kvm_async_pf_deinit(); out_free: -- cgit v1.2.3 From f1ed0450a5fac7067590317cbf027f566b6ccbca Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 28 Apr 2013 14:00:41 +0200 Subject: KVM: x86: Remove support for reporting coalesced APIC IRQs Since the arrival of posted interrupt support we can no longer guarantee that coalesced IRQs are always reported to the IRQ source. Moreover, accumulated APIC timer events could cause a busy loop when a VCPU should rather be halted. The consensus is to remove coalesced tracking from the LAPIC. Signed-off-by: Jan Kiszka Acked-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/lapic.c | 57 +++++++++++++++++++++------------------------------- arch/x86/kvm/lapic.h | 6 +++--- virt/kvm/irq_comm.c | 9 ++++++--- 3 files changed, 32 insertions(+), 40 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e1adbb4aca75..9d751931cf84 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -405,17 +405,17 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) return highest_irr; } -static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode, - unsigned long *dest_map); +static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode, + unsigned long *dest_map); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map) +void kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; - return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode, dest_map); + __apic_accept_irq(apic, irq->delivery_mode, irq->vector, + irq->level, irq->trig_mode, dest_map); } static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) @@ -608,7 +608,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); + kvm_apic_set_irq(src->vcpu, irq, dest_map); + *r = 1; return true; } @@ -653,7 +654,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, continue; if (*r < 0) *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); + kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); + *r += 1; } ret = true; @@ -662,15 +664,11 @@ out: return ret; } -/* - * Add a pending IRQ into lapic. - * Return 1 if successfully added and 0 if discarded. - */ -static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode, - unsigned long *dest_map) +/* Set an IRQ pending in the lapic. */ +static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode, + unsigned long *dest_map) { - int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; switch (delivery_mode) { @@ -684,13 +682,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (dest_map) __set_bit(vcpu->vcpu_id, dest_map); - if (kvm_x86_ops->deliver_posted_interrupt) { - result = 1; + if (kvm_x86_ops->deliver_posted_interrupt) kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - } else { - result = !apic_test_and_set_irr(vector, apic); - - if (!result) { + else { + if (apic_test_and_set_irr(vector, apic)) { if (trig_mode) apic_debug("level trig mode repeatedly " "for vector %d", vector); @@ -702,7 +697,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } out: trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, !result); + trig_mode, vector, false); break; case APIC_DM_REMRD: @@ -714,14 +709,12 @@ out: break; case APIC_DM_NMI: - result = 1; kvm_inject_nmi(vcpu); kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: if (!trig_mode || level) { - result = 1; /* assumes that there are only KVM_APIC_INIT/SIPI */ apic->pending_events = (1UL << KVM_APIC_INIT); /* make sure pending_events is visible before sending @@ -738,7 +731,6 @@ out: case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - result = 1; apic->sipi_vector = vector; /* make sure sipi_vector is visible for the receiver */ smp_wmb(); @@ -760,7 +752,6 @@ out: delivery_mode); break; } - return result; } int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) @@ -1470,7 +1461,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) +void kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_apic_get_reg(apic, lvt_type); int vector, mode, trig_mode; @@ -1479,10 +1470,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode, - NULL); + __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); } - return 0; } void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) @@ -1608,8 +1597,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) return; if (atomic_read(&apic->lapic_timer.pending) > 0) { - if (kvm_apic_local_deliver(apic, APIC_LVTT)) - atomic_dec(&apic->lapic_timer.pending); + kvm_apic_local_deliver(apic, APIC_LVTT); + atomic_set(&apic->lapic_timer.pending, 0); } } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c730ac9fe801..61a73a01ab0b 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -57,9 +57,9 @@ void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map); -int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); +void kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map); +void kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e2e6b4473a96..ef1817b61cf4 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -91,7 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_is_dm_lowest_prio(irq)) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, irq, dest_map); + kvm_apic_set_irq(vcpu, irq, dest_map); + r++; } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; @@ -100,8 +101,10 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } } - if (lowest) - r = kvm_apic_set_irq(lowest, irq, dest_map); + if (lowest) { + kvm_apic_set_irq(lowest, irq, dest_map); + r = 1; + } return r; } -- cgit v1.2.3 From 7275acdfe29ba03ad2f6e150386900c4e2d43fb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 14:31:01 +0100 Subject: ARM: KVM: move GIC/timer code to a common location As KVM/arm64 is looming on the horizon, it makes sense to move some of the common code to a single location in order to reduce duplication. The code could live anywhere. Actually, most of KVM is already built with a bunch of ugly ../../.. hacks in the various Makefiles, so we're not exactly talking about style here. But maybe it is time to start moving into a less ugly direction. The include files must be in a "public" location, as they are accessed from non-KVM files (arch/arm/kernel/asm-offsets.c). For this purpose, introduce two new locations: - virt/kvm/arm/ : x86 and ia64 already share the ioapic code in virt/kvm, so this could be seen as a (very ugly) precedent. - include/kvm/ : there is already an include/xen, and while the intent is slightly different, this seems as good a location as any Eventually, we should probably have independant Makefiles at every levels (just like everywhere else in the kernel), but this is just the first step. Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov --- arch/arm/include/asm/kvm_arch_timer.h | 85 -- arch/arm/include/asm/kvm_host.h | 4 +- arch/arm/include/asm/kvm_vgic.h | 220 ----- arch/arm/kvm/Makefile | 7 +- arch/arm/kvm/arch_timer.c | 272 ------ arch/arm/kvm/vgic.c | 1499 --------------------------------- include/kvm/arm_arch_timer.h | 85 ++ include/kvm/arm_vgic.h | 220 +++++ virt/kvm/arm/arch_timer.c | 272 ++++++ virt/kvm/arm/vgic.c | 1499 +++++++++++++++++++++++++++++++++ 10 files changed, 2082 insertions(+), 2081 deletions(-) delete mode 100644 arch/arm/include/asm/kvm_arch_timer.h delete mode 100644 arch/arm/include/asm/kvm_vgic.h delete mode 100644 arch/arm/kvm/arch_timer.c delete mode 100644 arch/arm/kvm/vgic.c create mode 100644 include/kvm/arm_arch_timer.h create mode 100644 include/kvm/arm_vgic.h create mode 100644 virt/kvm/arm/arch_timer.c create mode 100644 virt/kvm/arm/vgic.c (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h deleted file mode 100644 index 68cb9e1dfb81..000000000000 --- a/arch/arm/include/asm/kvm_arch_timer.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_ARCH_TIMER_H -#define __ASM_ARM_KVM_ARCH_TIMER_H - -#include -#include -#include - -struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER - /* Is the timer enabled */ - bool enabled; - - /* Virtual offset */ - cycle_t cntvoff; -#endif -}; - -struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER - /* Registers: control register, timer value */ - u32 cntv_ctl; /* Saved/restored */ - cycle_t cntv_cval; /* Saved/restored */ - - /* - * Anything that is not used directly from assembly code goes - * here. - */ - - /* Background timer used when the guest is not running */ - struct hrtimer timer; - - /* Work queued with the above timer expires */ - struct work_struct expired; - - /* Background timer active */ - bool armed; - - /* Timer IRQ */ - const struct kvm_irq_level *irq; -#endif -}; - -#ifdef CONFIG_KVM_ARM_TIMER -int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} -#endif - -#endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..ff5aaf10e6ec 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #define KVM_USER_MEM_SLOTS 32 @@ -38,7 +38,7 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) -#include +#include struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h deleted file mode 100644 index 343744e4809c..000000000000 --- a/arch/arm/include/asm/kvm_vgic.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_VGIC_H -#define __ASM_ARM_KVM_VGIC_H - -#include -#include -#include -#include -#include -#include - -#define VGIC_NR_IRQS 128 -#define VGIC_NR_SGIS 16 -#define VGIC_NR_PPIS 16 -#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) - -/* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) -#error Invalid number of CPU interfaces -#endif - -#if (VGIC_NR_IRQS & 31) -#error "VGIC_NR_IRQS must be a multiple of 32" -#endif - -#if (VGIC_NR_IRQS > 1024) -#error "VGIC_NR_IRQS must be <= 1024" -#endif - -/* - * The GIC distributor registers describing interrupts have two parts: - * - 32 per-CPU interrupts (SGI + PPI) - * - a bunch of shared interrupts (SPI) - */ -struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; -}; - -struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; -}; - -struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC - spinlock_t lock; - bool ready; - - /* Virtual control interface mapping */ - void __iomem *vctrl_base; - - /* Distributor and vcpu interface mapping in the guest */ - phys_addr_t vgic_dist_base; - phys_addr_t vgic_cpu_base; - - /* Distributor enabled */ - u32 enabled; - - /* Interrupt enabled (one bit per IRQ) */ - struct vgic_bitmap irq_enabled; - - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; - - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; - - /* Interrupt priority. Not used yet. */ - struct vgic_bytemap irq_priority; - - /* Level/edge triggered */ - struct vgic_bitmap irq_cfg; - - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; - - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; - - /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; -#endif -}; - -struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC - /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; - - /* Pending interrupts on this VCPU */ - DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); - - /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); - - /* Number of list registers on this CPU */ - int nr_lr; - - /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; -#endif -}; - -#define LR_EMPTY 0xff - -struct kvm; -struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; - -#ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); -int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); -int kvm_vgic_create(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level); -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); - -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) - -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_init(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} -#endif - -#endif diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..9184a491d172 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o -obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c deleted file mode 100644 index c55b6089e923..000000000000 --- a/arch/arm/kvm/arch_timer.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -static struct timecounter *timecounter; -static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; - -static cycle_t kvm_phys_timer_read(void) -{ - return timecounter->cc->read(timecounter->cc); -} - -static bool timer_is_armed(struct arch_timer_cpu *timer) -{ - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); -} - -static void timer_disarm(struct arch_timer_cpu *timer) -{ - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } -} - -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); -} - -static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) -{ - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; - - /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. - */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); - return IRQ_HANDLED; -} - -static void kvm_timer_inject_irq_work(struct work_struct *work) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); -} - -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) -{ - struct arch_timer_cpu *timer; - timer = container_of(hrt, struct arch_timer_cpu, timer); - queue_work(wqueue, &timer->expired); - return HRTIMER_NORESTART; -} - -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); -} - -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. - */ -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; - - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - BUG_ON(timer_is_armed(timer)); - - if (cval <= now) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ - kvm_timer_inject_irq(vcpu); - return; - } - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); - timer_arm(timer, ns); -} - -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; -} - -static void kvm_timer_init_interrupt(void *info) -{ - enable_percpu_irq(timer_irq.irq, 0); -} - - -static int kvm_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - kvm_timer_init_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block kvm_timer_cpu_nb = { - .notifier_call = kvm_timer_cpu_notify, -}; - -static const struct of_device_id arch_timer_of_match[] = { - { .compatible = "arm,armv7-timer", }, - {}, -}; - -int kvm_timer_hyp_init(void) -{ - struct device_node *np; - unsigned int ppi; - int err; - - timecounter = arch_timer_get_timecounter(); - if (!timecounter) - return -ENODEV; - - np = of_find_matching_node(NULL, arch_timer_of_match); - if (!np) { - kvm_err("kvm_arch_timer: can't find DT node\n"); - return -ENODEV; - } - - ppi = irq_of_parse_and_map(np, 2); - if (!ppi) { - kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); - err = -EINVAL; - goto out; - } - - err = request_percpu_irq(ppi, kvm_arch_timer_handler, - "kvm guest timer", kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", - ppi, err); - goto out; - } - - timer_irq.irq = ppi; - - err = register_cpu_notifier(&kvm_timer_cpu_nb); - if (err) { - kvm_err("Cannot register timer CPU notifier\n"); - goto out_free; - } - - wqueue = create_singlethread_workqueue("kvm_arch_timer"); - if (!wqueue) { - err = -ENOMEM; - goto out_free; - } - - kvm_info("%s IRQ%d\n", np->name, ppi); - on_each_cpu(kvm_timer_init_interrupt, NULL, 1); - - goto out; -out_free: - free_percpu_irq(ppi, kvm_get_running_vcpus()); -out: - of_node_put(np); - return err; -} - -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer_disarm(timer); -} - -int kvm_timer_init(struct kvm *kvm) -{ - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); - kvm->arch.timer.enabled = 1; - } - - return 0; -} diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c deleted file mode 100644 index 17c5ac7d10ed..000000000000 --- a/arch/arm/kvm/vgic.c +++ /dev/null @@ -1,1499 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -/* - * How the whole thing works (courtesy of Christoffer Dall): - * - * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. - * - Every time the bitmap changes, the irq_pending_on_cpu oracle is - * recalculated - * - To calculate the oracle, we need info for each cpu from - * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR - * registers, stored on each vcpu. We only keep one bit of - * information per interrupt, making sure that only one vcpu can - * accept the interrupt. - * - The same is true when injecting an interrupt, except that we only - * consider a single interrupt at a time. The irq_spi_cpu array - * contains the target CPU for each SPI. - * - * The handling of level interrupts adds some extra complexity. We - * need to track when the interrupt has been EOIed, so we can sample - * the 'line' again. This is achieved as such: - * - * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line - * will be ignored for further interrupts. The interrupt is injected - * into the vcpu with the GICH_LR_EOI bit set (generate a - * maintenance interrupt on EOI). - * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the - * interrupt line to be sampled again. - */ - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_update_state(struct kvm *kvm); -static void vgic_kick_vcpus(struct kvm *kvm); -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); -static u32 vgic_nr_lr; - -static unsigned int vgic_maint_irq; - -static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, - int cpuid, u32 offset) -{ - offset >>= 2; - if (!offset) - return x->percpu[cpuid].reg; - else - return x->shared.reg + offset - 1; -} - -static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, - int cpuid, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); - - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); -} - -static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) -{ - unsigned long *reg; - - if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; - } else { - reg = x->shared.reg_ul; - irq -= VGIC_NR_PRIVATE_IRQS; - } - - if (val) - set_bit(irq, reg); - else - clear_bit(irq, reg); -} - -static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) -{ - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; -} - -static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) -{ - return x->shared.reg_ul; -} - -static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) -{ - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; -} - -#define VGIC_CFG_LEVEL 0 -#define VGIC_CFG_EDGE 1 - -static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int irq_val; - - irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); - return irq_val == VGIC_CFG_EDGE; -} - -static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); -} - -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); -} - -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); -} - -static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - set_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - clear_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return *((u32 *)mmio->data) & mask; -} - -static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = value & mask; -} - -/** - * vgic_reg_access - access vgic register - * @mmio: pointer to the data describing the mmio access - * @reg: pointer to the virtual backing of vgic distributor data - * @offset: least significant 2 bits used for word offset - * @mode: ACCESS_ mode (see defines above) - * - * Helper to make vgic register access easier using one of the access - * modes defined for vgic register access - * (read,raz,write-ignored,setbit,clearbit,write) - */ -static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) -{ - int word_offset = (offset & 3) * 8; - u32 mask = (1UL << (mmio->len * 8)) - 1; - u32 regval; - - /* - * Any alignment fault should have been delivered to the guest - * directly (ARM ARM B3.12.7 "Prioritization of aborts"). - */ - - if (reg) { - regval = *reg; - } else { - BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); - regval = 0; - } - - if (mmio->is_write) { - u32 data = mmio_data_read(mmio, mask) << word_offset; - switch (ACCESS_WRITE_MASK(mode)) { - case ACCESS_WRITE_IGNORED: - return; - - case ACCESS_WRITE_SETBIT: - regval |= data; - break; - - case ACCESS_WRITE_CLEARBIT: - regval &= ~data; - break; - - case ACCESS_WRITE_VALUE: - regval = (regval & ~(mask << word_offset)) | data; - break; - } - *reg = regval; - } else { - switch (ACCESS_READ_MASK(mode)) { - case ACCESS_READ_RAZ: - regval = 0; - /* fall through */ - - case ACCESS_READ_VALUE: - mmio_data_write(mmio, mask, regval >> word_offset); - } - } -} - -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* IIDR */ - reg = 0x4B00043B; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - -static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(vcpu); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static u32 vgic_cfg_expand(u16 val) -{ - u32 res = 0; - int i; - - /* - * Turn a 16bit value like abcd...mnop into a 32bit word - * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. - */ - for (i = 0; i < 16; i++) - res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); - - return res; -} - -static u16 vgic_cfg_compress(u32 val) -{ - u16 res = 0; - int i; - - /* - * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like - * abcd...mnop which is what we really care about. - */ - for (i = 0; i < 16; i++) - res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; - - return res; -} - -/* - * The distributor uses 2 bits per IRQ for the CFG register, but the - * LSB is always 0. As such, we only keep the upper bit, and use the - * two above functions to compress/expand the bits - */ -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - if (offset & 2) - val = *reg >> 16; - else - val = *reg & 0xffff; - - val = vgic_cfg_expand(val); - vgic_reg_access(mmio, &val, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - if (offset < 4) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ - return false; - } - - val = vgic_cfg_compress(val); - if (offset & 2) { - *reg &= 0xffff; - *reg |= val << 16; - } else { - *reg &= 0xffff << 16; - *reg |= val; - } - } - - return false; -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -/* - * I would have liked to use the kvm_bus_io_*() API instead, but it - * cannot cope with banked registers (only the VM pointer is passed - * around, and we need the vcpu). One of these days, someone please - * fix it! - */ -struct mmio_range { - phys_addr_t base; - unsigned long len; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -static const struct mmio_range vgic_ranges[] = { - { - .base = GIC_DIST_CTRL, - .len = 12, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - {} -}; - -static const -struct mmio_range *find_matching_range(const struct mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t base) -{ - const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; - - while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) - return r; - r++; - } - - return NULL; -} - -/** - * vgic_handle_mmio - handle an in-kernel MMIO access - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - const struct mmio_range *range; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long base = dist->vgic_dist_base; - bool updated_state; - unsigned long offset; - - if (!irqchip_in_kernel(vcpu->kvm) || - mmio->phys_addr < base || - (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - range = find_matching_range(vgic_ranges, mmio, base); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; - } - - spin_lock(&vcpu->kvm->arch.vgic.lock); - offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); - kvm_handle_mmio_return(vcpu, run); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return true; -} - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pending, *enabled, *pend_percpu, *pend_shared; - unsigned long pending_private, pending_shared; - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; - pend_shared = vcpu->arch.vgic_cpu.pending_shared; - - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - - pending = vgic_bitmap_get_shared_map(&dist->irq_state); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); - bitmap_and(pend_shared, pend_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); - - pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); - return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); -} - -/* - * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. - */ -static void vgic_update_state(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int c; - - if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); - return; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); - } - } -} - -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) - -/* - * An interrupt may have been disabled after being made pending on the - * CPU interface (the classic case is a timer running while we're - * rebooting the guest - the interrupt would kick as soon as the CPU - * interface gets enabled, with deadly consequences). - * - * The solution is to examine already active LRs, and check the - * interrupt is still enabled. If not, just retire it. - */ -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); - } - } -} - -/* - * Queue an interrupt to a CPU virtual interface. Return true on success, - * or false if it wasn't possible to queue it. - */ -static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - /* Sanitize the input... */ - BUG_ON(sgi_source_id & ~7); - BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); - - kvm_debug("Queue IRQ%d\n", irq); - - lr = vgic_cpu->vgic_irq_lr_map[irq]; - - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; - } - - /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) - return false; - - kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); - - if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; - - return true; -} - -static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = dist->irq_sgi_sources[vcpu_id][irq]; - - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - dist->irq_sgi_sources[vcpu_id][irq] = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) -{ - if (vgic_irq_is_active(vcpu, irq)) - return true; /* level interrupt, already queued */ - - if (vgic_queue_irq(vcpu, 0, irq)) { - if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - } else { - vgic_irq_set_active(vcpu, irq); - } - - return true; - } - - return false; -} - -/* - * Fill the list registers with pending interrupts before running the - * guest. - */ -static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i, vcpu_id; - int overflow = 0; - - vcpu_id = vcpu->vcpu_id; - - /* - * We may not have any pending interrupt, or the interrupts - * may have been serviced from another vcpu. In all cases, - * move along. - */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); - goto epilog; - } - - /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { - if (!vgic_queue_sgi(vcpu, i)) - overflow = 1; - } - - /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { - if (!vgic_queue_hwirq(vcpu, i)) - overflow = 1; - } - - /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { - if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) - overflow = 1; - } - -epilog: - if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; - } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - /* - * We're about to run this VCPU, and we've consumed - * everything the distributor had in store for - * us. Claim we don't have anything pending. We'll - * adjust that if needed while exiting. - */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); - } -} - -static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - bool level_pending = false; - - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - int lr, irq; - - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, - vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); - level_pending = true; - } else { - vgic_cpu_irq_clear(vcpu, irq); - } - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; - } - } - - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - - return level_pending; -} - -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ -static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int lr, pending; - bool level_pending; - - level_pending = vgic_process_maintenance(vcpu); - - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr) { - int irq; - - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) - continue; - - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - } - - /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_flush_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -static void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) -{ - int is_edge = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); - - /* - * Only inject an interrupt if: - * - edge triggered and we have a rising edge - * - level triggered and we change level - */ - if (is_edge) - return level > state; - else - return level != state; -} - -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int is_edge, is_level; - int enabled; - bool ret = true; - - spin_lock(&dist->lock); - - vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; - - if (!vgic_validate_injection(vcpu, irq_num, level)) { - ret = false; - goto out; - } - - if (irq_num >= VGIC_NR_PRIVATE_IRQS) { - cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; - vcpu = kvm_get_vcpu(kvm, cpuid); - } - - kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - - if (level) - vgic_dist_irq_set(vcpu, irq_num); - else - vgic_dist_irq_clear(vcpu, irq_num); - - enabled = vgic_irq_is_enabled(vcpu, irq_num); - - if (!enabled) { - ret = false; - goto out; - } - - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { - /* - * Level interrupt in progress, will be picked up - * when EOId. - */ - ret = false; - goto out; - } - - if (level) { - vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); - } - -out: - spin_unlock(&dist->lock); - - return ret; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: activates an interrupt - * false: deactivates an interrupt - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level) -{ - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); - - return 0; -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) - return -EBUSY; - - for (i = 0; i < VGIC_NR_IRQS; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_vmcr = 0; - - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ - - return 0; -} - -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic_maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -int kvm_vgic_hyp_init(void) -{ - int ret; - struct resource vctrl_res; - struct resource vcpu_res; - - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); - if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); - return -ENODEV; - } - - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } - - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); - goto out; - } - - ret = register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic_vcpu_base = vcpu_res.start; - - goto out; - -out_unmap: - iounmap(vgic_vctrl_base); -out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); - return ret; -} - -int kvm_vgic_init(struct kvm *kvm) -{ - int ret = 0, i; - - mutex_lock(&kvm->lock); - - if (vgic_initialized(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; - } - - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) - vgic_set_target_reg(kvm, 0, i); - - kvm_timer_init(kvm); - kvm->arch.vgic.ready = true; -out: - mutex_unlock(&kvm->lock); - return ret; -} - -int kvm_vgic_create(struct kvm *kvm) -{ - int ret = 0; - - mutex_lock(&kvm->lock); - - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { - ret = -EEXIST; - goto out; - } - - spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - -out: - mutex_unlock(&kvm->lock); - return ret; -} - -static bool vgic_ioaddr_overlap(struct kvm *kvm) -{ - phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; - phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; - - if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) - return 0; - if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || - (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) - return -EBUSY; - return 0; -} - -static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t size) -{ - int ret; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - if (addr + size < addr) - return -EINVAL; - - ret = vgic_ioaddr_overlap(kvm); - if (ret) - return ret; - *ioaddr = addr; - return ret; -} - -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); - break; - default: - r = -ENODEV; - } - - mutex_unlock(&kvm->lock); - return r; -} diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h new file mode 100644 index 000000000000..68cb9e1dfb81 --- /dev/null +++ b/include/kvm/arm_arch_timer.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_KVM_ARCH_TIMER_H +#define __ASM_ARM_KVM_ARCH_TIMER_H + +#include +#include +#include + +struct arch_timer_kvm { +#ifdef CONFIG_KVM_ARM_TIMER + /* Is the timer enabled */ + bool enabled; + + /* Virtual offset */ + cycle_t cntvoff; +#endif +}; + +struct arch_timer_cpu { +#ifdef CONFIG_KVM_ARM_TIMER + /* Registers: control register, timer value */ + u32 cntv_ctl; /* Saved/restored */ + cycle_t cntv_cval; /* Saved/restored */ + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + + /* Background timer used when the guest is not running */ + struct hrtimer timer; + + /* Work queued with the above timer expires */ + struct work_struct expired; + + /* Background timer active */ + bool armed; + + /* Timer IRQ */ + const struct kvm_irq_level *irq; +#endif +}; + +#ifdef CONFIG_KVM_ARM_TIMER +int kvm_timer_hyp_init(void); +int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +#else +static inline int kvm_timer_hyp_init(void) +{ + return 0; +}; + +static inline int kvm_timer_init(struct kvm *kvm) +{ + return 0; +} + +static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} +#endif + +#endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h new file mode 100644 index 000000000000..343744e4809c --- /dev/null +++ b/include/kvm/arm_vgic.h @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_KVM_VGIC_H +#define __ASM_ARM_KVM_VGIC_H + +#include +#include +#include +#include +#include +#include + +#define VGIC_NR_IRQS 128 +#define VGIC_NR_SGIS 16 +#define VGIC_NR_PPIS 16 +#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) +#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) +#define VGIC_MAX_CPUS KVM_MAX_VCPUS +#define VGIC_MAX_LRS (1 << 6) + +/* Sanity checks... */ +#if (VGIC_MAX_CPUS > 8) +#error Invalid number of CPU interfaces +#endif + +#if (VGIC_NR_IRQS & 31) +#error "VGIC_NR_IRQS must be a multiple of 32" +#endif + +#if (VGIC_NR_IRQS > 1024) +#error "VGIC_NR_IRQS must be <= 1024" +#endif + +/* + * The GIC distributor registers describing interrupts have two parts: + * - 32 per-CPU interrupts (SGI + PPI) + * - a bunch of shared interrupts (SPI) + */ +struct vgic_bitmap { + union { + u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); + } percpu[VGIC_MAX_CPUS]; + union { + u32 reg[VGIC_NR_SHARED_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); + } shared; +}; + +struct vgic_bytemap { + u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; + u32 shared[VGIC_NR_SHARED_IRQS / 4]; +}; + +struct vgic_dist { +#ifdef CONFIG_KVM_ARM_VGIC + spinlock_t lock; + bool ready; + + /* Virtual control interface mapping */ + void __iomem *vctrl_base; + + /* Distributor and vcpu interface mapping in the guest */ + phys_addr_t vgic_dist_base; + phys_addr_t vgic_cpu_base; + + /* Distributor enabled */ + u32 enabled; + + /* Interrupt enabled (one bit per IRQ) */ + struct vgic_bitmap irq_enabled; + + /* Interrupt 'pin' level */ + struct vgic_bitmap irq_state; + + /* Level-triggered interrupt in progress */ + struct vgic_bitmap irq_active; + + /* Interrupt priority. Not used yet. */ + struct vgic_bytemap irq_priority; + + /* Level/edge triggered */ + struct vgic_bitmap irq_cfg; + + /* Source CPU per SGI and target CPU */ + u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; + + /* Target CPU for each IRQ */ + u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; + struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + + /* Bitmap indicating which CPU has something pending */ + unsigned long irq_pending_on_cpu; +#endif +}; + +struct vgic_cpu { +#ifdef CONFIG_KVM_ARM_VGIC + /* per IRQ to LR mapping */ + u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + + /* Pending interrupts on this VCPU */ + DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + + /* Bitmap of used/free list registers */ + DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); + + /* Number of list registers on this CPU */ + int nr_lr; + + /* CPU vif control registers for world switch */ + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_MAX_LRS]; +#endif +}; + +#define LR_EMPTY 0xff + +struct kvm; +struct kvm_vcpu; +struct kvm_run; +struct kvm_exit_mmio; + +#ifdef CONFIG_KVM_ARM_VGIC +int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_hyp_init(void); +int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_create(struct kvm *kvm); +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, + bool level); +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); +bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio); + +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) +#define vgic_initialized(k) ((k)->arch.vgic.ready) + +#else +static inline int kvm_vgic_hyp_init(void) +{ + return 0; +} + +static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +{ + return 0; +} + +static inline int kvm_vgic_init(struct kvm *kvm) +{ + return 0; +} + +static inline int kvm_vgic_create(struct kvm *kvm) +{ + return 0; +} + +static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} + +static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, + unsigned int irq_num, bool level) +{ + return 0; +} + +static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + return false; +} + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 0; +} + +static inline bool vgic_initialized(struct kvm *kvm) +{ + return true; +} +#endif + +#endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c new file mode 100644 index 000000000000..2d00b2925780 --- /dev/null +++ b/virt/kvm/arm/arch_timer.c @@ -0,0 +1,272 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static struct timecounter *timecounter; +static struct workqueue_struct *wqueue; +static struct kvm_irq_level timer_irq = { + .level = 1, +}; + +static cycle_t kvm_phys_timer_read(void) +{ + return timecounter->cc->read(timecounter->cc); +} + +static bool timer_is_armed(struct arch_timer_cpu *timer) +{ + return timer->armed; +} + +/* timer_arm: as in "arm the timer", not as in ARM the company */ +static void timer_arm(struct arch_timer_cpu *timer, u64 ns) +{ + timer->armed = true; + hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), + HRTIMER_MODE_ABS); +} + +static void timer_disarm(struct arch_timer_cpu *timer) +{ + if (timer_is_armed(timer)) { + hrtimer_cancel(&timer->timer); + cancel_work_sync(&timer->expired); + timer->armed = false; + } +} + +static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; + kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + vcpu->arch.timer_cpu.irq->irq, + vcpu->arch.timer_cpu.irq->level); +} + +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + + /* + * We disable the timer in the world switch and let it be + * handled by kvm_timer_sync_hwstate(). Getting a timer + * interrupt at this point is a sure sign of some major + * breakage. + */ + pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); + return IRQ_HANDLED; +} + +static void kvm_timer_inject_irq_work(struct work_struct *work) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); + vcpu->arch.timer_cpu.armed = false; + kvm_timer_inject_irq(vcpu); +} + +static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) +{ + struct arch_timer_cpu *timer; + timer = container_of(hrt, struct arch_timer_cpu, timer); + queue_work(wqueue, &timer->expired); + return HRTIMER_NORESTART; +} + +/** + * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu + * @vcpu: The vcpu pointer + * + * Disarm any pending soft timers, since the world-switch code will write the + * virtual timer state back to the physical CPU. + */ +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * We're about to run this vcpu again, so there is no need to + * keep the background timer running, as we're about to + * populate the CPU timer again. + */ + timer_disarm(timer); +} + +/** + * kvm_timer_sync_hwstate - sync timer state from cpu + * @vcpu: The vcpu pointer + * + * Check if the virtual timer was armed and either schedule a corresponding + * soft timer or inject directly if already expired. + */ +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + u64 ns; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + BUG_ON(timer_is_armed(timer)); + + if (cval <= now) { + /* + * Timer has already expired while we were not + * looking. Inject the interrupt and carry on. + */ + kvm_timer_inject_irq(vcpu); + return; + } + + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); + timer_arm(timer, ns); +} + +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); + hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->timer.function = kvm_timer_expire; + timer->irq = &timer_irq; +} + +static void kvm_timer_init_interrupt(void *info) +{ + enable_percpu_irq(timer_irq.irq, 0); +} + + +static int kvm_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + kvm_timer_init_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(timer_irq.irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block kvm_timer_cpu_nb = { + .notifier_call = kvm_timer_cpu_notify, +}; + +static const struct of_device_id arch_timer_of_match[] = { + { .compatible = "arm,armv7-timer", }, + {}, +}; + +int kvm_timer_hyp_init(void) +{ + struct device_node *np; + unsigned int ppi; + int err; + + timecounter = arch_timer_get_timecounter(); + if (!timecounter) + return -ENODEV; + + np = of_find_matching_node(NULL, arch_timer_of_match); + if (!np) { + kvm_err("kvm_arch_timer: can't find DT node\n"); + return -ENODEV; + } + + ppi = irq_of_parse_and_map(np, 2); + if (!ppi) { + kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); + err = -EINVAL; + goto out; + } + + err = request_percpu_irq(ppi, kvm_arch_timer_handler, + "kvm guest timer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", + ppi, err); + goto out; + } + + timer_irq.irq = ppi; + + err = register_cpu_notifier(&kvm_timer_cpu_nb); + if (err) { + kvm_err("Cannot register timer CPU notifier\n"); + goto out_free; + } + + wqueue = create_singlethread_workqueue("kvm_arch_timer"); + if (!wqueue) { + err = -ENOMEM; + goto out_free; + } + + kvm_info("%s IRQ%d\n", np->name, ppi); + on_each_cpu(kvm_timer_init_interrupt, NULL, 1); + + goto out; +out_free: + free_percpu_irq(ppi, kvm_get_running_vcpus()); +out: + of_node_put(np); + return err; +} + +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + timer_disarm(timer); +} + +int kvm_timer_init(struct kvm *kvm) +{ + if (timecounter && wqueue) { + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + kvm->arch.timer.enabled = 1; + } + + return 0; +} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c new file mode 100644 index 000000000000..17c5ac7d10ed --- /dev/null +++ b/virt/kvm/arm/vgic.c @@ -0,0 +1,1499 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* + * How the whole thing works (courtesy of Christoffer Dall): + * + * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if + * something is pending + * - VGIC pending interrupts are stored on the vgic.irq_state vgic + * bitmap (this bitmap is updated by both user land ioctls and guest + * mmio ops, and other in-kernel peripherals such as the + * arch. timers) and indicate the 'wire' state. + * - Every time the bitmap changes, the irq_pending_on_cpu oracle is + * recalculated + * - To calculate the oracle, we need info for each cpu from + * compute_pending_for_cpu, which considers: + * - PPI: dist->irq_state & dist->irq_enable + * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target + * - irq_spi_target is a 'formatted' version of the GICD_ICFGR + * registers, stored on each vcpu. We only keep one bit of + * information per interrupt, making sure that only one vcpu can + * accept the interrupt. + * - The same is true when injecting an interrupt, except that we only + * consider a single interrupt at a time. The irq_spi_cpu array + * contains the target CPU for each SPI. + * + * The handling of level interrupts adds some extra complexity. We + * need to track when the interrupt has been EOIed, so we can sample + * the 'line' again. This is achieved as such: + * + * - When a level interrupt is moved onto a vcpu, the corresponding + * bit in irq_active is set. As long as this bit is set, the line + * will be ignored for further interrupts. The interrupt is injected + * into the vcpu with the GICH_LR_EOI bit set (generate a + * maintenance interrupt on EOI). + * - When the interrupt is EOIed, the maintenance interrupt fires, + * and clears the corresponding bit in irq_active. This allow the + * interrupt line to be sampled again. + */ + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +/* Physical address of vgic virtual cpu interface */ +static phys_addr_t vgic_vcpu_base; + +/* Virtual control interface base address */ +static void __iomem *vgic_vctrl_base; + +static struct device_node *vgic_node; + +#define ACCESS_READ_VALUE (1 << 0) +#define ACCESS_READ_RAZ (0 << 0) +#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) +#define ACCESS_WRITE_IGNORED (0 << 1) +#define ACCESS_WRITE_SETBIT (1 << 1) +#define ACCESS_WRITE_CLEARBIT (2 << 1) +#define ACCESS_WRITE_VALUE (3 << 1) +#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) + +static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_update_state(struct kvm *kvm); +static void vgic_kick_vcpus(struct kvm *kvm); +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static u32 vgic_nr_lr; + +static unsigned int vgic_maint_irq; + +static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, + int cpuid, u32 offset) +{ + offset >>= 2; + if (!offset) + return x->percpu[cpuid].reg; + else + return x->shared.reg + offset - 1; +} + +static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, + int cpuid, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + return test_bit(irq, x->percpu[cpuid].reg_ul); + + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); +} + +static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, + int irq, int val) +{ + unsigned long *reg; + + if (irq < VGIC_NR_PRIVATE_IRQS) { + reg = x->percpu[cpuid].reg_ul; + } else { + reg = x->shared.reg_ul; + irq -= VGIC_NR_PRIVATE_IRQS; + } + + if (val) + set_bit(irq, reg); + else + clear_bit(irq, reg); +} + +static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) +{ + if (unlikely(cpuid >= VGIC_MAX_CPUS)) + return NULL; + return x->percpu[cpuid].reg_ul; +} + +static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) +{ + return x->shared.reg_ul; +} + +static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) +{ + offset >>= 2; + BUG_ON(offset > (VGIC_NR_IRQS / 4)); + if (offset < 4) + return x->percpu[cpuid] + offset; + else + return x->shared + offset - 8; +} + +#define VGIC_CFG_LEVEL 0 +#define VGIC_CFG_EDGE 1 + +static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int irq_val; + + irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); + return irq_val == VGIC_CFG_EDGE; +} + +static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); +} + +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + +static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); +} + +static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); +} + +static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); + else + set_bit(irq - VGIC_NR_PRIVATE_IRQS, + vcpu->arch.vgic_cpu.pending_shared); +} + +static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); + else + clear_bit(irq - VGIC_NR_PRIVATE_IRQS, + vcpu->arch.vgic_cpu.pending_shared); +} + +static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) +{ + return *((u32 *)mmio->data) & mask; +} + +static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) +{ + *((u32 *)mmio->data) = value & mask; +} + +/** + * vgic_reg_access - access vgic register + * @mmio: pointer to the data describing the mmio access + * @reg: pointer to the virtual backing of vgic distributor data + * @offset: least significant 2 bits used for word offset + * @mode: ACCESS_ mode (see defines above) + * + * Helper to make vgic register access easier using one of the access + * modes defined for vgic register access + * (read,raz,write-ignored,setbit,clearbit,write) + */ +static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, + phys_addr_t offset, int mode) +{ + int word_offset = (offset & 3) * 8; + u32 mask = (1UL << (mmio->len * 8)) - 1; + u32 regval; + + /* + * Any alignment fault should have been delivered to the guest + * directly (ARM ARM B3.12.7 "Prioritization of aborts"). + */ + + if (reg) { + regval = *reg; + } else { + BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); + regval = 0; + } + + if (mmio->is_write) { + u32 data = mmio_data_read(mmio, mask) << word_offset; + switch (ACCESS_WRITE_MASK(mode)) { + case ACCESS_WRITE_IGNORED: + return; + + case ACCESS_WRITE_SETBIT: + regval |= data; + break; + + case ACCESS_WRITE_CLEARBIT: + regval &= ~data; + break; + + case ACCESS_WRITE_VALUE: + regval = (regval & ~(mask << word_offset)) | data; + break; + } + *reg = regval; + } else { + switch (ACCESS_READ_MASK(mode)) { + case ACCESS_READ_RAZ: + regval = 0; + /* fall through */ + + case ACCESS_READ_VALUE: + mmio_data_write(mmio, mask, regval >> word_offset); + } + } +} + +static bool handle_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + u32 word_offset = offset & 3; + + switch (offset & ~3) { + case 0: /* CTLR */ + reg = vcpu->kvm->arch.vgic.enabled; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vcpu->kvm->arch.vgic.enabled = reg & 1; + vgic_update_state(vcpu->kvm); + return true; + } + break; + + case 4: /* TYPER */ + reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + reg |= (VGIC_NR_IRQS >> 5) - 1; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + + case 8: /* IIDR */ + reg = 0x4B00043B; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + } + + return false; +} + +static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + if (mmio->is_write) { + if (offset < 4) /* Force SGI enabled */ + *reg |= 0xffff; + vgic_retire_disabled_irqs(vcpu); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + return false; +} + +#define GICD_ITARGETSR_SIZE 32 +#define GICD_CPUTARGETS_BITS 8 +#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) +static u32 vgic_get_target_reg(struct kvm *kvm, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, c; + unsigned long *bmap; + u32 val = 0; + + irq -= VGIC_NR_PRIVATE_IRQS; + + kvm_for_each_vcpu(c, vcpu, kvm) { + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + if (test_bit(irq + i, bmap)) + val |= 1 << (c + i * 8); + } + + return val; +} + +static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, c; + unsigned long *bmap; + u32 target; + + irq -= VGIC_NR_PRIVATE_IRQS; + + /* + * Pick the LSB in each byte. This ensures we target exactly + * one vcpu per IRQ. If the byte is null, assume we target + * CPU0. + */ + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { + int shift = i * GICD_CPUTARGETS_BITS; + target = ffs((val >> shift) & 0xffU); + target = target ? (target - 1) : 0; + dist->irq_spi_cpu[irq + i] = target; + kvm_for_each_vcpu(c, vcpu, kvm) { + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); + if (c == target) + set_bit(irq + i, bmap); + else + clear_bit(irq + i, bmap); + } + } +} + +static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + /* We treat the banked interrupts targets as read-only */ + if (offset < 32) { + u32 roreg = 1 << vcpu->vcpu_id; + roreg |= roreg << 8; + roreg |= roreg << 16; + + vgic_reg_access(mmio, &roreg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static u32 vgic_cfg_expand(u16 val) +{ + u32 res = 0; + int i; + + /* + * Turn a 16bit value like abcd...mnop into a 32bit word + * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. + */ + for (i = 0; i < 16; i++) + res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); + + return res; +} + +static u16 vgic_cfg_compress(u32 val) +{ + u16 res = 0; + int i; + + /* + * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like + * abcd...mnop which is what we really care about. + */ + for (i = 0; i < 16; i++) + res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; + + return res; +} + +/* + * The distributor uses 2 bits per IRQ for the CFG register, but the + * LSB is always 0. As such, we only keep the upper bit, and use the + * two above functions to compress/expand the bits + */ +static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 val; + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset >> 1); + if (offset & 2) + val = *reg >> 16; + else + val = *reg & 0xffff; + + val = vgic_cfg_expand(val); + vgic_reg_access(mmio, &val, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + if (offset < 4) { + *reg = ~0U; /* Force PPIs/SGIs to 1 */ + return false; + } + + val = vgic_cfg_compress(val); + if (offset & 2) { + *reg &= 0xffff; + *reg |= val << 16; + } else { + *reg &= 0xffff << 16; + *reg |= val; + } + } + + return false; +} + +static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_dispatch_sgi(vcpu, reg); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +/* + * I would have liked to use the kvm_bus_io_*() API instead, but it + * cannot cope with banked registers (only the VM pointer is passed + * around, and we need the vcpu). One of these days, someone please + * fix it! + */ +struct mmio_range { + phys_addr_t base; + unsigned long len; + bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, + phys_addr_t offset); +}; + +static const struct mmio_range vgic_ranges[] = { + { + .base = GIC_DIST_CTRL, + .len = 12, + .handle_mmio = handle_mmio_misc, + }, + { + .base = GIC_DIST_IGROUP, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ENABLE_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_set_enable_reg, + }, + { + .base = GIC_DIST_ENABLE_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_clear_enable_reg, + }, + { + .base = GIC_DIST_PENDING_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_set_pending_reg, + }, + { + .base = GIC_DIST_PENDING_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_clear_pending_reg, + }, + { + .base = GIC_DIST_ACTIVE_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ACTIVE_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_PRI, + .len = VGIC_NR_IRQS, + .handle_mmio = handle_mmio_priority_reg, + }, + { + .base = GIC_DIST_TARGET, + .len = VGIC_NR_IRQS, + .handle_mmio = handle_mmio_target_reg, + }, + { + .base = GIC_DIST_CONFIG, + .len = VGIC_NR_IRQS / 4, + .handle_mmio = handle_mmio_cfg_reg, + }, + { + .base = GIC_DIST_SOFTINT, + .len = 4, + .handle_mmio = handle_mmio_sgi_reg, + }, + {} +}; + +static const +struct mmio_range *find_matching_range(const struct mmio_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t base) +{ + const struct mmio_range *r = ranges; + phys_addr_t addr = mmio->phys_addr - base; + + while (r->len) { + if (addr >= r->base && + (addr + mmio->len) <= (r->base + r->len)) + return r; + r++; + } + + return NULL; +} + +/** + * vgic_handle_mmio - handle an in-kernel MMIO access + * @vcpu: pointer to the vcpu performing the access + * @run: pointer to the kvm_run structure + * @mmio: pointer to the data describing the access + * + * returns true if the MMIO access has been performed in kernel space, + * and false if it needs to be emulated in user space. + */ +bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + const struct mmio_range *range; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long base = dist->vgic_dist_base; + bool updated_state; + unsigned long offset; + + if (!irqchip_in_kernel(vcpu->kvm) || + mmio->phys_addr < base || + (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) + return false; + + /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ + if (mmio->len > 4) { + kvm_inject_dabt(vcpu, mmio->phys_addr); + return true; + } + + range = find_matching_range(vgic_ranges, mmio, base); + if (unlikely(!range || !range->handle_mmio)) { + pr_warn("Unhandled access %d %08llx %d\n", + mmio->is_write, mmio->phys_addr, mmio->len); + return false; + } + + spin_lock(&vcpu->kvm->arch.vgic.lock); + offset = mmio->phys_addr - range->base - base; + updated_state = range->handle_mmio(vcpu, mmio, offset); + spin_unlock(&vcpu->kvm->arch.vgic.lock); + kvm_prepare_mmio(run, mmio); + kvm_handle_mmio_return(vcpu, run); + + if (updated_state) + vgic_kick_vcpus(vcpu->kvm); + + return true; +} + +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *dist = &kvm->arch.vgic; + int nrcpus = atomic_read(&kvm->online_vcpus); + u8 target_cpus; + int sgi, mode, c, vcpu_id; + + vcpu_id = vcpu->vcpu_id; + + sgi = reg & 0xf; + target_cpus = (reg >> 16) & 0xff; + mode = (reg >> 24) & 3; + + switch (mode) { + case 0: + if (!target_cpus) + return; + + case 1: + target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; + break; + + case 2: + target_cpus = 1 << vcpu_id; + break; + } + + kvm_for_each_vcpu(c, vcpu, kvm) { + if (target_cpus & 1) { + /* Flag the SGI as pending */ + vgic_dist_irq_set(vcpu, sgi); + dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; + kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); + } + + target_cpus >>= 1; + } +} + +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pending, *enabled, *pend_percpu, *pend_shared; + unsigned long pending_private, pending_shared; + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; + pend_shared = vcpu->arch.vgic_cpu.pending_shared; + + pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); + + pending = vgic_bitmap_get_shared_map(&dist->irq_state); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); + bitmap_and(pend_shared, pend_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + VGIC_NR_SHARED_IRQS); + + pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); + pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); + return (pending_private < VGIC_NR_PRIVATE_IRQS || + pending_shared < VGIC_NR_SHARED_IRQS); +} + +/* + * Update the interrupt state and determine which CPUs have pending + * interrupts. Must be called with distributor lock held. + */ +static void vgic_update_state(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int c; + + if (!dist->enabled) { + set_bit(0, &dist->irq_pending_on_cpu); + return; + } + + kvm_for_each_vcpu(c, vcpu, kvm) { + if (compute_pending_for_cpu(vcpu)) { + pr_debug("CPU%d has pending interrupts\n", c); + set_bit(c, &dist->irq_pending_on_cpu); + } + } +} + +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define MK_LR_PEND(src, irq) \ + (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) + +/* + * An interrupt may have been disabled after being made pending on the + * CPU interface (the classic case is a timer running while we're + * rebooting the guest - the interrupt would kick as soon as the CPU + * interface gets enabled, with deadly consequences). + * + * The solution is to examine already active LRs, and check the + * interrupt is still enabled. If not, just retire it. + */ +static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int lr; + + for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + if (!vgic_irq_is_enabled(vcpu, irq)) { + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + clear_bit(lr, vgic_cpu->lr_used); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; + if (vgic_irq_is_active(vcpu, irq)) + vgic_irq_clear_active(vcpu, irq); + } + } +} + +/* + * Queue an interrupt to a CPU virtual interface. Return true on success, + * or false if it wasn't possible to queue it. + */ +static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int lr; + + /* Sanitize the input... */ + BUG_ON(sgi_source_id & ~7); + BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); + BUG_ON(irq >= VGIC_NR_IRQS); + + kvm_debug("Queue IRQ%d\n", irq); + + lr = vgic_cpu->vgic_irq_lr_map[irq]; + + /* Do we have an active interrupt for the same CPUID? */ + if (lr != LR_EMPTY && + (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + kvm_debug("LR%d piggyback for IRQ%d %x\n", + lr, irq, vgic_cpu->vgic_lr[lr]); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + return true; + } + + /* Try to use another LR for this interrupt */ + lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, + vgic_cpu->nr_lr); + if (lr >= vgic_cpu->nr_lr) + return false; + + kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); + vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_irq_lr_map[irq] = lr; + set_bit(lr, vgic_cpu->lr_used); + + if (!vgic_irq_is_edge(vcpu, irq)) + vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + + return true; +} + +static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long sources; + int vcpu_id = vcpu->vcpu_id; + int c; + + sources = dist->irq_sgi_sources[vcpu_id][irq]; + + for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { + if (vgic_queue_irq(vcpu, c, irq)) + clear_bit(c, &sources); + } + + dist->irq_sgi_sources[vcpu_id][irq] = sources; + + /* + * If the sources bitmap has been cleared it means that we + * could queue all the SGIs onto link registers (see the + * clear_bit above), and therefore we are done with them in + * our emulated gic and can get rid of them. + */ + if (!sources) { + vgic_dist_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + return true; + } + + return false; +} + +static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) +{ + if (vgic_irq_is_active(vcpu, irq)) + return true; /* level interrupt, already queued */ + + if (vgic_queue_irq(vcpu, 0, irq)) { + if (vgic_irq_is_edge(vcpu, irq)) { + vgic_dist_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + } else { + vgic_irq_set_active(vcpu, irq); + } + + return true; + } + + return false; +} + +/* + * Fill the list registers with pending interrupts before running the + * guest. + */ +static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int i, vcpu_id; + int overflow = 0; + + vcpu_id = vcpu->vcpu_id; + + /* + * We may not have any pending interrupt, or the interrupts + * may have been serviced from another vcpu. In all cases, + * move along. + */ + if (!kvm_vgic_vcpu_pending_irq(vcpu)) { + pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + goto epilog; + } + + /* SGIs */ + for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + if (!vgic_queue_sgi(vcpu, i)) + overflow = 1; + } + + /* PPIs */ + for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + if (!vgic_queue_hwirq(vcpu, i)) + overflow = 1; + } + + /* SPIs */ + for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { + if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) + overflow = 1; + } + +epilog: + if (overflow) { + vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + } else { + vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + /* + * We're about to run this VCPU, and we've consumed + * everything the distributor had in store for + * us. Claim we don't have anything pending. We'll + * adjust that if needed while exiting. + */ + clear_bit(vcpu_id, &dist->irq_pending_on_cpu); + } +} + +static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool level_pending = false; + + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + + if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + /* + * Some level interrupts have been EOIed. Clear their + * active bit. + */ + int lr, irq; + + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + vgic_cpu->nr_lr) { + irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + vgic_irq_clear_active(vcpu, irq); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_is_pending(vcpu, irq)) { + vgic_cpu_irq_set(vcpu, irq); + level_pending = true; + } else { + vgic_cpu_irq_clear(vcpu, irq); + } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; + } + } + + if (vgic_cpu->vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + + return level_pending; +} + +/* + * Sync back the VGIC state after a guest run. The distributor lock is + * needed so we don't get preempted in the middle of the state processing. + */ +static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int lr, pending; + bool level_pending; + + level_pending = vgic_process_maintenance(vcpu); + + /* Clear mappings for empty LRs */ + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + vgic_cpu->nr_lr) { + int irq; + + if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) + continue; + + irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + BUG_ON(irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + } + + /* Check if we still have something up our sleeve... */ + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + vgic_cpu->nr_lr); + if (level_pending || pending < vgic_cpu->nr_lr) + set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); +} + +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + spin_lock(&dist->lock); + __kvm_vgic_flush_hwstate(vcpu); + spin_unlock(&dist->lock); +} + +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + spin_lock(&dist->lock); + __kvm_vgic_sync_hwstate(vcpu); + spin_unlock(&dist->lock); +} + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); +} + +static void vgic_kick_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c; + + /* + * We've injected an interrupt, time to find out who deserves + * a good kick... + */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) + kvm_vcpu_kick(vcpu); + } +} + +static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) +{ + int is_edge = vgic_irq_is_edge(vcpu, irq); + int state = vgic_dist_irq_is_pending(vcpu, irq); + + /* + * Only inject an interrupt if: + * - edge triggered and we have a rising edge + * - level triggered and we change level + */ + if (is_edge) + return level > state; + else + return level != state; +} + +static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, + unsigned int irq_num, bool level) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int is_edge, is_level; + int enabled; + bool ret = true; + + spin_lock(&dist->lock); + + vcpu = kvm_get_vcpu(kvm, cpuid); + is_edge = vgic_irq_is_edge(vcpu, irq_num); + is_level = !is_edge; + + if (!vgic_validate_injection(vcpu, irq_num, level)) { + ret = false; + goto out; + } + + if (irq_num >= VGIC_NR_PRIVATE_IRQS) { + cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; + vcpu = kvm_get_vcpu(kvm, cpuid); + } + + kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); + + if (level) + vgic_dist_irq_set(vcpu, irq_num); + else + vgic_dist_irq_clear(vcpu, irq_num); + + enabled = vgic_irq_is_enabled(vcpu, irq_num); + + if (!enabled) { + ret = false; + goto out; + } + + if (is_level && vgic_irq_is_active(vcpu, irq_num)) { + /* + * Level interrupt in progress, will be picked up + * when EOId. + */ + ret = false; + goto out; + } + + if (level) { + vgic_cpu_irq_set(vcpu, irq_num); + set_bit(cpuid, &dist->irq_pending_on_cpu); + } + +out: + spin_unlock(&dist->lock); + + return ret; +} + +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @irq_num: The IRQ number that is assigned to the device + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: activates an interrupt + * false: deactivates an interrupt + * + * The GIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, + bool level) +{ + if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) + vgic_kick_vcpus(kvm); + + return 0; +} + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + /* + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_process_maintenance). + */ + return IRQ_HANDLED; +} + +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int i; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + if (vcpu->vcpu_id >= VGIC_MAX_CPUS) + return -EBUSY; + + for (i = 0; i < VGIC_NR_IRQS; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, VGIC_CFG_EDGE); + + vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; + } + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_cpu->vgic_vmcr = 0; + + vgic_cpu->nr_lr = vgic_nr_lr; + vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + return 0; +} + +static void vgic_init_maintenance_interrupt(void *info) +{ + enable_percpu_irq(vgic_maint_irq, 0); +} + +static int vgic_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + vgic_init_maintenance_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(vgic_maint_irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vgic_cpu_nb = { + .notifier_call = vgic_cpu_notify, +}; + +int kvm_vgic_hyp_init(void) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + + vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + if (!vgic_node) { + kvm_err("error: no compatible vgic node in DT\n"); + return -ENODEV; + } + + vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic_maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + goto out; + } + + ret = register_cpu_notifier(&vgic_cpu_nb); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain VCTRL resource\n"); + goto out_free_irq; + } + + vgic_vctrl_base = of_iomap(vgic_node, 2); + if (!vgic_vctrl_base) { + kvm_err("Cannot ioremap VCTRL\n"); + ret = -ENOMEM; + goto out_free_irq; + } + + vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); + vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic_vctrl_base, + vgic_vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain VCPU resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + + goto out; + +out_unmap: + iounmap(vgic_vctrl_base); +out_free_irq: + free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); +out: + of_node_put(vgic_node); + return ret; +} + +int kvm_vgic_init(struct kvm *kvm) +{ + int ret = 0, i; + + mutex_lock(&kvm->lock); + + if (vgic_initialized(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + kvm_err("Need to set vgic cpu and dist addresses first\n"); + ret = -ENXIO; + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + + for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_timer_init(kvm); + kvm->arch.vgic.ready = true; +out: + mutex_unlock(&kvm->lock); + return ret; +} + +int kvm_vgic_create(struct kvm *kvm) +{ + int ret = 0; + + mutex_lock(&kvm->lock); + + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + ret = -EEXIST; + goto out; + } + + spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + +out: + mutex_unlock(&kvm->lock); + return ret; +} + +static bool vgic_ioaddr_overlap(struct kvm *kvm) +{ + phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; + phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; + + if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) + return 0; + if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || + (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) + return -EBUSY; + return 0; +} + +static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t size) +{ + int ret; + + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) + return -EEXIST; + if (addr + size < addr) + return -EINVAL; + + ret = vgic_ioaddr_overlap(kvm); + if (ret) + return ret; + *ioaddr = addr; + return ret; +} + +int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +{ + int r = 0; + struct vgic_dist *vgic = &kvm->arch.vgic; + + if (addr & ~KVM_PHYS_MASK) + return -E2BIG; + + if (addr & (SZ_4K - 1)) + return -EINVAL; + + mutex_lock(&kvm->lock); + switch (type) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, + addr, KVM_VGIC_V2_DIST_SIZE); + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, + addr, KVM_VGIC_V2_CPU_SIZE); + break; + default: + r = -ENODEV; + } + + mutex_unlock(&kvm->lock); + return r; +} -- cgit v1.2.3 From 6ea34c9b78c10289846db0abeebd6b84d5aca084 Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Sat, 25 May 2013 06:44:15 +0800 Subject: kvm: exclude ioeventfd from counting kvm_io_range limit We can easily reach the 1000 limit by start VM with a couple hundred I/O devices (multifunction=on). The hardcode limit already been adjusted 3 times (6 ~ 200 ~ 300 ~ 1000). In userspace, we already have maximum file descriptor to limit ioeventfd count. But kvm_io_bus devices also are used for pit, pic, ioapic, coalesced_mmio. They couldn't be limited by maximum file descriptor. Currently only ioeventfds take too much kvm_io_bus devices, so just exclude it from counting kvm_io_range limit. Also fixed one indent issue in kvm_host.h Signed-off-by: Amos Kong Reviewed-by: Stefan Hajnoczi Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 3 ++- virt/kvm/eventfd.c | 2 ++ virt/kvm/kvm_main.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d9a3c30eab2e..e3aae6db276f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -145,7 +145,8 @@ struct kvm_io_range { #define NR_IOBUS_DEVS 1000 struct kvm_io_bus { - int dev_count; + int dev_count; + int ioeventfd_count; struct kvm_io_range range[]; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 64ee720b75c7..1550637d1b10 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -753,6 +753,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; + kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); mutex_unlock(&kvm->slots_lock); @@ -798,6 +799,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b547a1ceecbc..1580dd4ace4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2926,7 +2926,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - if (bus->dev_count > NR_IOBUS_DEVS - 1) + /* exclude ioeventfd which is limited by maximum fd */ + if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * -- cgit v1.2.3 From 5ae7f87a56fab10b8f9b135a8377c144397293ca Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 30 Apr 2013 12:02:15 +0530 Subject: ARM: KVM: Allow host virt timer irq to be different from guest timer virt irq The arch_timer irq numbers (or PPI numbers) are implementation dependent, so the host virtual timer irq number can be different from guest virtual timer irq number. This patch ensures that host virtual timer irq number is read from DTB and guest virtual timer irq is determined based on vcpu target type. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 12 ++++++++++++ include/kvm/arm_arch_timer.h | 4 ++++ virt/kvm/arm/arch_timer.c | 29 ++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..b7840e7aa452 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,6 +27,8 @@ #include #include +#include + /****************************************************************************** * Cortex-A15 Reset Values */ @@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level a15_vtimer_irq = { + .irq = 27, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *cpu_reset; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: @@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return -EINVAL; cpu_reset = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &a15_vtimer_irq; break; default: return -ENODEV; @@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 68cb9e1dfb81..6d9aeddc09bf 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -61,6 +61,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); @@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm) return 0; } +static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 2d00b2925780..af4583e2c185 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -30,9 +30,7 @@ static struct timecounter *timecounter; static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; +static unsigned int host_vtimer_irq; static cycle_t kvm_phys_timer_read(void) { @@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); + timer->irq->irq, + timer->irq->level); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) timer_arm(timer, ns); } +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * The vcpu timer irq number cannot be determined in + * kvm_timer_vcpu_init() because it is called much before + * kvm_vcpu_set_target(). To handle this, we determine + * vcpu timer irq number when the vcpu is reset. + */ + timer->irq = irq; +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; } static void kvm_timer_init_interrupt(void *info) { - enable_percpu_irq(timer_irq.irq, 0); + enable_percpu_irq(host_vtimer_irq, 0); } @@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); + disable_percpu_irq(host_vtimer_irq); break; } @@ -229,7 +240,7 @@ int kvm_timer_hyp_init(void) goto out; } - timer_irq.irq = ppi; + host_vtimer_irq = ppi; err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { -- cgit v1.2.3 From 24f7bb52e952912b6a936ebcdc4e744b03e9e5cf Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Jun 2013 15:19:15 +0300 Subject: KVM: Fix RTC interrupt coalescing tracking This reverts most of the f1ed0450a5fac7067590317cbf027f566b6ccbca. After the commit kvm_apic_set_irq() no longer returns accurate information about interrupt injection status if injection is done into disabled APIC. RTC interrupt coalescing tracking relies on the information to be accurate and cannot recover if it is not. Signed-off-by: Gleb Natapov --- arch/x86/kvm/lapic.c | 53 +++++++++++++++++++++++++++++++--------------------- arch/x86/kvm/lapic.h | 6 +++--- virt/kvm/irq_comm.c | 9 +++------ 3 files changed, 38 insertions(+), 30 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9d751931cf84..9f4bea805bed 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -405,17 +405,17 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) return highest_irr; } -static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode, - unsigned long *dest_map); +static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode, + unsigned long *dest_map); -void kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; - __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode, dest_map); + return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, + irq->level, irq->trig_mode, dest_map); } static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) @@ -608,8 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - kvm_apic_set_irq(src->vcpu, irq, dest_map); - *r = 1; + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -654,8 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, continue; if (*r < 0) *r = 0; - kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); - *r += 1; + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } ret = true; @@ -664,11 +662,15 @@ out: return ret; } -/* Set an IRQ pending in the lapic. */ -static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode, - unsigned long *dest_map) +/* + * Add a pending IRQ into lapic. + * Return 1 if successfully added and 0 if discarded. + */ +static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode, + unsigned long *dest_map) { + int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; switch (delivery_mode) { @@ -682,10 +684,13 @@ static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (dest_map) __set_bit(vcpu->vcpu_id, dest_map); - if (kvm_x86_ops->deliver_posted_interrupt) + if (kvm_x86_ops->deliver_posted_interrupt) { + result = 1; kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { - if (apic_test_and_set_irr(vector, apic)) { + } else { + result = !apic_test_and_set_irr(vector, apic); + + if (!result) { if (trig_mode) apic_debug("level trig mode repeatedly " "for vector %d", vector); @@ -697,7 +702,7 @@ static void __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } out: trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, false); + trig_mode, vector, !result); break; case APIC_DM_REMRD: @@ -709,12 +714,14 @@ out: break; case APIC_DM_NMI: + result = 1; kvm_inject_nmi(vcpu); kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: if (!trig_mode || level) { + result = 1; /* assumes that there are only KVM_APIC_INIT/SIPI */ apic->pending_events = (1UL << KVM_APIC_INIT); /* make sure pending_events is visible before sending @@ -731,6 +738,7 @@ out: case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); + result = 1; apic->sipi_vector = vector; /* make sure sipi_vector is visible for the receiver */ smp_wmb(); @@ -752,6 +760,7 @@ out: delivery_mode); break; } + return result; } int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) @@ -1461,7 +1470,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -void kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) +int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_apic_get_reg(apic, lvt_type); int vector, mode, trig_mode; @@ -1470,8 +1479,10 @@ void kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); + return __apic_accept_irq(apic, mode, vector, 1, trig_mode, + NULL); } + return 0; } void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 61a73a01ab0b..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -57,9 +57,9 @@ void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -void kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map); -void kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map); +int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ef1817b61cf4..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -91,8 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_is_dm_lowest_prio(irq)) { if (r < 0) r = 0; - kvm_apic_set_irq(vcpu, irq, dest_map); - r++; + r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; @@ -101,10 +100,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } } - if (lowest) { - kvm_apic_set_irq(lowest, irq, dest_map); - r = 1; - } + if (lowest) + r = kvm_apic_set_irq(lowest, irq, dest_map); return r; } -- cgit v1.2.3