From f1a9761fbb00639c5e73835f7f373ef834bb867f Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 16 Mar 2022 00:55:37 +0000
Subject: KVM: x86: Allow userspace to opt out of hypercall patching

KVM handles the VMCALL/VMMCALL instructions very strangely. Even though
both of these instructions really should #UD when executed on the wrong
vendor's hardware (i.e. VMCALL on SVM, VMMCALL on VMX), KVM replaces the
guest's instruction with the appropriate instruction for the vendor.
Nonetheless, older guest kernels without commit c1118b3602c2 ("x86: kvm:
use alternatives for VMCALL vs. VMMCALL if kernel text is read-only")
do not patch in the appropriate instruction using alternatives, likely
motivating KVM's intervention.

Add a quirk allowing userspace to opt out of hypercall patching. If the
quirk is disabled, KVM synthesizes a #UD in the guest.

Signed-off-by: Oliver Upton <oupton@google.com>
Message-Id: <20220316005538.2282772-2-oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93a2671a57cf..d9a825182b84 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1987,6 +1987,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
 	 KVM_X86_QUIRK_CD_NW_CLEARED |		\
 	 KVM_X86_QUIRK_LAPIC_MMIO_HOLE |	\
 	 KVM_X86_QUIRK_OUT_7E_INC_RIP |		\
-	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)
+	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |	\
+	 KVM_X86_QUIRK_FIX_HYPERCALL_INSN)
 
 #endif /* _ASM_X86_KVM_HOST_H */
-- 
cgit v1.2.3


From a795cd43c5b5819b8ee94690f22caa5e2e4d8620 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:13 +0000
Subject: KVM: x86/xen: Use gfn_to_pfn_cache for runstate area

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-4-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d9a825182b84..3263147a166f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -608,10 +608,9 @@ struct kvm_vcpu_xen {
 	u32 current_runstate;
 	bool vcpu_info_set;
 	bool vcpu_time_info_set;
-	bool runstate_set;
 	struct gfn_to_hva_cache vcpu_info_cache;
 	struct gfn_to_hva_cache vcpu_time_info_cache;
-	struct gfn_to_hva_cache runstate_cache;
+	struct gfn_to_pfn_cache runstate_cache;
 	u64 last_steal;
 	u64 runstate_entry_time;
 	u64 runstate_times[4];
-- 
cgit v1.2.3


From 916d3608df8265a2e3f6214200dbb0b39a5ca383 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:14 +0000
Subject: KVM: x86: Use gfn_to_pfn_cache for pv_time

Add a new kvm_setup_guest_pvclock() which parallels the existing
kvm_setup_pvclock_page(). The latter will be removed once we convert
all users to the gfn_to_pfn_cache version.

Using the new cache, we can potentially let kvm_set_guest_paused() set
the PVCLOCK_GUEST_STOPPED bit directly rather than having to delegate
to the vCPU via KVM_REQ_CLOCK_UPDATE. But not yet.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-5-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3263147a166f..5a0ff6a07431 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -751,8 +751,7 @@ struct kvm_vcpu_arch {
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
-	struct gfn_to_hva_cache pv_time;
-	bool pv_time_enabled;
+	struct gfn_to_pfn_cache pv_time;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
-- 
cgit v1.2.3


From 7caf9571563eade546976d600dd023674b1c3185 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:15 +0000
Subject: KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_info

Currently, the fast path of kvm_xen_set_evtchn_fast() doesn't set the
index bits in the target vCPU's evtchn_pending_sel, because it only has
a userspace virtual address with which to do so. It just sets them in
the kernel, and kvm_xen_has_interrupt() then completes the delivery to
the actual vcpu_info structure when the vCPU runs.

Using a gfn_to_pfn_cache allows kvm_xen_set_evtchn_fast() to do the full
delivery in the common case.

Clean up the fallback case too, by moving the deferred delivery out into
a separate kvm_xen_inject_pending_events() function which isn't ever
called in atomic contexts as __kvm_xen_has_interrupt() is.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-6-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a0ff6a07431..fce4dca16e5b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -606,9 +606,8 @@ struct kvm_vcpu_hv {
 struct kvm_vcpu_xen {
 	u64 hypercall_rip;
 	u32 current_runstate;
-	bool vcpu_info_set;
 	bool vcpu_time_info_set;
-	struct gfn_to_hva_cache vcpu_info_cache;
+	struct gfn_to_pfn_cache vcpu_info_cache;
 	struct gfn_to_hva_cache vcpu_time_info_cache;
 	struct gfn_to_pfn_cache runstate_cache;
 	u64 last_steal;
-- 
cgit v1.2.3


From 69d413cfcf77920bb4d7c4bbfbf305781fa53a0e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:16 +0000
Subject: KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_time_info

This switches the final pvclock to kvm_setup_pvclock_pfncache() and now
the old kvm_setup_pvclock_page() can be removed.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-7-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fce4dca16e5b..7808491ebba8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -606,9 +606,8 @@ struct kvm_vcpu_hv {
 struct kvm_vcpu_xen {
 	u64 hypercall_rip;
 	u32 current_runstate;
-	bool vcpu_time_info_set;
 	struct gfn_to_pfn_cache vcpu_info_cache;
-	struct gfn_to_hva_cache vcpu_time_info_cache;
+	struct gfn_to_pfn_cache vcpu_time_info_cache;
 	struct gfn_to_pfn_cache runstate_cache;
 	u64 last_steal;
 	u64 runstate_entry_time;
-- 
cgit v1.2.3


From 2fd6df2f2b47d4301b1ee0fe9d627d1c061a5988 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Thu, 3 Mar 2022 15:41:19 +0000
Subject: KVM: x86/xen: intercept EVTCHNOP_send from guests

Userspace registers a sending @port to either deliver to an @eventfd
or directly back to a local event channel port.

After binding events the guest or host may wish to bind those
events to a particular vcpu. This is usually done for unbound
and and interdomain events. Update requests are handled via the
KVM_XEN_EVTCHN_UPDATE flag.

Unregistered ports are handled by the emulator.

Co-developed-by: Ankur Arora <ankur.a.arora@oracle.com>
Co-developed-By: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-10-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7808491ebba8..b20f7d99d702 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1024,6 +1024,7 @@ struct kvm_xen {
 	bool long_mode;
 	u8 upcall_vector;
 	struct gfn_to_pfn_cache shinfo_cache;
+	struct idr evtchn_ports;
 };
 
 enum kvm_irqchip_mode {
-- 
cgit v1.2.3


From 942c2490c23f2800ad8143f5eb84a79b859aa743 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:21 +0000
Subject: KVM: x86/xen: Add KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID

In order to intercept hypercalls such as VCPUOP_set_singleshot_timer, we
need to be aware of the Xen CPU numbering.

This looks a lot like the Hyper-V handling of vpidx, for obvious reasons.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-12-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b20f7d99d702..9e3408542b41 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -613,6 +613,7 @@ struct kvm_vcpu_xen {
 	u64 runstate_entry_time;
 	u64 runstate_times[4];
 	unsigned long evtchn_pending_sel;
+	u32 vcpu_id; /* The Xen / ACPI vCPU ID */
 };
 
 struct kvm_vcpu_arch {
-- 
cgit v1.2.3


From 536395260582be7443b0b35b0bbb89ffe3947f62 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Thu, 3 Mar 2022 15:41:22 +0000
Subject: KVM: x86/xen: handle PV timers oneshot mode

If the guest has offloaded the timer virq, handle the following
hypercalls for programming the timer:

    VCPUOP_set_singleshot_timer
    VCPUOP_stop_singleshot_timer
    set_timer_op(timestamp_ns)

The event channel corresponding to the timer virq is then used to inject
events once timer deadlines are met. For now we back the PV timer with
hrtimer.

[ dwmw2: Add save/restore, 32-bit compat mode, immediate delivery,
         don't check timer in kvm_vcpu_has_event() ]

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-13-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e3408542b41..8193d5285544 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -614,6 +614,10 @@ struct kvm_vcpu_xen {
 	u64 runstate_times[4];
 	unsigned long evtchn_pending_sel;
 	u32 vcpu_id; /* The Xen / ACPI vCPU ID */
+	u32 timer_virq;
+	u64 timer_expires; /* In guest epoch */
+	atomic_t timer_pending;
+	struct hrtimer timer;
 };
 
 struct kvm_vcpu_arch {
-- 
cgit v1.2.3


From 28d1629f751c4a5f9437fbaa0ee4ed81d1a8e587 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:23 +0000
Subject: KVM: x86/xen: Kernel acceleration for XENVER_version

Turns out this is a fast path for PV guests because they use it to
trigger the event channel upcall. So letting it bounce all the way up
to userspace is not great.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-14-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8193d5285544..f3fba9d8ddc6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1026,6 +1026,7 @@ struct msr_bitmap_range {
 
 /* Xen emulation context */
 struct kvm_xen {
+	u32 xen_version;
 	bool long_mode;
 	u8 upcall_vector;
 	struct gfn_to_pfn_cache shinfo_cache;
-- 
cgit v1.2.3


From fde0451be8fb3208d4d146b8602d99ee8139e515 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:24 +0000
Subject: KVM: x86/xen: Support per-vCPU event channel upcall via local APIC

Windows uses a per-vCPU vector, and it's delivered via the local APIC
basically like an MSI (with associated EOI) unlike the traditional
guest-wide vector which is just magically asserted by Xen (and in the
KVM case by kvm_xen_has_interrupt() / kvm_cpu_get_extint()).

Now that the kernel is able to raise event channel events for itself,
being able to do so for Windows guests is also going to be useful.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-15-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f3fba9d8ddc6..998caf7a3ce9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -606,6 +606,7 @@ struct kvm_vcpu_hv {
 struct kvm_vcpu_xen {
 	u64 hypercall_rip;
 	u32 current_runstate;
+	u8 upcall_vector;
 	struct gfn_to_pfn_cache vcpu_info_cache;
 	struct gfn_to_pfn_cache vcpu_time_info_cache;
 	struct gfn_to_pfn_cache runstate_cache;
-- 
cgit v1.2.3


From 1a65105a5aba9f7c6ea68cf687e569d055a94685 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 3 Mar 2022 15:41:26 +0000
Subject: KVM: x86/xen: handle PV spinlocks slowpath

Add support for SCHEDOP_poll hypercall.

This implementation is optimized for polling for a single channel, which
is what Linux does. Polling for multiple channels is not especially
efficient (and has not been tested).

PV spinlocks slow path uses this hypercall, and explicitly crash if it's
not supported.

[ dwmw2: Rework to use kvm_vcpu_halt(), not supported for 32-bit guests ]

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-17-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 998caf7a3ce9..5370744b789c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -619,6 +619,8 @@ struct kvm_vcpu_xen {
 	u64 timer_expires; /* In guest epoch */
 	atomic_t timer_pending;
 	struct hrtimer timer;
+	int poll_evtchn;
+	struct timer_list poll_timer;
 };
 
 struct kvm_vcpu_arch {
@@ -1032,6 +1034,7 @@ struct kvm_xen {
 	u8 upcall_vector;
 	struct gfn_to_pfn_cache shinfo_cache;
 	struct idr evtchn_ports;
+	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 };
 
 enum kvm_irqchip_mode {
-- 
cgit v1.2.3


From ffbb61d09fc56c85e28b110494f3788d0ed4d1f8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 25 Feb 2022 14:53:02 +0000
Subject: KVM: x86: Accept KVM_[GS]ET_TSC_KHZ as a VM ioctl.

This sets the default TSC frequency for subsequently created vCPUs.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20220225145304.36166-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5370744b789c..fac990cc189d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1126,6 +1126,8 @@ struct kvm_arch {
 	u64 cur_tsc_generation;
 	int nr_vcpus_matched_tsc;
 
+	u32 default_tsc_khz;
+
 	seqcount_raw_spinlock_t pvclock_sc;
 	bool use_master_clock;
 	u64 master_kernel_ns;
-- 
cgit v1.2.3


From d5fa597ed87047117dc62ce1d38ba1d007442359 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 22 Mar 2022 19:40:49 +0200
Subject: KVM: x86: allow per cpu apicv inhibit reasons

Add optional callback .vcpu_get_apicv_inhibit_reasons returning
extra inhibit reasons that prevent APICv from working on this vCPU.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220322174050.241850-6-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fac990cc189d..676705ad1e23 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1507,6 +1507,11 @@ struct kvm_x86_ops {
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+	/*
+	 * Returns vCPU specific APICv inhibit reasons
+	 */
+	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
@@ -1807,6 +1812,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
 void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
 				      enum kvm_apicv_inhibit reason, bool set);
-- 
cgit v1.2.3


From 8176472563fb1a233f46f3f0441738c82afd88da Mon Sep 17 00:00:00 2001
From: Peng Hao <flyingpeng@tencent.com>
Date: Mon, 28 Feb 2022 11:07:49 +0800
Subject: kvm: x86: Adjust the location of pkru_mask of kvm_mmu to reduce
 memory

Adjust the field pkru_mask to the back of direct_map to make up 8-byte
alignment.This reduces the size of kvm_mmu by 8 bytes.

Signed-off-by: Peng Hao <flyingpeng@tencent.com>
Message-Id: <20220228030749.88353-1-flyingpeng@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1dfa066068d..e9c9a23d6623 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -443,14 +443,6 @@ struct kvm_mmu {
 	u8 shadow_root_level;
 	u8 ept_ad;
 	bool direct_map;
-	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
-
-	/*
-	 * Bitmap; bit set = permission fault
-	 * Byte index: page fault error code [4:1]
-	 * Bit index: pte permissions in ACC_* format
-	 */
-	u8 permissions[16];
 
 	/*
 	* The pkru_mask indicates if protection key checks are needed.  It
@@ -460,6 +452,15 @@ struct kvm_mmu {
 	*/
 	u32 pkru_mask;
 
+	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
+
+	/*
+	 * Bitmap; bit set = permission fault
+	 * Byte index: page fault error code [4:1]
+	 * Bit index: pte permissions in ACC_* format
+	 */
+	u8 permissions[16];
+
 	u64 *pae_root;
 	u64 *pml4_root;
 	u64 *pml5_root;
-- 
cgit v1.2.3


From fdc298da866165ec0288fe227982f1aa0467bf5c Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Tue, 29 Mar 2022 23:50:51 +0000
Subject: KVM: x86: Move kvm_ops_static_call_update() to x86.c

The kvm_ops_static_call_update() is defined in kvm_host.h. That's
completely unnecessary, it should have exactly one caller,
kvm_arch_hardware_setup().  Move the helper to x86.c and have it do the
actual memcpy() of the ops in addition to the static call updates.  This
will also allow for cleanly giving kvm_pmu_ops static_call treatment.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Like Xu <likexu@tencent.com>
[sean: Move memcpy() into the helper and rename accordingly]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220329235054.3534728-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e9c9a23d6623..25d1aac74c55 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1563,20 +1563,6 @@ extern struct kvm_x86_ops kvm_x86_ops;
 #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
 #include <asm/kvm-x86-ops.h>
 
-static inline void kvm_ops_static_call_update(void)
-{
-#define __KVM_X86_OP(func) \
-	static_call_update(kvm_x86_##func, kvm_x86_ops.func);
-#define KVM_X86_OP(func) \
-	WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
-#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
-#define KVM_X86_OP_OPTIONAL_RET0(func) \
-	static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
-					   (void *)__static_call_return0);
-#include <asm/kvm-x86-ops.h>
-#undef __KVM_X86_OP
-}
-
 #define __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
 {
-- 
cgit v1.2.3


From 34886e796c413273908b036df57cc9ae731badae Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Tue, 29 Mar 2022 23:50:53 +0000
Subject: KVM: x86: Move .pmu_ops to kvm_x86_init_ops and tag as __initdata

The pmu_ops should be moved to kvm_x86_init_ops and tagged as __initdata.
That'll save those precious few bytes, and more importantly make
the original ops unreachable, i.e. make it harder to sneak in post-init
modification bugs.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220329235054.3534728-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 25d1aac74c55..e1c695f72b8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1465,8 +1465,6 @@ struct kvm_x86_ops {
 	int cpu_dirty_log_size;
 	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
-	/* pmu operations of sub-arch */
-	const struct kvm_pmu_ops *pmu_ops;
 	const struct kvm_x86_nested_ops *nested_ops;
 
 	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
@@ -1542,6 +1540,7 @@ struct kvm_x86_init_ops {
 	unsigned int (*handle_intel_pt_intr)(void);
 
 	struct kvm_x86_ops *runtime_ops;
+	struct kvm_pmu_ops *pmu_ops;
 };
 
 struct kvm_arch_async_pf {
-- 
cgit v1.2.3


From 6819af7597d87d40769b47bb377472877a6b56c0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 3 Mar 2022 20:20:17 -0800
Subject: KVM: x86: Clean up and document nested #PF workaround

Replace the per-vendor hack-a-fix for KVM's #PF => #PF => #DF workaround
with an explicit, common workaround in kvm_inject_emulated_page_fault().
Aside from being a hack, the current approach is brittle and incomplete,
e.g. nSVM's KVM_SET_NESTED_STATE fails to set ->inject_page_fault(),
and nVMX fails to apply the workaround when VMX is intercepting #PF due
to allow_smaller_maxphyaddr=1.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e1c695f72b8b..e46bd289e5df 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1516,6 +1516,8 @@ struct kvm_x86_ops {
 struct kvm_x86_nested_ops {
 	void (*leave_nested)(struct kvm_vcpu *vcpu);
 	int (*check_events)(struct kvm_vcpu *vcpu);
+	bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu,
+					     struct x86_exception *fault);
 	bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
 	void (*triple_fault)(struct kvm_vcpu *vcpu);
 	int (*get_state)(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From e5ed0fb01004f93ddf8a0c632cbc8a3f1ea5b518 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 11 Feb 2022 06:50:11 -0500
Subject: KVM: x86/mmu: split cpu_role from mmu_role

Snapshot the state of the processor registers that govern page walk into
a new field of struct kvm_mmu.  This is a more natural representation
than having it *mostly* in mmu_role but not exclusively; the delta
right now is represented in other fields, such as root_level.

The nested MMU now has only the CPU role; and in fact the new function
kvm_calc_cpu_role is analogous to the previous kvm_calc_nested_mmu_role,
except that it has role.base.direct equal to !CR0.PG.  For a walk-only
MMU, "direct" has no meaning, but we set it to !CR0.PG so that
role.ext.cr0_pg can go away in a future patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e46bd289e5df..50edf52a3ef6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -438,6 +438,7 @@ struct kvm_mmu {
 			 struct kvm_mmu_page *sp);
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
 	struct kvm_mmu_root_info root;
+	union kvm_mmu_role cpu_role;
 	union kvm_mmu_role mmu_role;
 	u8 root_level;
 	u8 shadow_root_level;
-- 
cgit v1.2.3


From ec283cb1dcb934d1a861e5e25ce843e033ee4ab7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 07:32:51 -0500
Subject: KVM: x86/mmu: remove ept_ad field

The ept_ad field is used during page walk to determine if the guest PTEs
have accessed and dirty bits.  In the MMU role, the ad_disabled
bit represents whether the *shadow* PTEs have the bits, so it
would be incorrect to replace PT_HAVE_ACCESSED_DIRTY with just
!mmu->mmu_role.base.ad_disabled.

However, the similar field in the CPU mode, ad_disabled, is initialized
correctly: to the opposite value of ept_ad for shadow EPT, and zero
for non-EPT guest paging modes (which always have A/D bits).  It is
therefore possible to compute PT_HAVE_ACCESSED_DIRTY from the CPU mode,
like other page-format fields; it just has to be inverted to account
for the different polarity.

In fact, now that the CPU mode is distinct from the MMU roles, it would
even be possible to remove PT_HAVE_ACCESSED_DIRTY macro altogether, and
use !mmu->cpu_role.base.ad_disabled instead.  I am not doing this because
the macro has a small effect in terms of dead code elimination:

   text	   data	    bss	    dec	    hex
 103544	  16665	    112	 120321	  1d601    # as of this patch
 103746	  16665	    112	 120523	  1d6cb    # without PT_HAVE_ACCESSED_DIRTY

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 50edf52a3ef6..a299236cfde5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -442,7 +442,6 @@ struct kvm_mmu {
 	union kvm_mmu_role mmu_role;
 	u8 root_level;
 	u8 shadow_root_level;
-	u8 ept_ad;
 	bool direct_map;
 
 	/*
-- 
cgit v1.2.3


From 7a458f0e1ba150a6ea012171a43c4b947f1d825d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 14 Feb 2022 08:46:24 -0500
Subject: KVM: x86/mmu: remove extended bits from mmu_role, rename field

mmu_role represents the role of the root of the page tables.
It does not need any extended bits, as those govern only KVM's
page table walking; the is_* functions used for page table
walking always use the CPU role.

ext.valid is not present anymore in the MMU role, but an
all-zero MMU role is impossible because the level field is
never zero in the MMU role.  So just zap the whole mmu_role
in order to force invalidation after CPUID is updated.

While making this change, which requires touching almost every
occurrence of "mmu_role", rename it to "root_role".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a299236cfde5..c81221d03a1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -439,7 +439,7 @@ struct kvm_mmu {
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
 	struct kvm_mmu_root_info root;
 	union kvm_mmu_role cpu_role;
-	union kvm_mmu_role mmu_role;
+	union kvm_mmu_page_role root_role;
 	u8 root_level;
 	u8 shadow_root_level;
 	bool direct_map;
-- 
cgit v1.2.3


From 7a7ae8292391c4d53c4340e606bf48776c3449e7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 07:38:32 -0500
Subject: KVM: x86/mmu: rename kvm_mmu_role union

It is quite confusing that the "full" union is called kvm_mmu_role
but is used for the "cpu_role" field of struct kvm_mmu.  Rename it
to kvm_cpu_role.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c81221d03a1b..6bc5550ae530 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -281,7 +281,7 @@ struct kvm_kernel_irq_routing_entry;
 /*
  * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
  * also includes TDP pages) to determine whether or not a page can be used in
- * the given MMU context.  This is a subset of the overall kvm_mmu_role to
+ * the given MMU context.  This is a subset of the overall kvm_cpu_role to
  * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
  * 2 bytes per gfn instead of 4 bytes per gfn.
  *
@@ -378,7 +378,7 @@ union kvm_mmu_extended_role {
 	};
 };
 
-union kvm_mmu_role {
+union kvm_cpu_role {
 	u64 as_u64;
 	struct {
 		union kvm_mmu_page_role base;
@@ -438,7 +438,7 @@ struct kvm_mmu {
 			 struct kvm_mmu_page *sp);
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
 	struct kvm_mmu_root_info root;
-	union kvm_mmu_role cpu_role;
+	union kvm_cpu_role cpu_role;
 	union kvm_mmu_page_role root_role;
 	u8 root_level;
 	u8 shadow_root_level;
-- 
cgit v1.2.3


From faf729621c9609367a0714f5383df67fdd8d021c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 07:38:51 -0500
Subject: KVM: x86/mmu: remove redundant bits from extended role

Before the separation of the CPU and the MMU role, CR0.PG was not
available in the base MMU role, because two-dimensional paging always
used direct=1 in the MMU role.  However, now that the raw role is
snapshotted in mmu->cpu_role, the value of CR0.PG always matches both
!cpu_role.base.direct and cpu_role.base.level > 0.  There is no need to
store it again in union kvm_mmu_extended_role; instead, write an is_cr0_pg
accessor by hand that takes care of the conversion.  Use cpu_role.base.level
since the future of the direct field is unclear.

Likewise, CR4.PAE is now always present in the CPU role as
!cpu_role.base.has_4_byte_gpte.  The inversion makes certain tests on
the MMU role easier, and is easily hidden by the is_cr4_pae accessor
when operating on the CPU role.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6bc5550ae530..52ceeadbed28 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -367,8 +367,6 @@ union kvm_mmu_extended_role {
 	struct {
 		unsigned int valid:1;
 		unsigned int execonly:1;
-		unsigned int cr0_pg:1;
-		unsigned int cr4_pae:1;
 		unsigned int cr4_pse:1;
 		unsigned int cr4_pke:1;
 		unsigned int cr4_smap:1;
-- 
cgit v1.2.3


From a972e29c1d6c957242a23b42f355b1fdf721cbd4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 07:41:19 -0500
Subject: KVM: x86/mmu: replace shadow_root_level with root_role.level

root_role.level is always the same value as shadow_level:

- it's kvm_mmu_get_tdp_level(vcpu) when going through init_kvm_tdp_mmu

- it's the level argument when going through kvm_init_shadow_ept_mmu

- it's assigned directly from new_role.base.level when going
  through shadow_mmu_init_context

Remove the duplication and get the level directly from the role.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52ceeadbed28..35056341799d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -439,7 +439,6 @@ struct kvm_mmu {
 	union kvm_cpu_role cpu_role;
 	union kvm_mmu_page_role root_role;
 	u8 root_level;
-	u8 shadow_root_level;
 	bool direct_map;
 
 	/*
-- 
cgit v1.2.3


From 4d25502aa12ef1fb01e599cbfd341a8d436f4b8b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 07:42:22 -0500
Subject: KVM: x86/mmu: replace root_level with cpu_role.base.level

Remove another duplicate field of struct kvm_mmu.  This time it's
the root level for page table walking; the separate field is
always initialized as cpu_role.base.level, so its users can look
up the CPU mode directly instead.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35056341799d..aea39fe03a99 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -438,7 +438,6 @@ struct kvm_mmu {
 	struct kvm_mmu_root_info root;
 	union kvm_cpu_role cpu_role;
 	union kvm_mmu_page_role root_role;
-	u8 root_level;
 	bool direct_map;
 
 	/*
-- 
cgit v1.2.3


From 347a0d0ded16a2e59c35b43ace7ad2b53fb6df57 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Feb 2022 08:00:56 -0500
Subject: KVM: x86/mmu: replace direct_map with root_role.direct

direct_map is always equal to the direct field of the root page's role:

- for shadow paging, direct_map is true if CR0.PG=0 and root_role.direct is
copied from cpu_role.base.direct

- for TDP, it is always true and root_role.direct is also always true

- for shadow TDP, it is always false and root_role.direct is also always
false

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aea39fe03a99..752a6d2357ce 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -438,7 +438,6 @@ struct kvm_mmu {
 	struct kvm_mmu_root_info root;
 	union kvm_cpu_role cpu_role;
 	union kvm_mmu_page_role root_role;
-	bool direct_map;
 
 	/*
 	* The pkru_mask indicates if protection key checks are needed.  It
-- 
cgit v1.2.3


From 84e5ffd045f33e4fa32370135436d987478d0bf7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Wed, 20 Apr 2022 21:12:04 +0800
Subject: KVM: X86/MMU: Fix shadowing 5-level NPT for 4-level NPT L1 guest

When shadowing 5-level NPT for 4-level NPT L1 guest, the root_sp is
allocated with role.level = 5 and the guest pagetable's root gfn.

And root_sp->spt[0] is also allocated with the same gfn and the same
role except role.level = 4.  Luckily that they are different shadow
pages, but only root_sp->spt[0] is the real translation of the guest
pagetable.

Here comes a problem:

If the guest switches from gCR4_LA57=0 to gCR4_LA57=1 (or vice verse)
and uses the same gfn as the root page for nested NPT before and after
switching gCR4_LA57.  The host (hCR4_LA57=1) might use the same root_sp
for the guest even the guest switches gCR4_LA57.  The guest will see
unexpected page mapped and L2 may exploit the bug and hurt L1.  It is
lucky that the problem can't hurt L0.

And three special cases need to be handled:

The root_sp should be like role.direct=1 sometimes: its contents are
not backed by gptes, root_sp->gfns is meaningless.  (For a normal high
level sp in shadow paging, sp->gfns is often unused and kept zero, but
it could be relevant and meaningful if sp->gfns is used because they
are backed by concrete gptes.)

For such root_sp in the case, root_sp is just a portal to contribute
root_sp->spt[0], and root_sp->gfns should not be used and
root_sp->spt[0] should not be dropped if gpte[0] of the guest root
pagetable is changed.

Such root_sp should not be accounted too.

So add role.passthrough to distinguish the shadow pages in the hash
when gCR4_LA57 is toggled and fix above special cases by using it in
kvm_mmu_page_{get|set}_gfn() and sp_has_gptes().

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Message-Id: <20220420131204.2850-3-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 752a6d2357ce..f164c6c1514a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -285,7 +285,7 @@ struct kvm_kernel_irq_routing_entry;
  * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
  * 2 bytes per gfn instead of 4 bytes per gfn.
  *
- * Indirect upper-level shadow pages are tracked for write-protection via
+ * Upper-level shadow pages having gptes are tracked for write-protection via
  * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
  * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
  * gfn_track will overflow and explosions will ensure.
@@ -331,7 +331,8 @@ union kvm_mmu_page_role {
 		unsigned smap_andnot_wp:1;
 		unsigned ad_disabled:1;
 		unsigned guest_mode:1;
-		unsigned :6;
+		unsigned passthrough:1;
+		unsigned :5;
 
 		/*
 		 * This is left at the top of the word so that
-- 
cgit v1.2.3


From 1075d41efd598d3fd4d52a1e1116b20979975135 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 23 Apr 2022 03:47:49 +0000
Subject: KVM: x86/mmu: Expand and clean up page fault stats

Expand and clean up the page fault stats.  The current stats are at best
incomplete, and at worst misleading.  Differentiate between faults that
are actually fixed vs those that result in an MMIO SPTE being created,
track faults that are spurious, faults that trigger emulation, faults
that that are fixed in the fast path, and last but not least, track the
number of faults that are taken.

Note, the number of faults that require emulation for write-protected
shadow pages can roughly be calculated by subtracting the number of MMIO
SPTEs created from the overall number of faults that trigger emulation.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220423034752.1161007-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86/include/asm/kvm_host.h')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f164c6c1514a..c5fb4115176d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1269,7 +1269,12 @@ struct kvm_vm_stat {
 
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
+	u64 pf_taken;
 	u64 pf_fixed;
+	u64 pf_emulate;
+	u64 pf_spurious;
+	u64 pf_fast;
+	u64 pf_mmio_spte_created;
 	u64 pf_guest;
 	u64 tlb_flush;
 	u64 invlpg;
-- 
cgit v1.2.3