LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v3 0/3] SVM 5-level page table support
@ 2021-08-18 16:55 Wei Huang
  2021-08-18 16:55 ` [PATCH v3 1/3] KVM: x86: Allow CPU to force vendor-specific TDP level Wei Huang
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Wei Huang @ 2021-08-18 16:55 UTC (permalink / raw)
  To: kvm
  Cc: linux-kernel, pbonzini, vkuznets, seanjc, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa, wei.huang2

This patch set adds 5-level page table support for AMD SVM. When the
5-level page table is enabled on host OS, the nested page table for guest
VMs will use the same format as host OS (i.e. 5-level NPT). These patches
were tested with various combination of different settings and test cases
(nested/regular VMs, AMD64/i686 kernels, kvm-unit-tests, etc.)

v2->v3:
 * Change the way of building root_hpa by following the existing flow (Sean)

v1->v2:
 * Remove v1's arch-specific get_tdp_level() and add a new parameter,
   tdp_forced_root_level, to allow forced TDP level (Sean)
 * Add additional comment on tdp_root table chaining trick and change the
   PML root table allocation code (Sean)
 * Revise Patch 1's commit msg (Sean and Jim)

Thanks,
-Wei

Wei Huang (3):
  KVM: x86: Allow CPU to force vendor-specific TDP level
  KVM: x86: Handle the case of 5-level shadow page table
  KVM: SVM: Add 5-level page table support for SVM

 arch/x86/include/asm/kvm_host.h |  6 ++--
 arch/x86/kvm/mmu/mmu.c          | 56 ++++++++++++++++++++++-----------
 arch/x86/kvm/svm/svm.c          | 13 ++++----
 arch/x86/kvm/vmx/vmx.c          |  3 +-
 4 files changed, 49 insertions(+), 29 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 1/3] KVM: x86: Allow CPU to force vendor-specific TDP level
  2021-08-18 16:55 [PATCH v3 0/3] SVM 5-level page table support Wei Huang
@ 2021-08-18 16:55 ` Wei Huang
  2021-08-18 16:55 ` [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table Wei Huang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Wei Huang @ 2021-08-18 16:55 UTC (permalink / raw)
  To: kvm
  Cc: linux-kernel, pbonzini, vkuznets, seanjc, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa, wei.huang2

AMD future CPUs will require a 5-level NPT if host CR4.LA57 is set.
To prevent kvm_mmu_get_tdp_level() from incorrectly changing NPT level
on behalf of CPUs, add a new parameter in kvm_configure_mmu() to force
a fixed TDP level.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  5 ++---
 arch/x86/kvm/mmu/mmu.c          | 10 ++++++++--
 arch/x86/kvm/svm/svm.c          |  4 +++-
 arch/x86/kvm/vmx/vmx.c          |  3 ++-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af6ce8d4c86a..9daa86aa5649 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -723,7 +723,6 @@ struct kvm_vcpu_arch {
 
 	u64 reserved_gpa_bits;
 	int maxphyaddr;
-	int max_tdp_level;
 
 	/* emulate context */
 
@@ -1754,8 +1753,8 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level);
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level);
 
 static inline u16 kvm_read_ldt(void)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 47b765270239..14eac57bdaaa 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -97,6 +97,7 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
 bool tdp_enabled = false;
 
 static int max_huge_page_level __read_mostly;
+static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
 enum {
@@ -4588,6 +4589,10 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 
 static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 {
+	/* tdp_root_level is architecture forced level, use it if nonzero */
+	if (tdp_root_level)
+		return tdp_root_level;
+
 	/* Use 5-level TDP if and only if it's useful/necessary. */
 	if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
 		return 4;
@@ -5279,10 +5284,11 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 	 */
 }
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level)
 {
 	tdp_enabled = enable_tdp;
+	tdp_root_level = tdp_forced_root_level;
 	max_tdp_level = tdp_max_root_level;
 
 	/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 69639f9624f5..b34840a2ffa7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1015,7 +1015,9 @@ static __init int svm_hardware_setup(void)
 	if (!boot_cpu_has(X86_FEATURE_NPT))
 		npt_enabled = false;
 
-	kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+	/* Force VM NPT level equal to the host's max NPT level */
+	kvm_configure_mmu(npt_enabled, get_max_npt_level(),
+			  get_max_npt_level(), PG_LEVEL_1G);
 	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
 	/* Note, SEV setup consumes npt_enabled. */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..034e1397c7d5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7803,7 +7803,8 @@ static __init int hardware_setup(void)
 		ept_lpage_level = PG_LEVEL_2M;
 	else
 		ept_lpage_level = PG_LEVEL_4K;
-	kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
+	kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+			  ept_lpage_level);
 
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table
  2021-08-18 16:55 [PATCH v3 0/3] SVM 5-level page table support Wei Huang
  2021-08-18 16:55 ` [PATCH v3 1/3] KVM: x86: Allow CPU to force vendor-specific TDP level Wei Huang
@ 2021-08-18 16:55 ` Wei Huang
  2021-08-18 17:15   ` Sean Christopherson
  2021-08-18 18:00   ` Tom Lendacky
  2021-08-18 16:55 ` [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM Wei Huang
  2021-08-19 16:43 ` [PATCH v3 0/3] SVM 5-level page table support Paolo Bonzini
  3 siblings, 2 replies; 15+ messages in thread
From: Wei Huang @ 2021-08-18 16:55 UTC (permalink / raw)
  To: kvm
  Cc: linux-kernel, pbonzini, vkuznets, seanjc, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa, wei.huang2

When the 5-level page table CPU flag is exposed, KVM code needs to handle
this case by pointing mmu->root_hpa to a properly-constructed 5-level page
table.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 46 +++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9daa86aa5649..0024b72a2b32 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -447,6 +447,7 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *pml4_root;
+	u64 *pml5_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 14eac57bdaaa..0fb0937c1ea7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3456,15 +3456,22 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK | shadow_me_mask;
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 		if (WARN_ON_ONCE(!mmu->pml4_root)) {
 			r = -EIO;
 			goto out_unlock;
 		}
-
 		mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
+
+		if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) {
+			if (WARN_ON_ONCE(!mmu->pml5_root)) {
+				r = -EIO;
+				goto out_unlock;
+			}
+			mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask;
+		}
 	}
 
 	for (i = 0; i < 4; ++i) {
@@ -3483,7 +3490,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		mmu->pae_root[i] = root | pm_mask;
 	}
 
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
+	if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
+		mmu->root_hpa = __pa(mmu->pml5_root);
+	else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
 		mmu->root_hpa = __pa(mmu->pml4_root);
 	else
 		mmu->root_hpa = __pa(mmu->pae_root);
@@ -3499,7 +3508,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	u64 *pml4_root, *pae_root;
+	u64 *pml5_root, *pml4_root, *pae_root;
 
 	/*
 	 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3511,21 +3520,15 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	    mmu->shadow_root_level < PT64_ROOT_4LEVEL)
 		return 0;
 
-	/*
-	 * This mess only works with 4-level paging and needs to be updated to
-	 * work with 5-level paging.
-	 */
-	if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
-		return -EIO;
-
-	if (mmu->pae_root && mmu->pml4_root)
+	if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
 		return 0;
 
 	/*
 	 * The special roots should always be allocated in concert.  Yell and
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
-	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
+	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
+			 mmu->pml5_root))
 		return -EIO;
 
 	/*
@@ -3537,15 +3540,25 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 		return -ENOMEM;
 
 	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!pml4_root) {
-		free_page((unsigned long)pae_root);
-		return -ENOMEM;
+	if (!pml4_root)
+		goto err_pml4;
+
+	if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
+		pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+		if (!pml5_root)
+			goto err_pml5;
 	}
 
 	mmu->pae_root = pae_root;
 	mmu->pml4_root = pml4_root;
+	mmu->pml5_root = pml5_root;
 
 	return 0;
+err_pml5:
+	free_page((unsigned long)pml4_root);
+err_pml4:
+	free_page((unsigned long)pae_root);
+	return -ENOMEM;
 }
 
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -5364,6 +5377,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
 	free_page((unsigned long)mmu->pae_root);
 	free_page((unsigned long)mmu->pml4_root);
+	free_page((unsigned long)mmu->pml5_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM
  2021-08-18 16:55 [PATCH v3 0/3] SVM 5-level page table support Wei Huang
  2021-08-18 16:55 ` [PATCH v3 1/3] KVM: x86: Allow CPU to force vendor-specific TDP level Wei Huang
  2021-08-18 16:55 ` [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table Wei Huang
@ 2021-08-18 16:55 ` Wei Huang
  2021-08-18 17:32   ` Sean Christopherson
  2021-08-19 16:43 ` [PATCH v3 0/3] SVM 5-level page table support Paolo Bonzini
  3 siblings, 1 reply; 15+ messages in thread
From: Wei Huang @ 2021-08-18 16:55 UTC (permalink / raw)
  To: kvm
  Cc: linux-kernel, pbonzini, vkuznets, seanjc, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa, wei.huang2

When the 5-level page table is enabled on host OS, the nested page table
for guest VMs must use 5-level as well. Update get_npt_level() function
to reflect this requirement. In the meanwhile, remove the code that
prevents kvm-amd driver from being loaded when 5-level page table is
detected.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b34840a2ffa7..ecc4bb8e4ea0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -261,7 +261,9 @@ u32 svm_msrpm_offset(u32 msr)
 static int get_max_npt_level(void)
 {
 #ifdef CONFIG_X86_64
-	return PT64_ROOT_4LEVEL;
+	bool la57 = (cr4_read_shadow() & X86_CR4_LA57) != 0;
+
+	return la57 ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 #else
 	return PT32E_ROOT_LEVEL;
 #endif
@@ -462,11 +464,6 @@ static int has_svm(void)
 		return 0;
 	}
 
-	if (pgtable_l5_enabled()) {
-		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
-		return 0;
-	}
-
 	return 1;
 }
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table
  2021-08-18 16:55 ` [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table Wei Huang
@ 2021-08-18 17:15   ` Sean Christopherson
  2021-08-19 16:36     ` Paolo Bonzini
  2021-08-18 18:00   ` Tom Lendacky
  1 sibling, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2021-08-18 17:15 UTC (permalink / raw)
  To: Wei Huang
  Cc: kvm, linux-kernel, pbonzini, vkuznets, wanpengli, jmattson, joro,
	tglx, mingo, bp, x86, hpa

The shortlog is very misleading.  KVM already supports 5-level paging for
traditional shadow paging.  This is specifically for shadowing nNPT, and it's
specifically for shadow everything _except_ 5-level nNPT.  Something like:

KVM: x86/mmu: Support shadowing nNPT when 5-level paging is enabled in host


On Wed, Aug 18, 2021, Wei Huang wrote:
> When the 5-level page table CPU flag is exposed, KVM code needs to handle
> this case by pointing mmu->root_hpa to a properly-constructed 5-level page
> table.

Similarly, this is wrong, or maybe just poorly worded.  This has nothing to do
with LA57 being exposed to the guest, it's purely the host using 5-level paging
and NPT being enabled and exposed to L1.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM
  2021-08-18 16:55 ` [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM Wei Huang
@ 2021-08-18 17:32   ` Sean Christopherson
  2021-08-19 16:38     ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2021-08-18 17:32 UTC (permalink / raw)
  To: Wei Huang
  Cc: kvm, linux-kernel, pbonzini, vkuznets, wanpengli, jmattson, joro,
	tglx, mingo, bp, x86, hpa

On Wed, Aug 18, 2021, Wei Huang wrote:
> When the 5-level page table is enabled on host OS, the nested page table
> for guest VMs must use 5-level as well. Update get_npt_level() function
> to reflect this requirement. In the meanwhile, remove the code that
> prevents kvm-amd driver from being loaded when 5-level page table is
> detected.
> 
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

If this patch authored by Paolo, he needs to be attributed via From:.  If Paolo
is a co-author, he needs a Co-developed-by:.  If either of those is true, your
SOB needs to be last in the chain since you are the last handler of the patch.
If neither is true, Paolo's SOB should be removed.

> ---
>  arch/x86/kvm/svm/svm.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b34840a2ffa7..ecc4bb8e4ea0 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -261,7 +261,9 @@ u32 svm_msrpm_offset(u32 msr)
>  static int get_max_npt_level(void)
>  {
>  #ifdef CONFIG_X86_64
> -	return PT64_ROOT_4LEVEL;
> +	bool la57 = (cr4_read_shadow() & X86_CR4_LA57) != 0;
> +
> +	return la57 ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;

Why obfuscate this?  KVM is completely hosed if pgtable_l5_enabled() doesn't
match host CR4.  E.g.

	return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;

>  #else
>  	return PT32E_ROOT_LEVEL;
>  #endif
> @@ -462,11 +464,6 @@ static int has_svm(void)
>  		return 0;
>  	}
>  
> -	if (pgtable_l5_enabled()) {
> -		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
> -		return 0;
> -	}
> -
>  	return 1;
>  }
>  
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table
  2021-08-18 16:55 ` [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table Wei Huang
  2021-08-18 17:15   ` Sean Christopherson
@ 2021-08-18 18:00   ` Tom Lendacky
  1 sibling, 0 replies; 15+ messages in thread
From: Tom Lendacky @ 2021-08-18 18:00 UTC (permalink / raw)
  To: Wei Huang, kvm
  Cc: linux-kernel, pbonzini, vkuznets, seanjc, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa

On 8/18/21 11:55 AM, Wei Huang wrote:
> When the 5-level page table CPU flag is exposed, KVM code needs to handle
> this case by pointing mmu->root_hpa to a properly-constructed 5-level page
> table.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> ---

...

>  	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
> -	if (!pml4_root) {
> -		free_page((unsigned long)pae_root);
> -		return -ENOMEM;
> +	if (!pml4_root)
> +		goto err_pml4;
> +
> +	if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
> +		pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
> +		if (!pml5_root)
> +			goto err_pml5;
>  	}
>  
>  	mmu->pae_root = pae_root;
>  	mmu->pml4_root = pml4_root;
> +	mmu->pml5_root = pml5_root;

It looks like pml5_root could be used uninitialized here. You should
initialize it to NULL or set it to NULL as an else path of the new check
above.

Thanks,
Tom

>  
>  	return 0;
> +err_pml5:
> +	free_page((unsigned long)pml4_root);
> +err_pml4:
> +	free_page((unsigned long)pae_root);
> +	return -ENOMEM;
>  }
>  
>  void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
> @@ -5364,6 +5377,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
>  		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
>  	free_page((unsigned long)mmu->pae_root);
>  	free_page((unsigned long)mmu->pml4_root);
> +	free_page((unsigned long)mmu->pml5_root);
>  }
>  
>  static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table
  2021-08-18 17:15   ` Sean Christopherson
@ 2021-08-19 16:36     ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2021-08-19 16:36 UTC (permalink / raw)
  To: Sean Christopherson, Wei Huang
  Cc: kvm, linux-kernel, vkuznets, wanpengli, jmattson, joro, tglx,
	mingo, bp, x86, hpa

On 18/08/21 19:15, Sean Christopherson wrote:
>> When the 5-level page table CPU flag is exposed, KVM code needs to handle
>> this case by pointing mmu->root_hpa to a properly-constructed 5-level page
>> table.
> Similarly, this is wrong, or maybe just poorly worded.  This has nothing to do
> with LA57 being exposed to the guest, it's purely the host using 5-level paging
> and NPT being enabled and exposed to L1.

Like this:

---
KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in host

When the 5-level page table CPU flag is set in the host, but the guest
has CR4.LA57=0 (including the case of a 32-bit guest), the top level of
the shadow NPT page tables will be fixed, consisting of one pointer to
a lower-level table and 511 non-present entries.  Extend the existing
code that creates the fixed PML4 or PDP table, to provide a fixed PML5
table if needed.

This is not needed on EPT because the number of layers in the tables
is specified in the EPTP instead of depending on the host CR4.
---

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM
  2021-08-18 17:32   ` Sean Christopherson
@ 2021-08-19 16:38     ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2021-08-19 16:38 UTC (permalink / raw)
  To: Sean Christopherson, Wei Huang
  Cc: kvm, linux-kernel, vkuznets, wanpengli, jmattson, joro, tglx,
	mingo, bp, x86, hpa

On 18/08/21 19:32, Sean Christopherson wrote:
> On Wed, Aug 18, 2021, Wei Huang wrote:
>> When the 5-level page table is enabled on host OS, the nested page table
>> for guest VMs must use 5-level as well. Update get_npt_level() function
>> to reflect this requirement. In the meanwhile, remove the code that
>> prevents kvm-amd driver from being loaded when 5-level page table is
>> detected.
>>
>> Signed-off-by: Wei Huang <wei.huang2@amd.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> If this patch authored by Paolo, he needs to be attributed via From:.  If Paolo
> is a co-author, he needs a Co-developed-by:.  If either of those is true, your
> SOB needs to be last in the chain since you are the last handler of the patch.
> If neither is true, Paolo's SOB should be removed.

I didn't even remember writing this, but it's possible I pseudocoded in 
an email just like you did below.

>> -	return PT64_ROOT_4LEVEL;
>> +	bool la57 = (cr4_read_shadow() & X86_CR4_LA57) != 0;
>> +
>> +	return la57 ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
> 
> Why obfuscate this?  KVM is completely hosed if pgtable_l5_enabled() doesn't
> match host CR4.  E.g.
> 
> 	return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;

That also suggests the above pseudocoding scenario, where I'd be too 
lazy to look up the correct spelling of pgtable_l5_enabled().

Paolo

>>   #else
>>   	return PT32E_ROOT_LEVEL;
>>   #endif
>> @@ -462,11 +464,6 @@ static int has_svm(void)
>>   		return 0;
>>   	}
>>   
>> -	if (pgtable_l5_enabled()) {
>> -		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
>> -		return 0;
>> -	}
>> -
>>   	return 1;
>>   }
>>   
>> -- 
>> 2.31.1
>>
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-18 16:55 [PATCH v3 0/3] SVM 5-level page table support Wei Huang
                   ` (2 preceding siblings ...)
  2021-08-18 16:55 ` [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM Wei Huang
@ 2021-08-19 16:43 ` Paolo Bonzini
  2021-08-23  9:20   ` Maxim Levitsky
  3 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2021-08-19 16:43 UTC (permalink / raw)
  To: Wei Huang, kvm
  Cc: linux-kernel, vkuznets, seanjc, wanpengli, jmattson, joro, tglx,
	mingo, bp, x86, hpa

On 18/08/21 18:55, Wei Huang wrote:
> This patch set adds 5-level page table support for AMD SVM. When the
> 5-level page table is enabled on host OS, the nested page table for guest
> VMs will use the same format as host OS (i.e. 5-level NPT). These patches
> were tested with various combination of different settings and test cases
> (nested/regular VMs, AMD64/i686 kernels, kvm-unit-tests, etc.)
> 
> v2->v3:
>   * Change the way of building root_hpa by following the existing flow (Sean)
> 
> v1->v2:
>   * Remove v1's arch-specific get_tdp_level() and add a new parameter,
>     tdp_forced_root_level, to allow forced TDP level (Sean)
>   * Add additional comment on tdp_root table chaining trick and change the
>     PML root table allocation code (Sean)
>   * Revise Patch 1's commit msg (Sean and Jim)
> 
> Thanks,
> -Wei
> 
> Wei Huang (3):
>    KVM: x86: Allow CPU to force vendor-specific TDP level
>    KVM: x86: Handle the case of 5-level shadow page table
>    KVM: SVM: Add 5-level page table support for SVM
> 
>   arch/x86/include/asm/kvm_host.h |  6 ++--
>   arch/x86/kvm/mmu/mmu.c          | 56 ++++++++++++++++++++++-----------
>   arch/x86/kvm/svm/svm.c          | 13 ++++----
>   arch/x86/kvm/vmx/vmx.c          |  3 +-
>   4 files changed, 49 insertions(+), 29 deletions(-)
> 

Queued, thanks, with NULL initializations according to Tom's review.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-19 16:43 ` [PATCH v3 0/3] SVM 5-level page table support Paolo Bonzini
@ 2021-08-23  9:20   ` Maxim Levitsky
  2021-08-23 15:15     ` Wei Huang
  0 siblings, 1 reply; 15+ messages in thread
From: Maxim Levitsky @ 2021-08-23  9:20 UTC (permalink / raw)
  To: Paolo Bonzini, Wei Huang, kvm
  Cc: linux-kernel, vkuznets, seanjc, wanpengli, jmattson, joro, tglx,
	mingo, bp, x86, hpa

On Thu, 2021-08-19 at 18:43 +0200, Paolo Bonzini wrote:
> On 18/08/21 18:55, Wei Huang wrote:
> > This patch set adds 5-level page table support for AMD SVM. When the
> > 5-level page table is enabled on host OS, the nested page table for guest
> > VMs will use the same format as host OS (i.e. 5-level NPT). These patches
> > were tested with various combination of different settings and test cases
> > (nested/regular VMs, AMD64/i686 kernels, kvm-unit-tests, etc.)
> > 
> > v2->v3:
> >   * Change the way of building root_hpa by following the existing flow (Sean)
> > 
> > v1->v2:
> >   * Remove v1's arch-specific get_tdp_level() and add a new parameter,
> >     tdp_forced_root_level, to allow forced TDP level (Sean)
> >   * Add additional comment on tdp_root table chaining trick and change the
> >     PML root table allocation code (Sean)
> >   * Revise Patch 1's commit msg (Sean and Jim)
> > 
> > Thanks,
> > -Wei
> > 
> > Wei Huang (3):
> >    KVM: x86: Allow CPU to force vendor-specific TDP level
> >    KVM: x86: Handle the case of 5-level shadow page table
> >    KVM: SVM: Add 5-level page table support for SVM
> > 
> >   arch/x86/include/asm/kvm_host.h |  6 ++--
> >   arch/x86/kvm/mmu/mmu.c          | 56 ++++++++++++++++++++++-----------
> >   arch/x86/kvm/svm/svm.c          | 13 ++++----
> >   arch/x86/kvm/vmx/vmx.c          |  3 +-
> >   4 files changed, 49 insertions(+), 29 deletions(-)
> > 
> 
> Queued, thanks, with NULL initializations according to Tom's review.
> 
> Paolo
> 

Hi,
Yesterday while testing my SMM patches, I noticed a minor issue: 
It seems that this patchset breaks my 32 bit nested VM testcase with NPT=0.

This hack makes it work again for me (I don't yet use TDP mmu).

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index caa3f9aee7d1..c25e0d40a620 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3562,7 +3562,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
            mmu->shadow_root_level < PT64_ROOT_4LEVEL)
                return 0;
 
-       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
+       if (mmu->pae_root && mmu->pml4_root)
                return 0;
 
        /*



Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-23  9:20   ` Maxim Levitsky
@ 2021-08-23 15:15     ` Wei Huang
  2021-08-23 16:10       ` Sean Christopherson
  2021-08-23 18:06       ` Maxim Levitsky
  0 siblings, 2 replies; 15+ messages in thread
From: Wei Huang @ 2021-08-23 15:15 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Paolo Bonzini, kvm, linux-kernel, vkuznets, seanjc, wanpengli,
	jmattson, joro, tglx, mingo, bp, x86, hpa

On 08/23 12:20, Maxim Levitsky wrote:
> On Thu, 2021-08-19 at 18:43 +0200, Paolo Bonzini wrote:
> > On 18/08/21 18:55, Wei Huang wrote:
> > > This patch set adds 5-level page table support for AMD SVM. When the
> > > 5-level page table is enabled on host OS, the nested page table for guest
> > > VMs will use the same format as host OS (i.e. 5-level NPT). These patches
> > > were tested with various combination of different settings and test cases
> > > (nested/regular VMs, AMD64/i686 kernels, kvm-unit-tests, etc.)
> > > 
> > > v2->v3:
> > >   * Change the way of building root_hpa by following the existing flow (Sean)
> > > 
> > > v1->v2:
> > >   * Remove v1's arch-specific get_tdp_level() and add a new parameter,
> > >     tdp_forced_root_level, to allow forced TDP level (Sean)
> > >   * Add additional comment on tdp_root table chaining trick and change the
> > >     PML root table allocation code (Sean)
> > >   * Revise Patch 1's commit msg (Sean and Jim)
> > > 
> > > Thanks,
> > > -Wei
> > > 
> > > Wei Huang (3):
> > >    KVM: x86: Allow CPU to force vendor-specific TDP level
> > >    KVM: x86: Handle the case of 5-level shadow page table
> > >    KVM: SVM: Add 5-level page table support for SVM
> > > 
> > >   arch/x86/include/asm/kvm_host.h |  6 ++--
> > >   arch/x86/kvm/mmu/mmu.c          | 56 ++++++++++++++++++++++-----------
> > >   arch/x86/kvm/svm/svm.c          | 13 ++++----
> > >   arch/x86/kvm/vmx/vmx.c          |  3 +-
> > >   4 files changed, 49 insertions(+), 29 deletions(-)
> > > 
> > 
> > Queued, thanks, with NULL initializations according to Tom's review.
> > 
> > Paolo
> > 
> 
> Hi,
> Yesterday while testing my SMM patches, I noticed a minor issue: 
> It seems that this patchset breaks my 32 bit nested VM testcase with NPT=0.
>

Could you elaborate the detailed setup? NPT=0 for KVM running on L1?
Which VM is 32bit - L1 or L2?

Thanks,
-Wei

> This hack makes it work again for me (I don't yet use TDP mmu).
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index caa3f9aee7d1..c25e0d40a620 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3562,7 +3562,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
>             mmu->shadow_root_level < PT64_ROOT_4LEVEL)
>                 return 0;
>  
> -       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
> +       if (mmu->pae_root && mmu->pml4_root)
>                 return 0;
>  
>         /*
> 
> 
> 
> Best regards,
> 	Maxim Levitsky
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-23 15:15     ` Wei Huang
@ 2021-08-23 16:10       ` Sean Christopherson
  2021-08-23 18:10         ` Maxim Levitsky
  2021-08-23 18:06       ` Maxim Levitsky
  1 sibling, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2021-08-23 16:10 UTC (permalink / raw)
  To: Wei Huang
  Cc: Maxim Levitsky, Paolo Bonzini, kvm, linux-kernel, vkuznets,
	wanpengli, jmattson, joro, tglx, mingo, bp, x86, hpa

On Mon, Aug 23, 2021, Wei Huang wrote:
> On 08/23 12:20, Maxim Levitsky wrote:
> > This hack makes it work again for me (I don't yet use TDP mmu).
> > 
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index caa3f9aee7d1..c25e0d40a620 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -3562,7 +3562,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
> >             mmu->shadow_root_level < PT64_ROOT_4LEVEL)
> >                 return 0;
> >  
> > -       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)

Maxim, I assume you hit this WARN and bail?

        if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
                         mmu->pml5_root))
		return -EIO;

Because as the comment states, KVM expects all the special roots to be allocated
together.  The 5-level paging supported breaks that assumption because pml5_root
will be allocated iff the host is using 5-level paging.

        if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
                pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
                if (!pml5_root)
                        goto err_pml5;
        }

I think this is the least awful fix, I'll test and send a proper patch later today.

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4853c033e6ce..93b2ed422b48 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3548,6 +3548,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *mmu = vcpu->arch.mmu;
+       bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL;
        u64 *pml5_root = NULL;
        u64 *pml4_root = NULL;
        u64 *pae_root;
@@ -3562,7 +3563,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
            mmu->shadow_root_level < PT64_ROOT_4LEVEL)
                return 0;

-       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
+       /*
+        * NPT, the only paging mode that uses this horror, uses a fixed number
+        * of levels for the shadow page tables, e.g. all MMUs are 4-level or
+        * all MMus are 5-level.  Thus, this can safely require that pml5_root
+        * is allocated if the other roots are valid and pml5 is needed, as any
+        * prior MMU would also have required pml5.
+        */
+       if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root))
                return 0;

        /*
@@ -3570,7 +3578,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
         * bail if KVM ends up in a state where only one of the roots is valid.
         */
        if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
-                        mmu->pml5_root))
+                        (need_pml5 && mmu->pml5_root)))
                return -EIO;

        /*

> > +       if (mmu->pae_root && mmu->pml4_root)
> >                 return 0;
> >  
> >         /*
> > 
> > 
> > 
> > Best regards,
> > 	Maxim Levitsky
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-23 15:15     ` Wei Huang
  2021-08-23 16:10       ` Sean Christopherson
@ 2021-08-23 18:06       ` Maxim Levitsky
  1 sibling, 0 replies; 15+ messages in thread
From: Maxim Levitsky @ 2021-08-23 18:06 UTC (permalink / raw)
  To: Wei Huang
  Cc: Paolo Bonzini, kvm, linux-kernel, vkuznets, seanjc, wanpengli,
	jmattson, joro, tglx, mingo, bp, x86, hpa

On Mon, 2021-08-23 at 10:15 -0500, Wei Huang wrote:
> On 08/23 12:20, Maxim Levitsky wrote:
> > On Thu, 2021-08-19 at 18:43 +0200, Paolo Bonzini wrote:
> > > On 18/08/21 18:55, Wei Huang wrote:
> > > > This patch set adds 5-level page table support for AMD SVM. When the
> > > > 5-level page table is enabled on host OS, the nested page table for guest
> > > > VMs will use the same format as host OS (i.e. 5-level NPT). These patches
> > > > were tested with various combination of different settings and test cases
> > > > (nested/regular VMs, AMD64/i686 kernels, kvm-unit-tests, etc.)
> > > > 
> > > > v2->v3:
> > > >   * Change the way of building root_hpa by following the existing flow (Sean)
> > > > 
> > > > v1->v2:
> > > >   * Remove v1's arch-specific get_tdp_level() and add a new parameter,
> > > >     tdp_forced_root_level, to allow forced TDP level (Sean)
> > > >   * Add additional comment on tdp_root table chaining trick and change the
> > > >     PML root table allocation code (Sean)
> > > >   * Revise Patch 1's commit msg (Sean and Jim)
> > > > 
> > > > Thanks,
> > > > -Wei
> > > > 
> > > > Wei Huang (3):
> > > >    KVM: x86: Allow CPU to force vendor-specific TDP level
> > > >    KVM: x86: Handle the case of 5-level shadow page table
> > > >    KVM: SVM: Add 5-level page table support for SVM
> > > > 
> > > >   arch/x86/include/asm/kvm_host.h |  6 ++--
> > > >   arch/x86/kvm/mmu/mmu.c          | 56 ++++++++++++++++++++++-----------
> > > >   arch/x86/kvm/svm/svm.c          | 13 ++++----
> > > >   arch/x86/kvm/vmx/vmx.c          |  3 +-
> > > >   4 files changed, 49 insertions(+), 29 deletions(-)
> > > > 
> > > 
> > > Queued, thanks, with NULL initializations according to Tom's review.
> > > 
> > > Paolo
> > > 
> > 
> > Hi,
> > Yesterday while testing my SMM patches, I noticed a minor issue: 
> > It seems that this patchset breaks my 32 bit nested VM testcase with NPT=0.
> > 
> 
> Could you elaborate the detailed setup? NPT=0 for KVM running on L1?
> Which VM is 32bit - L1 or L2?

NPT=0, L1 and L2 were 32 bit PAE VMs. The test was done to see how well
this setup deals with SMM mode entry/exits with SMM generated by L1 guest,
and see if I have any PDPTR related shenanigans.

I disabled the TDP MMU for now, although in this setup it won't be used anyway.

BIOS was seabios, patched to use PAE itself during bootm, as well in SMM.
(from https://mail.coreboot.org/pipermail/seabios/2015-September/009788.html, patch applied by hand)

Failure was immediate without my hack - L1 died as soon as L2 was started due to an assert in
this code.


Best regards,
	Maxim Levitsky
> 
> Thanks,
> -Wei
> 
> > This hack makes it work again for me (I don't yet use TDP mmu).
> > 
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index caa3f9aee7d1..c25e0d40a620 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -3562,7 +3562,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
> >             mmu->shadow_root_level < PT64_ROOT_4LEVEL)
> >                 return 0;
> >  
> > -       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
> > +       if (mmu->pae_root && mmu->pml4_root)
> >                 return 0;
> >  
> >         /*
> > 
> > 
> > 
> > Best regards,
> > 	Maxim Levitsky
> > 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] SVM 5-level page table support
  2021-08-23 16:10       ` Sean Christopherson
@ 2021-08-23 18:10         ` Maxim Levitsky
  0 siblings, 0 replies; 15+ messages in thread
From: Maxim Levitsky @ 2021-08-23 18:10 UTC (permalink / raw)
  To: Sean Christopherson, Wei Huang
  Cc: Paolo Bonzini, kvm, linux-kernel, vkuznets, wanpengli, jmattson,
	joro, tglx, mingo, bp, x86, hpa

On Mon, 2021-08-23 at 16:10 +0000, Sean Christopherson wrote:
> On Mon, Aug 23, 2021, Wei Huang wrote:
> > On 08/23 12:20, Maxim Levitsky wrote:
> > > This hack makes it work again for me (I don't yet use TDP mmu).
> > > 
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index caa3f9aee7d1..c25e0d40a620 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -3562,7 +3562,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
> > >             mmu->shadow_root_level < PT64_ROOT_4LEVEL)
> > >                 return 0;
> > >  
> > > -       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
> 
> Maxim, I assume you hit this WARN and bail?
Yep.
> 
>         if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
>                          mmu->pml5_root))
> 		return -EIO;
> 
> Because as the comment states, KVM expects all the special roots to be allocated
> together.  The 5-level paging supported breaks that assumption because pml5_root
> will be allocated iff the host is using 5-level paging.
> 
>         if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) {
>                 pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
>                 if (!pml5_root)
>                         goto err_pml5;
>         }
> 
> I think this is the least awful fix, I'll test and send a proper patch later today.
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 4853c033e6ce..93b2ed422b48 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3548,6 +3548,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_mmu *mmu = vcpu->arch.mmu;
> +       bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL;
>         u64 *pml5_root = NULL;
>         u64 *pml4_root = NULL;
>         u64 *pae_root;
> @@ -3562,7 +3563,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
>             mmu->shadow_root_level < PT64_ROOT_4LEVEL)
>                 return 0;
> 
> -       if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root)
> +       /*
> +        * NPT, the only paging mode that uses this horror, uses a fixed number
> +        * of levels for the shadow page tables, e.g. all MMUs are 4-level or
> +        * all MMus are 5-level.  Thus, this can safely require that pml5_root
> +        * is allocated if the other roots are valid and pml5 is needed, as any
> +        * prior MMU would also have required pml5.
> +        */
> +       if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root))
>                 return 0;
> 
>         /*
> @@ -3570,7 +3578,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
>          * bail if KVM ends up in a state where only one of the roots is valid.
>          */
>         if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
> -                        mmu->pml5_root))
> +                        (need_pml5 && mmu->pml5_root)))
>                 return -EIO;
> 
>         /*
> 
> > > +       if (mmu->pae_root && mmu->pml4_root)
> > >                 return 0;
> > >  
> > >         /*

Makes sense, works, and without digging too much into this
I expected this to be fixed by something like that, so:

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Thanks,
Best regards,
	Maxim Levitsky



> > > 
> > > 
> > > 
> > > Best regards,
> > > 	Maxim Levitsky
> > > 



^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-08-23 18:11 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-18 16:55 [PATCH v3 0/3] SVM 5-level page table support Wei Huang
2021-08-18 16:55 ` [PATCH v3 1/3] KVM: x86: Allow CPU to force vendor-specific TDP level Wei Huang
2021-08-18 16:55 ` [PATCH v3 2/3] KVM: x86: Handle the case of 5-level shadow page table Wei Huang
2021-08-18 17:15   ` Sean Christopherson
2021-08-19 16:36     ` Paolo Bonzini
2021-08-18 18:00   ` Tom Lendacky
2021-08-18 16:55 ` [PATCH v3 3/3] KVM: SVM: Add 5-level page table support for SVM Wei Huang
2021-08-18 17:32   ` Sean Christopherson
2021-08-19 16:38     ` Paolo Bonzini
2021-08-19 16:43 ` [PATCH v3 0/3] SVM 5-level page table support Paolo Bonzini
2021-08-23  9:20   ` Maxim Levitsky
2021-08-23 15:15     ` Wei Huang
2021-08-23 16:10       ` Sean Christopherson
2021-08-23 18:10         ` Maxim Levitsky
2021-08-23 18:06       ` Maxim Levitsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).