LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
@ 2015-01-24 10:20 Wincy Van
  2015-01-28  8:00 ` Zhang, Yang Z
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Wincy Van @ 2015-01-24 10:20 UTC (permalink / raw)
  To: Paolo Bonzini, gleb, Zhang, Yang Z
  Cc: kvm, linux-kernel, Wanpeng Li, Jan Kiszka, 范文一

Currently, if L1 enables MSR_BITMAP, we will emulate this feature,
all of L2's msr access is intercepted by L0. Since many features
like virtualize x2apic mode has a complicated logic and it is
difficult for us to emulate, we should use hardware and merge
the bitmap.

This patch introduces nested_vmx_merge_msr_bitmap for future use.

Signed-off-by: Wincy Van <fanwenyi0529@gmail.com>
---
 arch/x86/kvm/vmx.c |   71 +++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c987374..36d0724 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -798,6 +798,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;

@@ -5812,13 +5813,18 @@ static __init int hardware_setup(void)
                                (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
+
+       vmx_msr_bitmap_nested = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_nested)
+               goto out5;
+
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
-               goto out5;
+               goto out6;

        vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmwrite_bitmap)
-               goto out6;
+               goto out7;

        memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
        memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -5834,10 +5840,11 @@ static __init int hardware_setup(void)

        memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
        memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+       memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);

        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
-               goto out7;
+               goto out8;
        }

        if (boot_cpu_has(X86_FEATURE_NX))
@@ -5944,10 +5951,12 @@ static __init int hardware_setup(void)

        return alloc_kvm_area();

-out7:
+out8:
        free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
+out7:
        free_page((unsigned long)vmx_vmread_bitmap);
+out6:
+       free_page((unsigned long)vmx_msr_bitmap_nested);
 out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
@@ -5970,6 +5979,7 @@ static __exit void hardware_unsetup(void)
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
        free_page((unsigned long)vmx_msr_bitmap_legacy);
        free_page((unsigned long)vmx_msr_bitmap_longmode);
+       free_page((unsigned long)vmx_msr_bitmap_nested);
        free_page((unsigned long)vmx_io_bitmap_b);
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
@@ -8305,6 +8315,38 @@ static void vmx_start_preemption_timer(struct
kvm_vcpu *vcpu)
                      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
 }

+static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
+                                               struct vmcs12 *vmcs12)
+{
+       int maxphyaddr;
+       u64 addr;
+
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+               return 0;
+
+       if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+       maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+       if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
+          ((addr + PAGE_SIZE) >> maxphyaddr))
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * Merge L0's and L1's MSR bitmap, return false to indicate that
+ * we do not use the hardware.
+ */
+static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+                                              struct vmcs12 *vmcs12)
+{
+       return false;
+}
+
 static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
                                       unsigned long count_field,
                                       unsigned long addr_field,
@@ -8637,11 +8679,17 @@ static void prepare_vmcs02(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12)
                vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
        }

+       if (cpu_has_vmx_msr_bitmap() &&
+           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+           nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
+               vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
+       } else
+               exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
+
        /*
-        * Merging of IO and MSR bitmaps not currently supported.
+        * Merging of IO bitmap not currently supported.
         * Rather, exit every time.
         */
-       exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
        exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
        exec_control |= CPU_BASED_UNCOND_IO_EXITING;

@@ -8792,15 +8840,13 @@ static int nested_vmx_run(struct kvm_vcpu
*vcpu, bool launch)
                return 1;
        }

-       if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
-                       !PAGE_ALIGNED(vmcs12->msr_bitmap)) {
+       if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
                /*TODO: Also verify bits beyond physical address width are 0*/
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }

-       if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
-               /*TODO: Also verify bits beyond physical address width are 0*/
+       if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
@@ -9356,6 +9402,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        kvm_set_dr(vcpu, 7, 0x400);
        vmcs_write64(GUEST_IA32_DEBUGCTL, 0);

+       if (cpu_has_vmx_msr_bitmap())
+               vmx_set_msr_bitmap(vcpu);
+
        if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
                                vmcs12->vm_exit_msr_load_count))
                nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
--
1.7.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-24 10:20 [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap Wincy Van
@ 2015-01-28  8:00 ` Zhang, Yang Z
  2015-01-28  8:24   ` Wincy Van
  2015-01-28  8:05 ` Zhang, Yang Z
  2015-01-28  8:06 ` Zhang, Yang Z
  2 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28  8:00 UTC (permalink / raw)
  To: Wincy Van, Paolo Bonzini, gleb; +Cc: kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2376 bytes --]

Wincy Van wrote on 2015-01-24:
> Currently, if L1 enables MSR_BITMAP, we will emulate this feature, all of L2's
> msr access is intercepted by L0. Since many features like virtualize x2apic mode
> has a complicated logic and it is difficult for us to emulate, we should use
> hardware and merge the bitmap.
> 
> This patch introduces nested_vmx_merge_msr_bitmap for future use.
> 
> Signed-off-by: Wincy Van <fanwenyi0529@gmail.com>
> ---
>  arch/x86/kvm/vmx.c |   71
> +++++++++++++++++++++++++++++++++++++++++++--------
>  1 files changed, 60 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c987374..36d0724
> 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -798,6 +798,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
> static unsigned long *vmx_msr_bitmap_longmode;  static unsigned long
> *vmx_msr_bitmap_legacy_x2apic;  static unsigned long
> *vmx_msr_bitmap_longmode_x2apic;
> +static unsigned long *vmx_msr_bitmap_nested;
>  static unsigned long *vmx_vmread_bitmap;  static unsigned long
> *vmx_vmwrite_bitmap;
> 
> @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void)
>                                 (unsigned long
> *)__get_free_page(GFP_KERNEL);
>         if (!vmx_msr_bitmap_longmode_x2apic)
>                 goto out4;
> +
> +       vmx_msr_bitmap_nested = (unsigned long
> *)__get_free_page(GFP_KERNEL);
> +       if (!vmx_msr_bitmap_nested)
> +               goto out5;
> +

Since the nested virtualization is off by default. It's better to allocate the page
only when nested is true. Maybe adding the following check is better:

if (nested) {
        vmx_msr_bitmap_nested = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_msr_bitmap_nested)
                goto out5;
}

...snip...

> +
> +/*
> + * Merge L0's and L1's MSR bitmap, return false to indicate that
> + * we do not use the hardware.
> + */
> +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
> +                                              struct vmcs12
> *vmcs12) {
> +       return false;
> +}
> +

The following patches have nothing to do with the MSR control. Why leave the function empty here?

Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-24 10:20 [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap Wincy Van
  2015-01-28  8:00 ` Zhang, Yang Z
@ 2015-01-28  8:05 ` Zhang, Yang Z
  2015-01-28  8:57   ` Wincy Van
  2015-01-28  8:06 ` Zhang, Yang Z
  2 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28  8:05 UTC (permalink / raw)
  To: Wincy Van, Paolo Bonzini, gleb; +Cc: kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2087 bytes --]

Wincy Van wrote on 2015-01-24:
> When L2 is using x2apic, we can use virtualize x2apic mode to gain higher
> performance, especially in apicv case.
> 
> This patch also introduces nested_vmx_check_apicv_controls for the nested
> apicv patches.
> 
> Signed-off-by: Wincy Van <fanwenyi0529@gmail.com>
> ---

...snip...

>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)  {
>         if (!longmode_only)
> @@ -8344,7 +8394,68 @@ static int
> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,  static
> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>                                                struct vmcs12
> *vmcs12)  {
> -       return false;
> +       struct page *page;
> +       unsigned long *msr_bitmap;
> +
> +       if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
> +               return false;
> +
> +       page = nested_get_page(vcpu, vmcs12->msr_bitmap);
> +       if (!page) {
> +               WARN_ON(1);
> +               return false;
> +       }
> +       msr_bitmap = (unsigned long *)kmap(page);
> +       if (!msr_bitmap) {
> +               nested_release_page_clean(page);
> +               WARN_ON(1);
> +               return false;
> +       }
> +
> +       memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
> +
> +       if (nested_cpu_has_virt_x2apic_mode(vmcs12))
> +               /* TPR is allowed */
> +               nested_vmx_disable_intercept_for_msr(msr_bitmap,
> +                               vmx_msr_bitmap_nested,
> +                               APIC_BASE_MSR + (APIC_TASKPRI >>
> 4),
> +                               MSR_TYPE_R | MSR_TYPE_W);

I didn't understand what this function does? Per my understanding, you only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in vmcs12->msr_bitmap is setting. Am I missing some patches?

Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-24 10:20 [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap Wincy Van
  2015-01-28  8:00 ` Zhang, Yang Z
  2015-01-28  8:05 ` Zhang, Yang Z
@ 2015-01-28  8:06 ` Zhang, Yang Z
  2 siblings, 0 replies; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28  8:06 UTC (permalink / raw)
  To: Wincy Van, Paolo Bonzini, gleb; +Cc: kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2256 bytes --]

Zhang, Yang Z wrote on 2015-01-28:
> Wincy Van wrote on 2015-01-24:
>> When L2 is using x2apic, we can use virtualize x2apic mode to gain
>> higher performance, especially in apicv case.
>> 
>> This patch also introduces nested_vmx_check_apicv_controls for the
>> nested apicv patches.

Sorry, replied on the wrong thread.:(

>> 
>> Signed-off-by: Wincy Van <fanwenyi0529@gmail.com>
>> ---
> 
> ...snip...
> 
>>  static void vmx_disable_intercept_for_msr(u32 msr, bool
>> longmode_only)
> {
>>         if (!longmode_only)
>> @@ -8344,7 +8394,68 @@ static int
>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,  static
>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>                                                struct vmcs12
>> *vmcs12)  {
>> -       return false;
>> +       struct page *page;
>> +       unsigned long *msr_bitmap;
>> +
>> +       if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>> +               return false;
>> +
>> +       page = nested_get_page(vcpu, vmcs12->msr_bitmap);
>> +       if (!page) {
>> +               WARN_ON(1);
>> +               return false;
>> +       }
>> +       msr_bitmap = (unsigned long *)kmap(page);
>> +       if (!msr_bitmap) {
>> +               nested_release_page_clean(page);
>> +               WARN_ON(1);
>> +               return false;
>> +       }
>> +
>> +       memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
>> +
>> +       if (nested_cpu_has_virt_x2apic_mode(vmcs12))
>> +               /* TPR is allowed */
>> +               nested_vmx_disable_intercept_for_msr(msr_bitmap,
>> +                               vmx_msr_bitmap_nested,
>> +                               APIC_BASE_MSR + (APIC_TASKPRI >>
>> 4),
>> +                               MSR_TYPE_R | MSR_TYPE_W);
> 
> I didn't understand what this function does? Per my understanding, you
> only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap |
> vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in
> vmcs12->msr_bitmap is setting. Am I missing some patches?
> 
> Best regards,
> Yang
>


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28  8:00 ` Zhang, Yang Z
@ 2015-01-28  8:24   ` Wincy Van
  2015-01-28 11:09     ` Zhang, Yang Z
  0 siblings, 1 reply; 16+ messages in thread
From: Wincy Van @ 2015-01-28  8:24 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 4:00 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>> @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void)
>>                                 (unsigned long
>> *)__get_free_page(GFP_KERNEL);
>>         if (!vmx_msr_bitmap_longmode_x2apic)
>>                 goto out4;
>> +
>> +       vmx_msr_bitmap_nested = (unsigned long
>> *)__get_free_page(GFP_KERNEL);
>> +       if (!vmx_msr_bitmap_nested)
>> +               goto out5;
>> +
>
> Since the nested virtualization is off by default. It's better to allocate the page
> only when nested is true. Maybe adding the following check is better:
>
> if (nested) {
>         vmx_msr_bitmap_nested = (unsigned long *)__get_free_page(GFP_KERNEL);
>         if (!vmx_msr_bitmap_nested)
>                 goto out5;
> }

Agreed. Will do.

>
> ...snip...
>
>> +
>> +/*
>> + * Merge L0's and L1's MSR bitmap, return false to indicate that
>> + * we do not use the hardware.
>> + */
>> +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>> +                                              struct vmcs12
>> *vmcs12) {
>> +       return false;
>> +}
>> +
>
> The following patches have nothing to do with the MSR control. Why leave the function empty here?
>

No. In patch "Enable nested virtualize x2apic mode", we will return
false if L1 disabled virt_x2apic_mode, then the hardware MSR-bitmap
control is disabled. This is faster than rebuilding the
vmx_msr_bitmap_nested.
This function returns false here to indicate that we do not use the
hardware. Since It is not only virtualize x2apic mode using this,
other features may use this either. I think it isn't suitable to
introduce this function in other patches.


> Best regards,
> Yang
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28  8:05 ` Zhang, Yang Z
@ 2015-01-28  8:57   ` Wincy Van
  2015-01-28 11:25     ` Zhang, Yang Z
  0 siblings, 1 reply; 16+ messages in thread
From: Wincy Van @ 2015-01-28  8:57 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>> @@ -8344,7 +8394,68 @@ static int
>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,  static
>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>                                                struct vmcs12
>> *vmcs12)  {
>> -       return false;
>> +       struct page *page;
>> +       unsigned long *msr_bitmap;
>> +
>> +       if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>> +               return false;
>> +
>> +       page = nested_get_page(vcpu, vmcs12->msr_bitmap);
>> +       if (!page) {
>> +               WARN_ON(1);
>> +               return false;
>> +       }
>> +       msr_bitmap = (unsigned long *)kmap(page);
>> +       if (!msr_bitmap) {
>> +               nested_release_page_clean(page);
>> +               WARN_ON(1);
>> +               return false;
>> +       }
>> +
>> +       memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
>> +
>> +       if (nested_cpu_has_virt_x2apic_mode(vmcs12))
>> +               /* TPR is allowed */
>> +               nested_vmx_disable_intercept_for_msr(msr_bitmap,
>> +                               vmx_msr_bitmap_nested,
>> +                               APIC_BASE_MSR + (APIC_TASKPRI >>
>> 4),
>> +                               MSR_TYPE_R | MSR_TYPE_W);
>
> I didn't understand what this function does? Per my understanding, you only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in vmcs12->msr_bitmap is setting. Am I missing some patches?

In the beginning, I want to do "vmcs01->msr_bitmap |
vmcs12->msr_bitmap", but I remember that there isn't a instruction to
do a bit or operation in two pages effectively, so I do the bit or
operation in nested_vmx_disable_intercept_for_msr. If the hardware do
not support this, I think it is faster if we deal with the bits on
demand. nested_vmx_merge_msr_bitmap is used to merge L0's and L1's
bitmaps, any features can put their logic here.

If there is a faster way, please teach me how to do it : )

Thanks,
Wincy


>
> Best regards,
> Yang
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28  8:24   ` Wincy Van
@ 2015-01-28 11:09     ` Zhang, Yang Z
  0 siblings, 0 replies; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 11:09 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2173 bytes --]

Wincy Van wrote on 2015-01-28:
> On Wed, Jan 28, 2015 at 4:00 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>> @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void)
>>>                                 (unsigned long
>>> *)__get_free_page(GFP_KERNEL);
>>>         if (!vmx_msr_bitmap_longmode_x2apic)
>>>                 goto out4;
>>> +
>>> +       vmx_msr_bitmap_nested = (unsigned long
>>> *)__get_free_page(GFP_KERNEL);
>>> +       if (!vmx_msr_bitmap_nested)
>>> +               goto out5;
>>> +
>> 
>> Since the nested virtualization is off by default. It's better to
>> allocate the page only when nested is true. Maybe adding the following
>> check is better:
>> 
>> if (nested) {
>>         vmx_msr_bitmap_nested = (unsigned long
>>         *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_nested)
>>                 goto out5;
>> }
> 
> Agreed. Will do.
> 
>> 
>> ...snip...
>> 
>>> +
>>> +/*
>>> + * Merge L0's and L1's MSR bitmap, return false to indicate that
>>> + * we do not use the hardware.
>>> + */
>>> +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>> +                                              struct vmcs12
>>> *vmcs12) {
>>> +       return false;
>>> +}
>>> +
>> 
>> The following patches have nothing to do with the MSR control. Why
>> leave the function empty here?
>> 
> 
> No. In patch "Enable nested virtualize x2apic mode", we will return
> false if L1 disabled virt_x2apic_mode, then the hardware MSR-bitmap control is disabled.
> This is faster than rebuilding the vmx_msr_bitmap_nested.
> This function returns false here to indicate that we do not use the hardware.
> Since It is not only virtualize x2apic mode using this, other features
> may use this either. I think it isn't suitable to introduce this function in other patches.

Yes, rebuilding is more costly. But your current implementation cannot leverage the APICv feature correctly. I replied in another thread.

> 
> 
>> Best regards,
>> Yang
>> 
>>


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28  8:57   ` Wincy Van
@ 2015-01-28 11:25     ` Zhang, Yang Z
  2015-01-28 11:43       ` Wincy Van
  0 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 11:25 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2570 bytes --]

Wincy Van wrote on 2015-01-28:
> On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>> @@ -8344,7 +8394,68 @@ static int
>>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,  static
>>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>>                                                struct vmcs12
>>> *vmcs12)  { -       return false; +       struct page *page; +      
>>> unsigned long *msr_bitmap; + +       if
>>> (!nested_cpu_has_virt_x2apic_mode(vmcs12)) +               return
>>> false; + +       page = nested_get_page(vcpu, vmcs12->msr_bitmap); +  
>>>     if (!page) { +               WARN_ON(1); +               return
>>> false; +       } +       msr_bitmap = (unsigned long *)kmap(page); +  
>>>     if (!msr_bitmap) { +              
>>> nested_release_page_clean(page); +               WARN_ON(1); +        
>>>       return false; +       } + +       memset(vmx_msr_bitmap_nested,
>>> 0xff, PAGE_SIZE); + +       if
>>> (nested_cpu_has_virt_x2apic_mode(vmcs12)) +               /* TPR is
>>> allowed */ +              
>>> nested_vmx_disable_intercept_for_msr(msr_bitmap, +                    
>>>           vmx_msr_bitmap_nested, +                              
>>> APIC_BASE_MSR + (APIC_TASKPRI >> 4), +                              
>>> MSR_TYPE_R | MSR_TYPE_W);
>> 
>> I didn't understand what this function does? Per my understanding,
>> you only
> need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap |
> vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in
> vmcs12->msr_bitmap is setting. Am I missing some patches?
> 
> In the beginning, I want to do "vmcs01->msr_bitmap |
> vmcs12->msr_bitmap", but I remember that there isn't a instruction to
> do a bit or operation in two pages effectively, so I do the bit or
> operation in nested_vmx_disable_intercept_for_msr. If the hardware do
> not support this, I think it is faster if we deal with the bits on demand.
> nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps,
> any features can put their logic here.

You construct the nested_msr_bitmap based on vmcs12->msr_bitmap, what happens if vmcs01->msr_bitmap want to trap this msr?

> 
> If there is a faster way, please teach me how to do it : )

You are right. Interception should be much faster.

> 
> Thanks,
> Wincy
> 
> 
>> 
>> Best regards,
>> Yang
>> 
>>


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 11:25     ` Zhang, Yang Z
@ 2015-01-28 11:43       ` Wincy Van
  2015-01-28 11:52         ` Zhang, Yang Z
  0 siblings, 1 reply; 16+ messages in thread
From: Wincy Van @ 2015-01-28 11:43 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 7:25 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
> Wincy Van wrote on 2015-01-28:
>> On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
>> wrote:
>>>> @@ -8344,7 +8394,68 @@ static int
>>>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,  static
>>>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>>>                                                struct vmcs12
>>>> *vmcs12)  { -       return false; +       struct page *page; +
>>>> unsigned long *msr_bitmap; + +       if
>>>> (!nested_cpu_has_virt_x2apic_mode(vmcs12)) +               return
>>>> false; + +       page = nested_get_page(vcpu, vmcs12->msr_bitmap); +
>>>>     if (!page) { +               WARN_ON(1); +               return
>>>> false; +       } +       msr_bitmap = (unsigned long *)kmap(page); +
>>>>     if (!msr_bitmap) { +
>>>> nested_release_page_clean(page); +               WARN_ON(1); +
>>>>       return false; +       } + +       memset(vmx_msr_bitmap_nested,
>>>> 0xff, PAGE_SIZE); + +       if
>>>> (nested_cpu_has_virt_x2apic_mode(vmcs12)) +               /* TPR is
>>>> allowed */ +
>>>> nested_vmx_disable_intercept_for_msr(msr_bitmap, +
>>>>           vmx_msr_bitmap_nested, +
>>>> APIC_BASE_MSR + (APIC_TASKPRI >> 4), +
>>>> MSR_TYPE_R | MSR_TYPE_W);
>>>
>>> I didn't understand what this function does? Per my understanding,
>>> you only
>> need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap |
>> vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in
>> vmcs12->msr_bitmap is setting. Am I missing some patches?
>>
>> In the beginning, I want to do "vmcs01->msr_bitmap |
>> vmcs12->msr_bitmap", but I remember that there isn't a instruction to
>> do a bit or operation in two pages effectively, so I do the bit or
>> operation in nested_vmx_disable_intercept_for_msr. If the hardware do
>> not support this, I think it is faster if we deal with the bits on demand.
>> nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps,
>> any features can put their logic here.
>
> You construct the nested_msr_bitmap based on vmcs12->msr_bitmap, what happens if vmcs01->msr_bitmap want to trap this msr?
>

If L0 wants to intercept a msr, we should set
vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic),
and that bitmaps should only be loaded in non-nested entry.
Currently we only clear the corresponding bits if L1 enables
virtualize x2apic mode, all the other bits are all set. On nested
entry, we load nested_msr_bitmap, on nested vmexit, we will restore
L0's bitmap.
In the nested entry, L0 only care about L2's msr access, not L1's. I
think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap"
as your mentioned above.

Thanks,
Wincy

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 11:43       ` Wincy Van
@ 2015-01-28 11:52         ` Zhang, Yang Z
  2015-01-28 12:06           ` Wincy Van
  0 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 11:52 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3325 bytes --]

Wincy Van wrote on 2015-01-28:
> On Wed, Jan 28, 2015 at 7:25 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>> Wincy Van wrote on 2015-01-28:
>>> On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z
>>> <yang.z.zhang@intel.com>
>>> wrote:
>>>>> @@ -8344,7 +8394,68 @@ static int
>>>>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static
>>>>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>>>>                                                struct vmcs12
>>>>> *vmcs12)  { -       return false; +       struct page *page; +
>>>>> unsigned long *msr_bitmap; + +       if
>>>>> (!nested_cpu_has_virt_x2apic_mode(vmcs12)) +               return
>>>>> false; + +       page = nested_get_page(vcpu, vmcs12->msr_bitmap);
> +
>>>>>     if (!page) { +               WARN_ON(1); +
> return
>>>>> false; +       } +       msr_bitmap = (unsigned long *)kmap(page); +
>>>>>     if (!msr_bitmap) { +
>>>>> nested_release_page_clean(page); +               WARN_ON(1); +
>>>>>       return false; +       } + +
> memset(vmx_msr_bitmap_nested,
>>>>> 0xff, PAGE_SIZE); + +       if
>>>>> (nested_cpu_has_virt_x2apic_mode(vmcs12)) +               /* TPR is
>>>>> allowed */ + nested_vmx_disable_intercept_for_msr(msr_bitmap, +
>>>>>           vmx_msr_bitmap_nested, + APIC_BASE_MSR + (APIC_TASKPRI
>>>>>>> 4), + MSR_TYPE_R | MSR_TYPE_W);
>>>> 
>>>> I didn't understand what this function does? Per my understanding,
>>>> you only
>>> need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap |
>>> vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit
>>> vmcs12->in msr_bitmap is setting. Am I missing some patches?
>>> 
>>> In the beginning, I want to do "vmcs01->msr_bitmap |
>>> vmcs12->msr_bitmap", but I remember that there isn't a instruction
>>> vmcs12->to
>>> do a bit or operation in two pages effectively, so I do the bit or
>>> operation in nested_vmx_disable_intercept_for_msr. If the hardware
>>> do not support this, I think it is faster if we deal with the bits on demand.
>>> nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps,
>>> any features can put their logic here.
>> 
>> You construct the nested_msr_bitmap based on vmcs12->msr_bitmap, what
>> happens if vmcs01->msr_bitmap want to trap this msr?
>> 
> 
> If L0 wants to intercept a msr, we should set
> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic),
> and that bitmaps should only be loaded in non-nested entry.
> Currently we only clear the corresponding bits if L1 enables
> virtualize x2apic mode, all the other bits are all set. On nested
> entry, we load nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap.
> In the nested entry, L0 only care about L2's msr access, not L1's. I
> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap"
> as your mentioned above.

Mmm... I think if the bit is setting in vmcs01->msr_bitmap means whenever the VCPU(no matter in nested guest mode or not) accesses this msr should cause vmexit, not only L1. That's why need to construct the vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed vmcs12->msr_bitmap).

> 
> Thanks,
> Wincy


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 11:52         ` Zhang, Yang Z
@ 2015-01-28 12:06           ` Wincy Van
  2015-01-28 12:26             ` Zhang, Yang Z
  2015-01-28 12:33             ` Zhang, Yang Z
  0 siblings, 2 replies; 16+ messages in thread
From: Wincy Van @ 2015-01-28 12:06 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>>>
>>
>> If L0 wants to intercept a msr, we should set
>> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic),
>> and that bitmaps should only be loaded in non-nested entry.
>> Currently we only clear the corresponding bits if L1 enables
>> virtualize x2apic mode, all the other bits are all set. On nested
>> entry, we load nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap.
>> In the nested entry, L0 only care about L2's msr access, not L1's. I
>> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap"
>> as your mentioned above.
>
> Mmm... I think if the bit is setting in vmcs01->msr_bitmap means whenever the VCPU(no matter in nested guest mode or not) accesses this msr should cause vmexit, not only L1. That's why need to construct the vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed vmcs12->msr_bitmap).
>

You are right, but this is not fit for all the cases, we should custom
the nested_msr_bitmap.
e.g.  Currently L0 wants to intercept some of the x2apic msrs' reading:
         if (enable_apicv) {
                for (msr = 0x800; msr <= 0x8ff; msr++)
                        vmx_disable_intercept_msr_read_x2apic(msr);

                /* According SDM, in x2apic mode, the whole id reg is used.
                 * But in KVM, it only use the highest eight bits. Need to
                 * intercept it */
                vmx_enable_intercept_msr_read_x2apic(0x802);
                /* TMCCT */
                vmx_enable_intercept_msr_read_x2apic(0x839);
                /* TPR */
                vmx_disable_intercept_msr_write_x2apic(0x808);
                /* EOI */
                vmx_disable_intercept_msr_write_x2apic(0x80b);
                /* SELF-IPI */
                vmx_disable_intercept_msr_write_x2apic(0x83f);
        }
But L1 may not want this. So I think we are better to deal with the msrs
seperately, there is not a common way suit for all the cases. If other
features want to intercept a msr in nested entry, they can put the
custom code in nested_vmx_merge_msr_bitmap.


Thanks,
Wincy

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 12:06           ` Wincy Van
@ 2015-01-28 12:26             ` Zhang, Yang Z
  2015-01-28 12:33             ` Zhang, Yang Z
  1 sibling, 0 replies; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 12:26 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3092 bytes --]

Wincy Van wrote on 2015-01-28:
> On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>>> 
>>> 
>>> If L0 wants to intercept a msr, we should set
>>> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic),
>>> and that bitmaps should only be loaded in non-nested entry. Currently
>>> we only clear the corresponding bits if L1 enables virtualize x2apic
>>> mode, all the other bits are all set. On nested entry, we load
>>> nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. In
>>> the nested entry, L0 only care about L2's msr access, not L1's. I
>>> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap"
>>> as your mentioned above.
>> 
>> Mmm... I think if the bit is setting in vmcs01->msr_bitmap means
>> whenever
> the VCPU(no matter in nested guest mode or not) accesses this msr
> should cause vmexit, not only L1. That's why need to construct the
> vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed
> vmcs12->msr_bitmap).
>> 
> 
> You are right, but this is not fit for all the cases, we should custom
> the nested_msr_bitmap.
> e.g.  Currently L0 wants to intercept some of the x2apic msrs' reading:
>          if (enable_apicv) {
>                 for (msr = 0x800; msr <= 0x8ff; msr++)
>                         vmx_disable_intercept_msr_read_x2apic(msr);
>                 /* According SDM, in x2apic mode, the whole id reg is used.
>                  * But in KVM, it only use the highest eight bits. Need to
>                  * intercept it */
>                 vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */
>                 vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */
>                 vmx_disable_intercept_msr_write_x2apic(0x808); /* EOI */
>                 vmx_disable_intercept_msr_write_x2apic(0x80b); /*
>                 SELF-IPI */
>                 vmx_disable_intercept_msr_write_x2apic(0x83f);
>         }
> But L1 may not want this. So I think we are better to deal with the

Actually, from L0's point, it is totally unaware of the L2. The only thing L0 aware is that the CPU should follow L0's configuration when VCPU is running. So if L0 wants to trap a msr, then the read operation to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....).

> msrs seperately, there is not a common way suit for all the cases. If
> other features want to intercept a msr in nested entry, they can put
> the custom code in nested_vmx_merge_msr_bitmap.

Yes, if other features want to do it in 'nested' entry, they can fill nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be our responsibly to handle it correctly, how about add following check:

if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) && !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
        __clear_bit(msr, msr_bitmap_nested + 0x000 / f);


> 
> 
> Thanks,
> Wincy


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 12:06           ` Wincy Van
  2015-01-28 12:26             ` Zhang, Yang Z
@ 2015-01-28 12:33             ` Zhang, Yang Z
  2015-01-28 12:45               ` Wincy Van
  1 sibling, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 12:33 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3354 bytes --]

Zhang, Yang Z wrote on 2015-01-28:
> Wincy Van wrote on 2015-01-28:
>> On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z
>> <yang.z.zhang@intel.com>
>> wrote:
>>>>> 
>>>> 
>>>> If L0 wants to intercept a msr, we should set
>>>> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic),
>>>> and that bitmaps should only be loaded in non-nested entry. Currently
>>>> we only clear the corresponding bits if L1 enables virtualize x2apic
>>>> mode, all the other bits are all set. On nested entry, we load
>>>> nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. In
>>>> the nested entry, L0 only care about L2's msr access, not L1's. I
>>>> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap"
>>>> as your mentioned above.
>>> 
>>> Mmm... I think if the bit is setting in vmcs01->msr_bitmap means
>>> whenever
>> the VCPU(no matter in nested guest mode or not) accesses this msr
>> should cause vmexit, not only L1. That's why need to construct the
>> vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed
>> vmcs12->msr_bitmap).
>>> 
>> 
>> You are right, but this is not fit for all the cases, we should
>> custom the nested_msr_bitmap.
>> e.g.  Currently L0 wants to intercept some of the x2apic msrs' reading:
>>          if (enable_apicv) {
>>                 for (msr = 0x800; msr <= 0x8ff; msr++)
> vmx_disable_intercept_msr_read_x2apic(msr);
>>                 /* According SDM, in x2apic mode, the whole id reg
>> is
> used.
>>                  * But in KVM, it only use the highest eight bits. Need to
>>                  * intercept it */
>>                 vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT
>>                 */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR
>>                 */ vmx_disable_intercept_msr_write_x2apic(0x808); /*
>> EOI
> */
>>                 vmx_disable_intercept_msr_write_x2apic(0x80b); /*
>>                 SELF-IPI */
>>                 vmx_disable_intercept_msr_write_x2apic(0x83f);
>>         }
>> But L1 may not want this. So I think we are better to deal with the
> 
> Actually, from L0's point, it is totally unaware of the L2. The only
> thing L0 aware is that the CPU should follow L0's configuration when
> VCPU is running. So if L0 wants to trap a msr, then the read operation
> to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....).
> 
>> msrs seperately, there is not a common way suit for all the cases.
>> If other features want to intercept a msr in nested entry, they can
>> put the custom code in nested_vmx_merge_msr_bitmap.
> 
> Yes, if other features want to do it in 'nested' entry, they can fill
> nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be
> our responsibly to handle it correctly, how about add following check:
> 
> if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) &&
> !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
>         __clear_bit(msr, msr_bitmap_nested + 0x000 / f);


Anyway, this is not necessary for your current patch. We can consider it later if there really have other features will use it.

> 
>> 
>> 
>> Thanks,
>> Wincy
> 
> 
> Best regards,
> Yang
>


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 12:33             ` Zhang, Yang Z
@ 2015-01-28 12:45               ` Wincy Van
  2015-01-28 13:06                 ` Zhang, Yang Z
  0 siblings, 1 reply; 16+ messages in thread
From: Wincy Van @ 2015-01-28 12:45 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 8:33 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>>>
>>> You are right, but this is not fit for all the cases, we should
>>> custom the nested_msr_bitmap.
>>> e.g.  Currently L0 wants to intercept some of the x2apic msrs' reading:
>>>          if (enable_apicv) {
>>>                 for (msr = 0x800; msr <= 0x8ff; msr++)
>> vmx_disable_intercept_msr_read_x2apic(msr);
>>>                 /* According SDM, in x2apic mode, the whole id reg
>>> is
>> used.
>>>                  * But in KVM, it only use the highest eight bits. Need to
>>>                  * intercept it */
>>>                 vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT
>>>                 */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR
>>>                 */ vmx_disable_intercept_msr_write_x2apic(0x808); /*
>>> EOI
>> */
>>>                 vmx_disable_intercept_msr_write_x2apic(0x80b); /*
>>>                 SELF-IPI */
>>>                 vmx_disable_intercept_msr_write_x2apic(0x83f);
>>>         }
>>> But L1 may not want this. So I think we are better to deal with the
>>
>> Actually, from L0's point, it is totally unaware of the L2. The only
>> thing L0 aware is that the CPU should follow L0's configuration when
>> VCPU is running. So if L0 wants to trap a msr, then the read operation
>> to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....).
>>
>>> msrs seperately, there is not a common way suit for all the cases.
>>> If other features want to intercept a msr in nested entry, they can
>>> put the custom code in nested_vmx_merge_msr_bitmap.
>>
>> Yes, if other features want to do it in 'nested' entry, they can fill
>> nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be
>> our responsibly to handle it correctly, how about add following check:
>>
>> if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) &&
>> !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
>>         __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
>
>
> Anyway, this is not necessary for your current patch. We can consider it later if there really have other features will use it.
>

Yep, I know what you mean now, for other msrs which are not forwarded
access by a mechanism like virtual-apic page, we should intercept it
unconditionally. I think we should ensure the msr can be allowed
before call nested_vmx_disable_intercept_for_msr, if L0 want to
intercept it, just do not call nested_vmx_disable_intercept_for_msr.

 !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some
of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for
example.
Intercept reading for these msrs is okay, but it is not efficient.

Thanks,
Wincy

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 12:45               ` Wincy Van
@ 2015-01-28 13:06                 ` Zhang, Yang Z
  2015-01-28 13:57                   ` Wincy Van
  0 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2015-01-28 13:06 UTC (permalink / raw)
  To: Wincy Van; +Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3277 bytes --]

Wincy Van wrote on 2015-01-28:
> On Wed, Jan 28, 2015 at 8:33 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>>> 
>>>> You are right, but this is not fit for all the cases, we should
>>>> custom the nested_msr_bitmap.
>>>> e.g.  Currently L0 wants to intercept some of the x2apic msrs' reading:
>>>>          if (enable_apicv) {
>>>>                 for (msr = 0x800; msr <= 0x8ff; msr++)
>>>>                 vmx_disable_intercept_msr_read_x2apic(msr); /*
>>>>                 According SDM, in x2apic mode, the whole id reg
>>>> is
>>> used.
>>>>                  * But in KVM, it only use the highest eight bits.
>>>> Need
> to
>>>>                  * intercept it */
>>>>                 vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT
>>>>                 */ vmx_enable_intercept_msr_read_x2apic(0x839); /*
>>>>                 TPR */ vmx_disable_intercept_msr_write_x2apic(0x808);
> /*
>>>> EOI
>>> */
>>>>                 vmx_disable_intercept_msr_write_x2apic(0x80b); /*
>>>>                 SELF-IPI */
>>>>                 vmx_disable_intercept_msr_write_x2apic(0x83f);
>>>>         }
>>>> But L1 may not want this. So I think we are better to deal with
>>>> the
>>> 
>>> Actually, from L0's point, it is totally unaware of the L2. The only
>>> thing L0 aware is that the CPU should follow L0's configuration when
>>> VCPU is running. So if L0 wants to trap a msr, then the read operation
>>> to this msr should cause vmexit unconditionally no matter who is
>>> running(who means L1, L2, L3.....).
>>> 
>>>> msrs seperately, there is not a common way suit for all the cases.
>>>> If other features want to intercept a msr in nested entry, they
>>>> can put the custom code in nested_vmx_merge_msr_bitmap.
>>> 
>>> Yes, if other features want to do it in 'nested' entry, they can
>>> fill nested_vmx_merge_msr_bitmap. But if in non-nested case, it
>>> should be our responsibly to handle it correctly, how about add following check:
>>> 
>>> if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) &&
>>> !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
>>>         __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
>> 
>> 
>> Anyway, this is not necessary for your current patch. We can consider
>> it later if there really have other features will use it.
>> 
> 
> Yep, I know what you mean now, for other msrs which are not forwarded
> access by a mechanism like virtual-apic page, we should intercept it
> unconditionally. I think we should ensure the msr can be allowed
> before call nested_vmx_disable_intercept_for_msr, if L0 want to
> intercept it, just do not call nested_vmx_disable_intercept_for_msr.

Yes, this is a solution. But I prefer to handle it in nested code path since others may not familiar with nested and may block it by mistake.

> 
>  !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some
> of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for example.
> Intercept reading for these msrs is okay, but it is not efficient.

TMCCT is always trapped by most VMM. I don't think TPR is trapped in KVM.

> 
> Thanks,
> Wincy


Best regards,
Yang


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap
  2015-01-28 13:06                 ` Zhang, Yang Z
@ 2015-01-28 13:57                   ` Wincy Van
  0 siblings, 0 replies; 16+ messages in thread
From: Wincy Van @ 2015-01-28 13:57 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Paolo Bonzini, gleb, kvm, linux-kernel, Wanpeng Li, Jan Kiszka

On Wed, Jan 28, 2015 at 9:06 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>>>>         __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
>>>
>>>
>>> Anyway, this is not necessary for your current patch. We can consider
>>> it later if there really have other features will use it.
>>>
>>
>> Yep, I know what you mean now, for other msrs which are not forwarded
>> access by a mechanism like virtual-apic page, we should intercept it
>> unconditionally. I think we should ensure the msr can be allowed
>> before call nested_vmx_disable_intercept_for_msr, if L0 want to
>> intercept it, just do not call nested_vmx_disable_intercept_for_msr.
>
> Yes, this is a solution. But I prefer to handle it in nested code path since others may not familiar with nested and may block it by mistake.
>
>>
>>  !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some
>> of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for example.
>> Intercept reading for these msrs is okay, but it is not efficient.
>
> TMCCT is always trapped by most VMM. I don't think TPR is trapped in KVM.
>

Oooops. This piece of code made me confused and I was not think about
that words, just forget it.
( * ^ _ ^ * )


                vmx_enable_intercept_msr_read_x2apic(0x802);
                /* TMCCT */
                vmx_enable_intercept_msr_read_x2apic(0x839);
                /* TPR */


If there are any features use it in the future, let's consider about
this, thank you for your patience.


Thanks,
Wincy

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-01-29  3:51 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-24 10:20 [PATCH v3 1/6] KVM: nVMX: Use hardware MSR bitmap Wincy Van
2015-01-28  8:00 ` Zhang, Yang Z
2015-01-28  8:24   ` Wincy Van
2015-01-28 11:09     ` Zhang, Yang Z
2015-01-28  8:05 ` Zhang, Yang Z
2015-01-28  8:57   ` Wincy Van
2015-01-28 11:25     ` Zhang, Yang Z
2015-01-28 11:43       ` Wincy Van
2015-01-28 11:52         ` Zhang, Yang Z
2015-01-28 12:06           ` Wincy Van
2015-01-28 12:26             ` Zhang, Yang Z
2015-01-28 12:33             ` Zhang, Yang Z
2015-01-28 12:45               ` Wincy Van
2015-01-28 13:06                 ` Zhang, Yang Z
2015-01-28 13:57                   ` Wincy Van
2015-01-28  8:06 ` Zhang, Yang Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).