LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping
@ 2018-03-19  2:30 Jason Cai (Xiang Feng)
  2018-03-20 21:33 ` Alex Williamson
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Cai (Xiang Feng) @ 2018-03-19  2:30 UTC (permalink / raw)
  To: Alex Williamson, pbonzini, kvm, linux-kernel, linux-mm
  Cc: gnehzuil, Jason Cai (Xiang Feng)

When using vfio to pass through a PCIe device (e.g. a GPU card) that
has a huge BAR (e.g. 16GB), a lot of cycles are wasted on memory
pinning because PFNs of PCI BAR are not backed by struct page, and
the corresponding VMA has flag VM_PFNMAP.

With this change, when pinning a region which is a raw PFN mapping,
it can skip unnecessary user memory pinning process, and thus, can
significantly improve VM's boot up time when passing through devices
via VFIO. In my test on a Xeon E5 2.6GHz, the time mapping a 16GB
BAR was reduced from about 0.4s to 1.5us.

Signed-off-by: Jason Cai (Xiang Feng) <jason.cai@linux.alibaba.com>
---
 drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 45657e2b1ff7..0658f35318b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 {
        unsigned long pfn = 0;
        long ret, pinned = 0, lock_acct = 0;
-       bool rsvd;
        dma_addr_t iova = vaddr - dma->vaddr + dma->iova;

        /* This code path is only user initiated */
@@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
        if (ret)
                return ret;

+       if (is_invalid_reserved_pfn(*pfn_base)) {
+               struct vm_area_struct *vma;
+               down_read(&current->mm->mmap_sem);
+               vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
+               pinned = min(npage, (long)vma_pages(vma));
+               up_read(&current->mm->mmap_sem);
+               return pinned;
+       }
+
        pinned++;
-       rsvd = is_invalid_reserved_pfn(*pfn_base);

        /*
         * Reserved pages aren't counted against the user, externally pinned
         * pages are already counted against the user.
         */
-       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+       if (!vfio_find_vpfn(dma, iova)) {
                if (!lock_cap && current->mm->locked_vm + 1 > limit) {
                        put_pfn(*pfn_base, dma->prot);
                        pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
@@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
                if (ret)
                        break;

-               if (pfn != *pfn_base + pinned ||
-                   rsvd != is_invalid_reserved_pfn(pfn)) {
+               if (pfn != *pfn_base + pinned) {
                        put_pfn(pfn, dma->prot);
                        break;
                }

-               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+               if (!vfio_find_vpfn(dma, iova)) {
                        if (!lock_cap &&
                            current->mm->locked_vm + lock_acct + 1 > limit) {
                                put_pfn(pfn, dma->prot);
@@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,

 unpin_out:
        if (ret) {
-               if (!rsvd) {
-                       for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
-                               put_pfn(pfn, dma->prot);
-               }
+               for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
+                       put_pfn(pfn, dma->prot);

                return ret;
        }
--
2.13.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping
  2018-03-19  2:30 [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping Jason Cai (Xiang Feng)
@ 2018-03-20 21:33 ` Alex Williamson
  2018-03-20 22:04   ` Alex Williamson
  0 siblings, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2018-03-20 21:33 UTC (permalink / raw)
  To: Jason Cai (Xiang Feng); +Cc: pbonzini, kvm, linux-kernel, linux-mm, gnehzuil

On Mon, 19 Mar 2018 10:30:24 +0800
"Jason Cai (Xiang Feng)" <jason.cai@linux.alibaba.com> wrote:

> When using vfio to pass through a PCIe device (e.g. a GPU card) that
> has a huge BAR (e.g. 16GB), a lot of cycles are wasted on memory
> pinning because PFNs of PCI BAR are not backed by struct page, and
> the corresponding VMA has flag VM_PFNMAP.
> 
> With this change, when pinning a region which is a raw PFN mapping,
> it can skip unnecessary user memory pinning process, and thus, can
> significantly improve VM's boot up time when passing through devices
> via VFIO. In my test on a Xeon E5 2.6GHz, the time mapping a 16GB
> BAR was reduced from about 0.4s to 1.5us.
> 
> Signed-off-by: Jason Cai (Xiang Feng) <jason.cai@linux.alibaba.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++----------
>  1 file changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 45657e2b1ff7..0658f35318b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>  {
>         unsigned long pfn = 0;
>         long ret, pinned = 0, lock_acct = 0;
> -       bool rsvd;
>         dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
> 
>         /* This code path is only user initiated */
> @@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>         if (ret)
>                 return ret;
> 
> +       if (is_invalid_reserved_pfn(*pfn_base)) {
> +               struct vm_area_struct *vma;
> +               down_read(&current->mm->mmap_sem);
> +               vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> +               pinned = min(npage, (long)vma_pages(vma));
> +               up_read(&current->mm->mmap_sem);
> +               return pinned;
> +       }
> +
>         pinned++;
> -       rsvd = is_invalid_reserved_pfn(*pfn_base);
> 
>         /*
>          * Reserved pages aren't counted against the user, externally pinned
>          * pages are already counted against the user.
>          */
> -       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +       if (!vfio_find_vpfn(dma, iova)) {
>                 if (!lock_cap && current->mm->locked_vm + 1 > limit) {
>                         put_pfn(*pfn_base, dma->prot);
>                         pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> @@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>                 if (ret)
>                         break;
> 
> -               if (pfn != *pfn_base + pinned ||
> -                   rsvd != is_invalid_reserved_pfn(pfn)) {
> +               if (pfn != *pfn_base + pinned) {
>                         put_pfn(pfn, dma->prot);
>                         break;
>                 }
> 
> -               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +               if (!vfio_find_vpfn(dma, iova)) {
>                         if (!lock_cap &&
>                             current->mm->locked_vm + lock_acct + 1 > limit) {
>                                 put_pfn(pfn, dma->prot);
> @@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> 
>  unpin_out:
>         if (ret) {
> -               if (!rsvd) {
> -                       for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> -                               put_pfn(pfn, dma->prot);
> -               }
> +               for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> +                       put_pfn(pfn, dma->prot);
> 
>                 return ret;
>         }

Hi Jason,

Something is wrong with your mail setup, the patch looks normal above,
but when I view the source or save it to try to apply it, the diff is
corrupt, as below.  It looks like maybe you're pasting the patch into
your mailer and it's wrapping lines (ending with '=') and actual '='
are replaced with '=3D' and tabs are converted to spaces.  Please fix
your mailer and resend.  Thanks,

Alex

diff --git a/drivers/vfio/vfio_iommu_type1.c =
b/drivers/vfio/vfio_iommu_type1.c
index 45657e2b1ff7..0658f35318b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma =
*dma, unsigned long vaddr,
 {
        unsigned long pfn =3D 0;
        long ret, pinned =3D 0, lock_acct =3D 0;
-       bool rsvd;
        dma_addr_t iova =3D vaddr - dma->vaddr + dma->iova;

        /* This code path is only user initiated */
@@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma =
*dma, unsigned long vaddr,
        if (ret)
                return ret;

+       if (is_invalid_reserved_pfn(*pfn_base)) {
+               struct vm_area_struct *vma;
+               down_read(&current->mm->mmap_sem);
+               vma =3D find_vma_intersection(current->mm, vaddr, vaddr =
+ 1);
+               pinned =3D min(npage, (long)vma_pages(vma));
+               up_read(&current->mm->mmap_sem);
+               return pinned;
+       }
+
        pinned++;
-       rsvd =3D is_invalid_reserved_pfn(*pfn_base);

        /*
         * Reserved pages aren't counted against the user, externally =
pinned
         * pages are already counted against the user.
         */
-       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+       if (!vfio_find_vpfn(dma, iova)) {
                if (!lock_cap && current->mm->locked_vm + 1 > limit) {
                        put_pfn(*pfn_base, dma->prot);
                        pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", =
__func__,
@@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma =
*dma, unsigned long vaddr,
                if (ret)
                        break;

-               if (pfn !=3D *pfn_base + pinned ||
-                   rsvd !=3D is_invalid_reserved_pfn(pfn)) {
+               if (pfn !=3D *pfn_base + pinned) {
                        put_pfn(pfn, dma->prot);
                        break;
                }

-               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+               if (!vfio_find_vpfn(dma, iova)) {
                        if (!lock_cap &&
                            current->mm->locked_vm + lock_acct + 1 > =
limit) {
                                put_pfn(pfn, dma->prot);
@@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma =
*dma, unsigned long vaddr,

 unpin_out:
        if (ret) {
-               if (!rsvd) {
-                       for (pfn =3D *pfn_base ; pinned ; pfn++, =
pinned--)
-                               put_pfn(pfn, dma->prot);
-               }
+               for (pfn =3D *pfn_base ; pinned ; pfn++, pinned--)
+                       put_pfn(pfn, dma->prot);

                return ret;
        }
--
2.13.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping
  2018-03-20 21:33 ` Alex Williamson
@ 2018-03-20 22:04   ` Alex Williamson
  0 siblings, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2018-03-20 22:04 UTC (permalink / raw)
  To: Jason Cai (Xiang Feng); +Cc: pbonzini, kvm, linux-kernel, linux-mm, gnehzuil

On Tue, 20 Mar 2018 15:33:23 -0600
Alex Williamson <alex.williamson@redhat.com> wrote:

> On Mon, 19 Mar 2018 10:30:24 +0800
> "Jason Cai (Xiang Feng)" <jason.cai@linux.alibaba.com> wrote:
> 
> > When using vfio to pass through a PCIe device (e.g. a GPU card) that
> > has a huge BAR (e.g. 16GB), a lot of cycles are wasted on memory
> > pinning because PFNs of PCI BAR are not backed by struct page, and
> > the corresponding VMA has flag VM_PFNMAP.
> > 
> > With this change, when pinning a region which is a raw PFN mapping,
> > it can skip unnecessary user memory pinning process, and thus, can
> > significantly improve VM's boot up time when passing through devices
> > via VFIO. In my test on a Xeon E5 2.6GHz, the time mapping a 16GB
> > BAR was reduced from about 0.4s to 1.5us.
> > 
> > Signed-off-by: Jason Cai (Xiang Feng) <jason.cai@linux.alibaba.com>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++----------
> >  1 file changed, 14 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index 45657e2b1ff7..0658f35318b8 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> >  {
> >         unsigned long pfn = 0;
> >         long ret, pinned = 0, lock_acct = 0;
> > -       bool rsvd;
> >         dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
> > 
> >         /* This code path is only user initiated */
> > @@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> >         if (ret)
> >                 return ret;
> > 
> > +       if (is_invalid_reserved_pfn(*pfn_base)) {
> > +               struct vm_area_struct *vma;
> > +               down_read(&current->mm->mmap_sem);
> > +               vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> > +               pinned = min(npage, (long)vma_pages(vma));
> > +               up_read(&current->mm->mmap_sem);
> > +               return pinned;
> > +       }
> > +
> >         pinned++;
> > -       rsvd = is_invalid_reserved_pfn(*pfn_base);
> > 
> >         /*
> >          * Reserved pages aren't counted against the user, externally pinned
> >          * pages are already counted against the user.
> >          */
> > -       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> > +       if (!vfio_find_vpfn(dma, iova)) {
> >                 if (!lock_cap && current->mm->locked_vm + 1 > limit) {
> >                         put_pfn(*pfn_base, dma->prot);
> >                         pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> > @@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> >                 if (ret)
> >                         break;
> > 
> > -               if (pfn != *pfn_base + pinned ||
> > -                   rsvd != is_invalid_reserved_pfn(pfn)) {
> > +               if (pfn != *pfn_base + pinned) {
> >                         put_pfn(pfn, dma->prot);
> >                         break;
> >                 }
> > 
> > -               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> > +               if (!vfio_find_vpfn(dma, iova)) {
> >                         if (!lock_cap &&
> >                             current->mm->locked_vm + lock_acct + 1 > limit) {
> >                                 put_pfn(pfn, dma->prot);
> > @@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> > 
> >  unpin_out:
> >         if (ret) {
> > -               if (!rsvd) {
> > -                       for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> > -                               put_pfn(pfn, dma->prot);
> > -               }
> > +               for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> > +                       put_pfn(pfn, dma->prot);
> > 
> >                 return ret;
> >         }  
> 
> Hi Jason,
> 
> Something is wrong with your mail setup, the patch looks normal above,
> but when I view the source or save it to try to apply it, the diff is
> corrupt, as below.  It looks like maybe you're pasting the patch into
> your mailer and it's wrapping lines (ending with '=') and actual '='
> are replaced with '=3D' and tabs are converted to spaces.  Please fix
> your mailer and resend.  Thanks,

Actually, it's even a little more strange, the copies I received via
the kvm and lkml mailing lists don't have the line wrapping and
character conversion, but the tab to space conversion is still there,
which is what caused me to suspect a copy-paste into the mailer.
Between the headers, I see the direct copy has:

Content-Transfer-Encoding: quoted-printable

While the list copy has:

Content-Transfer-Encoding: 8BIT

Perhaps the list is automatically fixing some part of the problem.
Thanks,

Alex

> diff --git a/drivers/vfio/vfio_iommu_type1.c =
> b/drivers/vfio/vfio_iommu_type1.c
> index 45657e2b1ff7..0658f35318b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma =
> *dma, unsigned long vaddr,
>  {
>         unsigned long pfn =3D 0;
>         long ret, pinned =3D 0, lock_acct =3D 0;
> -       bool rsvd;
>         dma_addr_t iova =3D vaddr - dma->vaddr + dma->iova;
> 
>         /* This code path is only user initiated */
> @@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma =
> *dma, unsigned long vaddr,
>         if (ret)
>                 return ret;
> 
> +       if (is_invalid_reserved_pfn(*pfn_base)) {
> +               struct vm_area_struct *vma;
> +               down_read(&current->mm->mmap_sem);
> +               vma =3D find_vma_intersection(current->mm, vaddr, vaddr =
> + 1);
> +               pinned =3D min(npage, (long)vma_pages(vma));
> +               up_read(&current->mm->mmap_sem);
> +               return pinned;
> +       }
> +
>         pinned++;
> -       rsvd =3D is_invalid_reserved_pfn(*pfn_base);
> 
>         /*
>          * Reserved pages aren't counted against the user, externally =
> pinned
>          * pages are already counted against the user.
>          */
> -       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +       if (!vfio_find_vpfn(dma, iova)) {
>                 if (!lock_cap && current->mm->locked_vm + 1 > limit) {
>                         put_pfn(*pfn_base, dma->prot);
>                         pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", =
> __func__,
> @@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma =
> *dma, unsigned long vaddr,
>                 if (ret)
>                         break;
> 
> -               if (pfn !=3D *pfn_base + pinned ||
> -                   rsvd !=3D is_invalid_reserved_pfn(pfn)) {
> +               if (pfn !=3D *pfn_base + pinned) {
>                         put_pfn(pfn, dma->prot);
>                         break;
>                 }
> 
> -               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +               if (!vfio_find_vpfn(dma, iova)) {
>                         if (!lock_cap &&
>                             current->mm->locked_vm + lock_acct + 1 > =
> limit) {
>                                 put_pfn(pfn, dma->prot);
> @@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma =
> *dma, unsigned long vaddr,
> 
>  unpin_out:
>         if (ret) {
> -               if (!rsvd) {
> -                       for (pfn =3D *pfn_base ; pinned ; pfn++, =
> pinned--)
> -                               put_pfn(pfn, dma->prot);
> -               }
> +               for (pfn =3D *pfn_base ; pinned ; pfn++, pinned--)
> +                       put_pfn(pfn, dma->prot);
> 
>                 return ret;
>         }
> --
> 2.13.6
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping
  2018-03-22  4:52 Jason Cai (Xiang Feng)
@ 2018-03-22 21:48 ` Alex Williamson
  0 siblings, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2018-03-22 21:48 UTC (permalink / raw)
  To: Jason Cai (Xiang Feng); +Cc: pbonzini, kvm, linux-kernel, linux-mm, gnehzuil

On Thu, 22 Mar 2018 12:52:16 +0800
"Jason Cai (Xiang Feng)" <jason.cai@linux.alibaba.com> wrote:

> When using vfio to pass through a PCIe device (e.g. a GPU card) that
> has a huge BAR (e.g. 16GB), a lot of cycles are wasted on memory
> pinning because PFNs of PCI BAR are not backed by struct page, and
> the corresponding VMA has flag VM_PFNMAP.
> 
> With this change, when pinning a region which is a raw PFN mapping,
> it can skip unnecessary user memory pinning process, and thus, can
> significantly improve VM's boot up time when passing through devices
> via VFIO. In my test on a Xeon E5 2.6GHz, the time mapping a 16GB
> BAR was reduced from about 0.4s to 1.5us.
> 
> Signed-off-by: Jason Cai (Xiang Feng) <jason.cai@linux.alibaba.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++----------
>  1 file changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 45657e2b1ff7..0658f35318b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>  {
>  	unsigned long pfn = 0;
>  	long ret, pinned = 0, lock_acct = 0;
> -	bool rsvd;
>  	dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
>  
>  	/* This code path is only user initiated */
> @@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>  	if (ret)
>  		return ret;
>  
> +	if (is_invalid_reserved_pfn(*pfn_base)) {
> +		struct vm_area_struct *vma;

scripts/checkpatch.pl suggests a new line here to separate variable
declaration from code.

> +		down_read(&current->mm->mmap_sem);
> +		vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> +		pinned = min(npage, (long)vma_pages(vma));

checkpatch also suggests using min_t rather than casting to a
compatible type, ie:

	pinned = min_t(long, npage, vma_pages(vma));

I'll make these updates on commit, please make use of checkpatch on
future patches.  Applied to vfio next branch for v4.17.  Thanks,

Alex

> +		up_read(&current->mm->mmap_sem);
> +		return pinned;
> +	}
> +
>  	pinned++;
> -	rsvd = is_invalid_reserved_pfn(*pfn_base);
>  
>  	/*
>  	 * Reserved pages aren't counted against the user, externally pinned
>  	 * pages are already counted against the user.
>  	 */
> -	if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +	if (!vfio_find_vpfn(dma, iova)) {
>  		if (!lock_cap && current->mm->locked_vm + 1 > limit) {
>  			put_pfn(*pfn_base, dma->prot);
>  			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> @@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>  		if (ret)
>  			break;
>  
> -		if (pfn != *pfn_base + pinned ||
> -		    rsvd != is_invalid_reserved_pfn(pfn)) {
> +		if (pfn != *pfn_base + pinned) {
>  			put_pfn(pfn, dma->prot);
>  			break;
>  		}
>  
> -		if (!rsvd && !vfio_find_vpfn(dma, iova)) {
> +		if (!vfio_find_vpfn(dma, iova)) {
>  			if (!lock_cap &&
>  			    current->mm->locked_vm + lock_acct + 1 > limit) {
>  				put_pfn(pfn, dma->prot);
> @@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>  
>  unpin_out:
>  	if (ret) {
> -		if (!rsvd) {
> -			for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> -				put_pfn(pfn, dma->prot);
> -		}
> +		for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> +			put_pfn(pfn, dma->prot);
>  
>  		return ret;
>  	}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping
@ 2018-03-22  4:52 Jason Cai (Xiang Feng)
  2018-03-22 21:48 ` Alex Williamson
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Cai (Xiang Feng) @ 2018-03-22  4:52 UTC (permalink / raw)
  To: alex.williamson, pbonzini, kvm, linux-kernel, linux-mm
  Cc: gnehzuil, jason.cai

When using vfio to pass through a PCIe device (e.g. a GPU card) that
has a huge BAR (e.g. 16GB), a lot of cycles are wasted on memory
pinning because PFNs of PCI BAR are not backed by struct page, and
the corresponding VMA has flag VM_PFNMAP.

With this change, when pinning a region which is a raw PFN mapping,
it can skip unnecessary user memory pinning process, and thus, can
significantly improve VM's boot up time when passing through devices
via VFIO. In my test on a Xeon E5 2.6GHz, the time mapping a 16GB
BAR was reduced from about 0.4s to 1.5us.

Signed-off-by: Jason Cai (Xiang Feng) <jason.cai@linux.alibaba.com>
---
 drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 45657e2b1ff7..0658f35318b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -397,7 +397,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 {
 	unsigned long pfn = 0;
 	long ret, pinned = 0, lock_acct = 0;
-	bool rsvd;
 	dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
 
 	/* This code path is only user initiated */
@@ -408,14 +407,22 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 	if (ret)
 		return ret;
 
+	if (is_invalid_reserved_pfn(*pfn_base)) {
+		struct vm_area_struct *vma;
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
+		pinned = min(npage, (long)vma_pages(vma));
+		up_read(&current->mm->mmap_sem);
+		return pinned;
+	}
+
 	pinned++;
-	rsvd = is_invalid_reserved_pfn(*pfn_base);
 
 	/*
 	 * Reserved pages aren't counted against the user, externally pinned
 	 * pages are already counted against the user.
 	 */
-	if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+	if (!vfio_find_vpfn(dma, iova)) {
 		if (!lock_cap && current->mm->locked_vm + 1 > limit) {
 			put_pfn(*pfn_base, dma->prot);
 			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
@@ -435,13 +442,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 		if (ret)
 			break;
 
-		if (pfn != *pfn_base + pinned ||
-		    rsvd != is_invalid_reserved_pfn(pfn)) {
+		if (pfn != *pfn_base + pinned) {
 			put_pfn(pfn, dma->prot);
 			break;
 		}
 
-		if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+		if (!vfio_find_vpfn(dma, iova)) {
 			if (!lock_cap &&
 			    current->mm->locked_vm + lock_acct + 1 > limit) {
 				put_pfn(pfn, dma->prot);
@@ -459,10 +465,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 
 unpin_out:
 	if (ret) {
-		if (!rsvd) {
-			for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
-				put_pfn(pfn, dma->prot);
-		}
+		for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
+			put_pfn(pfn, dma->prot);
 
 		return ret;
 	}
-- 
2.13.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-03-22 21:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-19  2:30 [PATCH] vfio iommu type1: improve memory pinning process for raw PFN mapping Jason Cai (Xiang Feng)
2018-03-20 21:33 ` Alex Williamson
2018-03-20 22:04   ` Alex Williamson
2018-03-22  4:52 Jason Cai (Xiang Feng)
2018-03-22 21:48 ` Alex Williamson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).