LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C
@ 2008-01-09  2:57 Huang, Ying
  2008-01-10  1:05 ` Vivek Goyal
  0 siblings, 1 reply; 5+ messages in thread
From: Huang, Ying @ 2008-01-09  2:57 UTC (permalink / raw)
  To: Eric W. Biederman, Vivek Goyal, Andrew Morton; +Cc: linux-kernel

This patch transforms the kexec page tables setup code from assembler
code to C code in machine_kexec_prepare. This improves readability and
reduces code line number.

Signed-off-by: Huang Ying <ying.huang@intel.com>

---
 arch/x86/kernel/machine_kexec_32.c   |   50 +++++++++++----
 arch/x86/kernel/relocate_kernel_32.S |  114 -----------------------------------
 include/asm-x86/kexec_32.h           |   18 -----
 3 files changed, 40 insertions(+), 142 deletions(-)

--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -86,6 +86,42 @@ static void free_page_tables(struct kima
 	free_page((unsigned long)image->arch_kimage.pte1);
 }
 
+static void page_table_set_one(pgd_t *pgd, pmd_t *pmd, pte_t *pte,
+			       unsigned long vaddr, unsigned long paddr)
+{
+	pud_t *pud;
+
+	pgd += pgd_index(vaddr);
+#ifdef CONFIG_X86_PAE
+	if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+		set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
+#endif
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	if (!(pmd_val(*pmd) & _PAGE_PRESENT))
+		set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+	pte = pte_offset_kernel(pmd, vaddr);
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+}
+
+static void prepare_page_tables(struct kimage *image)
+{
+	void *control_page;
+	pmd_t *pmd = 0;
+
+	control_page = page_address(image->control_code_page);
+#ifdef CONFIG_X86_PAE
+	pmd = image->arch_kimage.pmd0;
+#endif
+	page_table_set_one(image->arch_kimage.pgd, pmd, image->arch_kimage.pte0,
+			   (unsigned long)relocate_kernel, __pa(control_page));
+#ifdef CONFIG_X86_PAE
+	pmd = image->arch_kimage.pmd1;
+#endif
+	page_table_set_one(image->arch_kimage.pgd, pmd, image->arch_kimage.pte1,
+			   __pa(control_page), __pa(control_page));
+}
+
 /*
  * A architecture hook called to validate the
  * proposed image and prepare the control pages
@@ -98,6 +134,7 @@ static void free_page_tables(struct kima
  * later.
  *
  * - Allocate page tables
+ * - Setup page tables
  */
 int machine_kexec_prepare(struct kimage *image)
 {
@@ -112,6 +149,7 @@ int machine_kexec_prepare(struct kimage 
 		free_page_tables(image);
 		return -ENOMEM;
 	}
+	prepare_page_tables(image);
 	return 0;
 }
 
@@ -140,19 +178,7 @@ NORET_TYPE void machine_kexec(struct kim
 	memcpy(control_page, relocate_kernel, PAGE_SIZE);
 
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
-	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
 	page_list[PA_PGD] = __pa(image->arch_kimage.pgd);
-	page_list[VA_PGD] = (unsigned long)image->arch_kimage.pgd;
-#ifdef CONFIG_X86_PAE
-	page_list[PA_PMD_0] = __pa(image->arch_kimage.pmd0);
-	page_list[VA_PMD_0] = (unsigned long)image->arch_kimage.pmd0;
-	page_list[PA_PMD_1] = __pa(image->arch_kimage.pmd1);
-	page_list[VA_PMD_1] = (unsigned long)image->arch_kimage.pmd1;
-#endif
-	page_list[PA_PTE_0] = __pa(image->arch_kimage.pte0);
-	page_list[VA_PTE_0] = (unsigned long)image->arch_kimage.pte0;
-	page_list[PA_PTE_1] = __pa(image->arch_kimage.pte1);
-	page_list[VA_PTE_1] = (unsigned long)image->arch_kimage.pte1;
 
 	/* The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -16,126 +16,12 @@
 
 #define PTR(x) (x << 2)
 #define PAGE_ALIGNED (1 << PAGE_SHIFT)
-#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
 
 	.text
 	.align PAGE_ALIGNED
 	.globl relocate_kernel
 relocate_kernel:
 	movl	8(%esp), %ebp /* list of pages */
-
-#ifdef CONFIG_X86_PAE
-	/* map the control page at its virtual address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xc0000000, %eax
-	shrl	$27, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PMD_0)(%ebp), %edx
-	orl	$PAE_PGD_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PMD_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x3fe00000, %eax
-	shrl	$18, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_0)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x001ff000, %eax
-	shrl	$9, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	/* identity map the control page at its physical address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xc0000000, %eax
-	shrl	$27, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PMD_1)(%ebp), %edx
-	orl	$PAE_PGD_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PMD_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x3fe00000, %eax
-	shrl	$18, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_1)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x001ff000, %eax
-	shrl	$9, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-#else
-	/* map the control page at its virtual address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xffc00000, %eax
-	shrl	$20, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_0)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x003ff000, %eax
-	shrl	$10, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	/* identity map the control page at its physical address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xffc00000, %eax
-	shrl	$20, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_1)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x003ff000, %eax
-	shrl	$10, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-#endif
-
-relocate_new_kernel:
 	/* read the arguments and say goodbye to the stack */
 	movl  4(%esp), %ebx /* page_list */
 	movl  8(%esp), %ebp /* list of pages */
--- a/include/asm-x86/kexec_32.h
+++ b/include/asm-x86/kexec_32.h
@@ -2,22 +2,8 @@
 #define _I386_KEXEC_H
 
 #define PA_CONTROL_PAGE  0
-#define VA_CONTROL_PAGE  1
-#define PA_PGD           2
-#define VA_PGD           3
-#define PA_PTE_0         4
-#define VA_PTE_0         5
-#define PA_PTE_1         6
-#define VA_PTE_1         7
-#ifdef CONFIG_X86_PAE
-#define PA_PMD_0         8
-#define VA_PMD_0         9
-#define PA_PMD_1         10
-#define VA_PMD_1         11
-#define PAGES_NR         12
-#else
-#define PAGES_NR         8
-#endif
+#define PA_PGD           1
+#define PAGES_NR         2
 
 #ifndef __ASSEMBLY__
 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C
  2008-01-09  2:57 [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C Huang, Ying
@ 2008-01-10  1:05 ` Vivek Goyal
  2008-01-10  2:08   ` Huang, Ying
  0 siblings, 1 reply; 5+ messages in thread
From: Vivek Goyal @ 2008-01-10  1:05 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, Horms, magnus.damn

On Wed, Jan 09, 2008 at 10:57:50AM +0800, Huang, Ying wrote:
> This patch transforms the kexec page tables setup code from asseumbler
> code to iC code in machine_kexec_prepare. This improves readability and
> reduces code line number.
> 

I think this will create issues for Xen. Initially page table setup
was in C but Xen Guests could not modify the page tables. I think Xen
folks implemented a hypercall where they passed all the page table pages
and the control pages and then hypervisor executed the control page(which
in turn setup the page tables). I think that's why page table setup
code is on the control page in assembly.

You might want to go through Xen kexec implementation and dig through
kexec mailing list archive.

CCing Magnus and Horms. They had done the page tables related changes
for Xen.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C
  2008-01-10  1:05 ` Vivek Goyal
@ 2008-01-10  2:08   ` Huang, Ying
  2008-01-10  7:21     ` Simon Horman
  0 siblings, 1 reply; 5+ messages in thread
From: Huang, Ying @ 2008-01-10  2:08 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, Horms, magnus.damn

On Wed, 2008-01-09 at 20:05 -0500, Vivek Goyal wrote:
> On Wed, Jan 09, 2008 at 10:57:50AM +0800, Huang, Ying wrote:
> > This patch transforms the kexec page tables setup code from asseumbler
> > code to iC code in machine_kexec_prepare. This improves readability and
> > reduces code line number.
> > 
> 
> I think this will create issues for Xen. Initially page table setup
> was in C but Xen Guests could not modify the page tables. I think Xen
> folks implemented a hypercall where they passed all the page table pages
> and the control pages and then hypervisor executed the control page(which
> in turn setup the page tables). I think that's why page table setup
> code is on the control page in assembly.
> 
> You might want to go through Xen kexec implementation and dig through
> kexec mailing list archive.

OK, I will check the Xen kexec implementation.

> CCing Magnus and Horms. They had done the page tables related changes
> for Xen.

Best Regards,
Huang Ying


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C
  2008-01-10  2:08   ` Huang, Ying
@ 2008-01-10  7:21     ` Simon Horman
  2008-01-14 13:29       ` Ian Campbell
  0 siblings, 1 reply; 5+ messages in thread
From: Simon Horman @ 2008-01-10  7:21 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Vivek Goyal, Eric W. Biederman, Andrew Morton, linux-kernel,
	Magnus Damm, Ian Campbell

[ CCing Ian Campbell who handles much of the maintenance of kexec in Xen ]

On Thu, Jan 10, 2008 at 10:08:09AM +0800, Huang, Ying wrote:
> On Wed, 2008-01-09 at 20:05 -0500, Vivek Goyal wrote:
> > On Wed, Jan 09, 2008 at 10:57:50AM +0800, Huang, Ying wrote:
> > > This patch transforms the kexec page tables setup code from asseumbler
> > > code to iC code in machine_kexec_prepare. This improves readability and
> > > reduces code line number.
> > > 
> > 
> > I think this will create issues for Xen. Initially page table setup
> > was in C but Xen Guests could not modify the page tables. I think Xen
> > folks implemented a hypercall where they passed all the page table pages
> > and the control pages and then hypervisor executed the control page(which
> > in turn setup the page tables). I think that's why page table setup
> > code is on the control page in assembly.
> > 
> > You might want to go through Xen kexec implementation and dig through
> > kexec mailing list archive.
> 
> OK, I will check the Xen kexec implementation.
> 
> > CCing Magnus and Horms. They had done the page tables related changes
> > for Xen.

I think that potentially there is a problem here, though weather or
not it manifests is another question.

In machine_kexec() a PAGE_SIZE blob of code starting at relocate_kernel
gets saved. I think that previously x86 Linux excuded this saved blob,
though the current code seems to execute relocate_kernel() directly -
then again perhaps I'm confusing things with ia64 which still executes
the saved blob.

In any case, in the case of xen, the hypervisor will execute this saved
blob. So I think that the crux of the issue is that the blob contains
all the instructions required to run relocate_kernel, then it should
work from inside xen, if and if it doesn't, then i'll blow up.

The code that you have seems to satisfy this requirement,
so relocate_kernel itself shouldn't be a problem. And as
long as the preparation work does exacltly what the removed
portions of relocate_kernel used to do, which it seems to,
things should be fine.

That said, this certainly warrants testing :-)

I'm all in favour of this kind of consolodation,
so hopefully we can bend Xen's will if it doesn't work as is.

Speaking of which, I strongly suspect machine_kexec_32.c and
machine_kexec_64.c an be consolidated a bit.

-- 
Horms


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C
  2008-01-10  7:21     ` Simon Horman
@ 2008-01-14 13:29       ` Ian Campbell
  0 siblings, 0 replies; 5+ messages in thread
From: Ian Campbell @ 2008-01-14 13:29 UTC (permalink / raw)
  To: Simon Horman
  Cc: Huang, Ying, Vivek Goyal, Eric W. Biederman, Andrew Morton,
	linux-kernel, Magnus Damm


On Thu, 2008-01-10 at 16:21 +0900, Simon Horman wrote: 
> [ CCing Ian Campbell who handles much of the maintenance of kexec in Xen ]
> 
> On Thu, Jan 10, 2008 at 10:08:09AM +0800, Huang, Ying wrote:
> > On Wed, 2008-01-09 at 20:05 -0500, Vivek Goyal wrote:
> > > On Wed, Jan 09, 2008 at 10:57:50AM +0800, Huang, Ying wrote:
> > > > This patch transforms the kexec page tables setup code from asseumbler
> > > > code to iC code in machine_kexec_prepare. This improves readability and
> > > > reduces code line number.
> > > > 
> > > 
> > > I think this will create issues for Xen. Initially page table setup
> > > was in C but Xen Guests could not modify the page tables. I think Xen
> > > folks implemented a hypercall where they passed all the page table pages
> > > and the control pages and then hypervisor executed the control page(which
> > > in turn setup the page tables). I think that's why page table setup
> > > code is on the control page in assembly.
> > > 
> > > You might want to go through Xen kexec implementation and dig through
> > > kexec mailing list archive.
> > 
> > OK, I will check the Xen kexec implementation.
> > 
> > > CCing Magnus and Horms. They had done the page tables related changes
> > > for Xen.
> 
> I think that potentially there is a problem here, though weather or
> not it manifests is another question.
> 
> In machine_kexec() a PAGE_SIZE blob of code starting at relocate_kernel
> gets saved. I think that previously x86 Linux excuded this saved blob,
> though the current code seems to execute relocate_kernel() directly -
> then again perhaps I'm confusing things with ia64 which still executes
> the saved blob.
> 
> In any case, in the case of xen, the hypervisor will execute this saved
> blob. So I think that the crux of the issue is that the blob contains
> all the instructions required to run relocate_kernel, then it should
> work from inside xen, if and if it doesn't, then i'll blow up.
> 
> The code that you have seems to satisfy this requirement,
> so relocate_kernel itself shouldn't be a problem. And as
> long as the preparation work does exacltly what the removed
> portions of relocate_kernel used to do, which it seems to,
> things should be fine.
> 
> That said, this certainly warrants testing :-)

In the absence of domain 0 or Xen kexec in the upstream kernel testing
might be quite hard.

> I'm all in favour of this kind of consolodation,
> so hopefully we can bend Xen's will if it doesn't work as is.

Indeed, patch seem reasonable to me. Whatever issues this causes for Xen
will be stuff that can be fixed once someone does the kexec for Xen work
in the upstream kernel, I don't think it paints us into an inescapable
corner.

Ian.



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-01-14 13:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-09  2:57 [PATCH -mm 2/2] kexec/i386: kexec page table code clean up - page table setup in C Huang, Ying
2008-01-10  1:05 ` Vivek Goyal
2008-01-10  2:08   ` Huang, Ying
2008-01-10  7:21     ` Simon Horman
2008-01-14 13:29       ` Ian Campbell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).