LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: "Yinghai Lu" <yhlu.kernel@gmail.com>
To: "Andi Kleen" <andi@firstfloor.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Ingo Molnar" <mingo@elte.hu>,
	"Christoph Lameter" <clameter@sgi.com>
Cc: linux-kernel@vger.kernel.org, pj@sgi.com, linux-mm@kvack.org,
	nickpiggin@yahoo.com.au
Subject: Re: [PATCH] [11/18] Fix alignment bug in bootmem allocator
Date: Mon, 17 Mar 2008 14:27:21 -0700	[thread overview]
Message-ID: <86802c440803171427y7c9b2a54nacb0603916713033@mail.gmail.com> (raw)
In-Reply-To: <86802c440803171152y379560dfp1296aefb0b86b54b@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1159 bytes --]

On Mon, Mar 17, 2008 at 11:52 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Mon, Mar 17, 2008 at 1:56 AM, Andi Kleen <andi@firstfloor.org> wrote:
>  > > only happen when align is large than alignment of node_boot_start.
>  >
>  >  Here's an updated version of the patch with this addressed.
>  >  Please review. The patch is somewhat more complicated, but
>  >  actually makes the code a little cleaner now.
>  >
>  >  -Andi
>  >
>  >
>  >  Fix alignment bug in bootmem allocator
>  >
>  >
>  >  Without this fix bootmem can return unaligned addresses when the start of a
>  >  node is not aligned to the align value. Needed for reliably allocating
>  >  gigabyte pages.
>  >
>  >  I removed the offset variable because all tests should align themself correctly
>  >  now. Slight drawback might be that the bootmem allocator will spend
>  >  some more time skipping bits in the bitmap initially, but that shouldn't
>  >  be a big issue.
>  >
>  >
>  >  Signed-off-by: Andi Kleen <ak@suse.de>
>  >
>  how about create local node_boot_start and node_bootmem_map that make
>  sure node_boot_start has bigger alignment than align input.

please check it

YH

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: offset_alloc_bootmem_v2.patch --]
[-- Type: text/x-patch; name=offset_alloc_bootmem_v2.patch, Size: 4834 bytes --]

[PATCH] mm: offset align in alloc_bootmem v2

need offset alignment when node_boot_start's alignment is less than
align required

v2: use local node_boot_start to match align.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -206,9 +206,11 @@ void * __init
 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	      unsigned long align, unsigned long goal, unsigned long limit)
 {
-	unsigned long offset, remaining_size, areasize, preferred;
+	unsigned long areasize, preferred;
 	unsigned long i, start = 0, incr, eidx, end_pfn;
 	void *ret;
+	unsigned long node_boot_start;
+	void *node_bootmem_map;
 
 	if (!size) {
 		printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,23 +218,29 @@ __alloc_bootmem_core(struct bootmem_data
 	}
 	BUG_ON(align & (align-1));
 
-	if (limit && bdata->node_boot_start >= limit)
-		return NULL;
-
 	/* on nodes without memory - bootmem_map is NULL */
 	if (!bdata->node_bootmem_map)
 		return NULL;
 
+	/* bdata->node_boot_start is supposed to be (12+6)bits alignment ? */
+	if (align && ffs(align) > ffs(bdata->node_boot_start)) {
+		node_boot_start = ALIGN(bdata->node_boot_start, align);
+		node_bootmem_map = (unsigned char *)bdata->node_bootmem_map +
+		    PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
+	} else {
+		node_boot_start = bdata->node_boot_start;
+		node_bootmem_map = bdata->node_bootmem_map;
+	}
+
+	if (limit && node_boot_start >= limit)
+		return NULL;
+
 	end_pfn = bdata->node_low_pfn;
 	limit = PFN_DOWN(limit);
 	if (limit && end_pfn > limit)
 		end_pfn = limit;
 
-	eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
-	offset = 0;
-	if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
-		offset = align - (bdata->node_boot_start & (align - 1UL));
-	offset = PFN_DOWN(offset);
+	eidx = end_pfn - PFN_DOWN(node_boot_start);
 
 	/*
 	 * We try to allocate bootmem pages above 'goal'
@@ -240,15 +248,15 @@ __alloc_bootmem_core(struct bootmem_data
 	 */
 	preferred = 0;
 	if (goal && PFN_DOWN(goal) < end_pfn) {
-		if (goal > bdata->node_boot_start)
-			preferred = goal - bdata->node_boot_start;
+		if (goal > node_boot_start)
+			preferred = goal - node_boot_start;
 
 		if (bdata->last_success >= preferred)
 			if (!limit || (limit && limit > bdata->last_success))
 				preferred = bdata->last_success;
 	}
 
-	preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+	preferred = PFN_DOWN(ALIGN(preferred, align));
 	areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
 	incr = align >> PAGE_SHIFT ? : 1;
 
@@ -256,18 +264,18 @@ restart_scan:
 	for (i = preferred; i < eidx;) {
 		unsigned long j;
 
-		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+		i = find_next_zero_bit(node_bootmem_map, eidx, i);
 		i = ALIGN(i, incr);
 		if (i >= eidx)
 			break;
-		if (test_bit(i, bdata->node_bootmem_map)) {
+		if (test_bit(i, node_bootmem_map)) {
 			i += incr;
 			continue;
 		}
 		for (j = i + 1; j < i + areasize; ++j) {
 			if (j >= eidx)
 				goto fail_block;
-			if (test_bit(j, bdata->node_bootmem_map))
+			if (test_bit(j, node_bootmem_map))
 				goto fail_block;
 		}
 		start = i;
@@ -278,8 +286,8 @@ restart_scan:
 			i += incr;
 	}
 
-	if (preferred > offset) {
-		preferred = offset;
+	if (preferred > 0) {
+		preferred = 0;
 		goto restart_scan;
 	}
 	return NULL;
@@ -295,6 +303,7 @@ found:
 	 */
 	if (align < PAGE_SIZE &&
 	    bdata->last_offset && bdata->last_pos+1 == start) {
+		unsigned long offset, remaining_size;
 		offset = ALIGN(bdata->last_offset, align);
 		BUG_ON(offset > PAGE_SIZE);
 		remaining_size = PAGE_SIZE - offset;
@@ -303,14 +312,12 @@ found:
 			/* last_pos unchanged */
 			bdata->last_offset = offset + size;
 			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset +
-					   bdata->node_boot_start);
+					   offset + node_boot_start);
 		} else {
 			remaining_size = size - remaining_size;
 			areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
 			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset +
-					   bdata->node_boot_start);
+					   offset + node_boot_start);
 			bdata->last_pos = start + areasize - 1;
 			bdata->last_offset = remaining_size;
 		}
@@ -318,14 +325,14 @@ found:
 	} else {
 		bdata->last_pos = start + areasize - 1;
 		bdata->last_offset = size & ~PAGE_MASK;
-		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+		ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
 	}
 
 	/*
 	 * Reserve the area now:
 	 */
 	for (i = start; i < start + areasize; i++)
-		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+		if (unlikely(test_and_set_bit(i, node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
 	return ret;

  reply	other threads:[~2008-03-17 21:27 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-17  1:58 [PATCH] [0/18] GB pages hugetlb support Andi Kleen
2008-03-17  1:58 ` [PATCH] [1/18] Convert hugeltlb.c over to pass global state around in a structure Andi Kleen
2008-03-17 20:15   ` Adam Litke
2008-03-18 12:05   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [2/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17 20:22   ` Adam Litke
2008-03-17 20:44     ` Andi Kleen
2008-03-18 12:23   ` Mel Gorman
2008-03-23 10:38   ` KOSAKI Motohiro
2008-03-23 11:28     ` Andi Kleen
2008-03-23 11:30       ` KOSAKI Motohiro
2008-03-17  1:58 ` [PATCH] [3/18] Convert /proc output code over to report multiple hstates Andi Kleen
2008-03-18 12:28   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [4/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17  8:09   ` Paul Jackson
2008-03-17  8:15     ` Andi Kleen
2008-03-17 20:28   ` Adam Litke
2008-03-18 14:11   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [5/18] Expand the hugetlbfs sysctls to handle arrays for all hstates Andi Kleen
2008-03-18 14:34   ` Mel Gorman
2008-03-18 16:49     ` Andi Kleen
2008-03-18 17:01       ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [6/18] Add support to have individual hstates for each hugetlbfs mount Andi Kleen
2008-03-18 14:10   ` Adam Litke
2008-03-18 15:02   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [7/18] Abstract out the NUMA node round robin code into a separate function Andi Kleen
2008-03-18 15:42   ` Mel Gorman
2008-03-18 15:47     ` Andi Kleen
2008-03-18 16:04       ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [8/18] Add a __alloc_bootmem_node_nopanic Andi Kleen
2008-03-18 15:54   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [9/18] Export prep_compound_page to the hugetlb allocator Andi Kleen
2008-03-17  1:58 ` [PATCH] [10/18] Factor out new huge page preparation code into separate function Andi Kleen
2008-03-17 20:31   ` Adam Litke
2008-03-18 16:02   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [11/18] Fix alignment bug in bootmem allocator Andi Kleen
2008-03-17  2:19   ` Yinghai Lu
2008-03-17  7:02     ` Andi Kleen
2008-03-17  7:17       ` Yinghai Lu
2008-03-17  7:31         ` Yinghai Lu
2008-03-17  7:41           ` Andi Kleen
2008-03-17  7:53             ` Yinghai Lu
2008-03-17  8:10               ` Yinghai Lu
2008-03-17  8:17                 ` Andi Kleen
2008-03-17  8:56               ` Andi Kleen
2008-03-17 18:52                 ` Yinghai Lu
2008-03-17 21:27                   ` Yinghai Lu [this message]
2008-03-18  2:06                     ` Yinghai Lu
2008-03-18 16:18   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [12/18] Add support to allocate hugetlb pages that are larger than MAX_ORDER Andi Kleen
2008-03-18 16:27   ` Mel Gorman
2008-04-09 16:05   ` Andrew Hastings
2008-04-09 17:56     ` Andi Kleen
2008-03-17  1:58 ` [PATCH] [13/18] Add support to allocate hugepages of different size with hugepages= Andi Kleen
2008-03-18 16:32   ` Mel Gorman
2008-03-18 16:45     ` Andi Kleen
2008-03-18 16:46       ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [14/18] Clean up hugetlb boot time printk Andi Kleen
2008-03-18 16:37   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [15/18] Add support to x86-64 to allocate and lookup GB pages in hugetlb Andi Kleen
2008-03-17  1:58 ` [PATCH] [16/18] Add huge pud support to hugetlbfs Andi Kleen
2008-03-17  1:58 ` [PATCH] [17/18] Add huge pud support to mm/memory.c Andi Kleen
2008-03-17  1:58 ` [PATCH] [18/18] Implement hugepagesz= option for x86-64 Andi Kleen
2008-03-17  9:29   ` Paul Jackson
2008-03-17  9:59     ` Andi Kleen
2008-03-17 10:02       ` Paul Jackson
2008-03-17  3:11 ` [PATCH] [0/18] GB pages hugetlb support Paul Jackson
2008-03-17  7:00   ` Andi Kleen
2008-03-17  7:00     ` Paul Jackson
2008-03-17  7:29       ` Andi Kleen
2008-03-17  5:35 ` Paul Jackson
2008-03-17  6:58   ` Andi Kleen
2008-03-17  9:26 ` Paul Jackson
2008-03-17 15:05 ` Adam Litke
2008-03-17 15:33   ` Andi Kleen
2008-03-17 15:59     ` Adam Litke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=86802c440803171427y7c9b2a54nacb0603916713033@mail.gmail.com \
    --to=yhlu.kernel@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=clameter@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=nickpiggin@yahoo.com.au \
    --cc=pj@sgi.com \
    --subject='Re: [PATCH] [11/18] Fix alignment bug in bootmem allocator' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).