LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [Resend][PATCH -v2 3/3 -next] Fix the wrong corrupted page information of hugetlb page.
@ 2011-02-07  4:48 Jin Dongming
  0 siblings, 0 replies; only message in thread
From: Jin Dongming @ 2011-02-07  4:48 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Naoya Horiguchi, Huang Ying, Wu Fengguang, Hidetoshi Seto, LKLM

When more than on tail page of the same mapped hugetlb page are
poisoned at the same time, the sum of corrupted page should be
the size of one hugetlb page(512 pages). But HardwareCorrupted
information in /proc/meminfo is 1024 pages or more.

That is because when the above condition happened,
__memory_failure() will be called many times and pages in hugetlb
page are accounted more than one time.

This patch fixed it by incrementing the number of corrupted page
one by one.

-v2 fix checking for "just unpoisoned".

Signed-off-by: Jin Dongming <jin.dongming@np.css.fujitsu.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
---
 mm/memory-failure.c |   75 +++++++++++++++++++++++++++++++-------------------
 1 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0317cb3..0a11f6d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -974,16 +974,22 @@ static void set_page_hwpoison_huge_page(struct page *hpage)
 {
 	int i;
 	int nr_pages = 1 << compound_trans_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		SetPageHWPoison(hpage + i);
+	for (i = 0; i < nr_pages; i++) {
+		if (TestSetPageHWPoison(hpage + i))
+			continue;
+
+		atomic_long_inc(&mce_bad_pages);
+	}
 }
 
 static void clear_page_hwpoison_huge_page(struct page *hpage)
 {
 	int i;
 	int nr_pages = 1 << compound_trans_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		ClearPageHWPoison(hpage + i);
+	for (i = 0; i < nr_pages; i++) {
+		if (TestClearPageHWPoison(hpage + i))
+			atomic_long_dec(&mce_bad_pages);
+	}
 }
 
 int __memory_failure(unsigned long pfn, int trapno, int flags)
@@ -992,7 +998,6 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	struct page *p;
 	struct page *hpage;
 	int res;
-	unsigned int nr_pages;
 
 	if (!sysctl_memory_failure_recovery)
 		panic("Memory failure from trap %d on page %lx", trapno, pfn);
@@ -1011,8 +1016,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 		return 0;
 	}
 
-	nr_pages = 1 << compound_trans_order(hpage);
-	atomic_long_add(nr_pages, &mce_bad_pages);
+	atomic_long_inc(&mce_bad_pages);
 
 	/*
 	 * We need/can do nothing about count=0 pages.
@@ -1034,18 +1038,32 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 			action_result(pfn, "free buddy", DELAYED);
 			return 0;
 		} else if (PageHuge(hpage)) {
-			/*
-			 * Check "just unpoisoned", "filter hit", and
-			 * "race with other subpage."
-			 */
 			lock_page_nosync(hpage);
-			if (!PageHWPoison(p)
-			    || (hwpoison_filter(p) && TestClearPageHWPoison(p))
-			    || (p != hpage && TestSetPageHWPoison(hpage))) {
-				atomic_long_sub(nr_pages, &mce_bad_pages);
+			/* Check "just unpoisoned". */
+			if (!PageHWPoison(p)) {
 				unlock_page(hpage);
 				return 0;
 			}
+
+			/* Check "filter hit". */
+			if (hwpoison_filter(p)) {
+				if (TestClearPageHWPoison(p))
+					atomic_long_dec(&mce_bad_pages);
+
+				unlock_page(hpage);
+				return 0;
+			}
+
+			/* Check "race with other subpage". */
+			if (p != hpage) {
+				if (TestSetPageHWPoison(hpage)) {
+					unlock_page(hpage);
+					return 0;
+				}
+
+				atomic_long_inc(&mce_bad_pages);
+			}
+
 			set_page_hwpoison_huge_page(hpage);
 			res = dequeue_hwpoisoned_huge_page(hpage);
 			action_result(pfn, "free huge",
@@ -1101,7 +1119,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			atomic_long_sub(nr_pages, &mce_bad_pages);
+			atomic_long_dec(&mce_bad_pages);
 		unlock_page(hpage);
 		put_page(hpage);
 		return 0;
@@ -1111,12 +1129,16 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * For error on the tail page, we should set PG_hwpoison
 	 * on the head page to show that the hugepage is hwpoisoned
 	 */
-	if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
-		action_result(pfn, "hugepage already hardware poisoned",
+	if (PageHuge(p) && PageTail(p)) {
+		if (TestSetPageHWPoison(hpage)) {
+			action_result(pfn, "hugepage already hardware poisoned",
 				IGNORED);
-		unlock_page(hpage);
-		put_page(hpage);
-		return 0;
+			unlock_page(hpage);
+			put_page(hpage);
+			return 0;
+		}
+
+		atomic_long_inc(&mce_bad_pages);
 	}
 	/*
 	 * Set PG_hwpoison on all pages in an error hugepage,
@@ -1200,7 +1222,6 @@ int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int freeit = 0;
-	unsigned int nr_pages;
 
 	if (!pfn_valid(pfn))
 		return -ENXIO;
@@ -1213,8 +1234,6 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	nr_pages = 1 << compound_trans_order(page);
-
 	if (!get_page_unless_zero(page)) {
 		/*
 		 * Since HWPoisoned hugepage should have non-zero refcount,
@@ -1227,7 +1246,7 @@ int unpoison_memory(unsigned long pfn)
 			return 0;
 		}
 		if (TestClearPageHWPoison(p))
-			atomic_long_sub(nr_pages, &mce_bad_pages);
+			atomic_long_dec(&mce_bad_pages);
 		pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
 		return 0;
 	}
@@ -1241,7 +1260,7 @@ int unpoison_memory(unsigned long pfn)
 	 */
 	if (TestClearPageHWPoison(page)) {
 		pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
-		atomic_long_sub(nr_pages, &mce_bad_pages);
+		atomic_long_dec(&mce_bad_pages);
 		freeit = 1;
 		if (PageHuge(page))
 			clear_page_hwpoison_huge_page(page);
@@ -1353,8 +1372,6 @@ static int soft_offline_huge_page(struct page *page, int flags)
 		return ret;
 	}
 done:
-	if (!PageHWPoison(hpage))
-		atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages);
 	set_page_hwpoison_huge_page(hpage);
 	dequeue_hwpoisoned_huge_page(hpage);
 	/* keep elevated page count for bad page */
@@ -1483,7 +1500,7 @@ int soft_offline_page(struct page *page, int flags)
 		return ret;
 
 done:
-	atomic_long_add(1, &mce_bad_pages);
+	atomic_long_inc(&mce_bad_pages);
 	SetPageHWPoison(page);
 	/* keep elevated page count for bad page */
 	return ret;
-- 
1.7.2.2



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-02-07  4:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-07  4:48 [Resend][PATCH -v2 3/3 -next] Fix the wrong corrupted page information of hugetlb page Jin Dongming

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).