LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
To: linux-ia64@vger.kernel.org, Fenghua Yu <fenghua.yu@intel.com>,
	Tony Luck <tony.luck@intel.com>
Cc: kosaki.motohiro@jp.fujitsu.com,
	LKML <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [2.6.25-rc5-mm1][regression?] BUG: spinlock wrong CPU, modprobe
Date: Mon, 17 Mar 2008 22:32:15 +0900	[thread overview]
Message-ID: <20080317222846.7B58.KOSAKI.MOTOHIRO@jp.fujitsu.com> (raw)
In-Reply-To: <20080317120848.7B3F.KOSAKI.MOTOHIRO@jp.fujitsu.com>

[-- Attachment #1: Type: text/plain, Size: 1778 bytes --]

Hi

if revert following patch

    [IA64] Multiple outstanding ptc.g instruction support

then this problem doesn't reproduce.

I attached that revert patch.
Thanks!


> Hi
> 
> I found strange message at 2.6.25-rc5-mm1 booting.
> and it is always reproduce.
> 
> and, Eric Piel's spinlock related patch(below URL) can't fixed my problem.
> http://marc.info/?l=linux-kernel&m=120561027011749&w=2
> 
> Is this known bug?
> or should i do bisect?
> 
> -------------------------------------------------------------------
> BUG: spinlock wrong CPU on CPU#3, udevd/2495
>  lock: a040000058246510, .magic: dead4ead, .owner: udevd/2495, .owner_cpu: 2
> 
> Call Trace:
>  [<a000000100015f00>] show_stack+0x80/0xa0
>                                 sp=e00001609262fbd0 bsp=e000016092620fd8
>  [<a000000100015f50>] dump_stack+0x30/0x60
>                                 sp=e00001609262fda0 bsp=e000016092620fc0
>  [<a0000001004674a0>] spin_bug+0x1a0/0x1c0
>                                 sp=e00001609262fda0 bsp=e000016092620f90
>  [<a000000100467590>] _raw_spin_unlock+0xd0/0x120
>                                 sp=e00001609262fda0 bsp=e000016092620f60
>  [<a00000010082bfe0>] _spin_unlock+0x20/0x40
>                                 sp=e00001609262fda0 bsp=e000016092620f40
>  [<a000000100138100>] do_wp_page+0x500/0xfe0
>                                 sp=e00001609262fda0 bsp=e000016092620ed8
>  [<a00000010013c560>] handle_mm_fault+0xe80/0x10e0
>                                 sp=e00001609262fda0 bsp=e000016092620e50
>  [<a00000010006c140>] ia64_do_page_fault+0x600/0xa40
>                                 sp=e00001609262fda0 bsp=e000016092620df0
>  [<a00000010000abe0>] ia64_leave_kernel+0x0/0x270
>                                 sp=e00001609262fe30 bsp=e000016092620df0


[-- Attachment #2: revert-PALO.patch --]
[-- Type: application/octet-stream, Size: 8977 bytes --]

this patch is revert to following patch.

    [IA64] Multiple outstanding ptc.g instruction support
    
that scalability issue fixed patch makes more strong scalability issue.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>


---
 arch/ia64/kernel/efi.c      |   46 --------------------
 arch/ia64/kernel/setup.c    |    6 --
 arch/ia64/mm/tlb.c          |   99 ++++++--------------------------------------
 include/asm-ia64/sal.h      |   17 -------
 include/asm-ia64/tlbflush.h |    1 
 5 files changed, 17 insertions(+), 152 deletions(-)

Index: b/arch/ia64/kernel/efi.c
===================================================================
--- a/arch/ia64/kernel/efi.c	2008-03-14 21:38:10.000000000 +0900
+++ b/arch/ia64/kernel/efi.c	2008-03-17 22:30:49.000000000 +0900
@@ -37,7 +37,6 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
-#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
@@ -404,41 +403,6 @@ efi_get_pal_addr (void)
 	return NULL;
 }
 
-
-static u8 __init palo_checksum(u8 *buffer, u32 length)
-{
-	u8 sum = 0;
-	u8 *end = buffer + length;
-
-	while (buffer < end)
-		sum = (u8) (sum + *(buffer++));
-
-	return sum;
-}
-
-/*
- * Parse and handle PALO table which is published at:
- * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
- */
-static void __init handle_palo(unsigned long palo_phys)
-{
-	struct palo_table *palo = __va(palo_phys);
-	u8  checksum;
-
-	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
-		printk(KERN_INFO "PALO signature incorrect.\n");
-		return;
-	}
-
-	checksum = palo_checksum((u8 *)palo, palo->length);
-	if (checksum) {
-		printk(KERN_INFO "PALO checksum incorrect.\n");
-		return;
-	}
-
-	setup_ptcg_sem(palo->max_tlb_purges, 1);
-}
-
 void
 efi_map_pal_code (void)
 {
@@ -468,7 +432,6 @@ efi_init (void)
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
-	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -533,8 +496,6 @@ efi_init (void)
 	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
 	efi.uga        = EFI_INVALID_TABLE_ADDR;
 
-	palo_phys      = EFI_INVALID_TABLE_ADDR;
-
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = config_tables[i].table;
@@ -554,17 +515,10 @@ efi_init (void)
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid,
-			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
-			palo_phys = config_tables[i].table;
-			printk(" PALO=0x%lx", config_tables[i].table);
 		}
 	}
 	printk("\n");
 
-	if (palo_phys != EFI_INVALID_TABLE_ADDR)
-		handle_palo(palo_phys);
-
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
 	efi.set_time = phys_set_time;
Index: b/arch/ia64/kernel/setup.c
===================================================================
--- a/arch/ia64/kernel/setup.c	2008-03-14 21:38:10.000000000 +0900
+++ b/arch/ia64/kernel/setup.c	2008-03-17 22:30:49.000000000 +0900
@@ -59,7 +59,6 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/system.h>
-#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/hpsim.h>
 
@@ -947,10 +946,9 @@ cpu_init (void)
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, 0);
-	} else {
+	else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
Index: b/arch/ia64/mm/tlb.c
===================================================================
--- a/arch/ia64/mm/tlb.c	2008-03-14 21:38:10.000000000 +0900
+++ b/arch/ia64/mm/tlb.c	2008-03-17 22:30:49.000000000 +0900
@@ -11,9 +11,6 @@
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
  * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
- * Copyright (C) 2007 Intel Corp
- *	Fenghua Yu <fenghua.yu@intel.com>
- *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -29,7 +26,6 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
-#include <asm/sal.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -88,77 +84,13 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
-static __DECLARE_SEMAPHORE_GENERIC(ptcg_sem, 1);
-static u16 nptcg = 1;
-static int need_ptcg_sem = 1;
-static int toolatetochangeptcgsem = 0;
-
-/*
- * Maximum number of simultaneous ptc.g purges in the system can
- * be defined by PAL_VM_SUMMARY (in which case we should take
- * the smallest value for any cpu in the system) or by the PAL
- * override table (in which case we should ignore the value from
- * PAL_VM_SUMMARY).
- *
- * Complicating the logic here is the fact that num_possible_cpus()
- * isn't fully setup until we start bringing cpus online.
- */
-void
-setup_ptcg_sem(int max_purges, int from_palo)
-{
-	static int have_palo;
-	static int firstcpu = 1;
-
-	if (toolatetochangeptcgsem) {
-		BUG_ON(max_purges < nptcg);
-		return;
-	}
-
-	if (from_palo) {
-		have_palo = 1;
-
-		/* In PALO max_purges == 0 really means it! */
-		if (max_purges == 0)
-			panic("Whoa! Platform does not support global TLB purges.\n");
-		nptcg = max_purges;
-		if (nptcg == PALO_MAX_TLB_PURGES) {
-			need_ptcg_sem = 0;
-			return;
-		}
-		goto resetsema;
-	}
-	if (have_palo) {
-		if (nptcg != PALO_MAX_TLB_PURGES)
-			need_ptcg_sem = (num_possible_cpus() > nptcg);
-		return;
-	}
-
-	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
-	if (max_purges == 0) max_purges = 1;
-
-	if (firstcpu) {
-		nptcg = max_purges;
-		firstcpu = 0;
-	}
-	if (max_purges < nptcg)
-		nptcg = max_purges;
-	if (nptcg == PAL_MAX_PURGES) {
-		need_ptcg_sem = 0;
-		return;
-	} else
-		need_ptcg_sem = (num_possible_cpus() > nptcg);
-
-resetsema:
-	sema_init(&ptcg_sem, max_purges);
-}
-
 void
 ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		       unsigned long end, unsigned long nbits)
 {
-	struct mm_struct *active_mm = current->active_mm;
+	static DEFINE_SPINLOCK(ptcg_lock);
 
-	toolatetochangeptcgsem = 1;
+	struct mm_struct *active_mm = current->active_mm;
 
 	if (mm != active_mm) {
 		/* Restore region IDs for mm */
@@ -170,20 +102,19 @@ ia64_global_tlb_purge (struct mm_struct 
 		}
 	}
 
-	if (need_ptcg_sem)
-		down(&ptcg_sem);
-
-	do {
-		/*
-		 * Flush ALAT entries also.
-		 */
-		ia64_ptcga(start, (nbits << 2));
-		ia64_srlz_i();
-		start += (1UL << nbits);
-	} while (start < end);
-
-	if (need_ptcg_sem)
-		up(&ptcg_sem);
+	/* HW requires global serialization of ptc.ga.  */
+	spin_lock(&ptcg_lock);
+	{
+		do {
+			/*
+			 * Flush ALAT entries also.
+			 */
+			ia64_ptcga(start, (nbits<<2));
+			ia64_srlz_i();
+			start += (1UL << nbits);
+		} while (start < end);
+	}
+	spin_unlock(&ptcg_lock);
 
         if (mm != active_mm) {
                 activate_context(active_mm);
Index: b/include/asm-ia64/sal.h
===================================================================
--- a/include/asm-ia64/sal.h	2008-03-14 21:38:13.000000000 +0900
+++ b/include/asm-ia64/sal.h	2008-03-17 22:30:49.000000000 +0900
@@ -296,9 +296,6 @@ enum {
     EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
 #define SAL_PLAT_BUS_ERR_SECT_GUID  \
     EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
-#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
-    EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
-		0xca, 0x4d)
 
 #define MAX_CACHE_ERRORS	6
 #define MAX_TLB_ERRORS		6
@@ -882,20 +879,6 @@ extern void ia64_jump_to_sal(struct sal_
 
 extern void ia64_sal_handler_init(void *entry_point, void *gpval);
 
-#define PALO_MAX_TLB_PURGES	0xFFFF
-#define PALO_SIG	"PALO"
-
-struct palo_table {
-	u8  signature[4];	/* Should be "PALO" */
-	u32 length;
-	u8  minor_revision;
-	u8  major_revision;
-	u8  checksum;
-	u8  reserved1[5];
-	u16 max_tlb_purges;
-	u8  reserved2[6];
-};
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_SAL_H */
Index: b/include/asm-ia64/tlbflush.h
===================================================================
--- a/include/asm-ia64/tlbflush.h	2008-03-14 21:38:13.000000000 +0900
+++ b/include/asm-ia64/tlbflush.h	2008-03-17 22:30:49.000000000 +0900
@@ -17,7 +17,6 @@
  * Now for some TLB flushing routines.  This is the kind of stuff that
  * can be very expensive, so try to avoid them whenever possible.
  */
-extern void setup_ptcg_sem(int max_purges, int from_palo);
 
 /*
  * Flush everything (kernel mapping may also have changed due to

  reply	other threads:[~2008-03-17 13:32 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-17  4:25 KOSAKI Motohiro
2008-03-17 13:32 ` KOSAKI Motohiro [this message]
2008-03-18  7:17 ` Andrew Morton
2008-03-18  9:46   ` KOSAKI Motohiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080317222846.7B58.KOSAKI.MOTOHIRO@jp.fujitsu.com \
    --to=kosaki.motohiro@jp.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=fenghua.yu@intel.com \
    --cc=linux-ia64@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --subject='Re: [2.6.25-rc5-mm1][regression?] BUG: spinlock wrong CPU, modprobe' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).