LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
@ 2019-05-14  9:05 Christophe Leroy
  2019-05-14  9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

On most arches having function flush_dcache_range(), including PPC32,
this function does a writeback and invalidation of the cache bloc.

On PPC64, flush_dcache_range() only does a writeback while
flush_inval_dcache_range() does the invalidation in addition.

In addition it looks like within arch/powerpc/, there are no PPC64
platforms using flush_dcache_range()

This patch drops the existing 64 bits version of flush_dcache_range()
and renames flush_inval_dcache_range() into flush_dcache_range().

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cacheflush.h |  1 -
 arch/powerpc/kernel/misc_64.S         | 27 ++-------------------------
 arch/powerpc/lib/pmem.c               |  8 ++++----
 arch/powerpc/mm/mem.c                 |  4 ++--
 arch/powerpc/sysdev/dart_iommu.c      |  2 +-
 drivers/macintosh/smu.c               |  4 ++--
 6 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d5a8d7bf0759..e9a40b110f1d 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -109,7 +109,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 #endif /* CONFIG_PPC32 */
 #ifdef CONFIG_PPC64
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
 #endif
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba9481781..a4fd536efb44 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -121,31 +121,8 @@ EXPORT_SYMBOL(flush_icache_range)
  *
  *    flush all bytes from start to stop-1 inclusive
  */
-_GLOBAL_TOC(flush_dcache_range)
 
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)	/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-0:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
  	ld	r10,PPC64_CACHES@toc(r2)
 	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
 	addi	r5,r7,-1
@@ -164,7 +141,7 @@ _GLOBAL(flush_inval_dcache_range)
 	sync
 	isync
 	blr
-
+EXPORT_SYMBOL(flush_dcache_range)
 
 /*
  * Flush a particular page from the data cache to RAM.
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 53c018762e1c..36e08bf850e0 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -23,14 +23,14 @@
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 }
 EXPORT_SYMBOL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 }
 EXPORT_SYMBOL(arch_invalidate_pmem);
 
@@ -43,7 +43,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
 	unsigned long copied, start = (unsigned long) dest;
 
 	copied = __copy_from_user(dest, src, size);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return copied;
 }
@@ -53,7 +53,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
 	unsigned long start = (unsigned long) dest;
 
 	memcpy(dest, src, size);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return dest;
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd525d709072..39e66f033995 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -125,7 +125,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm
 			start, start + size, rc);
 		return -EFAULT;
 	}
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
@@ -153,7 +153,7 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size,
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 	ret = remove_section_mapping(start, start + size);
 
 	/* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 2a751795ec1e..bc259a8d3f2d 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -158,7 +158,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count)
 	unsigned int tmp;
 
 	/* Perform a standard cache flush */
-	flush_inval_dcache_range(start, end);
+	flush_dcache_range(start, end);
 
 	/*
 	 * Perform the sequence described in the CPC925 manual to
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 6a844125cf2d..97758eed03f2 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -133,7 +133,7 @@ static void smu_start_cmd(void)
 	/* Flush command and data to RAM */
 	faddr = (unsigned long)smu->cmd_buf;
 	fend = faddr + smu->cmd_buf->length + 2;
-	flush_inval_dcache_range(faddr, fend);
+	flush_dcache_range(faddr, fend);
 
 
 	/* We also disable NAP mode for the duration of the command
@@ -195,7 +195,7 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
 		 * reply length (it's only 2 cache lines anyway)
 		 */
 		faddr = (unsigned long)smu->cmd_buf;
-		flush_inval_dcache_range(faddr, faddr + 256);
+		flush_dcache_range(faddr, faddr + 256);
 
 		/* Now check ack */
 		ack = (~cmd->cmd) & 0xff;
-- 
2.13.3


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
  2019-05-14  9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
@ 2019-05-14  9:05 ` Christophe Leroy
  2019-07-15  6:49   ` Michael Ellerman
  2019-05-14  9:05 ` [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes Christophe Leroy
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

PPC32 also have flush_dcache_range() so it can also support
ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7996cfaceca..cf6e30f637be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -127,13 +127,13 @@ config PPC
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MMIOWB			if PPC64
 	select ARCH_HAS_PHYS_TO_DMA
-	select ARCH_HAS_PMEM_API                if PPC64
+	select ARCH_HAS_PMEM_API
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_MEMBARRIER_CALLBACKS
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
 	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
-	select ARCH_HAS_UACCESS_FLUSHCACHE	if PPC64
+	select ARCH_HAS_UACCESS_FLUSHCACHE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_ZONE_DEVICE		if PPC_BOOK3S_64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-- 
2.13.3


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
  2019-05-14  9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
  2019-05-14  9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
@ 2019-05-14  9:05 ` Christophe Leroy
  2019-07-08  1:19   ` Michael Ellerman
  2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
  2019-07-08  1:19 ` [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Michael Ellerman
  3 siblings, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

This patch defines C helpers to retrieve the size of
cache blocks and uses them in the cacheflush functions.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cache.h      | 16 ++++++++++++++--
 arch/powerpc/include/asm/cacheflush.h | 24 +++++++++++++++---------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..0009a0a82e86 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,7 +33,8 @@
 
 #define IFETCH_ALIGN_BYTES	(1 << IFETCH_ALIGN_SHIFT)
 
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
 
 struct ppc_cache_info {
 	u32 size;
@@ -53,7 +54,18 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+#else
+static inline u32 l1_cache_shift(void)
+{
+	return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+	return L1_CACHE_BYTES;
+}
+#endif
+#endif /* ! __ASSEMBLY__ */
 
 #if defined(__ASSEMBLY__)
 /*
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e9a40b110f1d..d405f18441cd 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -64,11 +64,13 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbf(addr);
 	mb();	/* sync */
 }
@@ -80,11 +82,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
  */
 static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbst(addr);
 	mb();	/* sync */
 }
@@ -97,11 +101,13 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 static inline void invalidate_dcache_range(unsigned long start,
 					   unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbi(addr);
 	mb();	/* sync */
 }
-- 
2.13.3


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-05-14  9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
  2019-05-14  9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
  2019-05-14  9:05 ` [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes Christophe Leroy
@ 2019-05-14  9:05 ` Christophe Leroy
  2019-07-08  1:19   ` Michael Ellerman
                     ` (2 more replies)
  2019-07-08  1:19 ` [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Michael Ellerman
  3 siblings, 3 replies; 16+ messages in thread
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

This patch drops the assembly PPC64 version of flush_dcache_range()
and re-uses the PPC32 static inline version.

With GCC 8.1, the following code is generated:

void flush_test(unsigned long start, unsigned long stop)
{
	flush_dcache_range(start, stop);
}

0000000000000130 <.flush_test>:
 130:	3d 22 00 00 	addis   r9,r2,0
			132: R_PPC64_TOC16_HA	.data+0x8
 134:	81 09 00 00 	lwz     r8,0(r9)
			136: R_PPC64_TOC16_LO	.data+0x8
 138:	3d 22 00 00 	addis   r9,r2,0
			13a: R_PPC64_TOC16_HA	.data+0xc
 13c:	80 e9 00 00 	lwz     r7,0(r9)
			13e: R_PPC64_TOC16_LO	.data+0xc
 140:	7d 48 00 d0 	neg     r10,r8
 144:	7d 43 18 38 	and     r3,r10,r3
 148:	7c 00 04 ac 	hwsync
 14c:	4c 00 01 2c 	isync
 150:	39 28 ff ff 	addi    r9,r8,-1
 154:	7c 89 22 14 	add     r4,r9,r4
 158:	7c 83 20 50 	subf    r4,r3,r4
 15c:	7c 89 3c 37 	srd.    r9,r4,r7
 160:	41 82 00 1c 	beq     17c <.flush_test+0x4c>
 164:	7d 29 03 a6 	mtctr   r9
 168:	60 00 00 00 	nop
 16c:	60 00 00 00 	nop
 170:	7c 00 18 ac 	dcbf    0,r3
 174:	7c 63 42 14 	add     r3,r3,r8
 178:	42 00 ff f8 	bdnz    170 <.flush_test+0x40>
 17c:	7c 00 04 ac 	hwsync
 180:	4c 00 01 2c 	isync
 184:	4e 80 00 20 	blr
 188:	60 00 00 00 	nop
 18c:	60 00 00 00 	nop

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cache.h      | 10 ++++++++++
 arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
 arch/powerpc/kernel/misc_64.S         | 29 -----------------------------
 3 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 0009a0a82e86..45e3137ccd71 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -54,6 +54,16 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
+
+static inline u32 l1_cache_shift(void)
+{
+	return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+	return ppc64_caches.l1d.block_size;
+}
 #else
 static inline u32 l1_cache_shift(void)
 {
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d405f18441cd..3cd7ce3dec8b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
 }
 #endif
 
-#ifdef CONFIG_PPC32
 /*
  * Write any modified data cache blocks out to memory and invalidate them.
  * Does not invalidate the corresponding instruction cache blocks.
@@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
+	if (IS_ENABLED(CONFIG_PPC64)) {
+		mb();	/* sync */
+		isync();
+	}
+
 	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbf(addr);
 	mb();	/* sync */
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		isync();
 }
 
 /*
@@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 	mb();	/* sync */
 }
 
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#endif
-
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 	do { \
 		memcpy(dst, src, len); \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a4fd536efb44..1b0a42c50ef1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
 EXPORT_SYMBOL(flush_icache_range)
 
 /*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_dcache_range)
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	sync
-	isync
-	mtctr	r8
-0:	dcbf	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	isync
-	blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-/*
  * Flush a particular page from the data cache to RAM.
  * Note: this is necessary because the instruction cache does *not*
  * snoop from the data cache.
-- 
2.13.3


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
  2019-05-14  9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
                   ` (2 preceding siblings ...)
  2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
@ 2019-07-08  1:19 ` Michael Ellerman
  3 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-07-08  1:19 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel

On Tue, 2019-05-14 at 09:05:13 UTC, Christophe Leroy wrote:
> On most arches having function flush_dcache_range(), including PPC32,
> this function does a writeback and invalidation of the cache bloc.
> 
> On PPC64, flush_dcache_range() only does a writeback while
> flush_inval_dcache_range() does the invalidation in addition.
> 
> In addition it looks like within arch/powerpc/, there are no PPC64
> platforms using flush_dcache_range()
> 
> This patch drops the existing 64 bits version of flush_dcache_range()
> and renames flush_inval_dcache_range() into flush_dcache_range().
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/1cfb725fb1899dc6fdc88f8b5354a65e8ad260c6

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
@ 2019-07-08  1:19   ` Michael Ellerman
  2019-07-08 14:21   ` Aneesh Kumar K.V
  2019-08-08  6:53   ` powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()) Michael Ellerman
  2 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-07-08  1:19 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel

On Tue, 2019-05-14 at 09:05:16 UTC, Christophe Leroy wrote:
> This patch drops the assembly PPC64 version of flush_dcache_range()
> and re-uses the PPC32 static inline version.
> 
> With GCC 8.1, the following code is generated:
> 
> void flush_test(unsigned long start, unsigned long stop)
> {
> 	flush_dcache_range(start, stop);
> }
> 
> 0000000000000130 <.flush_test>:
>  130:	3d 22 00 00 	addis   r9,r2,0
> 			132: R_PPC64_TOC16_HA	.data+0x8
>  134:	81 09 00 00 	lwz     r8,0(r9)
> 			136: R_PPC64_TOC16_LO	.data+0x8
>  138:	3d 22 00 00 	addis   r9,r2,0
> 			13a: R_PPC64_TOC16_HA	.data+0xc
>  13c:	80 e9 00 00 	lwz     r7,0(r9)
> 			13e: R_PPC64_TOC16_LO	.data+0xc
>  140:	7d 48 00 d0 	neg     r10,r8
>  144:	7d 43 18 38 	and     r3,r10,r3
>  148:	7c 00 04 ac 	hwsync
>  14c:	4c 00 01 2c 	isync
>  150:	39 28 ff ff 	addi    r9,r8,-1
>  154:	7c 89 22 14 	add     r4,r9,r4
>  158:	7c 83 20 50 	subf    r4,r3,r4
>  15c:	7c 89 3c 37 	srd.    r9,r4,r7
>  160:	41 82 00 1c 	beq     17c <.flush_test+0x4c>
>  164:	7d 29 03 a6 	mtctr   r9
>  168:	60 00 00 00 	nop
>  16c:	60 00 00 00 	nop
>  170:	7c 00 18 ac 	dcbf    0,r3
>  174:	7c 63 42 14 	add     r3,r3,r8
>  178:	42 00 ff f8 	bdnz    170 <.flush_test+0x40>
>  17c:	7c 00 04 ac 	hwsync
>  180:	4c 00 01 2c 	isync
>  184:	4e 80 00 20 	blr
>  188:	60 00 00 00 	nop
>  18c:	60 00 00 00 	nop
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/22e9c88d486a0536d337d6e0973968be0a4cd4b2

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
  2019-05-14  9:05 ` [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes Christophe Leroy
@ 2019-07-08  1:19   ` Michael Ellerman
  0 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-07-08  1:19 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel

On Tue, 2019-05-14 at 09:05:15 UTC, Christophe Leroy wrote:
> This patch defines C helpers to retrieve the size of
> cache blocks and uses them in the cacheflush functions.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d98fc70fc139b72ae098d24fde42ad70c8ff2f81

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
  2019-07-08  1:19   ` Michael Ellerman
@ 2019-07-08 14:21   ` Aneesh Kumar K.V
  2019-07-09  2:20     ` Oliver O'Halloran
  2019-08-08  6:53   ` powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()) Michael Ellerman
  2 siblings, 1 reply; 16+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-08 14:21 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> This patch drops the assembly PPC64 version of flush_dcache_range()
> and re-uses the PPC32 static inline version.
>
> With GCC 8.1, the following code is generated:
>
> void flush_test(unsigned long start, unsigned long stop)
> {
> 	flush_dcache_range(start, stop);
> }
>
> 0000000000000130 <.flush_test>:
>  130:	3d 22 00 00 	addis   r9,r2,0
> 			132: R_PPC64_TOC16_HA	.data+0x8
>  134:	81 09 00 00 	lwz     r8,0(r9)
> 			136: R_PPC64_TOC16_LO	.data+0x8
>  138:	3d 22 00 00 	addis   r9,r2,0
> 			13a: R_PPC64_TOC16_HA	.data+0xc
>  13c:	80 e9 00 00 	lwz     r7,0(r9)
> 			13e: R_PPC64_TOC16_LO	.data+0xc
>  140:	7d 48 00 d0 	neg     r10,r8
>  144:	7d 43 18 38 	and     r3,r10,r3
>  148:	7c 00 04 ac 	hwsync
>  14c:	4c 00 01 2c 	isync
>  150:	39 28 ff ff 	addi    r9,r8,-1
>  154:	7c 89 22 14 	add     r4,r9,r4
>  158:	7c 83 20 50 	subf    r4,r3,r4
>  15c:	7c 89 3c 37 	srd.    r9,r4,r7
>  160:	41 82 00 1c 	beq     17c <.flush_test+0x4c>
>  164:	7d 29 03 a6 	mtctr   r9
>  168:	60 00 00 00 	nop
>  16c:	60 00 00 00 	nop
>  170:	7c 00 18 ac 	dcbf    0,r3
>  174:	7c 63 42 14 	add     r3,r3,r8
>  178:	42 00 ff f8 	bdnz    170 <.flush_test+0x40>
>  17c:	7c 00 04 ac 	hwsync
>  180:	4c 00 01 2c 	isync
>  184:	4e 80 00 20 	blr
>  188:	60 00 00 00 	nop
>  18c:	60 00 00 00 	nop
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/include/asm/cache.h      | 10 ++++++++++
>  arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
>  arch/powerpc/kernel/misc_64.S         | 29 -----------------------------
>  3 files changed, 18 insertions(+), 35 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
> index 0009a0a82e86..45e3137ccd71 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -54,6 +54,16 @@ struct ppc64_caches {
>  };
>  
>  extern struct ppc64_caches ppc64_caches;
> +
> +static inline u32 l1_cache_shift(void)
> +{
> +	return ppc64_caches.l1d.log_block_size;
> +}
> +
> +static inline u32 l1_cache_bytes(void)
> +{
> +	return ppc64_caches.l1d.block_size;
> +}
>  #else
>  static inline u32 l1_cache_shift(void)
>  {
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index d405f18441cd..3cd7ce3dec8b 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
>  }
>  #endif
>  
> -#ifdef CONFIG_PPC32
>  /*
>   * Write any modified data cache blocks out to memory and invalidate them.
>   * Does not invalidate the corresponding instruction cache blocks.
> @@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
>  	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
>  	unsigned long i;
>  
> +	if (IS_ENABLED(CONFIG_PPC64)) {
> +		mb();	/* sync */
> +		isync();
> +	}
> +
>  	for (i = 0; i < size >> shift; i++, addr += bytes)
>  		dcbf(addr);
>  	mb();	/* sync */
> +
> +	if (IS_ENABLED(CONFIG_PPC64))
> +		isync();
>  }


Was checking with Michael about why we need that extra isync. Michael 
pointed this came via 

https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14

for 970 which doesn't have coherent icache. So possibly isync there is
to flush the prefetch instructions? But even so we would need an icbi
there before that isync.

So overall wondering why we need that extra barriers there. 

>  
>  /*
> @@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
>  	mb();	/* sync */
>  }
>  

-aneesh


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-07-08 14:21   ` Aneesh Kumar K.V
@ 2019-07-09  2:20     ` Oliver O'Halloran
  2019-07-09  2:51       ` Aneesh Kumar K.V
  0 siblings, 1 reply; 16+ messages in thread
From: Oliver O'Halloran @ 2019-07-09  2:20 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Segher Boessenkool, linuxppc-dev,
	Linux Kernel Mailing List

On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
> > *snip*
> > +     if (IS_ENABLED(CONFIG_PPC64))
> > +             isync();
> >  }
>
>
> Was checking with Michael about why we need that extra isync. Michael
> pointed this came via
>
> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>
> for 970 which doesn't have coherent icache. So possibly isync there is
> to flush the prefetch instructions? But even so we would need an icbi
> there before that isync.

I don't think it's that, there's some magic in flush_icache_range() to
handle dropping prefetched instructions on 970.

> So overall wondering why we need that extra barriers there.

I think the isync is needed there because the architecture only
requires sync to provide ordering. A sync alone doesn't guarantee the
dcbfs have actually completed so the isync is necessary to ensure the
flushed cache lines are back in memory. That said, as far as I know
all the IBM book3s chips from power4 onwards will wait for pending
dcbfs when they hit a sync, but that might change in the future.

If it's a problem we could add a cpu-feature section around the isync
to no-op it in the common case. However, when I had a look with perf
it always showed that the sync was the hotspot so I don't think it'll
help much.

Oliver

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-07-09  2:20     ` Oliver O'Halloran
@ 2019-07-09  2:51       ` Aneesh Kumar K.V
  2019-07-09  5:29         ` Oliver O'Halloran
  2019-07-09 17:04         ` Segher Boessenkool
  0 siblings, 2 replies; 16+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-09  2:51 UTC (permalink / raw)
  To: Oliver O'Halloran
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Segher Boessenkool, linuxppc-dev,
	Linux Kernel Mailing List

On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> *snip*
>>> +     if (IS_ENABLED(CONFIG_PPC64))
>>> +             isync();
>>>   }
>>
>>
>> Was checking with Michael about why we need that extra isync. Michael
>> pointed this came via
>>
>> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>>
>> for 970 which doesn't have coherent icache. So possibly isync there is
>> to flush the prefetch instructions? But even so we would need an icbi
>> there before that isync.
> 
> I don't think it's that, there's some magic in flush_icache_range() to
> handle dropping prefetched instructions on 970.
> 
>> So overall wondering why we need that extra barriers there.
> 
> I think the isync is needed there because the architecture only
> requires sync to provide ordering. A sync alone doesn't guarantee the
> dcbfs have actually completed so the isync is necessary to ensure the
> flushed cache lines are back in memory. That said, as far as I know
> all the IBM book3s chips from power4 onwards will wait for pending
> dcbfs when they hit a sync, but that might change in the future.
> 

ISA doesn't list that as the sequence. Only place where isync was 
mentioned was w.r.t  icbi where want to discards the prefetch.



> If it's a problem we could add a cpu-feature section around the isync
> to no-op it in the common case. However, when I had a look with perf
> it always showed that the sync was the hotspot so I don't think it'll
> help much.
> 

What about the preceding barriers (sync; isync;) before dcbf? Why are 
they needed?

-aneesh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-07-09  2:51       ` Aneesh Kumar K.V
@ 2019-07-09  5:29         ` Oliver O'Halloran
  2019-07-09 17:04         ` Segher Boessenkool
  1 sibling, 0 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2019-07-09  5:29 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Segher Boessenkool, linuxppc-dev,
	Linux Kernel Mailing List

On Tue, Jul 9, 2019 at 12:52 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> > On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
> >>
> >> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> >>
> >>> *snip*
> >>> +     if (IS_ENABLED(CONFIG_PPC64))
> >>> +             isync();
> >>>   }
> >>
> >>
> >> Was checking with Michael about why we need that extra isync. Michael
> >> pointed this came via
> >>
> >> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
> >>
> >> for 970 which doesn't have coherent icache. So possibly isync there is
> >> to flush the prefetch instructions? But even so we would need an icbi
> >> there before that isync.
> >
> > I don't think it's that, there's some magic in flush_icache_range() to
> > handle dropping prefetched instructions on 970.
> >
> >> So overall wondering why we need that extra barriers there.
> >
> > I think the isync is needed there because the architecture only
> > requires sync to provide ordering. A sync alone doesn't guarantee the
> > dcbfs have actually completed so the isync is necessary to ensure the
> > flushed cache lines are back in memory. That said, as far as I know
> > all the IBM book3s chips from power4 onwards will wait for pending
> > dcbfs when they hit a sync, but that might change in the future.
> >
>
> ISA doesn't list that as the sequence. Only place where isync was
> mentioned was w.r.t  icbi where want to discards the prefetch.

doesn't list that as the sequence for what?

> > If it's a problem we could add a cpu-feature section around the isync
> > to no-op it in the common case. However, when I had a look with perf
> > it always showed that the sync was the hotspot so I don't think it'll
> > help much.
> >
>
> What about the preceding barriers (sync; isync;) before dcbf? Why are
> they needed?

Dunno, the sync might just be to ensure ordering between prior stores
and the dcbf.

>
> -aneesh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
  2019-07-09  2:51       ` Aneesh Kumar K.V
  2019-07-09  5:29         ` Oliver O'Halloran
@ 2019-07-09 17:04         ` Segher Boessenkool
  1 sibling, 0 replies; 16+ messages in thread
From: Segher Boessenkool @ 2019-07-09 17:04 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Oliver O'Halloran, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, linuxppc-dev,
	Linux Kernel Mailing List

On Tue, Jul 09, 2019 at 08:21:54AM +0530, Aneesh Kumar K.V wrote:
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> >I don't think it's that, there's some magic in flush_icache_range() to
> >handle dropping prefetched instructions on 970.
> >
> >>So overall wondering why we need that extra barriers there.
> >
> >I think the isync is needed there because the architecture only
> >requires sync to provide ordering. A sync alone doesn't guarantee the
> >dcbfs have actually completed so the isync is necessary to ensure the
> >flushed cache lines are back in memory. That said, as far as I know
> >all the IBM book3s chips from power4 onwards will wait for pending
> >dcbfs when they hit a sync, but that might change in the future.
> >
> 
> ISA doesn't list that as the sequence. Only place where isync was 
> mentioned was w.r.t  icbi where want to discards the prefetch.

You need an isync to guarantee all icbi insns before the isync have been
performed before any code after the isync is fetched.  Killing the
prefetch is just part of it.

> >If it's a problem we could add a cpu-feature section around the isync
> >to no-op it in the common case. However, when I had a look with perf
> >it always showed that the sync was the hotspot so I don't think it'll
> >help much.
> 
> What about the preceding barriers (sync; isync;) before dcbf? Why are 
> they needed?

This isn't very generic code.  The code seems to be trying to do
coherency in software.  Like you needed to do for DART on U3/U4, or for
some of the PMU/SMU communication -- both are through main memory, but
both are not cache coherent.  Which means all rules go out of the
window.

To do this properly you need some platform-specific code, for example
to kill hardware and software prefetch streams.  Or hope^Wguarantee
those never touch your communication buffers.


I recommend you keep the original function, maybe with a more specific
name, for the DART etc. code; and have all normal(*) dcbf users use a
new more normal function, with just a single sync instruction.


Segher


(*) As far as anything using dcbf can be called "normal"!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
  2019-05-14  9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
@ 2019-07-15  6:49   ` Michael Ellerman
  2019-07-15  7:02     ` Oliver O'Halloran
  2019-07-15 17:03     ` Christophe Leroy
  0 siblings, 2 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-07-15  6:49 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> PPC32 also have flush_dcache_range() so it can also support
> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/Kconfig | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index d7996cfaceca..cf6e30f637be 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -127,13 +127,13 @@ config PPC
>  	select ARCH_HAS_KCOV
>  	select ARCH_HAS_MMIOWB			if PPC64
>  	select ARCH_HAS_PHYS_TO_DMA
> -	select ARCH_HAS_PMEM_API                if PPC64
> +	select ARCH_HAS_PMEM_API
>  	select ARCH_HAS_PTE_SPECIAL
>  	select ARCH_HAS_MEMBARRIER_CALLBACKS
>  	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
>  	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
>  	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
> -	select ARCH_HAS_UACCESS_FLUSHCACHE	if PPC64
> +	select ARCH_HAS_UACCESS_FLUSHCACHE
>  	select ARCH_HAS_UBSAN_SANITIZE_ALL
>  	select ARCH_HAS_ZONE_DEVICE		if PPC_BOOK3S_64
>  	select ARCH_HAVE_NMI_SAFE_CMPXCHG

This didn't build for me, probably due to something that's changed in
the long period between you posting it and me applying it?

corenet32_smp_defconfig:

  powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
  powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
  powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
  powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
  powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
  2019-07-15  6:49   ` Michael Ellerman
@ 2019-07-15  7:02     ` Oliver O'Halloran
  2019-07-15 17:03     ` Christophe Leroy
  1 sibling, 0 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2019-07-15  7:02 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Segher Boessenkool, Linux Kernel Mailing List, linuxppc-dev

On Mon, Jul 15, 2019 at 4:49 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> > PPC32 also have flush_dcache_range() so it can also support
> > ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > ---
> >  arch/powerpc/Kconfig | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index d7996cfaceca..cf6e30f637be 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -127,13 +127,13 @@ config PPC
> >       select ARCH_HAS_KCOV
> >       select ARCH_HAS_MMIOWB                  if PPC64
> >       select ARCH_HAS_PHYS_TO_DMA
> > -     select ARCH_HAS_PMEM_API                if PPC64
> > +     select ARCH_HAS_PMEM_API
> >       select ARCH_HAS_PTE_SPECIAL
> >       select ARCH_HAS_MEMBARRIER_CALLBACKS
> >       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
> >       select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
> >       select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
> > -     select ARCH_HAS_UACCESS_FLUSHCACHE      if PPC64
> > +     select ARCH_HAS_UACCESS_FLUSHCACHE
> >       select ARCH_HAS_UBSAN_SANITIZE_ALL
> >       select ARCH_HAS_ZONE_DEVICE             if PPC_BOOK3S_64
> >       select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
>   powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
>   powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
>   powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
>   powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
>   powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'

I think lib/pmem.c just needs to be moved out of obj64-y.

>
> cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
  2019-07-15  6:49   ` Michael Ellerman
  2019-07-15  7:02     ` Oliver O'Halloran
@ 2019-07-15 17:03     ` Christophe Leroy
  1 sibling, 0 replies; 16+ messages in thread
From: Christophe Leroy @ 2019-07-15 17:03 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, linux-kernel, Segher Boessenkool,
	Oliver O'Halloran, Paul Mackerras, Benjamin Herrenschmidt

Michael Ellerman <mpe@ellerman.id.au> a écrit :

> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> PPC32 also have flush_dcache_range() so it can also support
>> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>  arch/powerpc/Kconfig | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index d7996cfaceca..cf6e30f637be 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -127,13 +127,13 @@ config PPC
>>  	select ARCH_HAS_KCOV
>>  	select ARCH_HAS_MMIOWB			if PPC64
>>  	select ARCH_HAS_PHYS_TO_DMA
>> -	select ARCH_HAS_PMEM_API                if PPC64
>> +	select ARCH_HAS_PMEM_API
>>  	select ARCH_HAS_PTE_SPECIAL
>>  	select ARCH_HAS_MEMBARRIER_CALLBACKS
>>  	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
>>  	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) &&  
>> !RELOCATABLE && !HIBERNATION)
>>  	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
>> -	select ARCH_HAS_UACCESS_FLUSHCACHE	if PPC64
>> +	select ARCH_HAS_UACCESS_FLUSHCACHE
>>  	select ARCH_HAS_UBSAN_SANITIZE_ALL
>>  	select ARCH_HAS_ZONE_DEVICE		if PPC_BOOK3S_64
>>  	select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
>   powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function  
> `_copy_from_iter_flushcache':
>   powerpc64-unknown-linux-gnu-ld:  
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined  
> reference to `memcpy_page_flushcache'
>   powerpc64-unknown-linux-gnu-ld:  
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined  
> reference to `memcpy_flushcache'
>   powerpc64-unknown-linux-gnu-ld:  
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined  
> reference to `__copy_from_user_flushcache'
>   powerpc64-unknown-linux-gnu-ld:  
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined  
> reference to `memcpy_flushcache'
>


Looks like arch/powerpc/lib/Makefile only builds pmem.o for ppc64

Moving it from obj64-y to obj-y should do it.

I'll update the patch when I'm back in two weeks unless you can fix it before.

Christophe



^ permalink raw reply	[flat|nested] 16+ messages in thread

* powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range())
  2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
  2019-07-08  1:19   ` Michael Ellerman
  2019-07-08 14:21   ` Aneesh Kumar K.V
@ 2019-08-08  6:53   ` Michael Ellerman
  2 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-08-08  6:53 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linux-kernel, linuxppc-dev, Aneesh Kumar K.V, Alastair D'Silva

[ deliberately broke threading so this doesn't get buried ]

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index a4fd536efb44..1b0a42c50ef1 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
>  EXPORT_SYMBOL(flush_icache_range)
>  
>  /*
> - * Like above, but only do the D-cache.
> - *
> - * flush_dcache_range(unsigned long start, unsigned long stop)
> - *
> - *    flush all bytes from start to stop-1 inclusive
> - */
> -
> -_GLOBAL_TOC(flush_dcache_range)
> - 	ld	r10,PPC64_CACHES@toc(r2)
> -	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5		/* ensure we get enough */
> -	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
> -	srw.	r8,r8,r9		/* compute line count */
          ^
> -	beqlr				/* nothing to do? */

Alastair noticed that this was a 32-bit right shift.

Meaning if you called flush_dcache_range() with a range larger than 4GB,
it did nothing and returned.

That code (which was previously called flush_inval_dcache_range()) was
merged back in 2005:

  https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14


Back then it was only used by the smu.c driver, which presumably wasn't
flushing more than 4GB.

Over time it grew more users:

  v4.17 (Apr 2018): fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug")
  v4.15 (Nov 2017): 6c44741d75a2 ("powerpc/lib: Implement UACCESS_FLUSHCACHE API")
  v4.15 (Nov 2017): 32ce3862af3c ("powerpc/lib: Implement PMEM API")
  v4.8  (Jul 2016): c40785ad305b ("powerpc/dart: Use a cachable DART")

The DART case doesn't matter, but the others probably could. I assume
the lack of bug reports is due to the fact that pmem stuff is still in
development and the lack of flushing usually doesn't actually matter? Or
are people flushing/hotplugging < 4G at a time?

Anyway we probably want to backport the fix below to various places?

cheers


diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1ad4089dd110..802f5abbf061 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -148,7 +148,7 @@ _GLOBAL(flush_inval_dcache_range)
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5		/* ensure we get enough */
 	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
+	srd.	r8,r8,r9		/* compute line count */
 	beqlr				/* nothing to do? */
 	sync
 	isync

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-08-08  6:53 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-14  9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
2019-05-14  9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
2019-07-15  6:49   ` Michael Ellerman
2019-07-15  7:02     ` Oliver O'Halloran
2019-07-15 17:03     ` Christophe Leroy
2019-05-14  9:05 ` [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes Christophe Leroy
2019-07-08  1:19   ` Michael Ellerman
2019-05-14  9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
2019-07-08  1:19   ` Michael Ellerman
2019-07-08 14:21   ` Aneesh Kumar K.V
2019-07-09  2:20     ` Oliver O'Halloran
2019-07-09  2:51       ` Aneesh Kumar K.V
2019-07-09  5:29         ` Oliver O'Halloran
2019-07-09 17:04         ` Segher Boessenkool
2019-08-08  6:53   ` powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()) Michael Ellerman
2019-07-08  1:19 ` [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).