LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] cache: Workaround HiSilicon Taishan DC CVAU
@ 2021-11-26  9:11 Weilong Chen
  2021-12-13 18:56 ` Will Deacon
  0 siblings, 1 reply; 3+ messages in thread
From: Weilong Chen @ 2021-11-26  9:11 UTC (permalink / raw)
  To: catalin.marinas, will, corbet, chenweilong, linux-kernel, linux-doc

Taishan's L1/L2 cache is inclusive, and the data is consistent.
Any change of L1 does not require DC operation to brush CL in L1 to L2.
It's safe that don't clean data cache by address to point of unification.

Without IDC featrue, kernel needs to flush icache as well as dcache,
causes performance degradation.

The flaw refers to V110/V200 variant 1.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
---
 Documentation/arm64/silicon-errata.rst |  2 ++
 arch/arm64/Kconfig                     | 11 +++++++++
 arch/arm64/include/asm/cputype.h       |  2 ++
 arch/arm64/kernel/cpu_errata.c         | 32 ++++++++++++++++++++++++++
 arch/arm64/tools/cpucaps               |  1 +
 5 files changed, 48 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 5342e895fb60..5fe577eed65f 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -158,6 +158,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | TSV{110,200}    | #1980005        | HISILICON_ERRATUM_1980005   |
++----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 517d26c8002d..8943a62ed643 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -913,6 +913,17 @@ config QCOM_QDF2400_ERRATUM_0065
 
 	  If unsure, say Y.
 
+config HISILICON_ERRATUM_1980005
+	bool "Hisilicon erratum IDC support"
+	default y
+	help
+	  The HiSilicon TSV100/200 SoC support idc but report wrong value to
+	  kernel. Any change of L1 does not require DC operation to brush CL in
+	  L1 to L2. It's safe that don't clean data cache by address to point of
+	  unification.
+
+	  If unsure, say Y.
+
 config QCOM_FALKOR_ERRATUM_E1041
 	bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
 	default y
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 19b8441aa8f2..86888a6381e0 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -101,6 +101,7 @@
 #define FUJITSU_CPU_PART_A64FX		0x001
 
 #define HISI_CPU_PART_TSV110		0xD01
+#define HISI_CPU_PART_TSV200		0xD02
 
 #define APPLE_CPU_PART_M1_ICESTORM	0x022
 #define APPLE_CPU_PART_M1_FIRESTORM	0x023
@@ -135,6 +136,7 @@
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
 #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_TSV200 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV200)
 #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
 #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9e1c1aef9ebd..b7daea3c14e9 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -55,6 +55,29 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
 	return model == entry->midr_range.model;
 }
 
+#ifdef CONFIG_HISILICON_ERRATUM_1980005
+static bool
+hisilicon_1980005_match(const struct arm64_cpu_capabilities *entry,
+		int scope)
+{
+	static const struct midr_range idc_support_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+		MIDR_REV(MIDR_HISI_TSV200, 1, 0),
+		{ /* sentinel */ }
+	};
+
+	return  is_midr_in_range_list(read_cpuid_id(), idc_support_list);
+}
+
+static void
+hisilicon_1980005_enable(const struct arm64_cpu_capabilities *__unused)
+{
+	cpus_set_cap(ARM64_HAS_CACHE_IDC);
+	arm64_ftr_reg_ctrel0.sys_val |= BIT(CTR_IDC_SHIFT);
+	sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
+}
+#endif
+
 static bool
 has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
 			  int scope)
@@ -450,6 +473,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.cpu_enable = cpu_enable_trap_ctr_access,
 	},
+#ifdef CONFIG_HISILICON_ERRATUM_1980005
+	{
+		.desc = "Taishan IDC coherence workaround",
+		.capability = ARM64_WORKAROUND_HISILICON_1980005,
+		.matches = hisilicon_1980005_match,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.cpu_enable = hisilicon_1980005_enable,
+	},
+#endif
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
 	{
 		.desc = "Qualcomm Technologies Falkor/Kryo erratum 1003",
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 870c39537dd0..d47f4904d589 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -69,3 +69,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP
 WORKAROUND_QCOM_FALKOR_E1003
 WORKAROUND_REPEAT_TLBI
 WORKAROUND_SPECULATIVE_AT
+WORKAROUND_HISILICON_1980005
-- 
2.31.GIT


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] cache: Workaround HiSilicon Taishan DC CVAU
  2021-11-26  9:11 [PATCH] cache: Workaround HiSilicon Taishan DC CVAU Weilong Chen
@ 2021-12-13 18:56 ` Will Deacon
  2021-12-29  3:11   ` chenweilong
  0 siblings, 1 reply; 3+ messages in thread
From: Will Deacon @ 2021-12-13 18:56 UTC (permalink / raw)
  To: Weilong Chen; +Cc: catalin.marinas, corbet, linux-kernel, linux-doc

On Fri, Nov 26, 2021 at 05:11:39PM +0800, Weilong Chen wrote:
> Taishan's L1/L2 cache is inclusive, and the data is consistent.
> Any change of L1 does not require DC operation to brush CL in L1 to L2.
> It's safe that don't clean data cache by address to point of unification.
> 
> Without IDC featrue, kernel needs to flush icache as well as dcache,
> causes performance degradation.
> 
> The flaw refers to V110/V200 variant 1.
> 
> Signed-off-by: Weilong Chen <chenweilong@huawei.com>
> ---
>  Documentation/arm64/silicon-errata.rst |  2 ++
>  arch/arm64/Kconfig                     | 11 +++++++++
>  arch/arm64/include/asm/cputype.h       |  2 ++
>  arch/arm64/kernel/cpu_errata.c         | 32 ++++++++++++++++++++++++++
>  arch/arm64/tools/cpucaps               |  1 +
>  5 files changed, 48 insertions(+)

Hmm. We don't usually apply optimisations for specific CPUs on arm64, simply
because the diversity of CPUs out there means it quickly becomes a
fragmented mess.

Is this patch purely a performance improvement? If so, please can you
provide some numbers in an attempt to justify it?

Thanks,

Will

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] cache: Workaround HiSilicon Taishan DC CVAU
  2021-12-13 18:56 ` Will Deacon
@ 2021-12-29  3:11   ` chenweilong
  0 siblings, 0 replies; 3+ messages in thread
From: chenweilong @ 2021-12-29  3:11 UTC (permalink / raw)
  To: Will Deacon; +Cc: catalin.marinas, corbet, linux-kernel, linux-doc

On 2021/12/14 2:56, Will Deacon wrote:
> On Fri, Nov 26, 2021 at 05:11:39PM +0800, Weilong Chen wrote:
>> Taishan's L1/L2 cache is inclusive, and the data is consistent.
>> Any change of L1 does not require DC operation to brush CL in L1 to L2.
>> It's safe that don't clean data cache by address to point of unification.
>>
>> Without IDC featrue, kernel needs to flush icache as well as dcache,
>> causes performance degradation.
>>
>> The flaw refers to V110/V200 variant 1.
>>
>> Signed-off-by: Weilong Chen <chenweilong@huawei.com>
>> ---
>>  Documentation/arm64/silicon-errata.rst |  2 ++
>>  arch/arm64/Kconfig                     | 11 +++++++++
>>  arch/arm64/include/asm/cputype.h       |  2 ++
>>  arch/arm64/kernel/cpu_errata.c         | 32 ++++++++++++++++++++++++++
>>  arch/arm64/tools/cpucaps               |  1 +
>>  5 files changed, 48 insertions(+)
> Hmm. We don't usually apply optimisations for specific CPUs on arm64, simply
> because the diversity of CPUs out there means it quickly becomes a
> fragmented mess.
>
> Is this patch purely a performance improvement? If so, please can you
> provide some numbers in an attempt to justify it?

Yes,it's a performance improvement. I have a test program like this:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>

int main()
{
        void *tmp;
        int len = 200 * 1024 * 1024;
        struct timeval start, end;
        int interval;
        tmp = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        if(tmp == MAP_FAILED) {
                perror("mmap failed");
                exit(errno);
        }
        memset(tmp, 0, len);

        gettimeofday(&start, NULL);
        if(mprotect(tmp, len, PROT_READ|PROT_EXEC)) {
                perror("Couldn’t mprotect");
                exit(errno);
        }
        gettimeofday(&end, NULL);
        interval = 1000000*(end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);
        printf("interval = %fms\n", interval/1000.0);
}

Without this fix, the mprotect takes:

interval = 25.608000ms

And with this fix:

interval = 0.689000ms

Have better performance improvement.

If you think it is suitable, I will send a v2 patch as the original patch broken cpu hotplug checks.

>
> Thanks,
>
> Will
> .



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-12-29  3:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-26  9:11 [PATCH] cache: Workaround HiSilicon Taishan DC CVAU Weilong Chen
2021-12-13 18:56 ` Will Deacon
2021-12-29  3:11   ` chenweilong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).